git clone https://github.com/our_new_public_git/court_parser
pip install -r requirements.txt
import os
import sys
import json
import pandas as pd
# Add the project root directory to Python pathproject_root = os.path.abspath(os.path.join(os.path.dirname('__file__'), '..'))
sys.path.append(project_root)
# downloading a sample court decision in JSON format
file_path = "data/raw/sample_court_decision.json"
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
# dowloading a dataframe with results
csv_path = 'data/raw/sample_decisions/sample_decisions.csv'
df = pd.read_csv(csv_path, sep=';')
# Importing the class that extracts articles
from src.articles import ArticlesExtractor
# Initializing an intstance of the extractor
articles_extractor = ArticlesExtractor()
# Example 1: Process single string>>> test_string = "Губаев Борис Магомедович - ст.159 ч.2 УК РФ"
result = articles_extractor.process_string(test_string)
print("Single string processing result:")
print(result)
[{
'person': 'Person 1',
'articles': [{'article': '159', 'part': '2', 'subpart': None}],
'code_type': 'CRIMINAL'
}]
# Example 2: Extract data from JSON object
result = articles_extractor.process_string(data['names'])
print("JSON field processing result:")
print(result)
[{
'person': 'Person 1',
'articles': [{'article': '116.1', 'part': '2', 'subpart': None}],
'code_type': 'CRIMINAL'
}]
# Example 3: Processing dataframe
results = articles_extractor.process_dataframe(df[df['names'].notna()], 'names')
print("DataFrame processing results (first row):")
print(results[0])
DataFrame processing results (first row):
[{
'person': 'Person 1',
'articles': [{'article': '116.1', 'part': '2', 'subpart': None}],
'code_type': 'CRIMINAL'
}]
from src.gender import GenderExtractor
gender_extractor = GenderExtractor(russian_names_db=False)
# Example 1: Extract gender from single string
text = "Волостных Владислав Витальевич - ст.291 ч.3; ст.222 ч.1; ст.290 ч.5 п.в; ст.290 ч.5 п.в; ст.290 ч.5 п.в УК РФ"
result = gender_extractor.extract_genders(text)
print("Single string gender extraction result:")
print(result)
[('Волостных Владислав Витальевич', 'M')]
# Example 2: Extract gender from JSON object# Defendant gender
defendant = gender_extractor.extract_genders(data['names'])
print("Defendant gender extraction result:")
print(defendant)
# Judge genderjudge = gender_extractor.extract_genders(data['judge'])
print("Judge gender extraction result:")
print(judge)
Defendant gender extraction result:
[('Петров Николай Сергеевич', 'M')]
Judge gender extraction result:
[('Бабич Светлана Николаевна', 'F')]
# Example 3: Processing dataframe - adding gender columns
df['judge_gender'] = df['judge'].apply(
lambda x: gender_extractor.extract_genders(x) if x else None)
df['defendant_gender'] = df['names'].apply(
lambda x: gender_extractor.extract_genders(x) if x else None)
print("DataFrame processing results (first row):")
print(f"Judge gender: {df['judge_gender'].iloc[0]}")
print(f"Defendant gender: {df['defendant_gender'].iloc[0]}")
DataFrame processing results (first row):
Judge gender: [('Рысков А Н', 'M')]
Defendant gender: [('Юсупов Гаяз Ризванович', 'M')]
from src.districts import MunicipalityExtractor
municipality_extractor = MunicipalityExtractor()
# Example 1: Get municipality for single court code
court_code = "61RS0006"
region, municipality, oktmo = municipality_extractor.get_municipality(court_code)
print("Single court code processing result:")
print(f"Court code: {court_code}")
print(f"Region: {region}")
print(f"Municipality: {municipality}")
print(f"OKTMO: {oktmo}")
Single court code processing result:
Court code: 61RS0006
Region: Ростовская область
Municipality: Ростов-на-Дону
OKTMO: 60701000001
def get_court_code(cui):
"""Extract court code from CUI (Case Unique Identifier)""" if '-' in cui:
return cui.split('-')[0]
else:
return cui
# Example 2: Extract court code from JSON object and get municipality
court_code = get_court_code(data['cui'])
region, municipality, oktmo = municipality_extractor.get_municipality(court_code)
print("JSON CUI processing result:")
print(f"Court code: {court_code}")
print(f"Region: {region}")
print(f"Municipality: {municipality}")
print(f"OKTMO: {oktmo}")
JSON CUI processing result:
Court code: 21RS0006
Region: Чувашская
Municipality: Канашский городской округ
OKTMO: 97707000
# Example 3: Processing dataframe using built-in method
df['court_code'] = df['cui'].apply(lambda x: get_court_code(x) if x else None)
df = municipality_extractor.process_dataframe(df, 'court_code')
print("DataFrame processing results (first row):")
print(f"Court code: {df['court_code'].iloc[0]}")
print(f"Region: {df['region'].iloc[0]}")
print(f"Municipality: {df['municipality'].iloc[0]}")
print(f"OKTMO: {df['oktmo'].iloc[0]}")
DataFrame processing results (first row):
Court code: 21RS0006
Region: Чувашская
Municipality: Канашский городской округ
OKTMO: 97707000
from src.punishments import PunishmentExtractor
punishment_extractor = PunishmentExtractor(api_key="") # Insert here your API key
# Example 1: Extract punishments from single stringinput_string = """Шестакова Александра Владимировича признать виновным в совершении преступлений, предусмотренных п. «з» ч.2 ст.111, п.«а» ч.3 ст.158 Уголовного кодекса Российской Федерации и назначить ему наказание: - по п. «з» ч.2 ст.111УК РФ – в виде лишения свободы насрок три года; - по п. «а» ч.3 ст.158 УК РФ – в виде лишения свободы на срок два года. На основании ч.3 ст. 69 Уголовного кодекса РоссийскойФедерации по совокупности преступлений путем частичного сложения наказаний окончательно назначить Шестакову Александру Владимировичу наказаниев виде лишения свободы на срок четыре года."""
initial_string, res = punishment_extractor.find_punishments(input_string)
print("Single string punishment extraction result:")
print(res)
Single string punishment extraction result:
{'individual_charges': [
{'article': '111', 'part': '2', 'punishment_type': 'лишение свободы', 'duration': '3 года'},
{'article': '158', 'part': '3', 'punishment_type': 'лишение свободы', 'duration': '2 года'}
],
'final_sentence': {'punishment_type': 'лишение свободы', 'duration': '4 года'}}
# Example 2: Extract punishments from JSON object
initial_string, res = punishment_extractor.find_punishments(data['text'])
print("JSON text processing result:")
print("Extracted punishments:", res)
JSON text processing result:
Extracted punishments: {'individual_charges': [...], 'final_sentence': {...}}
# Example 3: Processing dataframe# Note: This extractor requires API key for processing
df['punishments'] = df['text'].apply(
lambda x: punishment_extractor.find_punishments(x)[1] if x else None)
print("DataFrame processing results (first row):")
print(f"Punishments: {df['punishments'].iloc[0]}")
DataFrame processing results (first row):
Punishments: {'individual_charges': [{'article': '159', 'punishment_type': 'штраф', 'amount': '50000'}], 'final_sentence': {'punishment_type': 'штраф', 'amount': '50000'}}
Subscribe to our newsletter!
You will receive our monthly newsletter with the most relevant Russia-related research news
INSTITUTE FOR EUROPEAN, RUSSIAN, AND EURASIAN STUDIES
1957 E St., NW, Suite 412,
Washington, DC 20052