git clone https://github.com/our_new_public_git/tg_api_parser.git #TODO change it to the real git
cd tg_api_parser pip install -r requirements.txt TG_API_ID=1234567
TG_API_HASH=abc123def456... # Importing the parser class
from tg_api_data_parser import TgApiParser
chat_id = -1001582532081 #replace with chat you want to parse.
# Simple initialization - uses credentials from .env file
parser = TgApiParser(chat_id=chat_id)
# Advanced initialization with custom settings
parser = TgApiParser(
chat_id=chat_id,
api_id=12345, # override .env credentials
api_hash="your_hash", # override .env credentials
output_path="./my_downloads" # custom download location
) # Download all available messages
await parser.load_data()
# Download only the most recent 2000 messages
await parser.load_data(limit=2000)
# Be more persistent with rate limits
await parser.load_data(limit=1000, retries=5) # Access the loaded messages
print(f"Downloaded {len(parser.messages)} messages")
# Get the first message
first_message = parser.messages[0]
print(f"First message: {first_message.text}")
print(f"Sent by: {first_message.from_user.username}")
print(f"Date: {first_message.date}") # Extract messages into a structured format
parser.extract_messages()
# Access the structured data
print(f"Content messages: {len(parser.content_messages)}")
print(f"Service events: {len(parser.member_actions)}")
# View the structure of the first message
if parser.content_messages:
first_msg = parser.content_messages[0]
print(f"Message keys: {list(first_msg.keys())}") # Convert the list of message dictionaries to a pandas DataFrame for analysis
content_messages_table = pd.DataFrame(parser.content_messages)
# Display the first 10 rows to inspect the data structure and content
content_messages_table.head(10) # Save only content messages to CSV
parser.save_chat()
# Save both content messages and service events
parser.save_chat(save_actions=True) # Download all media files from messages
await parser.get_files()
# Download with custom retry settings
await parser.get_files(retries=5) python -m tg_api_parser.main -c CHAT_ID [OPTIONS] # Download all messages from a channel (no media files)
python -m tg_api_data_parser -c @my_channel
# Download 1000 most recent messages with media files
python -m tg_api_data_parser -c @my_channel -l 1000 -m
# Full export: all messages, media files, and service messages (joins/leaves) to custom folder
python -m tg_api_data_parser -c @my_channel -f ./my_data -s -m
# Quick export: 500 most recent messages only (no media, no service messages)
python -m tg_api_data_parser -c @my_channel -l 500
Subscribe to our newsletter!
You will receive our monthly newsletter with the most relevant Russia-related research news