HH/apps/social/tests/test_twitter.py
2026-01-15 14:31:58 +03:00

120 lines
4.0 KiB
Python

"""
Test script for Twitter/X comment scraper.
This script demonstrates how to use the Twitter scraper to extract replies
from a specified user's tweets.
"""
import os
import sys
import django
# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
django.setup()
from apps.social.scrapers import TwitterScraper
from django.conf import settings
def test_twitter_scraper():
"""
Test the Twitter scraper with configuration from Django settings.
"""
# Configuration - pulled from settings/base.py via Django settings
bearer_token = getattr(settings, 'TWITTER_BEARER_TOKEN', None)
username = getattr(settings, 'TWITTER_USERNAME', 'elonmusk')
if not bearer_token:
print("❌ ERROR: TWITTER_BEARER_TOKEN not found in environment variables")
print("\nPlease set TWITTER_BEARER_TOKEN in your .env file:")
print("TWITTER_BEARER_TOKEN=your_twitter_bearer_token_here")
print("\nTo get a bearer token:")
print("1. Go to https://developer.twitter.com/en/portal/dashboard")
print("2. Create a project and app")
print("3. Get your bearer token from the Keys and tokens section")
return
print("=" * 80)
print("🐦 TWITTER/X COMMENT SCRAPER TEST")
print("=" * 80)
# Initialize scraper
print(f"\n📝 Initializing Twitter scraper for @{username}...")
scraper_config = {
'bearer_token': bearer_token,
'username': username
}
try:
scraper = TwitterScraper(scraper_config)
print("✅ Scraper initialized successfully")
except Exception as e:
print(f"❌ Error initializing scraper: {e}")
return
# Scrape comments
print(f"\n🚀 Starting to scrape replies from @{username}...")
print(" - Maximum tweets: 50")
print(" - Maximum replies per tweet: 100")
print()
try:
comments = scraper.scrape_comments(
username=username,
max_tweets=50,
max_replies_per_tweet=100
)
if not comments:
print("⚠️ No comments found")
print("\nPossible reasons:")
print(" - User has no public tweets")
print(" - No replies found on tweets")
print(" - API rate limit reached")
return
print(f"✅ Successfully scraped {len(comments)} comments!")
# Display sample comments
print("\n" + "=" * 80)
print("📊 SAMPLE COMMENTS (showing first 5)")
print("=" * 80)
for i, comment in enumerate(comments[:5], 1):
print(f"\n--- Comment {i} ---")
print(f"ID: {comment['comment_id']}")
print(f"Author: {comment['author']}")
print(f"Published: {comment['published_at']}")
print(f"Original Tweet ID: {comment['post_id']}")
print(f"Likes: {comment['like_count']}")
print(f"Text: {comment['comments'][:100]}...")
if comment.get('raw_data'):
print(f"Original Tweet: {comment['raw_data'].get('original_tweet_text', 'N/A')[:80]}...")
# Statistics
print("\n" + "=" * 80)
print("📈 STATISTICS")
print("=" * 80)
print(f"Total comments: {len(comments)}")
print(f"Unique authors: {len(set(c['author'] for c in comments))}")
print(f"Total likes on all comments: {sum(c['like_count'] for c in comments)}")
# Save to CSV
import pandas as pd
df = pd.DataFrame(comments)
csv_filename = f"{username}_twitter_comments.csv"
df.to_csv(csv_filename, index=False)
print(f"\n💾 Comments saved to: {csv_filename}")
except Exception as e:
print(f"❌ Error scraping Twitter: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_twitter_scraper()