120 lines
4.0 KiB
Python
120 lines
4.0 KiB
Python
"""
|
|
Test script for Twitter/X comment scraper.
|
|
|
|
This script demonstrates how to use the Twitter scraper to extract replies
|
|
from a specified user's tweets.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import django
|
|
|
|
# Setup Django
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
django.setup()
|
|
|
|
from apps.social.scrapers import TwitterScraper
|
|
from django.conf import settings
|
|
|
|
|
|
def test_twitter_scraper():
|
|
"""
|
|
Test the Twitter scraper with configuration from Django settings.
|
|
"""
|
|
|
|
# Configuration - pulled from settings/base.py via Django settings
|
|
bearer_token = getattr(settings, 'TWITTER_BEARER_TOKEN', None)
|
|
username = getattr(settings, 'TWITTER_USERNAME', 'elonmusk')
|
|
|
|
if not bearer_token:
|
|
print("❌ ERROR: TWITTER_BEARER_TOKEN not found in environment variables")
|
|
print("\nPlease set TWITTER_BEARER_TOKEN in your .env file:")
|
|
print("TWITTER_BEARER_TOKEN=your_twitter_bearer_token_here")
|
|
print("\nTo get a bearer token:")
|
|
print("1. Go to https://developer.twitter.com/en/portal/dashboard")
|
|
print("2. Create a project and app")
|
|
print("3. Get your bearer token from the Keys and tokens section")
|
|
return
|
|
|
|
print("=" * 80)
|
|
print("🐦 TWITTER/X COMMENT SCRAPER TEST")
|
|
print("=" * 80)
|
|
|
|
# Initialize scraper
|
|
print(f"\n📝 Initializing Twitter scraper for @{username}...")
|
|
scraper_config = {
|
|
'bearer_token': bearer_token,
|
|
'username': username
|
|
}
|
|
|
|
try:
|
|
scraper = TwitterScraper(scraper_config)
|
|
print("✅ Scraper initialized successfully")
|
|
except Exception as e:
|
|
print(f"❌ Error initializing scraper: {e}")
|
|
return
|
|
|
|
# Scrape comments
|
|
print(f"\n🚀 Starting to scrape replies from @{username}...")
|
|
print(" - Maximum tweets: 50")
|
|
print(" - Maximum replies per tweet: 100")
|
|
print()
|
|
|
|
try:
|
|
comments = scraper.scrape_comments(
|
|
username=username,
|
|
max_tweets=50,
|
|
max_replies_per_tweet=100
|
|
)
|
|
|
|
if not comments:
|
|
print("⚠️ No comments found")
|
|
print("\nPossible reasons:")
|
|
print(" - User has no public tweets")
|
|
print(" - No replies found on tweets")
|
|
print(" - API rate limit reached")
|
|
return
|
|
|
|
print(f"✅ Successfully scraped {len(comments)} comments!")
|
|
|
|
# Display sample comments
|
|
print("\n" + "=" * 80)
|
|
print("📊 SAMPLE COMMENTS (showing first 5)")
|
|
print("=" * 80)
|
|
|
|
for i, comment in enumerate(comments[:5], 1):
|
|
print(f"\n--- Comment {i} ---")
|
|
print(f"ID: {comment['comment_id']}")
|
|
print(f"Author: {comment['author']}")
|
|
print(f"Published: {comment['published_at']}")
|
|
print(f"Original Tweet ID: {comment['post_id']}")
|
|
print(f"Likes: {comment['like_count']}")
|
|
print(f"Text: {comment['comments'][:100]}...")
|
|
if comment.get('raw_data'):
|
|
print(f"Original Tweet: {comment['raw_data'].get('original_tweet_text', 'N/A')[:80]}...")
|
|
|
|
# Statistics
|
|
print("\n" + "=" * 80)
|
|
print("📈 STATISTICS")
|
|
print("=" * 80)
|
|
print(f"Total comments: {len(comments)}")
|
|
print(f"Unique authors: {len(set(c['author'] for c in comments))}")
|
|
print(f"Total likes on all comments: {sum(c['like_count'] for c in comments)}")
|
|
|
|
# Save to CSV
|
|
import pandas as pd
|
|
df = pd.DataFrame(comments)
|
|
csv_filename = f"{username}_twitter_comments.csv"
|
|
df.to_csv(csv_filename, index=False)
|
|
print(f"\n💾 Comments saved to: {csv_filename}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error scraping Twitter: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_twitter_scraper()
|