121 lines
4.1 KiB
Python
121 lines
4.1 KiB
Python
"""
|
|
Test script for LinkedIn comment scraper.
|
|
|
|
This script demonstrates how to use the LinkedIn scraper to extract comments
|
|
from a specified organization's posts.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import django
|
|
|
|
# Setup Django
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
django.setup()
|
|
|
|
from apps.social.scrapers import LinkedInScraper
|
|
from django.conf import settings
|
|
|
|
|
|
def test_linkedin_scraper():
|
|
"""
|
|
Test the LinkedIn scraper with configuration from Django settings.
|
|
"""
|
|
|
|
# Configuration - pulled from settings/base.py via Django settings
|
|
access_token = getattr(settings, 'LINKEDIN_ACCESS_TOKEN', None)
|
|
organization_id = getattr(settings, 'LINKEDIN_ORGANIZATION_ID', 'urn:li:organization:1337')
|
|
|
|
if not access_token:
|
|
print("❌ ERROR: LINKEDIN_ACCESS_TOKEN not found in environment variables")
|
|
print("\nPlease set LINKEDIN_ACCESS_TOKEN in your .env file:")
|
|
print("LINKEDIN_ACCESS_TOKEN=your_linkedin_access_token_here")
|
|
print("\nTo get an access token:")
|
|
print("1. Go to https://www.linkedin.com/developers/")
|
|
print("2. Create an application")
|
|
print("3. Get your access token from the OAuth 2.0 flow")
|
|
return
|
|
|
|
print("=" * 80)
|
|
print("💼 LINKEDIN COMMENT SCRAPER TEST")
|
|
print("=" * 80)
|
|
|
|
# Initialize scraper
|
|
print(f"\n📝 Initializing LinkedIn scraper for {organization_id}...")
|
|
scraper_config = {
|
|
'access_token': access_token,
|
|
'organization_id': organization_id
|
|
}
|
|
|
|
try:
|
|
scraper = LinkedInScraper(scraper_config)
|
|
print("✅ Scraper initialized successfully")
|
|
except Exception as e:
|
|
print(f"❌ Error initializing scraper: {e}")
|
|
return
|
|
|
|
# Scrape comments
|
|
print(f"\n🚀 Starting to scrape comments from organization posts...")
|
|
print(" - Maximum posts: 50")
|
|
print(" - Maximum comments per post: 100")
|
|
print()
|
|
|
|
try:
|
|
comments = scraper.scrape_comments(
|
|
organization_id=organization_id,
|
|
max_posts=50,
|
|
max_comments_per_post=100
|
|
)
|
|
|
|
if not comments:
|
|
print("⚠️ No comments found")
|
|
print("\nPossible reasons:")
|
|
print(" - Organization has no public posts")
|
|
print(" - No comments found on posts")
|
|
print(" - Invalid access token or organization ID")
|
|
print(" - API rate limit reached")
|
|
return
|
|
|
|
print(f"✅ Successfully scraped {len(comments)} comments!")
|
|
|
|
# Display sample comments
|
|
print("\n" + "=" * 80)
|
|
print("📊 SAMPLE COMMENTS (showing first 5)")
|
|
print("=" * 80)
|
|
|
|
for i, comment in enumerate(comments[:5], 1):
|
|
print(f"\n--- Comment {i} ---")
|
|
print(f"ID: {comment['comment_id']}")
|
|
print(f"Author: {comment['author']}")
|
|
print(f"Published: {comment['published_at']}")
|
|
print(f"Post ID: {comment['post_id']}")
|
|
print(f"Likes: {comment['like_count']}")
|
|
print(f"Text: {comment['comments'][:100]}...")
|
|
if comment.get('raw_data'):
|
|
print(f"Raw Data: {str(comment['raw_data'])[:80]}...")
|
|
|
|
# Statistics
|
|
print("\n" + "=" * 80)
|
|
print("📈 STATISTICS")
|
|
print("=" * 80)
|
|
print(f"Total comments: {len(comments)}")
|
|
print(f"Unique authors: {len(set(c['author'] for c in comments))}")
|
|
print(f"Total likes on all comments: {sum(c['like_count'] for c in comments)}")
|
|
|
|
# Save to CSV
|
|
import pandas as pd
|
|
df = pd.DataFrame(comments)
|
|
csv_filename = f"{organization_id.replace('urn:li:organization:', '')}_linkedin_comments.csv"
|
|
df.to_csv(csv_filename, index=False)
|
|
print(f"\n💾 Comments saved to: {csv_filename}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error scraping LinkedIn: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_linkedin_scraper()
|