HH/apps/social/tests/test_linkedin.py
2026-01-12 12:27:29 +03:00

121 lines
4.1 KiB
Python

"""
Test script for LinkedIn comment scraper.
This script demonstrates how to use the LinkedIn scraper to extract comments
from a specified organization's posts.
"""
import os
import sys
import django
# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
django.setup()
from apps.social.scrapers import LinkedInScraper
from django.conf import settings
def test_linkedin_scraper():
"""
Test the LinkedIn scraper with configuration from Django settings.
"""
# Configuration - pulled from settings/base.py via Django settings
access_token = getattr(settings, 'LINKEDIN_ACCESS_TOKEN', None)
organization_id = getattr(settings, 'LINKEDIN_ORGANIZATION_ID', 'urn:li:organization:1337')
if not access_token:
print("❌ ERROR: LINKEDIN_ACCESS_TOKEN not found in environment variables")
print("\nPlease set LINKEDIN_ACCESS_TOKEN in your .env file:")
print("LINKEDIN_ACCESS_TOKEN=your_linkedin_access_token_here")
print("\nTo get an access token:")
print("1. Go to https://www.linkedin.com/developers/")
print("2. Create an application")
print("3. Get your access token from the OAuth 2.0 flow")
return
print("=" * 80)
print("💼 LINKEDIN COMMENT SCRAPER TEST")
print("=" * 80)
# Initialize scraper
print(f"\n📝 Initializing LinkedIn scraper for {organization_id}...")
scraper_config = {
'access_token': access_token,
'organization_id': organization_id
}
try:
scraper = LinkedInScraper(scraper_config)
print("✅ Scraper initialized successfully")
except Exception as e:
print(f"❌ Error initializing scraper: {e}")
return
# Scrape comments
print(f"\n🚀 Starting to scrape comments from organization posts...")
print(" - Maximum posts: 50")
print(" - Maximum comments per post: 100")
print()
try:
comments = scraper.scrape_comments(
organization_id=organization_id,
max_posts=50,
max_comments_per_post=100
)
if not comments:
print("⚠️ No comments found")
print("\nPossible reasons:")
print(" - Organization has no public posts")
print(" - No comments found on posts")
print(" - Invalid access token or organization ID")
print(" - API rate limit reached")
return
print(f"✅ Successfully scraped {len(comments)} comments!")
# Display sample comments
print("\n" + "=" * 80)
print("📊 SAMPLE COMMENTS (showing first 5)")
print("=" * 80)
for i, comment in enumerate(comments[:5], 1):
print(f"\n--- Comment {i} ---")
print(f"ID: {comment['comment_id']}")
print(f"Author: {comment['author']}")
print(f"Published: {comment['published_at']}")
print(f"Post ID: {comment['post_id']}")
print(f"Likes: {comment['like_count']}")
print(f"Text: {comment['comments'][:100]}...")
if comment.get('raw_data'):
print(f"Raw Data: {str(comment['raw_data'])[:80]}...")
# Statistics
print("\n" + "=" * 80)
print("📈 STATISTICS")
print("=" * 80)
print(f"Total comments: {len(comments)}")
print(f"Unique authors: {len(set(c['author'] for c in comments))}")
print(f"Total likes on all comments: {sum(c['like_count'] for c in comments)}")
# Save to CSV
import pandas as pd
df = pd.DataFrame(comments)
csv_filename = f"{organization_id.replace('urn:li:organization:', '')}_linkedin_comments.csv"
df.to_csv(csv_filename, index=False)
print(f"\n💾 Comments saved to: {csv_filename}")
except Exception as e:
print(f"❌ Error scraping LinkedIn: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_linkedin_scraper()