HH/apps/social/tests/test_linkedin.py

"""
Test script for LinkedIn comment scraper.

This script demonstrates how to use the LinkedIn scraper to extract comments
from a specified organization's posts.
"""

import os
import sys
import django

# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
django.setup()

from apps.social.scrapers import LinkedInScraper
from django.conf import settings


def test_linkedin_scraper():
    """
    Test the LinkedIn scraper with configuration from Django settings.
    """

    # Configuration - pulled from settings/base.py via Django settings
    access_token = getattr(settings, 'LINKEDIN_ACCESS_TOKEN', None)
    organization_id = getattr(settings, 'LINKEDIN_ORGANIZATION_ID', 'urn:li:organization:1337')

    if not access_token:
        print("❌ ERROR: LINKEDIN_ACCESS_TOKEN not found in environment variables")
        print("\nPlease set LINKEDIN_ACCESS_TOKEN in your .env file:")
        print("LINKEDIN_ACCESS_TOKEN=your_linkedin_access_token_here")
        print("\nTo get an access token:")
        print("1. Go to https://www.linkedin.com/developers/")
        print("2. Create an application")
        print("3. Get your access token from the OAuth 2.0 flow")
        return

    print("=" * 80)
    print("💼 LINKEDIN COMMENT SCRAPER TEST")
    print("=" * 80)

    # Initialize scraper
    print(f"\n📝 Initializing LinkedIn scraper for {organization_id}...")
    scraper_config = {
        'access_token': access_token,
        'organization_id': organization_id
    }

    try:
        scraper = LinkedInScraper(scraper_config)
        print("✅ Scraper initialized successfully")
    except Exception as e:
        print(f"❌ Error initializing scraper: {e}")
        return

    # Scrape comments
    print(f"\n🚀 Starting to scrape comments from organization posts...")
    print("   - Maximum posts: 50")
    print("   - Maximum comments per post: 100")
    print()

    try:
        comments = scraper.scrape_comments(
            organization_id=organization_id,
            max_posts=50,
            max_comments_per_post=100
        )

        if not comments:
            print("⚠️ No comments found")
            print("\nPossible reasons:")
            print("  - Organization has no public posts")
            print("  - No comments found on posts")
            print("  - Invalid access token or organization ID")
            print("  - API rate limit reached")
            return

        print(f"✅ Successfully scraped {len(comments)} comments!")

        # Display sample comments
        print("\n" + "=" * 80)
        print("📊 SAMPLE COMMENTS (showing first 5)")
        print("=" * 80)

        for i, comment in enumerate(comments[:5], 1):
            print(f"\n--- Comment {i} ---")
            print(f"ID: {comment['comment_id']}")
            print(f"Author: {comment['author']}")
            print(f"Published: {comment['published_at']}")
            print(f"Post ID: {comment['post_id']}")
            print(f"Likes: {comment['like_count']}")
            print(f"Text: {comment['comments'][:100]}...")
            if comment.get('raw_data'):
                print(f"Raw Data: {str(comment['raw_data'])[:80]}...")

        # Statistics
        print("\n" + "=" * 80)
        print("📈 STATISTICS")
        print("=" * 80)
        print(f"Total comments: {len(comments)}")
        print(f"Unique authors: {len(set(c['author'] for c in comments))}")
        print(f"Total likes on all comments: {sum(c['like_count'] for c in comments)}")

        # Save to CSV
        import pandas as pd
        df = pd.DataFrame(comments)
        csv_filename = f"{organization_id.replace('urn:li:organization:', '')}_linkedin_comments.csv"
        df.to_csv(csv_filename, index=False)
        print(f"\n💾 Comments saved to: {csv_filename}")

    except Exception as e:
        print(f"❌ Error scraping LinkedIn: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    test_linkedin_scraper()