""" Test script for LinkedIn comment scraper. This script demonstrates how to use the LinkedIn scraper to extract comments from a specified organization's posts. """ import os import sys import django # Setup Django os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) django.setup() from apps.social.scrapers import LinkedInScraper from django.conf import settings def test_linkedin_scraper(): """ Test the LinkedIn scraper with configuration from Django settings. """ # Configuration - pulled from settings/base.py via Django settings access_token = getattr(settings, 'LINKEDIN_ACCESS_TOKEN', None) organization_id = getattr(settings, 'LINKEDIN_ORGANIZATION_ID', 'urn:li:organization:1337') if not access_token: print("āŒ ERROR: LINKEDIN_ACCESS_TOKEN not found in environment variables") print("\nPlease set LINKEDIN_ACCESS_TOKEN in your .env file:") print("LINKEDIN_ACCESS_TOKEN=your_linkedin_access_token_here") print("\nTo get an access token:") print("1. Go to https://www.linkedin.com/developers/") print("2. Create an application") print("3. Get your access token from the OAuth 2.0 flow") return print("=" * 80) print("šŸ’¼ LINKEDIN COMMENT SCRAPER TEST") print("=" * 80) # Initialize scraper print(f"\nšŸ“ Initializing LinkedIn scraper for {organization_id}...") scraper_config = { 'access_token': access_token, 'organization_id': organization_id } try: scraper = LinkedInScraper(scraper_config) print("āœ… Scraper initialized successfully") except Exception as e: print(f"āŒ Error initializing scraper: {e}") return # Scrape comments print(f"\nšŸš€ Starting to scrape comments from organization posts...") print(" - Maximum posts: 50") print(" - Maximum comments per post: 100") print() try: comments = scraper.scrape_comments( organization_id=organization_id, max_posts=50, max_comments_per_post=100 ) if not comments: print("āš ļø No comments found") print("\nPossible reasons:") print(" - Organization has no public posts") print(" - No comments found on posts") print(" - Invalid access token or organization ID") print(" - API rate limit reached") return print(f"āœ… Successfully scraped {len(comments)} comments!") # Display sample comments print("\n" + "=" * 80) print("šŸ“Š SAMPLE COMMENTS (showing first 5)") print("=" * 80) for i, comment in enumerate(comments[:5], 1): print(f"\n--- Comment {i} ---") print(f"ID: {comment['comment_id']}") print(f"Author: {comment['author']}") print(f"Published: {comment['published_at']}") print(f"Post ID: {comment['post_id']}") print(f"Likes: {comment['like_count']}") print(f"Text: {comment['comments'][:100]}...") if comment.get('raw_data'): print(f"Raw Data: {str(comment['raw_data'])[:80]}...") # Statistics print("\n" + "=" * 80) print("šŸ“ˆ STATISTICS") print("=" * 80) print(f"Total comments: {len(comments)}") print(f"Unique authors: {len(set(c['author'] for c in comments))}") print(f"Total likes on all comments: {sum(c['like_count'] for c in comments)}") # Save to CSV import pandas as pd df = pd.DataFrame(comments) csv_filename = f"{organization_id.replace('urn:li:organization:', '')}_linkedin_comments.csv" df.to_csv(csv_filename, index=False) print(f"\nšŸ’¾ Comments saved to: {csv_filename}") except Exception as e: print(f"āŒ Error scraping LinkedIn: {e}") import traceback traceback.print_exc() if __name__ == "__main__": test_linkedin_scraper()