HH/apps/social/tests/test_google_reviews.py

"""
Test script for Google Reviews scraper.

This script demonstrates how to use the Google Reviews scraper to extract reviews
from a specified Google My Business account locations.
"""

import os
import sys
import django

# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
django.setup()

from apps.social.scrapers import GoogleReviewsScraper
from django.conf import settings


def test_google_reviews_scraper():
    """
    Test the Google Reviews scraper with configuration from Django settings.
    """

    # Configuration - pulled from settings/base.py via Django settings
    credentials_file = getattr(settings, 'GOOGLE_CREDENTIALS_FILE', 'client_secret.json')
    token_file = getattr(settings, 'GOOGLE_TOKEN_FILE', 'token.json')

    if not os.path.exists(credentials_file):
        print("❌ ERROR: GOOGLE_CREDENTIALS_FILE not found")
        print(f"\nExpected file: {credentials_file}")
        print("\nPlease download your client_secret.json from Google Cloud Console:")
        print("1. Go to https://console.cloud.google.com/")
        print("2. Create a new project or select existing")
        print("3. Enable Google My Business API")
        print("4. Create OAuth 2.0 credentials")
        print("5. Download client_secret.json")
        return

    print("=" * 80)
    print("⭐ GOOGLE REVIEWS SCRAPER TEST")
    print("=" * 80)

    # Initialize scraper
    print(f"\n📝 Initializing Google Reviews scraper...")
    scraper_config = {
        'credentials_file': credentials_file,
        'token_file': token_file
    }

    try:
        scraper = GoogleReviewsScraper(scraper_config)
        print("✅ Scraper initialized successfully")
    except Exception as e:
        print(f"❌ Error initializing scraper: {e}")
        return

    # Scrape reviews
    print(f"\n🚀 Starting to scrape Google Reviews...")
    print("   - Maximum reviews per location: 100")
    print("   - All locations will be scraped")
    print()

    try:
        reviews = scraper.scrape_comments(max_reviews_per_location=100)

        if not reviews:
            print("⚠️ No reviews found")
            print("\nPossible reasons:")
            print("  - No locations associated with your Google My Business account")
            print("  - Locations have no reviews")
            print("  - Invalid credentials or insufficient permissions")
            print("  - API rate limit reached")
            return

        print(f"✅ Successfully scraped {len(reviews)} reviews!")

        # Display sample reviews
        print("\n" + "=" * 80)
        print("📊 SAMPLE REVIEWS (showing first 5)")
        print("=" * 80)

        for i, review in enumerate(reviews[:5], 1):
            print(f"\n--- Review {i} ---")
            print(f"ID: {review['comment_id']}")
            print(f"Author: {review['author']}")
            print(f"Published: {review['published_at']}")
            print(f"Location: {review['raw_data']['location_display_name']}")
            print(f"Rating: {review['raw_data'].get('star_rating', 'N/A')}")
            print(f"Reply: {'Yes' if review['reply_count'] > 0 else 'No'}")
            print(f"Text: {review['comments'][:100]}...")
            if review.get('raw_data', {}).get('reply_comment'):
                print(f"Business Reply: {review['raw_data']['reply_comment'][:100]}...")

        # Statistics
        print("\n" + "=" * 80)
        print("📈 STATISTICS")
        print("=" * 80)
        print(f"Total reviews: {len(reviews)}")
        print(f"Unique reviewers: {len(set(r['author'] for r in reviews))}")

        # Location distribution
        print("\nReviews by Location:")
        location_stats = {}
        for review in reviews:
            location = review['raw_data']['location_display_name'] or 'Unknown'
            location_stats[location] = location_stats.get(location, 0) + 1

        for location, count in sorted(location_stats.items()):
            print(f"  - {location}: {count} reviews")

        # Rating distribution
        print("\nRating Distribution:")
        rating_stats = {}
        for review in reviews:
            rating = review['raw_data'].get('star_rating', 'N/A')
            rating_stats[rating] = rating_stats.get(rating, 0) + 1

        for rating, count in sorted(rating_stats.items()):
            print(f"  - {rating} stars: {count} reviews")

        # Reply statistics
        reviews_with_replies = sum(1 for r in reviews if r['reply_count'] > 0)
        print(f"\nReviews with business replies: {reviews_with_replies} ({reviews_with_replies/len(reviews)*100:.1f}%)")

        # Save to CSV
        import pandas as pd
        df = pd.DataFrame(reviews)
        csv_filename = 'google_reviews_export.csv'

        # Add readable columns
        df['location_name'] = df['raw_data'].apply(lambda x: x.get('location_display_name', ''))
        df['star_rating'] = df['raw_data'].apply(lambda x: x.get('star_rating', ''))
        df['has_reply'] = df['reply_count'].apply(lambda x: 'Yes' if x > 0 else 'No')

        df.to_csv(csv_filename, index=False)
        print(f"\n💾 Reviews saved to: {csv_filename}")

        # Query by location example
        print("\n" + "=" * 80)
        print("🔍 QUERY BY LOCATION")
        print("=" * 80)
        print("You can query reviews by location using the raw_data field:")
        print("\nExample SQL query:")
        print("  SELECT * FROM social_socialmediacomment")
        print("  WHERE platform = 'google_reviews'")
        print("    AND json_extract(raw_data, '$.location_display_name') = 'Your Location Name';")
        print("\nExample Django query:")
        print("  from social.models import SocialMediaComment")
        print("  location_reviews = SocialMediaComment.objects.filter(")
        print("      platform='google_reviews',")
        print("      raw_data__location_display_name='Your Location Name'")
        print("  )")

    except Exception as e:
        print(f"❌ Error scraping Google Reviews: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    test_google_reviews_scraper()