""" Test script for Google Reviews scraper. This script demonstrates how to use the Google Reviews scraper to extract reviews from a specified Google My Business account locations. """ import os import sys import django # Setup Django os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) django.setup() from apps.social.scrapers import GoogleReviewsScraper from django.conf import settings def test_google_reviews_scraper(): """ Test the Google Reviews scraper with configuration from Django settings. """ # Configuration - pulled from settings/base.py via Django settings credentials_file = getattr(settings, 'GOOGLE_CREDENTIALS_FILE', 'client_secret.json') token_file = getattr(settings, 'GOOGLE_TOKEN_FILE', 'token.json') if not os.path.exists(credentials_file): print("āŒ ERROR: GOOGLE_CREDENTIALS_FILE not found") print(f"\nExpected file: {credentials_file}") print("\nPlease download your client_secret.json from Google Cloud Console:") print("1. Go to https://console.cloud.google.com/") print("2. Create a new project or select existing") print("3. Enable Google My Business API") print("4. Create OAuth 2.0 credentials") print("5. Download client_secret.json") return print("=" * 80) print("⭐ GOOGLE REVIEWS SCRAPER TEST") print("=" * 80) # Initialize scraper print(f"\nšŸ“ Initializing Google Reviews scraper...") scraper_config = { 'credentials_file': credentials_file, 'token_file': token_file } try: scraper = GoogleReviewsScraper(scraper_config) print("āœ… Scraper initialized successfully") except Exception as e: print(f"āŒ Error initializing scraper: {e}") return # Scrape reviews print(f"\nšŸš€ Starting to scrape Google Reviews...") print(" - Maximum reviews per location: 100") print(" - All locations will be scraped") print() try: reviews = scraper.scrape_comments(max_reviews_per_location=100) if not reviews: print("āš ļø No reviews found") print("\nPossible reasons:") print(" - No locations associated with your Google My Business account") print(" - Locations have no reviews") print(" - Invalid credentials or insufficient permissions") print(" - API rate limit reached") return print(f"āœ… Successfully scraped {len(reviews)} reviews!") # Display sample reviews print("\n" + "=" * 80) print("šŸ“Š SAMPLE REVIEWS (showing first 5)") print("=" * 80) for i, review in enumerate(reviews[:5], 1): print(f"\n--- Review {i} ---") print(f"ID: {review['comment_id']}") print(f"Author: {review['author']}") print(f"Published: {review['published_at']}") print(f"Location: {review['raw_data']['location_display_name']}") print(f"Rating: {review['raw_data'].get('star_rating', 'N/A')}") print(f"Reply: {'Yes' if review['reply_count'] > 0 else 'No'}") print(f"Text: {review['comments'][:100]}...") if review.get('raw_data', {}).get('reply_comment'): print(f"Business Reply: {review['raw_data']['reply_comment'][:100]}...") # Statistics print("\n" + "=" * 80) print("šŸ“ˆ STATISTICS") print("=" * 80) print(f"Total reviews: {len(reviews)}") print(f"Unique reviewers: {len(set(r['author'] for r in reviews))}") # Location distribution print("\nReviews by Location:") location_stats = {} for review in reviews: location = review['raw_data']['location_display_name'] or 'Unknown' location_stats[location] = location_stats.get(location, 0) + 1 for location, count in sorted(location_stats.items()): print(f" - {location}: {count} reviews") # Rating distribution print("\nRating Distribution:") rating_stats = {} for review in reviews: rating = review['raw_data'].get('star_rating', 'N/A') rating_stats[rating] = rating_stats.get(rating, 0) + 1 for rating, count in sorted(rating_stats.items()): print(f" - {rating} stars: {count} reviews") # Reply statistics reviews_with_replies = sum(1 for r in reviews if r['reply_count'] > 0) print(f"\nReviews with business replies: {reviews_with_replies} ({reviews_with_replies/len(reviews)*100:.1f}%)") # Save to CSV import pandas as pd df = pd.DataFrame(reviews) csv_filename = 'google_reviews_export.csv' # Add readable columns df['location_name'] = df['raw_data'].apply(lambda x: x.get('location_display_name', '')) df['star_rating'] = df['raw_data'].apply(lambda x: x.get('star_rating', '')) df['has_reply'] = df['reply_count'].apply(lambda x: 'Yes' if x > 0 else 'No') df.to_csv(csv_filename, index=False) print(f"\nšŸ’¾ Reviews saved to: {csv_filename}") # Query by location example print("\n" + "=" * 80) print("šŸ” QUERY BY LOCATION") print("=" * 80) print("You can query reviews by location using the raw_data field:") print("\nExample SQL query:") print(" SELECT * FROM social_socialmediacomment") print(" WHERE platform = 'google_reviews'") print(" AND json_extract(raw_data, '$.location_display_name') = 'Your Location Name';") print("\nExample Django query:") print(" from social.models import SocialMediaComment") print(" location_reviews = SocialMediaComment.objects.filter(") print(" platform='google_reviews',") print(" raw_data__location_display_name='Your Location Name'") print(" )") except Exception as e: print(f"āŒ Error scraping Google Reviews: {e}") import traceback traceback.print_exc() if __name__ == "__main__": test_google_reviews_scraper()