HH/apps/social/tests/test_google_reviews.py
2026-01-15 14:31:58 +03:00

164 lines
6.2 KiB
Python

"""
Test script for Google Reviews scraper.
This script demonstrates how to use the Google Reviews scraper to extract reviews
from a specified Google My Business account locations.
"""
import os
import sys
import django
# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
django.setup()
from apps.social.scrapers import GoogleReviewsScraper
from django.conf import settings
def test_google_reviews_scraper():
"""
Test the Google Reviews scraper with configuration from Django settings.
"""
# Configuration - pulled from settings/base.py via Django settings
credentials_file = getattr(settings, 'GOOGLE_CREDENTIALS_FILE', 'client_secret.json')
token_file = getattr(settings, 'GOOGLE_TOKEN_FILE', 'token.json')
if not os.path.exists(credentials_file):
print("❌ ERROR: GOOGLE_CREDENTIALS_FILE not found")
print(f"\nExpected file: {credentials_file}")
print("\nPlease download your client_secret.json from Google Cloud Console:")
print("1. Go to https://console.cloud.google.com/")
print("2. Create a new project or select existing")
print("3. Enable Google My Business API")
print("4. Create OAuth 2.0 credentials")
print("5. Download client_secret.json")
return
print("=" * 80)
print("⭐ GOOGLE REVIEWS SCRAPER TEST")
print("=" * 80)
# Initialize scraper
print(f"\n📝 Initializing Google Reviews scraper...")
scraper_config = {
'credentials_file': credentials_file,
'token_file': token_file
}
try:
scraper = GoogleReviewsScraper(scraper_config)
print("✅ Scraper initialized successfully")
except Exception as e:
print(f"❌ Error initializing scraper: {e}")
return
# Scrape reviews
print(f"\n🚀 Starting to scrape Google Reviews...")
print(" - Maximum reviews per location: 100")
print(" - All locations will be scraped")
print()
try:
reviews = scraper.scrape_comments(max_reviews_per_location=100)
if not reviews:
print("⚠️ No reviews found")
print("\nPossible reasons:")
print(" - No locations associated with your Google My Business account")
print(" - Locations have no reviews")
print(" - Invalid credentials or insufficient permissions")
print(" - API rate limit reached")
return
print(f"✅ Successfully scraped {len(reviews)} reviews!")
# Display sample reviews
print("\n" + "=" * 80)
print("📊 SAMPLE REVIEWS (showing first 5)")
print("=" * 80)
for i, review in enumerate(reviews[:5], 1):
print(f"\n--- Review {i} ---")
print(f"ID: {review['comment_id']}")
print(f"Author: {review['author']}")
print(f"Published: {review['published_at']}")
print(f"Location: {review['raw_data']['location_display_name']}")
print(f"Rating: {review['raw_data'].get('star_rating', 'N/A')}")
print(f"Reply: {'Yes' if review['reply_count'] > 0 else 'No'}")
print(f"Text: {review['comments'][:100]}...")
if review.get('raw_data', {}).get('reply_comment'):
print(f"Business Reply: {review['raw_data']['reply_comment'][:100]}...")
# Statistics
print("\n" + "=" * 80)
print("📈 STATISTICS")
print("=" * 80)
print(f"Total reviews: {len(reviews)}")
print(f"Unique reviewers: {len(set(r['author'] for r in reviews))}")
# Location distribution
print("\nReviews by Location:")
location_stats = {}
for review in reviews:
location = review['raw_data']['location_display_name'] or 'Unknown'
location_stats[location] = location_stats.get(location, 0) + 1
for location, count in sorted(location_stats.items()):
print(f" - {location}: {count} reviews")
# Rating distribution
print("\nRating Distribution:")
rating_stats = {}
for review in reviews:
rating = review['raw_data'].get('star_rating', 'N/A')
rating_stats[rating] = rating_stats.get(rating, 0) + 1
for rating, count in sorted(rating_stats.items()):
print(f" - {rating} stars: {count} reviews")
# Reply statistics
reviews_with_replies = sum(1 for r in reviews if r['reply_count'] > 0)
print(f"\nReviews with business replies: {reviews_with_replies} ({reviews_with_replies/len(reviews)*100:.1f}%)")
# Save to CSV
import pandas as pd
df = pd.DataFrame(reviews)
csv_filename = 'google_reviews_export.csv'
# Add readable columns
df['location_name'] = df['raw_data'].apply(lambda x: x.get('location_display_name', ''))
df['star_rating'] = df['raw_data'].apply(lambda x: x.get('star_rating', ''))
df['has_reply'] = df['reply_count'].apply(lambda x: 'Yes' if x > 0 else 'No')
df.to_csv(csv_filename, index=False)
print(f"\n💾 Reviews saved to: {csv_filename}")
# Query by location example
print("\n" + "=" * 80)
print("🔍 QUERY BY LOCATION")
print("=" * 80)
print("You can query reviews by location using the raw_data field:")
print("\nExample SQL query:")
print(" SELECT * FROM social_socialmediacomment")
print(" WHERE platform = 'google_reviews'")
print(" AND json_extract(raw_data, '$.location_display_name') = 'Your Location Name';")
print("\nExample Django query:")
print(" from social.models import SocialMediaComment")
print(" location_reviews = SocialMediaComment.objects.filter(")
print(" platform='google_reviews',")
print(" raw_data__location_display_name='Your Location Name'")
print(" )")
except Exception as e:
print(f"❌ Error scraping Google Reviews: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_google_reviews_scraper()