164 lines
6.2 KiB
Python
164 lines
6.2 KiB
Python
"""
|
|
Test script for Google Reviews scraper.
|
|
|
|
This script demonstrates how to use the Google Reviews scraper to extract reviews
|
|
from a specified Google My Business account locations.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import django
|
|
|
|
# Setup Django
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
django.setup()
|
|
|
|
from apps.social.scrapers import GoogleReviewsScraper
|
|
from django.conf import settings
|
|
|
|
|
|
def test_google_reviews_scraper():
|
|
"""
|
|
Test the Google Reviews scraper with configuration from Django settings.
|
|
"""
|
|
|
|
# Configuration - pulled from settings/base.py via Django settings
|
|
credentials_file = getattr(settings, 'GOOGLE_CREDENTIALS_FILE', 'client_secret.json')
|
|
token_file = getattr(settings, 'GOOGLE_TOKEN_FILE', 'token.json')
|
|
|
|
if not os.path.exists(credentials_file):
|
|
print("❌ ERROR: GOOGLE_CREDENTIALS_FILE not found")
|
|
print(f"\nExpected file: {credentials_file}")
|
|
print("\nPlease download your client_secret.json from Google Cloud Console:")
|
|
print("1. Go to https://console.cloud.google.com/")
|
|
print("2. Create a new project or select existing")
|
|
print("3. Enable Google My Business API")
|
|
print("4. Create OAuth 2.0 credentials")
|
|
print("5. Download client_secret.json")
|
|
return
|
|
|
|
print("=" * 80)
|
|
print("⭐ GOOGLE REVIEWS SCRAPER TEST")
|
|
print("=" * 80)
|
|
|
|
# Initialize scraper
|
|
print(f"\n📝 Initializing Google Reviews scraper...")
|
|
scraper_config = {
|
|
'credentials_file': credentials_file,
|
|
'token_file': token_file
|
|
}
|
|
|
|
try:
|
|
scraper = GoogleReviewsScraper(scraper_config)
|
|
print("✅ Scraper initialized successfully")
|
|
except Exception as e:
|
|
print(f"❌ Error initializing scraper: {e}")
|
|
return
|
|
|
|
# Scrape reviews
|
|
print(f"\n🚀 Starting to scrape Google Reviews...")
|
|
print(" - Maximum reviews per location: 100")
|
|
print(" - All locations will be scraped")
|
|
print()
|
|
|
|
try:
|
|
reviews = scraper.scrape_comments(max_reviews_per_location=100)
|
|
|
|
if not reviews:
|
|
print("⚠️ No reviews found")
|
|
print("\nPossible reasons:")
|
|
print(" - No locations associated with your Google My Business account")
|
|
print(" - Locations have no reviews")
|
|
print(" - Invalid credentials or insufficient permissions")
|
|
print(" - API rate limit reached")
|
|
return
|
|
|
|
print(f"✅ Successfully scraped {len(reviews)} reviews!")
|
|
|
|
# Display sample reviews
|
|
print("\n" + "=" * 80)
|
|
print("📊 SAMPLE REVIEWS (showing first 5)")
|
|
print("=" * 80)
|
|
|
|
for i, review in enumerate(reviews[:5], 1):
|
|
print(f"\n--- Review {i} ---")
|
|
print(f"ID: {review['comment_id']}")
|
|
print(f"Author: {review['author']}")
|
|
print(f"Published: {review['published_at']}")
|
|
print(f"Location: {review['raw_data']['location_display_name']}")
|
|
print(f"Rating: {review['raw_data'].get('star_rating', 'N/A')}")
|
|
print(f"Reply: {'Yes' if review['reply_count'] > 0 else 'No'}")
|
|
print(f"Text: {review['comments'][:100]}...")
|
|
if review.get('raw_data', {}).get('reply_comment'):
|
|
print(f"Business Reply: {review['raw_data']['reply_comment'][:100]}...")
|
|
|
|
# Statistics
|
|
print("\n" + "=" * 80)
|
|
print("📈 STATISTICS")
|
|
print("=" * 80)
|
|
print(f"Total reviews: {len(reviews)}")
|
|
print(f"Unique reviewers: {len(set(r['author'] for r in reviews))}")
|
|
|
|
# Location distribution
|
|
print("\nReviews by Location:")
|
|
location_stats = {}
|
|
for review in reviews:
|
|
location = review['raw_data']['location_display_name'] or 'Unknown'
|
|
location_stats[location] = location_stats.get(location, 0) + 1
|
|
|
|
for location, count in sorted(location_stats.items()):
|
|
print(f" - {location}: {count} reviews")
|
|
|
|
# Rating distribution
|
|
print("\nRating Distribution:")
|
|
rating_stats = {}
|
|
for review in reviews:
|
|
rating = review['raw_data'].get('star_rating', 'N/A')
|
|
rating_stats[rating] = rating_stats.get(rating, 0) + 1
|
|
|
|
for rating, count in sorted(rating_stats.items()):
|
|
print(f" - {rating} stars: {count} reviews")
|
|
|
|
# Reply statistics
|
|
reviews_with_replies = sum(1 for r in reviews if r['reply_count'] > 0)
|
|
print(f"\nReviews with business replies: {reviews_with_replies} ({reviews_with_replies/len(reviews)*100:.1f}%)")
|
|
|
|
# Save to CSV
|
|
import pandas as pd
|
|
df = pd.DataFrame(reviews)
|
|
csv_filename = 'google_reviews_export.csv'
|
|
|
|
# Add readable columns
|
|
df['location_name'] = df['raw_data'].apply(lambda x: x.get('location_display_name', ''))
|
|
df['star_rating'] = df['raw_data'].apply(lambda x: x.get('star_rating', ''))
|
|
df['has_reply'] = df['reply_count'].apply(lambda x: 'Yes' if x > 0 else 'No')
|
|
|
|
df.to_csv(csv_filename, index=False)
|
|
print(f"\n💾 Reviews saved to: {csv_filename}")
|
|
|
|
# Query by location example
|
|
print("\n" + "=" * 80)
|
|
print("🔍 QUERY BY LOCATION")
|
|
print("=" * 80)
|
|
print("You can query reviews by location using the raw_data field:")
|
|
print("\nExample SQL query:")
|
|
print(" SELECT * FROM social_socialmediacomment")
|
|
print(" WHERE platform = 'google_reviews'")
|
|
print(" AND json_extract(raw_data, '$.location_display_name') = 'Your Location Name';")
|
|
print("\nExample Django query:")
|
|
print(" from social.models import SocialMediaComment")
|
|
print(" location_reviews = SocialMediaComment.objects.filter(")
|
|
print(" platform='google_reviews',")
|
|
print(" raw_data__location_display_name='Your Location Name'")
|
|
print(" )")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error scraping Google Reviews: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_google_reviews_scraper()
|