""" Analysis service for orchestrating AI-powered comment analysis. Coordinates between SocialMediaComment model and OpenRouter service. """ import logging from typing import List, Dict, Any, Optional from decimal import Decimal from datetime import datetime, timedelta from django.conf import settings from django.utils import timezone from django.db import models from ..models import SocialMediaComment from .openrouter_service import OpenRouterService logger = logging.getLogger(__name__) class AnalysisService: """ Service for managing AI analysis of social media comments. Handles batching, filtering, and updating comments with analysis results. """ def __init__(self): """Initialize the analysis service.""" self.openrouter_service = OpenRouterService() self.batch_size = getattr(settings, 'ANALYSIS_BATCH_SIZE', 10) if not self.openrouter_service.is_configured(): logger.warning("OpenRouter service not properly configured") else: logger.info(f"Analysis service initialized (batch_size: {self.batch_size})") def analyze_pending_comments( self, limit: Optional[int] = None, platform: Optional[str] = None, hours_ago: Optional[int] = None ) -> Dict[str, Any]: """ Analyze comments that haven't been analyzed yet. Args: limit: Maximum number of comments to analyze platform: Filter by platform (optional) hours_ago: Only analyze comments scraped in the last N hours Returns: Dictionary with analysis statistics """ if not self.openrouter_service.is_configured(): logger.error("OpenRouter service not configured") return { 'success': False, 'error': 'OpenRouter service not configured', 'analyzed': 0, 'failed': 0, 'skipped': 0 } # Build queryset for unanalyzed comments (check if ai_analysis is empty) # Using Q() for complex filtering (NULL or empty dict) from django.db.models import Q queryset = SocialMediaComment.objects.filter( Q(ai_analysis__isnull=True) | Q(ai_analysis={}) ) if platform: queryset = queryset.filter(platform=platform) if hours_ago: cutoff_time = timezone.now() - timedelta(hours=hours_ago) queryset = queryset.filter(scraped_at__gte=cutoff_time) if limit: queryset = queryset[:limit] # Fetch comments comments = list(queryset) if not comments: logger.info("No pending comments to analyze") return { 'success': True, 'analyzed': 0, 'failed': 0, 'skipped': 0, 'message': 'No pending comments to analyze' } logger.info(f"Found {len(comments)} pending comments to analyze") # Process in batches analyzed_count = 0 failed_count = 0 skipped_count = 0 for i in range(0, len(comments), self.batch_size): batch = comments[i:i + self.batch_size] logger.info(f"Processing batch {i//self.batch_size + 1} ({len(batch)} comments)") # Prepare batch for API batch_data = [ { 'id': comment.id, 'text': comment.comments } for comment in batch ] # Analyze batch result = self.openrouter_service.analyze_comments(batch_data) if result.get('success'): # Update comments with analysis results for analysis in result.get('analyses', []): try: comment_id = analysis.get('comment_id') comment = SocialMediaComment.objects.get(id=comment_id) # Build new bilingual analysis structure ai_analysis = { 'sentiment': analysis.get('sentiment', {}), 'summaries': analysis.get('summaries', {}), 'keywords': analysis.get('keywords', {}), 'topics': analysis.get('topics', {}), 'entities': analysis.get('entities', []), 'emotions': analysis.get('emotions', {}), 'metadata': { **result.get('metadata', {}), 'analyzed_at': timezone.now().isoformat() } } # Update with bilingual analysis structure comment.ai_analysis = ai_analysis comment.save() analyzed_count += 1 logger.debug(f"Updated comment {comment_id} with bilingual analysis") except SocialMediaComment.DoesNotExist: logger.warning(f"Comment {analysis.get('comment_id')} not found") failed_count += 1 except Exception as e: logger.error(f"Error updating comment {comment_id}: {e}") failed_count += 1 else: error = result.get('error', 'Unknown error') logger.error(f"Batch analysis failed: {error}") failed_count += len(batch) # Calculate skipped (comments that were analyzed during processing) skipped_count = len(comments) - analyzed_count - failed_count logger.info( f"Analysis complete: {analyzed_count} analyzed, " f"{failed_count} failed, {skipped_count} skipped" ) return { 'success': True, 'analyzed': analyzed_count, 'failed': failed_count, 'skipped': skipped_count, 'total': len(comments) } def analyze_comments_by_platform(self, platform: str, limit: int = 100) -> Dict[str, Any]: """ Analyze comments from a specific platform. Args: platform: Platform name (e.g., 'youtube', 'facebook') limit: Maximum number of comments to analyze Returns: Dictionary with analysis statistics """ logger.info(f"Analyzing comments from platform: {platform}") return self.analyze_pending_comments(limit=limit, platform=platform) def analyze_recent_comments(self, hours: int = 24, limit: int = 100) -> Dict[str, Any]: """ Analyze comments scraped in the last N hours. Args: hours: Number of hours to look back limit: Maximum number of comments to analyze Returns: Dictionary with analysis statistics """ logger.info(f"Analyzing comments from last {hours} hours") return self.analyze_pending_comments(limit=limit, hours_ago=hours) def get_analysis_statistics( self, platform: Optional[str] = None, days: int = 30 ) -> Dict[str, Any]: """ Get statistics about analyzed comments using ai_analysis structure. Args: platform: Filter by platform (optional) days: Number of days to look back Returns: Dictionary with analysis statistics """ cutoff_date = timezone.now() - timedelta(days=days) queryset = SocialMediaComment.objects.filter( scraped_at__gte=cutoff_date ) if platform: queryset = queryset.filter(platform=platform) total_comments = queryset.count() # Count analyzed comments (those with ai_analysis populated) analyzed_comments = 0 sentiment_counts = {'positive': 0, 'negative': 0, 'neutral': 0} confidence_scores = [] for comment in queryset: if comment.ai_analysis: analyzed_comments += 1 sentiment = comment.ai_analysis.get('sentiment', {}).get('classification', {}).get('en', 'neutral') if sentiment in sentiment_counts: sentiment_counts[sentiment] += 1 confidence = comment.ai_analysis.get('sentiment', {}).get('confidence', 0) if confidence: confidence_scores.append(confidence) # Calculate average confidence avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0 return { 'total_comments': total_comments, 'analyzed_comments': analyzed_comments, 'unanalyzed_comments': total_comments - analyzed_comments, 'analysis_rate': (analyzed_comments / total_comments * 100) if total_comments > 0 else 0, 'sentiment_distribution': sentiment_counts, 'average_confidence': float(avg_confidence), 'platform': platform or 'all' } def reanalyze_comment(self, comment_id: int) -> Dict[str, Any]: """ Re-analyze a specific comment. Args: comment_id: ID of the comment to re-analyze Returns: Dictionary with result """ try: comment = SocialMediaComment.objects.get(id=comment_id) except SocialMediaComment.DoesNotExist: return { 'success': False, 'error': f'Comment {comment_id} not found' } if not self.openrouter_service.is_configured(): return { 'success': False, 'error': 'OpenRouter service not configured' } # Prepare single comment for analysis batch_data = [{'id': comment.id, 'text': comment.comments}] # Analyze result = self.openrouter_service.analyze_comments(batch_data) if result.get('success'): analysis = result.get('analyses', [{}])[0] if result.get('analyses') else {} # Build new bilingual analysis structure ai_analysis = { 'sentiment': analysis.get('sentiment', {}), 'summaries': analysis.get('summaries', {}), 'keywords': analysis.get('keywords', {}), 'topics': analysis.get('topics', {}), 'entities': analysis.get('entities', []), 'emotions': analysis.get('emotions', {}), 'metadata': { **result.get('metadata', {}), 'analyzed_at': timezone.now().isoformat() } } # Update comment with bilingual analysis structure comment.ai_analysis = ai_analysis comment.save() sentiment_en = ai_analysis.get('sentiment', {}).get('classification', {}).get('en') confidence_val = ai_analysis.get('sentiment', {}).get('confidence', 0) return { 'success': True, 'comment_id': comment_id, 'sentiment': sentiment_en, 'confidence': float(confidence_val) } else: return { 'success': False, 'error': result.get('error', 'Unknown error') } def get_top_keywords( self, platform: Optional[str] = None, limit: int = 20, days: int = 30 ) -> List[Dict[str, Any]]: """ Get most common keywords from analyzed comments using ai_analysis structure. Args: platform: Filter by platform (optional) limit: Maximum number of keywords to return days: Number of days to look back Returns: List of keyword dictionaries with 'keyword' and 'count' keys """ cutoff_date = timezone.now() - timedelta(days=days) queryset = SocialMediaComment.objects.filter( scraped_at__gte=cutoff_date, ai_analysis__isnull=False ).exclude(ai_analysis={}) if platform: queryset = queryset.filter(platform=platform) # Count keywords from ai_analysis keyword_counts = {} for comment in queryset: keywords_en = comment.ai_analysis.get('keywords', {}).get('en', []) for keyword in keywords_en: keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1 # Sort by count and return top N sorted_keywords = sorted( keyword_counts.items(), key=lambda x: x[1], reverse=True )[:limit] return [ {'keyword': keyword, 'count': count} for keyword, count in sorted_keywords ]