""" Management command to run AI analysis on complaints within a date range. This command will find complaints created within a specified date range and run AI analysis on them. Useful for processing historical complaints that were imported without AI analysis. Usage: # Analyze complaints from specific date range python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 # Analyze only complaints without existing AI analysis python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --skip-analyzed # Force re-analysis of all complaints (even those already analyzed) python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --force # Analyze specific complaints by ID python manage.py analyze_complaints_ai --complaint-ids uuid1 uuid2 uuid3 # Limit number of complaints to analyze python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --limit 50 # Dry run to see what would be analyzed python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --dry-run """ import logging from datetime import datetime, time from typing import List, Optional from django.core.management.base import BaseCommand, CommandError from django.utils import timezone from apps.complaints.models import Complaint logger = logging.getLogger(__name__) class Command(BaseCommand): help = "Run AI analysis on complaints within a date range" def add_arguments(self, parser): # Date range arguments parser.add_argument( '--from-date', type=str, help='Start date (YYYY-MM-DD format)' ) parser.add_argument( '--to-date', type=str, help='End date (YYYY-MM-DD format)' ) # Specific complaint IDs parser.add_argument( '--complaint-ids', nargs='+', type=str, help='Specific complaint UUIDs to analyze' ) # Filtering options parser.add_argument( '--skip-analyzed', action='store_true', help='Skip complaints that already have AI analysis (default: False)' ) parser.add_argument( '--force', action='store_true', help='Force re-analysis even if complaint already has AI analysis' ) # Limit and batch options parser.add_argument( '--limit', type=int, help='Maximum number of complaints to analyze' ) parser.add_argument( '--batch-size', type=int, default=10, help='Number of complaints to process in each batch (default: 10)' ) # Other options parser.add_argument( '--dry-run', action='store_true', help='Show what would be analyzed without actually running analysis' ) parser.add_argument( '--sync', action='store_true', help='Run analysis synchronously (not as Celery tasks) - WARNING: May be slow' ) def handle(self, *args, **options): self.dry_run = options['dry_run'] self.sync = options['sync'] self.skip_analyzed = options['skip_analyzed'] self.force = options['force'] # Validate arguments if not options['complaint_ids'] and (not options['from_date'] or not options['to_date']): raise CommandError( "You must provide either --complaint-ids OR both --from-date and --to-date" ) # Build queryset queryset = self._build_queryset(options) if not queryset.exists(): self.stdout.write(self.style.WARNING("No complaints found matching criteria")) return total_count = queryset.count() self.stdout.write(f"Found {total_count} complaints to analyze") if self.dry_run: self.stdout.write(self.style.WARNING("\nDRY RUN - Showing first 10 complaints:")) for complaint in queryset[:10]: has_analysis = 'Yes' if complaint.metadata and 'ai_analysis' in complaint.metadata else 'No' self.stdout.write(f" - {complaint.reference_number}: {complaint.title[:50]}... [AI: {has_analysis}]") if total_count > 10: self.stdout.write(f" ... and {total_count - 10} more") return # Process complaints self._process_complaints(queryset, options.get('batch_size', 10)) def _build_queryset(self, options): """Build queryset based on options.""" # Start with specific IDs or date range if options['complaint_ids']: queryset = Complaint.objects.filter(id__in=options['complaint_ids']) else: # Parse dates try: from_date = datetime.strptime(options['from_date'], '%Y-%m-%d') to_date = datetime.strptime(options['to_date'], '%Y-%m-%d') # Set to end of day for to_date to_date = to_date.replace(hour=23, minute=59, second=59) except ValueError: raise CommandError("Dates must be in YYYY-MM-DD format") # Make timezone aware from_date = timezone.make_aware(from_date) to_date = timezone.make_aware(to_date) queryset = Complaint.objects.filter( created_at__gte=from_date, created_at__lte=to_date ) # Apply filters if self.skip_analyzed and not self.force: # Skip complaints that already have AI analysis queryset = queryset.exclude( metadata__has_key='ai_analysis' ) elif not self.force: # By default, skip analyzed unless force is specified queryset = queryset.exclude( metadata__has_key='ai_analysis' ) # Apply limit if options.get('limit'): queryset = queryset[:options['limit']] return queryset def _process_complaints(self, queryset, batch_size): """Process complaints in batches.""" total = queryset.count() processed = 0 success = 0 failed = 0 skipped = 0 self.stdout.write(f"\nProcessing {total} complaints...") self.stdout.write("=" * 80) for complaint in queryset.iterator(): processed += 1 # Show progress if processed % 10 == 0 or processed == 1: self.stdout.write(f"\nProgress: {processed}/{total} ({(processed/total)*100:.1f}%)") try: # Check if already analyzed (unless force) if not self.force and complaint.metadata and 'ai_analysis' in complaint.metadata: self.stdout.write(f" Skipping {complaint.reference_number}: Already analyzed") skipped += 1 continue self.stdout.write(f" Analyzing {complaint.reference_number}...", ending='') if self.sync: # Run synchronously from .tasks import analyze_complaint_with_ai result = analyze_complaint_with_ai(str(complaint.id)) if result and result.get('status') == 'success': self.stdout.write(self.style.SUCCESS(" OK")) success += 1 else: self.stdout.write(self.style.ERROR(" FAILED")) failed += 1 else: # Queue as Celery task from .tasks import analyze_complaint_with_ai analyze_complaint_with_ai.delay(str(complaint.id)) self.stdout.write(self.style.SUCCESS(" QUEUED")) success += 1 except Exception as e: self.stdout.write(self.style.ERROR(f" ERROR: {str(e)}")) logger.error(f"Error analyzing complaint {complaint.id}: {e}", exc_info=True) failed += 1 # Print summary self.stdout.write("\n" + "=" * 80) self.stdout.write(self.style.SUCCESS("ANALYSIS COMPLETE")) self.stdout.write("=" * 80) self.stdout.write(f"Total complaints: {total}") self.stdout.write(self.style.SUCCESS(f"Successfully queued/processed: {success}")) self.stdout.write(self.style.WARNING(f"Skipped (already analyzed): {skipped}")) self.stdout.write(self.style.ERROR(f"Failed: {failed}")) if not self.sync: self.stdout.write("\nNote: Analysis is running asynchronously via Celery.") self.stdout.write("Check Celery worker logs for progress.")