HH/apps/complaints/management/commands/analyze_complaints_ai.py
2026-03-28 14:03:56 +03:00

238 lines
9.0 KiB
Python

"""
Management command to run AI analysis on complaints within a date range.
This command will find complaints created within a specified date range
and run AI analysis on them. Useful for processing historical complaints
that were imported without AI analysis.
Usage:
# Analyze complaints from specific date range
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31
# Analyze only complaints without existing AI analysis
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --skip-analyzed
# Force re-analysis of all complaints (even those already analyzed)
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --force
# Analyze specific complaints by ID
python manage.py analyze_complaints_ai --complaint-ids uuid1 uuid2 uuid3
# Limit number of complaints to analyze
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --limit 50
# Dry run to see what would be analyzed
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --dry-run
"""
import logging
from datetime import datetime, time
from typing import List, Optional
from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone
from apps.complaints.models import Complaint
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Run AI analysis on complaints within a date range"
def add_arguments(self, parser):
# Date range arguments
parser.add_argument(
'--from-date',
type=str,
help='Start date (YYYY-MM-DD format)'
)
parser.add_argument(
'--to-date',
type=str,
help='End date (YYYY-MM-DD format)'
)
# Specific complaint IDs
parser.add_argument(
'--complaint-ids',
nargs='+',
type=str,
help='Specific complaint UUIDs to analyze'
)
# Filtering options
parser.add_argument(
'--skip-analyzed',
action='store_true',
help='Skip complaints that already have AI analysis (default: False)'
)
parser.add_argument(
'--force',
action='store_true',
help='Force re-analysis even if complaint already has AI analysis'
)
# Limit and batch options
parser.add_argument(
'--limit',
type=int,
help='Maximum number of complaints to analyze'
)
parser.add_argument(
'--batch-size',
type=int,
default=10,
help='Number of complaints to process in each batch (default: 10)'
)
# Other options
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be analyzed without actually running analysis'
)
parser.add_argument(
'--sync',
action='store_true',
help='Run analysis synchronously (not as Celery tasks) - WARNING: May be slow'
)
def handle(self, *args, **options):
self.dry_run = options['dry_run']
self.sync = options['sync']
self.skip_analyzed = options['skip_analyzed']
self.force = options['force']
# Validate arguments
if not options['complaint_ids'] and (not options['from_date'] or not options['to_date']):
raise CommandError(
"You must provide either --complaint-ids OR both --from-date and --to-date"
)
# Build queryset
queryset = self._build_queryset(options)
if not queryset.exists():
self.stdout.write(self.style.WARNING("No complaints found matching criteria"))
return
total_count = queryset.count()
self.stdout.write(f"Found {total_count} complaints to analyze")
if self.dry_run:
self.stdout.write(self.style.WARNING("\nDRY RUN - Showing first 10 complaints:"))
for complaint in queryset[:10]:
has_analysis = 'Yes' if complaint.metadata and 'ai_analysis' in complaint.metadata else 'No'
self.stdout.write(f" - {complaint.reference_number}: {complaint.title[:50]}... [AI: {has_analysis}]")
if total_count > 10:
self.stdout.write(f" ... and {total_count - 10} more")
return
# Process complaints
self._process_complaints(queryset, options.get('batch_size', 10))
def _build_queryset(self, options):
"""Build queryset based on options."""
# Start with specific IDs or date range
if options['complaint_ids']:
queryset = Complaint.objects.filter(id__in=options['complaint_ids'])
else:
# Parse dates
try:
from_date = datetime.strptime(options['from_date'], '%Y-%m-%d')
to_date = datetime.strptime(options['to_date'], '%Y-%m-%d')
# Set to end of day for to_date
to_date = to_date.replace(hour=23, minute=59, second=59)
except ValueError:
raise CommandError("Dates must be in YYYY-MM-DD format")
# Make timezone aware
from_date = timezone.make_aware(from_date)
to_date = timezone.make_aware(to_date)
queryset = Complaint.objects.filter(
created_at__gte=from_date,
created_at__lte=to_date
)
# Apply filters
if self.skip_analyzed and not self.force:
# Skip complaints that already have AI analysis
queryset = queryset.exclude(
metadata__has_key='ai_analysis'
)
elif not self.force:
# By default, skip analyzed unless force is specified
queryset = queryset.exclude(
metadata__has_key='ai_analysis'
)
# Apply limit
if options.get('limit'):
queryset = queryset[:options['limit']]
return queryset
def _process_complaints(self, queryset, batch_size):
"""Process complaints in batches."""
total = queryset.count()
processed = 0
success = 0
failed = 0
skipped = 0
self.stdout.write(f"\nProcessing {total} complaints...")
self.stdout.write("=" * 80)
for complaint in queryset.iterator():
processed += 1
# Show progress
if processed % 10 == 0 or processed == 1:
self.stdout.write(f"\nProgress: {processed}/{total} ({(processed/total)*100:.1f}%)")
try:
# Check if already analyzed (unless force)
if not self.force and complaint.metadata and 'ai_analysis' in complaint.metadata:
self.stdout.write(f" Skipping {complaint.reference_number}: Already analyzed")
skipped += 1
continue
self.stdout.write(f" Analyzing {complaint.reference_number}...", ending='')
if self.sync:
# Run synchronously
from .tasks import analyze_complaint_with_ai
result = analyze_complaint_with_ai(str(complaint.id))
if result and result.get('status') == 'success':
self.stdout.write(self.style.SUCCESS(" OK"))
success += 1
else:
self.stdout.write(self.style.ERROR(" FAILED"))
failed += 1
else:
# Queue as Celery task
from .tasks import analyze_complaint_with_ai
analyze_complaint_with_ai.delay(str(complaint.id))
self.stdout.write(self.style.SUCCESS(" QUEUED"))
success += 1
except Exception as e:
self.stdout.write(self.style.ERROR(f" ERROR: {str(e)}"))
logger.error(f"Error analyzing complaint {complaint.id}: {e}", exc_info=True)
failed += 1
# Print summary
self.stdout.write("\n" + "=" * 80)
self.stdout.write(self.style.SUCCESS("ANALYSIS COMPLETE"))
self.stdout.write("=" * 80)
self.stdout.write(f"Total complaints: {total}")
self.stdout.write(self.style.SUCCESS(f"Successfully queued/processed: {success}"))
self.stdout.write(self.style.WARNING(f"Skipped (already analyzed): {skipped}"))
self.stdout.write(self.style.ERROR(f"Failed: {failed}"))
if not self.sync:
self.stdout.write("\nNote: Analysis is running asynchronously via Celery.")
self.stdout.write("Check Celery worker logs for progress.")