238 lines
9.0 KiB
Python
238 lines
9.0 KiB
Python
"""
|
|
Management command to run AI analysis on complaints within a date range.
|
|
|
|
This command will find complaints created within a specified date range
|
|
and run AI analysis on them. Useful for processing historical complaints
|
|
that were imported without AI analysis.
|
|
|
|
Usage:
|
|
# Analyze complaints from specific date range
|
|
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31
|
|
|
|
# Analyze only complaints without existing AI analysis
|
|
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --skip-analyzed
|
|
|
|
# Force re-analysis of all complaints (even those already analyzed)
|
|
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --force
|
|
|
|
# Analyze specific complaints by ID
|
|
python manage.py analyze_complaints_ai --complaint-ids uuid1 uuid2 uuid3
|
|
|
|
# Limit number of complaints to analyze
|
|
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --limit 50
|
|
|
|
# Dry run to see what would be analyzed
|
|
python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --dry-run
|
|
"""
|
|
import logging
|
|
from datetime import datetime, time
|
|
from typing import List, Optional
|
|
|
|
from django.core.management.base import BaseCommand, CommandError
|
|
from django.utils import timezone
|
|
|
|
from apps.complaints.models import Complaint
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = "Run AI analysis on complaints within a date range"
|
|
|
|
def add_arguments(self, parser):
|
|
# Date range arguments
|
|
parser.add_argument(
|
|
'--from-date',
|
|
type=str,
|
|
help='Start date (YYYY-MM-DD format)'
|
|
)
|
|
parser.add_argument(
|
|
'--to-date',
|
|
type=str,
|
|
help='End date (YYYY-MM-DD format)'
|
|
)
|
|
|
|
# Specific complaint IDs
|
|
parser.add_argument(
|
|
'--complaint-ids',
|
|
nargs='+',
|
|
type=str,
|
|
help='Specific complaint UUIDs to analyze'
|
|
)
|
|
|
|
# Filtering options
|
|
parser.add_argument(
|
|
'--skip-analyzed',
|
|
action='store_true',
|
|
help='Skip complaints that already have AI analysis (default: False)'
|
|
)
|
|
parser.add_argument(
|
|
'--force',
|
|
action='store_true',
|
|
help='Force re-analysis even if complaint already has AI analysis'
|
|
)
|
|
|
|
# Limit and batch options
|
|
parser.add_argument(
|
|
'--limit',
|
|
type=int,
|
|
help='Maximum number of complaints to analyze'
|
|
)
|
|
parser.add_argument(
|
|
'--batch-size',
|
|
type=int,
|
|
default=10,
|
|
help='Number of complaints to process in each batch (default: 10)'
|
|
)
|
|
|
|
# Other options
|
|
parser.add_argument(
|
|
'--dry-run',
|
|
action='store_true',
|
|
help='Show what would be analyzed without actually running analysis'
|
|
)
|
|
parser.add_argument(
|
|
'--sync',
|
|
action='store_true',
|
|
help='Run analysis synchronously (not as Celery tasks) - WARNING: May be slow'
|
|
)
|
|
|
|
def handle(self, *args, **options):
|
|
self.dry_run = options['dry_run']
|
|
self.sync = options['sync']
|
|
self.skip_analyzed = options['skip_analyzed']
|
|
self.force = options['force']
|
|
|
|
# Validate arguments
|
|
if not options['complaint_ids'] and (not options['from_date'] or not options['to_date']):
|
|
raise CommandError(
|
|
"You must provide either --complaint-ids OR both --from-date and --to-date"
|
|
)
|
|
|
|
# Build queryset
|
|
queryset = self._build_queryset(options)
|
|
|
|
if not queryset.exists():
|
|
self.stdout.write(self.style.WARNING("No complaints found matching criteria"))
|
|
return
|
|
|
|
total_count = queryset.count()
|
|
self.stdout.write(f"Found {total_count} complaints to analyze")
|
|
|
|
if self.dry_run:
|
|
self.stdout.write(self.style.WARNING("\nDRY RUN - Showing first 10 complaints:"))
|
|
for complaint in queryset[:10]:
|
|
has_analysis = 'Yes' if complaint.metadata and 'ai_analysis' in complaint.metadata else 'No'
|
|
self.stdout.write(f" - {complaint.reference_number}: {complaint.title[:50]}... [AI: {has_analysis}]")
|
|
if total_count > 10:
|
|
self.stdout.write(f" ... and {total_count - 10} more")
|
|
return
|
|
|
|
# Process complaints
|
|
self._process_complaints(queryset, options.get('batch_size', 10))
|
|
|
|
def _build_queryset(self, options):
|
|
"""Build queryset based on options."""
|
|
|
|
# Start with specific IDs or date range
|
|
if options['complaint_ids']:
|
|
queryset = Complaint.objects.filter(id__in=options['complaint_ids'])
|
|
else:
|
|
# Parse dates
|
|
try:
|
|
from_date = datetime.strptime(options['from_date'], '%Y-%m-%d')
|
|
to_date = datetime.strptime(options['to_date'], '%Y-%m-%d')
|
|
# Set to end of day for to_date
|
|
to_date = to_date.replace(hour=23, minute=59, second=59)
|
|
except ValueError:
|
|
raise CommandError("Dates must be in YYYY-MM-DD format")
|
|
|
|
# Make timezone aware
|
|
from_date = timezone.make_aware(from_date)
|
|
to_date = timezone.make_aware(to_date)
|
|
|
|
queryset = Complaint.objects.filter(
|
|
created_at__gte=from_date,
|
|
created_at__lte=to_date
|
|
)
|
|
|
|
# Apply filters
|
|
if self.skip_analyzed and not self.force:
|
|
# Skip complaints that already have AI analysis
|
|
queryset = queryset.exclude(
|
|
metadata__has_key='ai_analysis'
|
|
)
|
|
elif not self.force:
|
|
# By default, skip analyzed unless force is specified
|
|
queryset = queryset.exclude(
|
|
metadata__has_key='ai_analysis'
|
|
)
|
|
|
|
# Apply limit
|
|
if options.get('limit'):
|
|
queryset = queryset[:options['limit']]
|
|
|
|
return queryset
|
|
|
|
def _process_complaints(self, queryset, batch_size):
|
|
"""Process complaints in batches."""
|
|
total = queryset.count()
|
|
processed = 0
|
|
success = 0
|
|
failed = 0
|
|
skipped = 0
|
|
|
|
self.stdout.write(f"\nProcessing {total} complaints...")
|
|
self.stdout.write("=" * 80)
|
|
|
|
for complaint in queryset.iterator():
|
|
processed += 1
|
|
|
|
# Show progress
|
|
if processed % 10 == 0 or processed == 1:
|
|
self.stdout.write(f"\nProgress: {processed}/{total} ({(processed/total)*100:.1f}%)")
|
|
|
|
try:
|
|
# Check if already analyzed (unless force)
|
|
if not self.force and complaint.metadata and 'ai_analysis' in complaint.metadata:
|
|
self.stdout.write(f" Skipping {complaint.reference_number}: Already analyzed")
|
|
skipped += 1
|
|
continue
|
|
|
|
self.stdout.write(f" Analyzing {complaint.reference_number}...", ending='')
|
|
|
|
if self.sync:
|
|
# Run synchronously
|
|
from .tasks import analyze_complaint_with_ai
|
|
result = analyze_complaint_with_ai(str(complaint.id))
|
|
if result and result.get('status') == 'success':
|
|
self.stdout.write(self.style.SUCCESS(" OK"))
|
|
success += 1
|
|
else:
|
|
self.stdout.write(self.style.ERROR(" FAILED"))
|
|
failed += 1
|
|
else:
|
|
# Queue as Celery task
|
|
from .tasks import analyze_complaint_with_ai
|
|
analyze_complaint_with_ai.delay(str(complaint.id))
|
|
self.stdout.write(self.style.SUCCESS(" QUEUED"))
|
|
success += 1
|
|
|
|
except Exception as e:
|
|
self.stdout.write(self.style.ERROR(f" ERROR: {str(e)}"))
|
|
logger.error(f"Error analyzing complaint {complaint.id}: {e}", exc_info=True)
|
|
failed += 1
|
|
|
|
# Print summary
|
|
self.stdout.write("\n" + "=" * 80)
|
|
self.stdout.write(self.style.SUCCESS("ANALYSIS COMPLETE"))
|
|
self.stdout.write("=" * 80)
|
|
self.stdout.write(f"Total complaints: {total}")
|
|
self.stdout.write(self.style.SUCCESS(f"Successfully queued/processed: {success}"))
|
|
self.stdout.write(self.style.WARNING(f"Skipped (already analyzed): {skipped}"))
|
|
self.stdout.write(self.style.ERROR(f"Failed: {failed}"))
|
|
|
|
if not self.sync:
|
|
self.stdout.write("\nNote: Analysis is running asynchronously via Celery.")
|
|
self.stdout.write("Check Celery worker logs for progress.")
|