HH/apps/complaints/management/commands/analyze_complaints_ai.py

"""
Management command to run AI analysis on complaints within a date range.

This command will find complaints created within a specified date range
and run AI analysis on them. Useful for processing historical complaints
that were imported without AI analysis.

Usage:
    # Analyze complaints from specific date range
    python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31

    # Analyze only complaints without existing AI analysis
    python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --skip-analyzed

    # Force re-analysis of all complaints (even those already analyzed)
    python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --force

    # Analyze specific complaints by ID
    python manage.py analyze_complaints_ai --complaint-ids uuid1 uuid2 uuid3

    # Limit number of complaints to analyze
    python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --limit 50

    # Dry run to see what would be analyzed
    python manage.py analyze_complaints_ai --from-date 2022-08-01 --to-date 2022-12-31 --dry-run
"""
import logging
from datetime import datetime, time
from typing import List, Optional

from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone

from apps.complaints.models import Complaint

logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Run AI analysis on complaints within a date range"

    def add_arguments(self, parser):
        # Date range arguments
        parser.add_argument(
            '--from-date',
            type=str,
            help='Start date (YYYY-MM-DD format)'
        )
        parser.add_argument(
            '--to-date',
            type=str,
            help='End date (YYYY-MM-DD format)'
        )

        # Specific complaint IDs
        parser.add_argument(
            '--complaint-ids',
            nargs='+',
            type=str,
            help='Specific complaint UUIDs to analyze'
        )

        # Filtering options
        parser.add_argument(
            '--skip-analyzed',
            action='store_true',
            help='Skip complaints that already have AI analysis (default: False)'
        )
        parser.add_argument(
            '--force',
            action='store_true',
            help='Force re-analysis even if complaint already has AI analysis'
        )

        # Limit and batch options
        parser.add_argument(
            '--limit',
            type=int,
            help='Maximum number of complaints to analyze'
        )
        parser.add_argument(
            '--batch-size',
            type=int,
            default=10,
            help='Number of complaints to process in each batch (default: 10)'
        )

        # Other options
        parser.add_argument(
            '--dry-run',
            action='store_true',
            help='Show what would be analyzed without actually running analysis'
        )
        parser.add_argument(
            '--sync',
            action='store_true',
            help='Run analysis synchronously (not as Celery tasks) - WARNING: May be slow'
        )

    def handle(self, *args, **options):
        self.dry_run = options['dry_run']
        self.sync = options['sync']
        self.skip_analyzed = options['skip_analyzed']
        self.force = options['force']

        # Validate arguments
        if not options['complaint_ids'] and (not options['from_date'] or not options['to_date']):
            raise CommandError(
                "You must provide either --complaint-ids OR both --from-date and --to-date"
            )

        # Build queryset
        queryset = self._build_queryset(options)

        if not queryset.exists():
            self.stdout.write(self.style.WARNING("No complaints found matching criteria"))
            return

        total_count = queryset.count()
        self.stdout.write(f"Found {total_count} complaints to analyze")

        if self.dry_run:
            self.stdout.write(self.style.WARNING("\nDRY RUN - Showing first 10 complaints:"))
            for complaint in queryset[:10]:
                has_analysis = 'Yes' if complaint.metadata and 'ai_analysis' in complaint.metadata else 'No'
                self.stdout.write(f"  - {complaint.reference_number}: {complaint.title[:50]}... [AI: {has_analysis}]")
            if total_count > 10:
                self.stdout.write(f"  ... and {total_count - 10} more")
            return

        # Process complaints
        self._process_complaints(queryset, options.get('batch_size', 10))

    def _build_queryset(self, options):
        """Build queryset based on options."""

        # Start with specific IDs or date range
        if options['complaint_ids']:
            queryset = Complaint.objects.filter(id__in=options['complaint_ids'])
        else:
            # Parse dates
            try:
                from_date = datetime.strptime(options['from_date'], '%Y-%m-%d')
                to_date = datetime.strptime(options['to_date'], '%Y-%m-%d')
                # Set to end of day for to_date
                to_date = to_date.replace(hour=23, minute=59, second=59)
            except ValueError:
                raise CommandError("Dates must be in YYYY-MM-DD format")

            # Make timezone aware
            from_date = timezone.make_aware(from_date)
            to_date = timezone.make_aware(to_date)

            queryset = Complaint.objects.filter(
                created_at__gte=from_date,
                created_at__lte=to_date
            )

        # Apply filters
        if self.skip_analyzed and not self.force:
            # Skip complaints that already have AI analysis
            queryset = queryset.exclude(
                metadata__has_key='ai_analysis'
            )
        elif not self.force:
            # By default, skip analyzed unless force is specified
            queryset = queryset.exclude(
                metadata__has_key='ai_analysis'
            )

        # Apply limit
        if options.get('limit'):
            queryset = queryset[:options['limit']]

        return queryset

    def _process_complaints(self, queryset, batch_size):
        """Process complaints in batches."""
        total = queryset.count()
        processed = 0
        success = 0
        failed = 0
        skipped = 0

        self.stdout.write(f"\nProcessing {total} complaints...")
        self.stdout.write("=" * 80)

        for complaint in queryset.iterator():
            processed += 1

            # Show progress
            if processed % 10 == 0 or processed == 1:
                self.stdout.write(f"\nProgress: {processed}/{total} ({(processed/total)*100:.1f}%)")

            try:
                # Check if already analyzed (unless force)
                if not self.force and complaint.metadata and 'ai_analysis' in complaint.metadata:
                    self.stdout.write(f"  Skipping {complaint.reference_number}: Already analyzed")
                    skipped += 1
                    continue

                self.stdout.write(f"  Analyzing {complaint.reference_number}...", ending='')

                if self.sync:
                    # Run synchronously
                    from .tasks import analyze_complaint_with_ai
                    result = analyze_complaint_with_ai(str(complaint.id))
                    if result and result.get('status') == 'success':
                        self.stdout.write(self.style.SUCCESS(" OK"))
                        success += 1
                    else:
                        self.stdout.write(self.style.ERROR(" FAILED"))
                        failed += 1
                else:
                    # Queue as Celery task
                    from .tasks import analyze_complaint_with_ai
                    analyze_complaint_with_ai.delay(str(complaint.id))
                    self.stdout.write(self.style.SUCCESS(" QUEUED"))
                    success += 1

            except Exception as e:
                self.stdout.write(self.style.ERROR(f" ERROR: {str(e)}"))
                logger.error(f"Error analyzing complaint {complaint.id}: {e}", exc_info=True)
                failed += 1

        # Print summary
        self.stdout.write("\n" + "=" * 80)
        self.stdout.write(self.style.SUCCESS("ANALYSIS COMPLETE"))
        self.stdout.write("=" * 80)
        self.stdout.write(f"Total complaints: {total}")
        self.stdout.write(self.style.SUCCESS(f"Successfully queued/processed: {success}"))
        self.stdout.write(self.style.WARNING(f"Skipped (already analyzed): {skipped}"))
        self.stdout.write(self.style.ERROR(f"Failed: {failed}"))

        if not self.sync:
            self.stdout.write("\nNote: Analysis is running asynchronously via Celery.")
            self.stdout.write("Check Celery worker logs for progress.")