HH/apps/physicians/management/commands/backfill_physician_monthly_ratings.py

"""
Management command to backfill monthly physician ratings from existing
individual ratings that have not yet been aggregated.

Usage:
    python manage.py backfill_physician_monthly_ratings
    python manage.py backfill_physician_monthly_ratings --year 2026 --month 4
    python manage.py backfill_physician_monthly_ratings --dry-run
"""

import logging
from typing import Optional

from django.core.management.base import BaseCommand

from apps.physicians.adapter import DoctorRatingAdapter
from apps.physicians.models import PhysicianIndividualRating
from apps.organizations.models import Hospital

logger = logging.getLogger(__name__)


class Command(BaseCommand):
    help = "Backfill PhysicianMonthlyRating aggregates from unaggregated PhysicianIndividualRating records"

    def add_arguments(self, parser):
        parser.add_argument("--year", type=int, help="Specific year to backfill (e.g. 2026)")
        parser.add_argument("--month", type=int, help="Specific month to backfill (1-12)")
        parser.add_argument("--hospital-id", type=str, help="Optional hospital UUID to limit backfill")
        parser.add_argument(
            "--dry-run",
            action="store_true",
            help="Show what would be aggregated without saving changes",
        )

    def handle(self, *args, **options):
        year: Optional[int] = options.get("year")
        month: Optional[int] = options.get("month")
        hospital_id: Optional[str] = options.get("hospital_id")
        dry_run: bool = options["dry_run"]

        hospital = None
        if hospital_id:
            try:
                hospital = Hospital.objects.get(id=hospital_id)
            except Hospital.DoesNotExist:
                self.stdout.write(self.style.ERROR(f"Hospital {hospital_id} not found"))
                return

        # Build queryset of unaggregated records
        qs = PhysicianIndividualRating.objects.filter(is_aggregated=False)
        if year:
            qs = qs.filter(rating_date__year=year)
        if month:
            qs = qs.filter(rating_date__month=month)
        if hospital:
            qs = qs.filter(hospital=hospital)

        total_unaggregated = qs.count()
        if total_unaggregated == 0:
            self.stdout.write(self.style.SUCCESS("No unaggregated individual ratings found."))
            return

        self.stdout.write(f"Found {total_unaggregated} unaggregated individual rating(s).")

        # Determine distinct periods/hospitals to process
        periods = (
            qs.values("rating_date__year", "rating_date__month", "hospital")
            .distinct()
            .order_by("rating_date__year", "rating_date__month", "hospital")
        )

        total_aggregated = 0
        total_skipped = 0
        total_errors = 0

        for period in periods:
            p_year = period["rating_date__year"]
            p_month = period["rating_date__month"]
            p_hospital_id = period["hospital"]

            p_hospital = None
            if p_hospital_id:
                try:
                    p_hospital = Hospital.objects.get(id=p_hospital_id)
                except Hospital.DoesNotExist:
                    self.stdout.write(
                        self.style.WARNING(f"Skipping {p_year}-{p_month:02d} hospital {p_hospital_id} (not found)")
                    )
                    continue

            label = f"{p_year}-{p_month:02d}"
            if p_hospital:
                label += f" ({p_hospital.name})"

            if dry_run:
                count = qs.filter(
                    rating_date__year=p_year,
                    rating_date__month=p_month,
                    hospital=p_hospital_id,
                ).count()
                self.stdout.write(f"[DRY RUN] Would aggregate {count} rating(s) for {label}")
                continue

            self.stdout.write(f"Aggregating {label} ...")
            results = DoctorRatingAdapter.aggregate_monthly_ratings(year=p_year, month=p_month, hospital=p_hospital)
            total_aggregated += results["aggregated"]
            total_skipped += results.get("skipped_unlinked", 0)
            total_errors += len(results["errors"])

            self.stdout.write(
                f"  -> Aggregated: {results['aggregated']}, "
                f"Skipped (unlinked): {results.get('skipped_unlinked', 0)}, "
                f"Errors: {len(results['errors'])}"
            )

        if not dry_run:
            self.stdout.write(
                self.style.SUCCESS(
                    f"\nBackfill complete: {total_aggregated} monthly record(s) updated, "
                    f"{total_skipped} unlinked rating(s) skipped, "
                    f"{total_errors} error(s)."
                )
            )
        else:
            self.stdout.write(self.style.NOTICE("\nDry run complete. No changes were saved."))