HH/apps/complaints/management/commands/import_historical_complaints.py

"""
Import historical complaints from Excel (2022-2024).

Usage:
    # Test import (dry run)
    python manage.py import_historical_complaints "Complaints Report - 2024.xlsx" --sheet="January 2024" --dry-run

    # Actual import for a single sheet
    python manage.py import_historical_complaints "Complaints Report - 2024.xlsx" --sheet="January 2024"
"""

import logging
import re
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple

from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from django.utils import timezone

from apps.organizations.models import Hospital, Location, MainSection, SubSection, Staff
from apps.complaints.models import Complaint, ComplaintCategory
from apps.accounts.models import User

from .complaint_taxonomy_mapping import (
    DOMAIN_MAPPING,
    CATEGORY_MAPPING,
    SUBCATEGORY_MAPPING,
    CLASSIFICATION_MAPPING,
    get_mapped_category,
    is_taxonomy_mapped,
)
from .complaint_source_mapping import resolve_px_source

logger = logging.getLogger(__name__)

# Default hospital code for all imported complaints
DEFAULT_HOSPITAL_CODE = "NUZHA"

# Column mapping: field_name -> column_number (1-based)
COLUMN_MAPPING = {
    "complaint_num": 3,  # رقم الشكوى
    "mrn": 4,  # رقم الملف
    "source": 5,  # جهة الشكوى
    "location_name": 6,  # الموقع
    "main_dept_name": 7,  # القسم الرئيس
    "sub_dept_name": 8,  # القسم الفرعي
    "date_received": 9,  # تاريخ إستلام الشكوى
    "data_entry_person": 10,  # المدخل (Data Entry Person)
    "accused_staff_id": 48,  # ID (Employee ID)
    "accused_staff_name": 49,  # اسم الشخص المشتكى عليه - ان وجد
    "domain": 50,  # Domain
    "category": 51,  # Category
    "subcategory": 52,  # Sub-Category
    "classification": 53,  # Classification
    "description_ar": 54,  # محتوى الشكوى (عربي)
    "description_en": 55,  # محتوى الشكوى (English)
    "satisfaction": 56,  # Satisfied/Dissatisfied
    "rightful_side": 57,  # The Rightful Side
    # Timeline columns
    "date_sent": 20,  # تم ارسال الشكوى (Complaint Sent/Activated)
    "first_reminder": 24,  # First Reminder Sent
    "second_reminder": 28,  # Second Reminder Sent
    "escalated_date": 32,  # Escalated
    "closed_date": 37,  # Closed
    "resolved_date": 44,  # Resolved
    "response_date": 41,  # تاريخ الرد (Response Date - for explanation received)
}

# Month mapping for reference numbers
MONTH_MAP = {
    # Full month names (2023-2025 format)
    "JANUARY": "01",
    "FEBRUARY": "02",
    "MARCH": "03",
    "APRIL": "04",
    "MAY": "05",
    "JUNE": "06",
    "JULY": "07",
    "AUGUST": "08",
    "SEPTEMBER": "09",
    "OCTOBER": "10",
    "NOVEMBER": "11",
    "DECEMBER": "12",
    # Short names (2022 format for backward compatibility)
    "AUG": "08",
    "SEP": "09",
    "OCT": "10",
    "NOV": "11",
    "DEC": "12",
}


class Command(BaseCommand):
    help = "Import historical complaints from Excel (2022-2024)"

    def add_arguments(self, parser):
        parser.add_argument("excel_file", type=str, help="Path to the Excel file")
        parser.add_argument(
            "--sheet", type=str, default="AUG 2022 ", help='Sheet name to import (default: "AUG 2022 ")'
        )
        parser.add_argument("--dry-run", action="store_true", help="Preview without saving to database")
        parser.add_argument("--start-row", type=int, default=3, help="First data row (default: 3, skipping header)")

    def handle(self, *args, **options):
        self.excel_file = options["excel_file"]
        self.sheet_name = options["sheet"]
        self.dry_run = options["dry_run"]
        self.start_row = options["start_row"]

        # Load hospital
        self.hospital = self._load_hospital()
        if not self.hospital:
            raise CommandError(f'Hospital with code "{DEFAULT_HOSPITAL_CODE}" not found')

        self.stdout.write(self.style.SUCCESS(f"Using hospital: {self.hospital.name}"))

        # Load Excel workbook
        try:
            import openpyxl

            self.wb = openpyxl.load_workbook(self.excel_file, read_only=True, data_only=True)
        except ImportError:
            raise CommandError("openpyxl is required. Install with: pip install openpyxl")
        except Exception as e:
            raise CommandError(f"Error loading Excel file: {e}")

        # Check sheet exists
        if self.sheet_name not in self.wb.sheetnames:
            available = ", ".join(self.wb.sheetnames)
            raise CommandError(f'Sheet "{self.sheet_name}" not found. Available: {available}')

        self.ws = self.wb[self.sheet_name]
        self.stdout.write(f"Processing sheet: {self.sheet_name}")

        # Statistics tracking
        self.stats = {
            "processed": 0,
            "success": 0,
            "failed": 0,
            "skipped_duplicate": 0,
            "skipped_unmapped_taxonomy": 0,
        }
        self.errors = []
        self.unmapped_taxonomy = set()
        self.unmatched_locations = set()
        self.unmatched_departments = set()

        # Cache for used reference numbers to avoid DB queries
        self.used_refs = set()

        # Process rows
        self._process_sheet()

        # Generate report
        self._print_report()

    def _load_hospital(self) -> Optional[Hospital]:
        """Load default hospital by code."""
        try:
            return Hospital.objects.get(code=DEFAULT_HOSPITAL_CODE)
        except Hospital.DoesNotExist:
            return None

    def _process_sheet(self):
        """Process all rows in the sheet using iter_rows for performance."""
        row_num = self.start_row

        for row in self.ws.iter_rows(min_row=self.start_row, max_row=5000, values_only=True):
            try:
                # Extract row data
                row_data = self._extract_row_data_from_values(row)

                # Skip empty rows
                if not row_data.get("complaint_num"):
                    row_num += 1
                    continue

                self.stats["processed"] += 1

                # Validate complaint number and build reference
                try:
                    ref_num = self._get_unique_reference_number(row_data["complaint_num"])
                except (ValueError, TypeError):
                    row_num += 1
                    continue

                # Resolve taxonomy - allow unmapped (will be backfilled later via AI)
                taxonomy = self._resolve_taxonomy(
                    row_data.get("domain"),
                    row_data.get("category"),
                    row_data.get("subcategory"),
                    row_data.get("classification"),
                )

                if not is_taxonomy_mapped(
                    row_data.get("domain"),
                    row_data.get("category"),
                    row_data.get("subcategory"),
                    row_data.get("classification"),
                ):
                    self._log_unmapped_taxonomy(row_data)

                # Resolve source
                px_source = resolve_px_source(row_data.get("source"))

                # Resolve location and departments
                location = self._resolve_location(row_data.get("location_name"))
                main_section = self._resolve_section(row_data.get("main_dept_name"))
                subsection = self._resolve_subsection(row_data.get("sub_dept_name"))

                # Determine status
                status = self._determine_status(row_data)

                # Parse date_received for created_at
                date_received = row_data.get("date_received")
                created_at = timezone.now()  # Default fallback
                if date_received:
                    if isinstance(date_received, str):
                        try:
                            created_at = datetime.strptime(date_received, "%Y-%m-%d %H:%M:%S")
                        except ValueError:
                            try:
                                created_at = datetime.strptime(date_received, "%Y-%m-%d")
                            except ValueError:
                                pass
                    elif isinstance(date_received, datetime):
                        created_at = date_received

                if created_at and timezone.is_naive(created_at):
                    created_at = timezone.make_aware(created_at)

                # Get or create data entry person user
                data_entry_person = row_data.get("data_entry_person")
                assigned_to_user = self._get_or_create_data_entry_user(data_entry_person)

                # Parse timeline dates
                date_sent = self._parse_datetime(row_data.get("date_sent"))
                first_reminder = self._parse_datetime(row_data.get("first_reminder"))
                second_reminder = self._parse_datetime(row_data.get("second_reminder"))
                escalated_date = self._parse_datetime(row_data.get("escalated_date"))
                closed_date = self._parse_datetime(row_data.get("closed_date"))
                resolved_date = self._parse_datetime(row_data.get("resolved_date"))
                response_date = self._parse_datetime(row_data.get("response_date"))

                # Determine explanation tracking
                explanation_requested = bool(date_sent)
                explanation_requested_at = date_sent
                explanation_received_at = response_date

                # Resolve accused staff
                accused_staff_id = row_data.get("accused_staff_id")
                accused_staff = self._resolve_staff_by_id(accused_staff_id)

                # Map rightful side to resolution outcome
                rightful_side = str(row_data.get("rightful_side") or "").lower().strip()
                resolution_outcome = ""
                if rightful_side in ["patient", "hospital", "other"]:
                    resolution_outcome = rightful_side

                if not self.dry_run:
                    # Create complaint
                    with transaction.atomic():
                        complaint = Complaint.objects.create(
                            reference_number=ref_num,
                            hospital=self.hospital,
                            location=location,
                            main_section=main_section,
                            subsection=subsection,
                            title=self._build_title(row_data),
                            description=self._build_description(row_data),
                            patient_name="Unknown",
                            national_id="",
                            relation_to_patient="patient",
                            staff=accused_staff,
                            staff_name=row_data.get("accused_staff_name") or "",
                            domain=taxonomy.get("domain"),
                            category=taxonomy.get("category"),
                            subcategory_obj=taxonomy.get("subcategory"),
                            classification_obj=taxonomy.get("classification"),
                            status=status,
                            assigned_to=assigned_to_user,
                            resolved_by=assigned_to_user,
                            resolution_outcome=resolution_outcome,
                            # Timeline fields
                            activated_at=date_sent,
                            reminder_sent_at=first_reminder,
                            second_reminder_sent_at=second_reminder,
                            escalated_at=escalated_date,
                            closed_at=closed_date,
                            resolved_at=resolved_date,
                            # Explanation tracking
                            explanation_requested=explanation_requested,
                            explanation_requested_at=explanation_requested_at,
                            explanation_received_at=explanation_received_at,
                            due_at=created_at + timedelta(hours=48),
                            source=px_source,
                            metadata=self._build_metadata(row_data, ref_num),
                        )

                        # Update created_at to historical date (can't set during create due to auto_now_add)
                        Complaint.objects.filter(pk=complaint.pk).update(created_at=created_at)

                self.stats["success"] += 1

            except Exception as e:
                self.stats["failed"] += 1
                self.errors.append(
                    {
                        "row": row_num,
                        "complaint_num": row_data.get("complaint_num") if "row_data" in locals() else None,
                        "error": str(e),
                    }
                )
                logger.error(f"Error processing row {row_num}: {e}", exc_info=True)

            row_num += 1

    def _extract_row_data(self, row_num: int) -> Dict:
        """Extract data from Excel row (kept for compatibility)."""
        data = {}
        for field, col in COLUMN_MAPPING.items():
            cell_value = self.ws.cell(row_num, col).value
            # Clean classification field (remove Excel artifacts like "AX5:BA5")
            if field == "classification" and cell_value:
                cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip()
            data[field] = cell_value
        return data

    def _extract_row_data_from_values(self, row: tuple) -> Dict:
        """Extract data from Excel row using values tuple (for iter_rows)."""
        data = {}
        for field, col in COLUMN_MAPPING.items():
            # col is 1-based, so subtract 1 for 0-based tuple index
            cell_value = row[col - 1] if col - 1 < len(row) else None
            # Clean classification field (remove Excel artifacts like "AX5:BA5")
            if field == "classification" and cell_value:
                cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip()
            data[field] = cell_value
        return data

    def _build_reference_number(self, complaint_num) -> str:
        """Build reference number: CMP-YYYY-MM-NNNN."""
        # Parse year and month from sheet name (e.g., "January 2023 " -> year=2023, month=January)
        sheet_parts = self.sheet_name.strip().split()
        year = sheet_parts[-1] if len(sheet_parts) > 1 else "2022"
        month_part = sheet_parts[0].upper()
        month_code = MONTH_MAP.get(month_part, "00")
        return f"CMP-{year}-{month_code}-{int(complaint_num):04d}"

    def _get_unique_reference_number(self, complaint_num) -> str:
        """Generate unique reference number with suffix if needed."""
        base_ref = self._build_reference_number(complaint_num)

        # Check cache first, then DB
        if base_ref not in self.used_refs and not Complaint.objects.filter(reference_number=base_ref).exists():
            self.used_refs.add(base_ref)
            return base_ref

        # Try with suffixes -A, -B, -C, ...
        suffix = ord("A")
        while suffix <= ord("Z"):
            ref_with_suffix = f"{base_ref}-{chr(suffix)}"
            if (
                ref_with_suffix not in self.used_refs
                and not Complaint.objects.filter(reference_number=ref_with_suffix).exists()
            ):
                self.used_refs.add(ref_with_suffix)
                self.stats["skipped_duplicate"] += 1  # Actually suffix added
                return ref_with_suffix
            suffix += 1

        # If all single letter suffixes exhausted (unlikely), raise error
        raise ValueError(f"Cannot generate unique reference for {base_ref}")

    def _resolve_taxonomy(self, domain, category, subcategory, classification) -> Dict:
        """Resolve taxonomy to ComplaintCategory objects."""
        return {
            "domain": self._get_category_by_uuid(get_mapped_category(domain, DOMAIN_MAPPING)),
            "category": self._get_category_by_uuid(get_mapped_category(category, CATEGORY_MAPPING)),
            "subcategory": self._get_category_by_uuid(get_mapped_category(subcategory, SUBCATEGORY_MAPPING)),
            "classification": self._get_category_by_uuid(get_mapped_category(classification, CLASSIFICATION_MAPPING)),
        }

    def _get_category_by_uuid(self, uuid: str) -> Optional[ComplaintCategory]:
        """Get ComplaintCategory by UUID."""
        if not uuid:
            return None
        try:
            return ComplaintCategory.objects.get(id=uuid)
        except ComplaintCategory.DoesNotExist:
            return None

    def _parse_datetime(self, value) -> Optional[datetime]:
        """Parse datetime from various formats."""
        if not value:
            return None
        if isinstance(value, datetime):
            return value
        if isinstance(value, str):
            try:
                return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
            except ValueError:
                try:
                    return datetime.strptime(value, "%Y-%m-%d")
                except ValueError:
                    return None
        return None

    def _resolve_location(self, name_ar: str) -> Optional[Location]:
        """Resolve location by Arabic name."""
        if not name_ar:
            return None
        location = Location.objects.filter(name_ar=name_ar).first()
        if not location:
            self.unmatched_locations.add(name_ar)
        return location

    def _resolve_section(self, name_ar: str) -> Optional[MainSection]:
        """Resolve main section/department by Arabic name."""
        if not name_ar:
            return None
        # Try Section model
        section = MainSection.objects.filter(name_ar=name_ar).first()
        if not section:
            self.unmatched_departments.add(name_ar)
        return section

    def _resolve_subsection(self, name_ar: str) -> Optional[SubSection]:
        """Resolve subsection by Arabic name."""
        if not name_ar:
            return None
        return SubSection.objects.filter(name_ar=name_ar).first()

    def _resolve_staff_by_id(self, employee_id: str) -> Optional[Staff]:
        """Resolve staff by employee ID."""
        if not employee_id:
            return None
        try:
            return Staff.objects.get(employee_id=str(employee_id))
        except Staff.DoesNotExist:
            return None

    def _get_or_create_data_entry_user(self, arabic_name: str) -> Optional[User]:
        """
        Create or get PX-Staff user from Arabic data entry person name.

        Transliterates Arabic name to Latin username using first and last name only.
        Stores full Arabic name in first_name field.

        Args:
            arabic_name: Arabic name from Excel (e.g., "أحمد محمد عبدالله")

        Returns:
            User object or None if name is empty
        """
        if not arabic_name:
            return None

        try:
            from unidecode import unidecode
        except ImportError:
            logger.error("unidecode library not installed. Run: pip install unidecode")
            return None

        # Split name and get first and last parts only
        parts = arabic_name.split()
        if len(parts) >= 2:
            first_name = parts[0]
            last_name = parts[-1]
        else:
            first_name = arabic_name
            last_name = "staff"

        # Transliterate to Latin for username
        username_first = unidecode(first_name).lower().strip()
        username_last = unidecode(last_name).lower().strip()

        # Clean username (remove special chars, spaces)
        username_first = re.sub(r"[^a-z0-9]", "", username_first)
        username_last = re.sub(r"[^a-z0-9]", "", username_last)

        if not username_first:
            username_first = "user"
        if not username_last:
            username_last = "staff"

        username = f"{username_first}.{username_last}"

        # Check if user already exists
        user = User.objects.filter(username=username).first()
        if user:
            return user

        # Check for similar users (same first name part)
        similar_user = User.objects.filter(username__startswith=username_first, first_name=arabic_name).first()
        if similar_user:
            return similar_user

        # Create new user
        try:
            # Generate unique email
            email = f"{username}@alhammadi.med.sa"
            user = User(
                username=username,
                first_name=arabic_name,  # Full Arabic name
                last_name="",
                email=email,
                is_active=True,
            )
            user.save()
            logger.info(f"Created new PX-Staff user: {username} ({arabic_name})")
            return user
        except Exception as e:
            logger.error(f"Error creating user {username}: {e}")
            # Try with numbered suffix if username exists
            for i in range(2, 100):
                try:
                    email = f"{username}{i}@alhammadi.med.sa"
                    user = User(
                        username=f"{username}{i}",
                        first_name=arabic_name,
                        last_name="",
                        email=email,
                        is_active=True,
                    )
                    user.save()
                    logger.info(f"Created new PX-Staff user: {username}{i} ({arabic_name})")
                    return user
                except Exception as e2:
                    logger.error(f"Error creating user {username}{i}: {e2}")
                    continue
            return None

    def _determine_status(self, row_data: Dict) -> str:
        """Determine complaint status from timeline dates."""
        if row_data.get("closed_date"):
            return "closed"
        elif row_data.get("resolved_date"):
            return "resolved"
        elif row_data.get("escalated_date"):
            return "in_progress"
        else:
            return "open"

    def _build_title(self, row_data: Dict) -> str:
        """Build complaint title from description."""
        desc = row_data.get("description_en") or row_data.get("description_ar") or ""
        return desc[:500] if desc else "No description"

    def _build_description(self, row_data: Dict) -> str:
        """Build complaint description (English preferred)."""
        desc_en = row_data.get("description_en") or ""
        desc_ar = row_data.get("description_ar") or ""

        if desc_en and desc_ar:
            return f"{desc_en}\n\n[Arabic]:\n{desc_ar}"
        return desc_en or desc_ar or "No description provided"

    def _build_metadata(self, row_data: Dict, ref_num: str) -> Dict:
        """Build metadata dictionary."""
        return {
            "import_source": "historical_excel_2022",
            "imported_at": datetime.now().isoformat(),
            "original_sheet": self.sheet_name,
            "reference_number": ref_num,
            "original_complaint_num": row_data.get("complaint_num"),
            "mrn": row_data.get("mrn"),
            "source": row_data.get("source"),
            "satisfaction": row_data.get("satisfaction"),
            "original_staff_name": row_data.get("accused_staff"),
            "original_location": row_data.get("location_name"),
            "original_departments": {
                "main": row_data.get("main_dept_name"),
                "sub": row_data.get("sub_dept_name"),
            },
            "taxonomy": {
                "domain": row_data.get("domain"),
                "category": row_data.get("category"),
                "subcategory": row_data.get("subcategory"),
                "classification": row_data.get("classification"),
            },
            "timeline": {
                "received": str(row_data.get("date_received")) if row_data.get("date_received") else None,
                "sent": str(row_data.get("date_sent")) if row_data.get("date_sent") else None,
                "first_reminder": str(row_data.get("first_reminder")) if row_data.get("first_reminder") else None,
                "escalated": str(row_data.get("escalated_date")) if row_data.get("escalated_date") else None,
                "closed": str(row_data.get("closed_date")) if row_data.get("closed_date") else None,
                "resolved": str(row_data.get("resolved_date")) if row_data.get("resolved_date") else None,
            },
        }

    def _log_unmapped_taxonomy(self, row_data: Dict):
        """Log unmapped taxonomy items."""
        items = [
            row_data.get("domain"),
            row_data.get("category"),
            row_data.get("subcategory"),
            row_data.get("classification"),
        ]
        for item in items:
            if item:
                self.unmapped_taxonomy.add(item)

    def _print_report(self):
        """Print import summary report."""
        self.stdout.write("\n" + "=" * 80)
        self.stdout.write(self.style.SUCCESS("IMPORT REPORT"))
        self.stdout.write("=" * 80)

        self.stdout.write(f"\nSheet: {self.sheet_name}")
        self.stdout.write(f"Mode: {'DRY RUN' if self.dry_run else 'ACTUAL IMPORT'}")

        self.stdout.write("\n--- Statistics ---")
        self.stdout.write(f"Total rows processed: {self.stats['processed']}")
        self.stdout.write(self.style.SUCCESS(f"Successfully imported: {self.stats['success']}"))
        self.stdout.write(self.style.WARNING(f"Skipped (duplicates): {self.stats['skipped_duplicate']}"))
        self.stdout.write(self.style.ERROR(f"Failed: {self.stats['failed']}"))

        if self.unmapped_taxonomy:
            self.stdout.write("\n--- Unmapped Taxonomy Items ---")
            self.stdout.write("Add these to complaint_taxonomy_mapping.py:")
            for item in sorted(self.unmapped_taxonomy):
                self.stdout.write(f"  - {item}")

        if self.unmatched_locations:
            self.stdout.write("\n--- Unmatched Locations ---")
            self.stdout.write("No Location found with these name_ar values:")
            for loc in sorted(self.unmatched_locations):
                self.stdout.write(f"  - {loc}")

        if self.unmatched_departments:
            self.stdout.write("\n--- Unmatched Departments ---")
            self.stdout.write("No MainSection/SubSection found with these name_ar values:")
            for dept in sorted(self.unmatched_departments):
                self.stdout.write(f"  - {dept}")

        if self.errors:
            self.stdout.write("\n--- Errors ---")
            self.stdout.write(f"Total errors: {len(self.errors)}")
            for error in self.errors[:10]:  # Show first 10
                self.stdout.write(
                    self.style.ERROR(f"Row {error['row']} (Complaint #{error['complaint_num']}): {error['error']}")
                )
            if len(self.errors) > 10:
                self.stdout.write(f"... and {len(self.errors) - 10} more errors")

        self.stdout.write("\n" + "=" * 80)

        if self.dry_run:
            self.stdout.write(self.style.WARNING("\nThis was a DRY RUN. No data was saved."))
            self.stdout.write("Run without --dry-run to perform actual import.")