HH/apps/complaints/management/commands/import_historical_complaints.py

"""
Import historical complaints from Excel (Aug-Dec 2022).

Usage:
    # Test import (AUG 2022 only, dry run)
    python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="AUG 2022 " --dry-run

    # Actual import (AUG 2022)
    python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="AUG 2022 "

    # Import all months
    python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="SEP 2022 "
    python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="OCT 2022"
    python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="NOV 2022"
    python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="DEC 2022"
"""

import logging
import re
from datetime import datetime
from typing import Dict, List, Optional, Tuple

from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from django.utils import timezone

from apps.organizations.models import Hospital, Location, MainSection, SubSection, Staff
from apps.complaints.models import Complaint, ComplaintCategory
from apps.accounts.models import User

from .complaint_taxonomy_mapping import (
    DOMAIN_MAPPING,
    CATEGORY_MAPPING,
    SUBCATEGORY_MAPPING,
    CLASSIFICATION_MAPPING,
    get_mapped_category,
    is_taxonomy_mapped,
)

logger = logging.getLogger(__name__)

# Default hospital code for all imported complaints
DEFAULT_HOSPITAL_CODE = "NUZHA-DEV"

# Column mapping: field_name -> column_number (1-based)
COLUMN_MAPPING = {
    "complaint_num": 3,  # رقم الشكوى
    "mrn": 4,  # رقم الملف
    "source": 5,  # جهة الشكوى
    "location_name": 6,  # الموقع
    "main_dept_name": 7,  # القسم الرئيس
    "sub_dept_name": 8,  # القسم الفرعي
    "date_received": 9,  # تاريخ إستلام الشكوى
    "data_entry_person": 10,  # المدخل (Data Entry Person)
    "accused_staff_id": 48,  # ID (Employee ID)
    "accused_staff_name": 49,  # اسم الشخص المشتكى عليه - ان وجد
    "domain": 50,  # Domain
    "category": 51,  # Category
    "subcategory": 52,  # Sub-Category
    "classification": 53,  # Classification
    "description_ar": 54,  # محتوى الشكوى (عربي)
    "description_en": 55,  # محتوى الشكوى (English)
    "satisfaction": 56,  # Satisfied/Dissatisfied
    "rightful_side": 57,  # The Rightful Side
    # Timeline columns
    "date_sent": 20,  # تم ارسال الشكوى (Complaint Sent/Activated)
    "first_reminder": 24,  # First Reminder Sent
    "second_reminder": 28,  # Second Reminder Sent
    "escalated_date": 32,  # Escalated
    "closed_date": 37,  # Closed
    "resolved_date": 44,  # Resolved
    "response_date": 41,  # تاريخ الرد (Response Date - for explanation received)
}

# Month mapping for reference numbers
MONTH_MAP = {
    # Full month names (2023-2025 format)
    "JANUARY": "01",
    "FEBRUARY": "02",
    "MARCH": "03",
    "APRIL": "04",
    "MAY": "05",
    "JUNE": "06",
    "JULY": "07",
    "AUGUST": "08",
    "SEPTEMBER": "09",
    "OCTOBER": "10",
    "NOVEMBER": "11",
    "DECEMBER": "12",
    # Short names (2022 format for backward compatibility)
    "AUG": "08",
    "SEP": "09",
    "OCT": "10",
    "NOV": "11",
    "DEC": "12",
}


class Command(BaseCommand):
    help = "Import historical complaints from Excel (Aug-Dec 2022)"

    def add_arguments(self, parser):
        parser.add_argument("excel_file", type=str, help="Path to the Excel file")
        parser.add_argument(
            "--sheet", type=str, default="AUG 2022 ", help='Sheet name to import (default: "AUG 2022 ")'
        )
        parser.add_argument("--dry-run", action="store_true", help="Preview without saving to database")
        parser.add_argument("--start-row", type=int, default=3, help="First data row (default: 3, skipping header)")

    def handle(self, *args, **options):
        self.excel_file = options["excel_file"]
        self.sheet_name = options["sheet"]
        self.dry_run = options["dry_run"]
        self.start_row = options["start_row"]

        # Load hospital
        self.hospital = self._load_hospital()
        if not self.hospital:
            raise CommandError(f'Hospital with code "{DEFAULT_HOSPITAL_CODE}" not found')

        self.stdout.write(self.style.SUCCESS(f"Using hospital: {self.hospital.name}"))

        # Load Excel workbook
        try:
            import openpyxl

            self.wb = openpyxl.load_workbook(self.excel_file)
        except ImportError:
            raise CommandError("openpyxl is required. Install with: pip install openpyxl")
        except Exception as e:
            raise CommandError(f"Error loading Excel file: {e}")

        # Check sheet exists
        if self.sheet_name not in self.wb.sheetnames:
            available = ", ".join(self.wb.sheetnames)
            raise CommandError(f'Sheet "{self.sheet_name}" not found. Available: {available}')

        self.ws = self.wb[self.sheet_name]
        self.stdout.write(f"Processing sheet: {self.sheet_name}")
        self.stdout.write(f"Total rows: {self.ws.max_row}")

        # Statistics tracking
        self.stats = {
            "processed": 0,
            "success": 0,
            "failed": 0,
            "skipped_duplicate": 0,
            "skipped_unmapped_taxonomy": 0,
        }
        self.errors = []
        self.unmapped_taxonomy = set()
        self.unmatched_locations = set()
        self.unmatched_departments = set()

        # Process rows
        self._process_sheet()

        # Generate report
        self._print_report()

    def _load_hospital(self) -> Optional[Hospital]:
        """Load default hospital by code."""
        try:
            return Hospital.objects.get(code=DEFAULT_HOSPITAL_CODE)
        except Hospital.DoesNotExist:
            return None

    def _process_sheet(self):
        """Process all rows in the sheet."""
        row_num = self.start_row

        while row_num <= self.ws.max_row:
            try:
                # Extract row data
                row_data = self._extract_row_data(row_num)

                # Skip empty rows
                if not row_data.get("complaint_num"):
                    row_num += 1
                    continue

                self.stats["processed"] += 1

                # Check for duplicate
                ref_num = self._build_reference_number(row_data["complaint_num"])
                if Complaint.objects.filter(reference_number=ref_num).exists():
                    self.stats["skipped_duplicate"] += 1
                    row_num += 1
                    continue

                # Resolve taxonomy - skip if unmapped
                taxonomy = self._resolve_taxonomy(
                    row_data.get("domain"),
                    row_data.get("category"),
                    row_data.get("subcategory"),
                    row_data.get("classification"),
                )

                if not is_taxonomy_mapped(
                    row_data.get("domain"),
                    row_data.get("category"),
                    row_data.get("subcategory"),
                    row_data.get("classification"),
                ):
                    self.stats["skipped_unmapped_taxonomy"] += 1
                    self._log_unmapped_taxonomy(row_data)
                    row_num += 1
                    continue

                # Resolve location and departments
                location = self._resolve_location(row_data.get("location_name"))
                main_section = self._resolve_section(row_data.get("main_dept_name"))
                subsection = self._resolve_subsection(row_data.get("sub_dept_name"))

                # Determine status
                status = self._determine_status(row_data)

                # Parse date_received for created_at
                date_received = row_data.get("date_received")
                created_at = timezone.now()  # Default fallback
                if date_received:
                    if isinstance(date_received, str):
                        try:
                            created_at = datetime.strptime(date_received, "%Y-%m-%d %H:%M:%S")
                        except ValueError:
                            try:
                                created_at = datetime.strptime(date_received, "%Y-%m-%d")
                            except ValueError:
                                pass
                    elif isinstance(date_received, datetime):
                        created_at = date_received

                # Get or create data entry person user
                data_entry_person = row_data.get("data_entry_person")
                assigned_to_user = self._get_or_create_data_entry_user(data_entry_person)

                # Parse timeline dates
                date_sent = self._parse_datetime(row_data.get("date_sent"))
                first_reminder = self._parse_datetime(row_data.get("first_reminder"))
                second_reminder = self._parse_datetime(row_data.get("second_reminder"))
                escalated_date = self._parse_datetime(row_data.get("escalated_date"))
                closed_date = self._parse_datetime(row_data.get("closed_date"))
                resolved_date = self._parse_datetime(row_data.get("resolved_date"))
                response_date = self._parse_datetime(row_data.get("response_date"))

                # Determine explanation tracking
                explanation_requested = bool(date_sent)
                explanation_requested_at = date_sent
                explanation_received_at = response_date

                # Resolve accused staff
                accused_staff_id = row_data.get("accused_staff_id")
                accused_staff = self._resolve_staff_by_id(accused_staff_id)

                # Map rightful side to resolution outcome
                rightful_side = row_data.get("rightful_side", "").lower().strip()
                resolution_outcome = ""
                if rightful_side in ["patient", "hospital", "other"]:
                    resolution_outcome = rightful_side

                if not self.dry_run:
                    # Create complaint
                    with transaction.atomic():
                        complaint = Complaint.objects.create(
                            reference_number=ref_num,
                            hospital=self.hospital,
                            location=location,
                            main_section=main_section,
                            subsection=subsection,
                            title=self._build_title(row_data),
                            description=self._build_description(row_data),
                            patient_name="Unknown",
                            national_id="",
                            relation_to_patient="patient",
                            staff=accused_staff,
                            staff_name=row_data.get("accused_staff_name") or "",
                            domain=taxonomy.get("domain"),
                            category=taxonomy.get("category"),
                            subcategory_obj=taxonomy.get("subcategory"),
                            classification_obj=taxonomy.get("classification"),
                            status=status,
                            assigned_to=assigned_to_user,
                            resolved_by=assigned_to_user,
                            resolution_outcome=resolution_outcome,
                            # Timeline fields
                            activated_at=date_sent,
                            reminder_sent_at=first_reminder,
                            second_reminder_sent_at=second_reminder,
                            escalated_at=escalated_date,
                            closed_at=closed_date,
                            resolved_at=resolved_date,
                            # Explanation tracking
                            explanation_requested=explanation_requested,
                            explanation_requested_at=explanation_requested_at,
                            explanation_received_at=explanation_received_at,
                            metadata=self._build_metadata(row_data, ref_num),
                        )

                        # Update created_at to historical date (can't set during create due to auto_now_add)
                        Complaint.objects.filter(pk=complaint.pk).update(created_at=created_at)

                self.stats["success"] += 1

            except Exception as e:
                self.stats["failed"] += 1
                self.errors.append(
                    {
                        "row": row_num,
                        "complaint_num": row_data.get("complaint_num") if "row_data" in locals() else None,
                        "error": str(e),
                    }
                )
                logger.error(f"Error processing row {row_num}: {e}", exc_info=True)

            row_num += 1

    def _extract_row_data(self, row_num: int) -> Dict:
        """Extract data from Excel row."""
        data = {}
        for field, col in COLUMN_MAPPING.items():
            cell_value = self.ws.cell(row_num, col).value
            # Clean classification field (remove Excel artifacts like "AX5:BA5")
            if field == "classification" and cell_value:
                cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip()
            data[field] = cell_value
        return data

    def _build_reference_number(self, complaint_num) -> str:
        """Build reference number: CMP-YYYY-MM-NNNN."""
        # Parse year and month from sheet name (e.g., "January 2023 " -> year=2023, month=January)
        sheet_parts = self.sheet_name.strip().split()
        year = sheet_parts[-1] if len(sheet_parts) > 1 else "2022"
        month_part = sheet_parts[0].upper()
        month_code = MONTH_MAP.get(month_part, "00")
        return f"CMP-{year}-{month_code}-{int(complaint_num):04d}"

    def _resolve_taxonomy(self, domain, category, subcategory, classification) -> Dict:
        """Resolve taxonomy to ComplaintCategory objects."""
        return {
            "domain": self._get_category_by_uuid(get_mapped_category(domain, DOMAIN_MAPPING)),
            "category": self._get_category_by_uuid(get_mapped_category(category, CATEGORY_MAPPING)),
            "subcategory": self._get_category_by_uuid(get_mapped_category(subcategory, SUBCATEGORY_MAPPING)),
            "classification": self._get_category_by_uuid(get_mapped_category(classification, CLASSIFICATION_MAPPING)),
        }

    def _get_category_by_uuid(self, uuid: str) -> Optional[ComplaintCategory]:
        """Get ComplaintCategory by UUID."""
        if not uuid:
            return None
        try:
            return ComplaintCategory.objects.get(id=uuid)
        except ComplaintCategory.DoesNotExist:
            return None

    def _parse_datetime(self, value) -> Optional[datetime]:
        """Parse datetime from various formats."""
        if not value:
            return None
        if isinstance(value, datetime):
            return value
        if isinstance(value, str):
            try:
                return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
            except ValueError:
                try:
                    return datetime.strptime(value, "%Y-%m-%d")
                except ValueError:
                    return None
        return None

    def _resolve_location(self, name_ar: str) -> Optional[Location]:
        """Resolve location by Arabic name."""
        if not name_ar:
            return None
        location = Location.objects.filter(name_ar=name_ar).first()
        if not location:
            self.unmatched_locations.add(name_ar)
        return location

    def _resolve_section(self, name_ar: str) -> Optional[MainSection]:
        """Resolve main section/department by Arabic name."""
        if not name_ar:
            return None
        # Try Section model
        section = MainSection.objects.filter(name_ar=name_ar).first()
        if not section:
            self.unmatched_departments.add(name_ar)
        return section

    def _resolve_subsection(self, name_ar: str) -> Optional[SubSection]:
        """Resolve subsection by Arabic name."""
        if not name_ar:
            return None
        return SubSection.objects.filter(name_ar=name_ar).first()

    def _resolve_staff_by_id(self, employee_id: str) -> Optional[Staff]:
        """Resolve staff by employee ID."""
        if not employee_id:
            return None
        try:
            return Staff.objects.get(employee_id=str(employee_id))
        except Staff.DoesNotExist:
            return None

    def _get_or_create_data_entry_user(self, arabic_name: str) -> Optional[User]:
        """
        Create or get PX-Coordinator user from Arabic data entry person name.

        Transliterates Arabic name to Latin username using first and last name only.
        Stores full Arabic name in first_name field.

        Args:
            arabic_name: Arabic name from Excel (e.g., "أحمد محمد عبدالله")

        Returns:
            User object or None if name is empty
        """
        if not arabic_name:
            return None

        try:
            from unidecode import unidecode
        except ImportError:
            logger.error("unidecode library not installed. Run: pip install unidecode")
            return None

        # Split name and get first and last parts only
        parts = arabic_name.split()
        if len(parts) >= 2:
            first_name = parts[0]
            last_name = parts[-1]
        else:
            first_name = arabic_name
            last_name = "coordinator"

        # Transliterate to Latin for username
        username_first = unidecode(first_name).lower().strip()
        username_last = unidecode(last_name).lower().strip()

        # Clean username (remove special chars, spaces)
        username_first = re.sub(r"[^a-z0-9]", "", username_first)
        username_last = re.sub(r"[^a-z0-9]", "", username_last)

        if not username_first:
            username_first = "user"
        if not username_last:
            username_last = "coordinator"

        username = f"{username_first}.{username_last}"

        # Check if user already exists
        user = User.objects.filter(username=username).first()
        if user:
            return user

        # Check for similar users (same first name part)
        similar_user = User.objects.filter(username__startswith=username_first, first_name=arabic_name).first()
        if similar_user:
            return similar_user

        # Create new user
        try:
            # Generate unique email
            email = f"{username}@alhammadi.med.sa"
            user = User(
                username=username,
                first_name=arabic_name,  # Full Arabic name
                last_name="",
                email=email,
                is_active=True,
            )
            user.save()
            logger.info(f"Created new PX-Coordinator user: {username} ({arabic_name})")
            return user
        except Exception as e:
            logger.error(f"Error creating user {username}: {e}")
            # Try with numbered suffix if username exists
            for i in range(2, 100):
                try:
                    email = f"{username}{i}@alhammadi.med.sa"
                    user = User(
                        username=f"{username}{i}",
                        first_name=arabic_name,
                        last_name="",
                        email=email,
                        is_active=True,
                    )
                    user.save()
                    logger.info(f"Created new PX-Coordinator user: {username}{i} ({arabic_name})")
                    return user
                except Exception as e2:
                    logger.error(f"Error creating user {username}{i}: {e2}")
                    continue
            return None

    def _determine_status(self, row_data: Dict) -> str:
        """Determine complaint status from timeline dates."""
        if row_data.get("closed_date"):
            return "closed"
        elif row_data.get("resolved_date"):
            return "resolved"
        elif row_data.get("escalated_date"):
            return "in_progress"
        else:
            return "open"

    def _build_title(self, row_data: Dict) -> str:
        """Build complaint title from description."""
        desc = row_data.get("description_en") or row_data.get("description_ar") or ""
        return desc[:500] if desc else "No description"

    def _build_description(self, row_data: Dict) -> str:
        """Build complaint description (English preferred)."""
        desc_en = row_data.get("description_en") or ""
        desc_ar = row_data.get("description_ar") or ""

        if desc_en and desc_ar:
            return f"{desc_en}\n\n[Arabic]:\n{desc_ar}"
        return desc_en or desc_ar or "No description provided"

    def _build_metadata(self, row_data: Dict, ref_num: str) -> Dict:
        """Build metadata dictionary."""
        return {
            "import_source": "historical_excel_2022",
            "imported_at": datetime.now().isoformat(),
            "original_sheet": self.sheet_name,
            "reference_number": ref_num,
            "original_complaint_num": row_data.get("complaint_num"),
            "mrn": row_data.get("mrn"),
            "source": row_data.get("source"),
            "satisfaction": row_data.get("satisfaction"),
            "original_staff_name": row_data.get("accused_staff"),
            "original_location": row_data.get("location_name"),
            "original_departments": {
                "main": row_data.get("main_dept_name"),
                "sub": row_data.get("sub_dept_name"),
            },
            "taxonomy": {
                "domain": row_data.get("domain"),
                "category": row_data.get("category"),
                "subcategory": row_data.get("subcategory"),
                "classification": row_data.get("classification"),
            },
            "timeline": {
                "received": str(row_data.get("date_received")) if row_data.get("date_received") else None,
                "sent": str(row_data.get("date_sent")) if row_data.get("date_sent") else None,
                "first_reminder": str(row_data.get("first_reminder")) if row_data.get("first_reminder") else None,
                "escalated": str(row_data.get("escalated_date")) if row_data.get("escalated_date") else None,
                "closed": str(row_data.get("closed_date")) if row_data.get("closed_date") else None,
                "resolved": str(row_data.get("resolved_date")) if row_data.get("resolved_date") else None,
            },
        }

    def _log_unmapped_taxonomy(self, row_data: Dict):
        """Log unmapped taxonomy items."""
        items = [
            row_data.get("domain"),
            row_data.get("category"),
            row_data.get("subcategory"),
            row_data.get("classification"),
        ]
        for item in items:
            if item:
                self.unmapped_taxonomy.add(item)

    def _print_report(self):
        """Print import summary report."""
        self.stdout.write("\n" + "=" * 80)
        self.stdout.write(self.style.SUCCESS("IMPORT REPORT"))
        self.stdout.write("=" * 80)

        self.stdout.write(f"\nSheet: {self.sheet_name}")
        self.stdout.write(f"Mode: {'DRY RUN' if self.dry_run else 'ACTUAL IMPORT'}")

        self.stdout.write("\n--- Statistics ---")
        self.stdout.write(f"Total rows processed: {self.stats['processed']}")
        self.stdout.write(self.style.SUCCESS(f"Successfully imported: {self.stats['success']}"))
        self.stdout.write(self.style.WARNING(f"Skipped (duplicates): {self.stats['skipped_duplicate']}"))
        self.stdout.write(self.style.WARNING(f"Skipped (unmapped taxonomy): {self.stats['skipped_unmapped_taxonomy']}"))
        self.stdout.write(self.style.ERROR(f"Failed: {self.stats['failed']}"))

        if self.unmapped_taxonomy:
            self.stdout.write("\n--- Unmapped Taxonomy Items ---")
            self.stdout.write("Add these to complaint_taxonomy_mapping.py:")
            for item in sorted(self.unmapped_taxonomy):
                self.stdout.write(f"  - {item}")

        if self.unmatched_locations:
            self.stdout.write("\n--- Unmatched Locations ---")
            self.stdout.write("No Location found with these name_ar values:")
            for loc in sorted(self.unmatched_locations):
                self.stdout.write(f"  - {loc}")

        if self.unmatched_departments:
            self.stdout.write("\n--- Unmatched Departments ---")
            self.stdout.write("No MainSection/SubSection found with these name_ar values:")
            for dept in sorted(self.unmatched_departments):
                self.stdout.write(f"  - {dept}")

        if self.errors:
            self.stdout.write("\n--- Errors ---")
            self.stdout.write(f"Total errors: {len(self.errors)}")
            for error in self.errors[:10]:  # Show first 10
                self.stdout.write(
                    self.style.ERROR(f"Row {error['row']} (Complaint #{error['complaint_num']}): {error['error']}")
                )
            if len(self.errors) > 10:
                self.stdout.write(f"... and {len(self.errors) - 10} more errors")

        self.stdout.write("\n" + "=" * 80)

        if self.dry_run:
            self.stdout.write(self.style.WARNING("\nThis was a DRY RUN. No data was saved."))
            self.stdout.write("Run without --dry-run to perform actual import.")