""" Import historical complaints from Excel (Aug-Dec 2022). Usage: # Test import (AUG 2022 only, dry run) python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="AUG 2022 " --dry-run # Actual import (AUG 2022) python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="AUG 2022 " # Import all months python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="SEP 2022 " python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="OCT 2022" python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="NOV 2022" python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="DEC 2022" """ import logging import re from datetime import datetime from typing import Dict, List, Optional, Tuple from django.core.management.base import BaseCommand, CommandError from django.db import transaction from django.utils import timezone from apps.organizations.models import Hospital, Location, MainSection, SubSection, Staff from apps.complaints.models import Complaint, ComplaintCategory from apps.accounts.models import User from .complaint_taxonomy_mapping import ( DOMAIN_MAPPING, CATEGORY_MAPPING, SUBCATEGORY_MAPPING, CLASSIFICATION_MAPPING, get_mapped_category, is_taxonomy_mapped, ) logger = logging.getLogger(__name__) # Default hospital code for all imported complaints DEFAULT_HOSPITAL_CODE = "NUZHA-DEV" # Column mapping: field_name -> column_number (1-based) COLUMN_MAPPING = { "complaint_num": 3, # رقم الشكوى "mrn": 4, # رقم الملف "source": 5, # جهة الشكوى "location_name": 6, # الموقع "main_dept_name": 7, # القسم الرئيس "sub_dept_name": 8, # القسم الفرعي "date_received": 9, # تاريخ إستلام الشكوى "data_entry_person": 10, # المدخل (Data Entry Person) "accused_staff_id": 48, # ID (Employee ID) "accused_staff_name": 49, # اسم الشخص المشتكى عليه - ان وجد "domain": 50, # Domain "category": 51, # Category "subcategory": 52, # Sub-Category "classification": 53, # Classification "description_ar": 54, # محتوى الشكوى (عربي) "description_en": 55, # محتوى الشكوى (English) "satisfaction": 56, # Satisfied/Dissatisfied "rightful_side": 57, # The Rightful Side # Timeline columns "date_sent": 20, # تم ارسال الشكوى (Complaint Sent/Activated) "first_reminder": 24, # First Reminder Sent "second_reminder": 28, # Second Reminder Sent "escalated_date": 32, # Escalated "closed_date": 37, # Closed "resolved_date": 44, # Resolved "response_date": 41, # تاريخ الرد (Response Date - for explanation received) } # Month mapping for reference numbers MONTH_MAP = { # Full month names (2023-2025 format) "JANUARY": "01", "FEBRUARY": "02", "MARCH": "03", "APRIL": "04", "MAY": "05", "JUNE": "06", "JULY": "07", "AUGUST": "08", "SEPTEMBER": "09", "OCTOBER": "10", "NOVEMBER": "11", "DECEMBER": "12", # Short names (2022 format for backward compatibility) "AUG": "08", "SEP": "09", "OCT": "10", "NOV": "11", "DEC": "12", } class Command(BaseCommand): help = "Import historical complaints from Excel (Aug-Dec 2022)" def add_arguments(self, parser): parser.add_argument("excel_file", type=str, help="Path to the Excel file") parser.add_argument( "--sheet", type=str, default="AUG 2022 ", help='Sheet name to import (default: "AUG 2022 ")' ) parser.add_argument("--dry-run", action="store_true", help="Preview without saving to database") parser.add_argument("--start-row", type=int, default=3, help="First data row (default: 3, skipping header)") def handle(self, *args, **options): self.excel_file = options["excel_file"] self.sheet_name = options["sheet"] self.dry_run = options["dry_run"] self.start_row = options["start_row"] # Load hospital self.hospital = self._load_hospital() if not self.hospital: raise CommandError(f'Hospital with code "{DEFAULT_HOSPITAL_CODE}" not found') self.stdout.write(self.style.SUCCESS(f"Using hospital: {self.hospital.name}")) # Load Excel workbook try: import openpyxl self.wb = openpyxl.load_workbook(self.excel_file) except ImportError: raise CommandError("openpyxl is required. Install with: pip install openpyxl") except Exception as e: raise CommandError(f"Error loading Excel file: {e}") # Check sheet exists if self.sheet_name not in self.wb.sheetnames: available = ", ".join(self.wb.sheetnames) raise CommandError(f'Sheet "{self.sheet_name}" not found. Available: {available}') self.ws = self.wb[self.sheet_name] self.stdout.write(f"Processing sheet: {self.sheet_name}") self.stdout.write(f"Total rows: {self.ws.max_row}") # Statistics tracking self.stats = { "processed": 0, "success": 0, "failed": 0, "skipped_duplicate": 0, "skipped_unmapped_taxonomy": 0, } self.errors = [] self.unmapped_taxonomy = set() self.unmatched_locations = set() self.unmatched_departments = set() # Process rows self._process_sheet() # Generate report self._print_report() def _load_hospital(self) -> Optional[Hospital]: """Load default hospital by code.""" try: return Hospital.objects.get(code=DEFAULT_HOSPITAL_CODE) except Hospital.DoesNotExist: return None def _process_sheet(self): """Process all rows in the sheet.""" row_num = self.start_row while row_num <= self.ws.max_row: try: # Extract row data row_data = self._extract_row_data(row_num) # Skip empty rows if not row_data.get("complaint_num"): row_num += 1 continue self.stats["processed"] += 1 # Check for duplicate ref_num = self._build_reference_number(row_data["complaint_num"]) if Complaint.objects.filter(reference_number=ref_num).exists(): self.stats["skipped_duplicate"] += 1 row_num += 1 continue # Resolve taxonomy - skip if unmapped taxonomy = self._resolve_taxonomy( row_data.get("domain"), row_data.get("category"), row_data.get("subcategory"), row_data.get("classification"), ) if not is_taxonomy_mapped( row_data.get("domain"), row_data.get("category"), row_data.get("subcategory"), row_data.get("classification"), ): self.stats["skipped_unmapped_taxonomy"] += 1 self._log_unmapped_taxonomy(row_data) row_num += 1 continue # Resolve location and departments location = self._resolve_location(row_data.get("location_name")) main_section = self._resolve_section(row_data.get("main_dept_name")) subsection = self._resolve_subsection(row_data.get("sub_dept_name")) # Determine status status = self._determine_status(row_data) # Parse date_received for created_at date_received = row_data.get("date_received") created_at = timezone.now() # Default fallback if date_received: if isinstance(date_received, str): try: created_at = datetime.strptime(date_received, "%Y-%m-%d %H:%M:%S") except ValueError: try: created_at = datetime.strptime(date_received, "%Y-%m-%d") except ValueError: pass elif isinstance(date_received, datetime): created_at = date_received # Get or create data entry person user data_entry_person = row_data.get("data_entry_person") assigned_to_user = self._get_or_create_data_entry_user(data_entry_person) # Parse timeline dates date_sent = self._parse_datetime(row_data.get("date_sent")) first_reminder = self._parse_datetime(row_data.get("first_reminder")) second_reminder = self._parse_datetime(row_data.get("second_reminder")) escalated_date = self._parse_datetime(row_data.get("escalated_date")) closed_date = self._parse_datetime(row_data.get("closed_date")) resolved_date = self._parse_datetime(row_data.get("resolved_date")) response_date = self._parse_datetime(row_data.get("response_date")) # Determine explanation tracking explanation_requested = bool(date_sent) explanation_requested_at = date_sent explanation_received_at = response_date # Resolve accused staff accused_staff_id = row_data.get("accused_staff_id") accused_staff = self._resolve_staff_by_id(accused_staff_id) # Map rightful side to resolution outcome rightful_side = row_data.get("rightful_side", "").lower().strip() resolution_outcome = "" if rightful_side in ["patient", "hospital", "other"]: resolution_outcome = rightful_side if not self.dry_run: # Create complaint with transaction.atomic(): complaint = Complaint.objects.create( reference_number=ref_num, hospital=self.hospital, location=location, main_section=main_section, subsection=subsection, title=self._build_title(row_data), description=self._build_description(row_data), patient_name="Unknown", national_id="", relation_to_patient="patient", staff=accused_staff, staff_name=row_data.get("accused_staff_name") or "", domain=taxonomy.get("domain"), category=taxonomy.get("category"), subcategory_obj=taxonomy.get("subcategory"), classification_obj=taxonomy.get("classification"), status=status, assigned_to=assigned_to_user, resolved_by=assigned_to_user, resolution_outcome=resolution_outcome, # Timeline fields activated_at=date_sent, reminder_sent_at=first_reminder, second_reminder_sent_at=second_reminder, escalated_at=escalated_date, closed_at=closed_date, resolved_at=resolved_date, # Explanation tracking explanation_requested=explanation_requested, explanation_requested_at=explanation_requested_at, explanation_received_at=explanation_received_at, metadata=self._build_metadata(row_data, ref_num), ) # Update created_at to historical date (can't set during create due to auto_now_add) Complaint.objects.filter(pk=complaint.pk).update(created_at=created_at) self.stats["success"] += 1 except Exception as e: self.stats["failed"] += 1 self.errors.append( { "row": row_num, "complaint_num": row_data.get("complaint_num") if "row_data" in locals() else None, "error": str(e), } ) logger.error(f"Error processing row {row_num}: {e}", exc_info=True) row_num += 1 def _extract_row_data(self, row_num: int) -> Dict: """Extract data from Excel row.""" data = {} for field, col in COLUMN_MAPPING.items(): cell_value = self.ws.cell(row_num, col).value # Clean classification field (remove Excel artifacts like "AX5:BA5") if field == "classification" and cell_value: cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip() data[field] = cell_value return data def _build_reference_number(self, complaint_num) -> str: """Build reference number: CMP-YYYY-MM-NNNN.""" # Parse year and month from sheet name (e.g., "January 2023 " -> year=2023, month=January) sheet_parts = self.sheet_name.strip().split() year = sheet_parts[-1] if len(sheet_parts) > 1 else "2022" month_part = sheet_parts[0].upper() month_code = MONTH_MAP.get(month_part, "00") return f"CMP-{year}-{month_code}-{int(complaint_num):04d}" def _resolve_taxonomy(self, domain, category, subcategory, classification) -> Dict: """Resolve taxonomy to ComplaintCategory objects.""" return { "domain": self._get_category_by_uuid(get_mapped_category(domain, DOMAIN_MAPPING)), "category": self._get_category_by_uuid(get_mapped_category(category, CATEGORY_MAPPING)), "subcategory": self._get_category_by_uuid(get_mapped_category(subcategory, SUBCATEGORY_MAPPING)), "classification": self._get_category_by_uuid(get_mapped_category(classification, CLASSIFICATION_MAPPING)), } def _get_category_by_uuid(self, uuid: str) -> Optional[ComplaintCategory]: """Get ComplaintCategory by UUID.""" if not uuid: return None try: return ComplaintCategory.objects.get(id=uuid) except ComplaintCategory.DoesNotExist: return None def _parse_datetime(self, value) -> Optional[datetime]: """Parse datetime from various formats.""" if not value: return None if isinstance(value, datetime): return value if isinstance(value, str): try: return datetime.strptime(value, "%Y-%m-%d %H:%M:%S") except ValueError: try: return datetime.strptime(value, "%Y-%m-%d") except ValueError: return None return None def _resolve_location(self, name_ar: str) -> Optional[Location]: """Resolve location by Arabic name.""" if not name_ar: return None location = Location.objects.filter(name_ar=name_ar).first() if not location: self.unmatched_locations.add(name_ar) return location def _resolve_section(self, name_ar: str) -> Optional[MainSection]: """Resolve main section/department by Arabic name.""" if not name_ar: return None # Try Section model section = MainSection.objects.filter(name_ar=name_ar).first() if not section: self.unmatched_departments.add(name_ar) return section def _resolve_subsection(self, name_ar: str) -> Optional[SubSection]: """Resolve subsection by Arabic name.""" if not name_ar: return None return SubSection.objects.filter(name_ar=name_ar).first() def _resolve_staff_by_id(self, employee_id: str) -> Optional[Staff]: """Resolve staff by employee ID.""" if not employee_id: return None try: return Staff.objects.get(employee_id=str(employee_id)) except Staff.DoesNotExist: return None def _get_or_create_data_entry_user(self, arabic_name: str) -> Optional[User]: """ Create or get PX-Coordinator user from Arabic data entry person name. Transliterates Arabic name to Latin username using first and last name only. Stores full Arabic name in first_name field. Args: arabic_name: Arabic name from Excel (e.g., "أحمد محمد عبدالله") Returns: User object or None if name is empty """ if not arabic_name: return None try: from unidecode import unidecode except ImportError: logger.error("unidecode library not installed. Run: pip install unidecode") return None # Split name and get first and last parts only parts = arabic_name.split() if len(parts) >= 2: first_name = parts[0] last_name = parts[-1] else: first_name = arabic_name last_name = "coordinator" # Transliterate to Latin for username username_first = unidecode(first_name).lower().strip() username_last = unidecode(last_name).lower().strip() # Clean username (remove special chars, spaces) username_first = re.sub(r"[^a-z0-9]", "", username_first) username_last = re.sub(r"[^a-z0-9]", "", username_last) if not username_first: username_first = "user" if not username_last: username_last = "coordinator" username = f"{username_first}.{username_last}" # Check if user already exists user = User.objects.filter(username=username).first() if user: return user # Check for similar users (same first name part) similar_user = User.objects.filter(username__startswith=username_first, first_name=arabic_name).first() if similar_user: return similar_user # Create new user try: # Generate unique email email = f"{username}@alhammadi.med.sa" user = User( username=username, first_name=arabic_name, # Full Arabic name last_name="", email=email, is_active=True, ) user.save() logger.info(f"Created new PX-Coordinator user: {username} ({arabic_name})") return user except Exception as e: logger.error(f"Error creating user {username}: {e}") # Try with numbered suffix if username exists for i in range(2, 100): try: email = f"{username}{i}@alhammadi.med.sa" user = User( username=f"{username}{i}", first_name=arabic_name, last_name="", email=email, is_active=True, ) user.save() logger.info(f"Created new PX-Coordinator user: {username}{i} ({arabic_name})") return user except Exception as e2: logger.error(f"Error creating user {username}{i}: {e2}") continue return None def _determine_status(self, row_data: Dict) -> str: """Determine complaint status from timeline dates.""" if row_data.get("closed_date"): return "closed" elif row_data.get("resolved_date"): return "resolved" elif row_data.get("escalated_date"): return "in_progress" else: return "open" def _build_title(self, row_data: Dict) -> str: """Build complaint title from description.""" desc = row_data.get("description_en") or row_data.get("description_ar") or "" return desc[:500] if desc else "No description" def _build_description(self, row_data: Dict) -> str: """Build complaint description (English preferred).""" desc_en = row_data.get("description_en") or "" desc_ar = row_data.get("description_ar") or "" if desc_en and desc_ar: return f"{desc_en}\n\n[Arabic]:\n{desc_ar}" return desc_en or desc_ar or "No description provided" def _build_metadata(self, row_data: Dict, ref_num: str) -> Dict: """Build metadata dictionary.""" return { "import_source": "historical_excel_2022", "imported_at": datetime.now().isoformat(), "original_sheet": self.sheet_name, "reference_number": ref_num, "original_complaint_num": row_data.get("complaint_num"), "mrn": row_data.get("mrn"), "source": row_data.get("source"), "satisfaction": row_data.get("satisfaction"), "original_staff_name": row_data.get("accused_staff"), "original_location": row_data.get("location_name"), "original_departments": { "main": row_data.get("main_dept_name"), "sub": row_data.get("sub_dept_name"), }, "taxonomy": { "domain": row_data.get("domain"), "category": row_data.get("category"), "subcategory": row_data.get("subcategory"), "classification": row_data.get("classification"), }, "timeline": { "received": str(row_data.get("date_received")) if row_data.get("date_received") else None, "sent": str(row_data.get("date_sent")) if row_data.get("date_sent") else None, "first_reminder": str(row_data.get("first_reminder")) if row_data.get("first_reminder") else None, "escalated": str(row_data.get("escalated_date")) if row_data.get("escalated_date") else None, "closed": str(row_data.get("closed_date")) if row_data.get("closed_date") else None, "resolved": str(row_data.get("resolved_date")) if row_data.get("resolved_date") else None, }, } def _log_unmapped_taxonomy(self, row_data: Dict): """Log unmapped taxonomy items.""" items = [ row_data.get("domain"), row_data.get("category"), row_data.get("subcategory"), row_data.get("classification"), ] for item in items: if item: self.unmapped_taxonomy.add(item) def _print_report(self): """Print import summary report.""" self.stdout.write("\n" + "=" * 80) self.stdout.write(self.style.SUCCESS("IMPORT REPORT")) self.stdout.write("=" * 80) self.stdout.write(f"\nSheet: {self.sheet_name}") self.stdout.write(f"Mode: {'DRY RUN' if self.dry_run else 'ACTUAL IMPORT'}") self.stdout.write("\n--- Statistics ---") self.stdout.write(f"Total rows processed: {self.stats['processed']}") self.stdout.write(self.style.SUCCESS(f"Successfully imported: {self.stats['success']}")) self.stdout.write(self.style.WARNING(f"Skipped (duplicates): {self.stats['skipped_duplicate']}")) self.stdout.write(self.style.WARNING(f"Skipped (unmapped taxonomy): {self.stats['skipped_unmapped_taxonomy']}")) self.stdout.write(self.style.ERROR(f"Failed: {self.stats['failed']}")) if self.unmapped_taxonomy: self.stdout.write("\n--- Unmapped Taxonomy Items ---") self.stdout.write("Add these to complaint_taxonomy_mapping.py:") for item in sorted(self.unmapped_taxonomy): self.stdout.write(f" - {item}") if self.unmatched_locations: self.stdout.write("\n--- Unmatched Locations ---") self.stdout.write("No Location found with these name_ar values:") for loc in sorted(self.unmatched_locations): self.stdout.write(f" - {loc}") if self.unmatched_departments: self.stdout.write("\n--- Unmatched Departments ---") self.stdout.write("No MainSection/SubSection found with these name_ar values:") for dept in sorted(self.unmatched_departments): self.stdout.write(f" - {dept}") if self.errors: self.stdout.write("\n--- Errors ---") self.stdout.write(f"Total errors: {len(self.errors)}") for error in self.errors[:10]: # Show first 10 self.stdout.write( self.style.ERROR(f"Row {error['row']} (Complaint #{error['complaint_num']}): {error['error']}") ) if len(self.errors) > 10: self.stdout.write(f"... and {len(self.errors) - 10} more errors") self.stdout.write("\n" + "=" * 80) if self.dry_run: self.stdout.write(self.style.WARNING("\nThis was a DRY RUN. No data was saved.")) self.stdout.write("Run without --dry-run to perform actual import.")