HH/apps/complaints/management/commands/import_historical_complaints.py
2026-04-19 10:53:12 +03:00

653 lines
27 KiB
Python

"""
Import historical complaints from Excel (2022-2024).
Usage:
# Test import (dry run)
python manage.py import_historical_complaints "Complaints Report - 2024.xlsx" --sheet="January 2024" --dry-run
# Actual import for a single sheet
python manage.py import_historical_complaints "Complaints Report - 2024.xlsx" --sheet="January 2024"
"""
import logging
import re
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from django.utils import timezone
from apps.organizations.models import Hospital, Location, MainSection, SubSection, Staff
from apps.complaints.models import Complaint, ComplaintCategory
from apps.accounts.models import User
from .complaint_taxonomy_mapping import (
DOMAIN_MAPPING,
CATEGORY_MAPPING,
SUBCATEGORY_MAPPING,
CLASSIFICATION_MAPPING,
get_mapped_category,
is_taxonomy_mapped,
)
from .complaint_source_mapping import resolve_px_source
logger = logging.getLogger(__name__)
# Default hospital code for all imported complaints
DEFAULT_HOSPITAL_CODE = "NUZHA"
# Column mapping: field_name -> column_number (1-based)
COLUMN_MAPPING = {
"complaint_num": 3, # رقم الشكوى
"mrn": 4, # رقم الملف
"source": 5, # جهة الشكوى
"location_name": 6, # الموقع
"main_dept_name": 7, # القسم الرئيس
"sub_dept_name": 8, # القسم الفرعي
"date_received": 9, # تاريخ إستلام الشكوى
"data_entry_person": 10, # المدخل (Data Entry Person)
"accused_staff_id": 48, # ID (Employee ID)
"accused_staff_name": 49, # اسم الشخص المشتكى عليه - ان وجد
"domain": 50, # Domain
"category": 51, # Category
"subcategory": 52, # Sub-Category
"classification": 53, # Classification
"description_ar": 54, # محتوى الشكوى (عربي)
"description_en": 55, # محتوى الشكوى (English)
"satisfaction": 56, # Satisfied/Dissatisfied
"rightful_side": 57, # The Rightful Side
# Timeline columns
"date_sent": 20, # تم ارسال الشكوى (Complaint Sent/Activated)
"first_reminder": 24, # First Reminder Sent
"second_reminder": 28, # Second Reminder Sent
"escalated_date": 32, # Escalated
"closed_date": 37, # Closed
"resolved_date": 44, # Resolved
"response_date": 41, # تاريخ الرد (Response Date - for explanation received)
}
# Month mapping for reference numbers
MONTH_MAP = {
# Full month names (2023-2025 format)
"JANUARY": "01",
"FEBRUARY": "02",
"MARCH": "03",
"APRIL": "04",
"MAY": "05",
"JUNE": "06",
"JULY": "07",
"AUGUST": "08",
"SEPTEMBER": "09",
"OCTOBER": "10",
"NOVEMBER": "11",
"DECEMBER": "12",
# Short names (2022 format for backward compatibility)
"AUG": "08",
"SEP": "09",
"OCT": "10",
"NOV": "11",
"DEC": "12",
}
class Command(BaseCommand):
help = "Import historical complaints from Excel (2022-2024)"
def add_arguments(self, parser):
parser.add_argument("excel_file", type=str, help="Path to the Excel file")
parser.add_argument(
"--sheet", type=str, default="AUG 2022 ", help='Sheet name to import (default: "AUG 2022 ")'
)
parser.add_argument("--dry-run", action="store_true", help="Preview without saving to database")
parser.add_argument("--start-row", type=int, default=3, help="First data row (default: 3, skipping header)")
def handle(self, *args, **options):
self.excel_file = options["excel_file"]
self.sheet_name = options["sheet"]
self.dry_run = options["dry_run"]
self.start_row = options["start_row"]
# Load hospital
self.hospital = self._load_hospital()
if not self.hospital:
raise CommandError(f'Hospital with code "{DEFAULT_HOSPITAL_CODE}" not found')
self.stdout.write(self.style.SUCCESS(f"Using hospital: {self.hospital.name}"))
# Load Excel workbook
try:
import openpyxl
self.wb = openpyxl.load_workbook(self.excel_file, read_only=True, data_only=True)
except ImportError:
raise CommandError("openpyxl is required. Install with: pip install openpyxl")
except Exception as e:
raise CommandError(f"Error loading Excel file: {e}")
# Check sheet exists
if self.sheet_name not in self.wb.sheetnames:
available = ", ".join(self.wb.sheetnames)
raise CommandError(f'Sheet "{self.sheet_name}" not found. Available: {available}')
self.ws = self.wb[self.sheet_name]
self.stdout.write(f"Processing sheet: {self.sheet_name}")
# Statistics tracking
self.stats = {
"processed": 0,
"success": 0,
"failed": 0,
"skipped_duplicate": 0,
"skipped_unmapped_taxonomy": 0,
}
self.errors = []
self.unmapped_taxonomy = set()
self.unmatched_locations = set()
self.unmatched_departments = set()
# Cache for used reference numbers to avoid DB queries
self.used_refs = set()
# Process rows
self._process_sheet()
# Generate report
self._print_report()
def _load_hospital(self) -> Optional[Hospital]:
"""Load default hospital by code."""
try:
return Hospital.objects.get(code=DEFAULT_HOSPITAL_CODE)
except Hospital.DoesNotExist:
return None
def _process_sheet(self):
"""Process all rows in the sheet using iter_rows for performance."""
row_num = self.start_row
for row in self.ws.iter_rows(min_row=self.start_row, max_row=5000, values_only=True):
try:
# Extract row data
row_data = self._extract_row_data_from_values(row)
# Skip empty rows
if not row_data.get("complaint_num"):
row_num += 1
continue
self.stats["processed"] += 1
# Validate complaint number and build reference
try:
ref_num = self._get_unique_reference_number(row_data["complaint_num"])
except (ValueError, TypeError):
row_num += 1
continue
# Resolve taxonomy - allow unmapped (will be backfilled later via AI)
taxonomy = self._resolve_taxonomy(
row_data.get("domain"),
row_data.get("category"),
row_data.get("subcategory"),
row_data.get("classification"),
)
if not is_taxonomy_mapped(
row_data.get("domain"),
row_data.get("category"),
row_data.get("subcategory"),
row_data.get("classification"),
):
self._log_unmapped_taxonomy(row_data)
# Resolve source
px_source = resolve_px_source(row_data.get("source"))
# Resolve location and departments
location = self._resolve_location(row_data.get("location_name"))
main_section = self._resolve_section(row_data.get("main_dept_name"))
subsection = self._resolve_subsection(row_data.get("sub_dept_name"))
# Determine status
status = self._determine_status(row_data)
# Parse date_received for created_at
date_received = row_data.get("date_received")
created_at = timezone.now() # Default fallback
if date_received:
if isinstance(date_received, str):
try:
created_at = datetime.strptime(date_received, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
created_at = datetime.strptime(date_received, "%Y-%m-%d")
except ValueError:
pass
elif isinstance(date_received, datetime):
created_at = date_received
if created_at and timezone.is_naive(created_at):
created_at = timezone.make_aware(created_at)
# Get or create data entry person user
data_entry_person = row_data.get("data_entry_person")
assigned_to_user = self._get_or_create_data_entry_user(data_entry_person)
# Parse timeline dates
date_sent = self._parse_datetime(row_data.get("date_sent"))
first_reminder = self._parse_datetime(row_data.get("first_reminder"))
second_reminder = self._parse_datetime(row_data.get("second_reminder"))
escalated_date = self._parse_datetime(row_data.get("escalated_date"))
closed_date = self._parse_datetime(row_data.get("closed_date"))
resolved_date = self._parse_datetime(row_data.get("resolved_date"))
response_date = self._parse_datetime(row_data.get("response_date"))
# Determine explanation tracking
explanation_requested = bool(date_sent)
explanation_requested_at = date_sent
explanation_received_at = response_date
# Resolve accused staff
accused_staff_id = row_data.get("accused_staff_id")
accused_staff = self._resolve_staff_by_id(accused_staff_id)
# Map rightful side to resolution outcome
rightful_side = str(row_data.get("rightful_side") or "").lower().strip()
resolution_outcome = ""
if rightful_side in ["patient", "hospital", "other"]:
resolution_outcome = rightful_side
if not self.dry_run:
# Create complaint
with transaction.atomic():
complaint = Complaint.objects.create(
reference_number=ref_num,
hospital=self.hospital,
location=location,
main_section=main_section,
subsection=subsection,
title=self._build_title(row_data),
description=self._build_description(row_data),
patient_name="Unknown",
national_id="",
relation_to_patient="patient",
staff=accused_staff,
staff_name=row_data.get("accused_staff_name") or "",
domain=taxonomy.get("domain"),
category=taxonomy.get("category"),
subcategory_obj=taxonomy.get("subcategory"),
classification_obj=taxonomy.get("classification"),
status=status,
assigned_to=assigned_to_user,
resolved_by=assigned_to_user,
resolution_outcome=resolution_outcome,
# Timeline fields
activated_at=date_sent,
reminder_sent_at=first_reminder,
second_reminder_sent_at=second_reminder,
escalated_at=escalated_date,
closed_at=closed_date,
resolved_at=resolved_date,
# Explanation tracking
explanation_requested=explanation_requested,
explanation_requested_at=explanation_requested_at,
explanation_received_at=explanation_received_at,
due_at=created_at + timedelta(hours=48),
source=px_source,
metadata=self._build_metadata(row_data, ref_num),
)
# Update created_at to historical date (can't set during create due to auto_now_add)
Complaint.objects.filter(pk=complaint.pk).update(created_at=created_at)
self.stats["success"] += 1
except Exception as e:
self.stats["failed"] += 1
self.errors.append(
{
"row": row_num,
"complaint_num": row_data.get("complaint_num") if "row_data" in locals() else None,
"error": str(e),
}
)
logger.error(f"Error processing row {row_num}: {e}", exc_info=True)
row_num += 1
def _extract_row_data(self, row_num: int) -> Dict:
"""Extract data from Excel row (kept for compatibility)."""
data = {}
for field, col in COLUMN_MAPPING.items():
cell_value = self.ws.cell(row_num, col).value
# Clean classification field (remove Excel artifacts like "AX5:BA5")
if field == "classification" and cell_value:
cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip()
data[field] = cell_value
return data
def _extract_row_data_from_values(self, row: tuple) -> Dict:
"""Extract data from Excel row using values tuple (for iter_rows)."""
data = {}
for field, col in COLUMN_MAPPING.items():
# col is 1-based, so subtract 1 for 0-based tuple index
cell_value = row[col - 1] if col - 1 < len(row) else None
# Clean classification field (remove Excel artifacts like "AX5:BA5")
if field == "classification" and cell_value:
cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip()
data[field] = cell_value
return data
def _build_reference_number(self, complaint_num) -> str:
"""Build reference number: CMP-YYYY-MM-NNNN."""
# Parse year and month from sheet name (e.g., "January 2023 " -> year=2023, month=January)
sheet_parts = self.sheet_name.strip().split()
year = sheet_parts[-1] if len(sheet_parts) > 1 else "2022"
month_part = sheet_parts[0].upper()
month_code = MONTH_MAP.get(month_part, "00")
return f"CMP-{year}-{month_code}-{int(complaint_num):04d}"
def _get_unique_reference_number(self, complaint_num) -> str:
"""Generate unique reference number with suffix if needed."""
base_ref = self._build_reference_number(complaint_num)
# Check cache first, then DB
if base_ref not in self.used_refs and not Complaint.objects.filter(reference_number=base_ref).exists():
self.used_refs.add(base_ref)
return base_ref
# Try with suffixes -A, -B, -C, ...
suffix = ord("A")
while suffix <= ord("Z"):
ref_with_suffix = f"{base_ref}-{chr(suffix)}"
if (
ref_with_suffix not in self.used_refs
and not Complaint.objects.filter(reference_number=ref_with_suffix).exists()
):
self.used_refs.add(ref_with_suffix)
self.stats["skipped_duplicate"] += 1 # Actually suffix added
return ref_with_suffix
suffix += 1
# If all single letter suffixes exhausted (unlikely), raise error
raise ValueError(f"Cannot generate unique reference for {base_ref}")
def _resolve_taxonomy(self, domain, category, subcategory, classification) -> Dict:
"""Resolve taxonomy to ComplaintCategory objects."""
return {
"domain": self._get_category_by_uuid(get_mapped_category(domain, DOMAIN_MAPPING)),
"category": self._get_category_by_uuid(get_mapped_category(category, CATEGORY_MAPPING)),
"subcategory": self._get_category_by_uuid(get_mapped_category(subcategory, SUBCATEGORY_MAPPING)),
"classification": self._get_category_by_uuid(get_mapped_category(classification, CLASSIFICATION_MAPPING)),
}
def _get_category_by_uuid(self, uuid: str) -> Optional[ComplaintCategory]:
"""Get ComplaintCategory by UUID."""
if not uuid:
return None
try:
return ComplaintCategory.objects.get(id=uuid)
except ComplaintCategory.DoesNotExist:
return None
def _parse_datetime(self, value) -> Optional[datetime]:
"""Parse datetime from various formats."""
if not value:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
try:
return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
return datetime.strptime(value, "%Y-%m-%d")
except ValueError:
return None
return None
def _resolve_location(self, name_ar: str) -> Optional[Location]:
"""Resolve location by Arabic name."""
if not name_ar:
return None
location = Location.objects.filter(name_ar=name_ar).first()
if not location:
self.unmatched_locations.add(name_ar)
return location
def _resolve_section(self, name_ar: str) -> Optional[MainSection]:
"""Resolve main section/department by Arabic name."""
if not name_ar:
return None
# Try Section model
section = MainSection.objects.filter(name_ar=name_ar).first()
if not section:
self.unmatched_departments.add(name_ar)
return section
def _resolve_subsection(self, name_ar: str) -> Optional[SubSection]:
"""Resolve subsection by Arabic name."""
if not name_ar:
return None
return SubSection.objects.filter(name_ar=name_ar).first()
def _resolve_staff_by_id(self, employee_id: str) -> Optional[Staff]:
"""Resolve staff by employee ID."""
if not employee_id:
return None
try:
return Staff.objects.get(employee_id=str(employee_id))
except Staff.DoesNotExist:
return None
def _get_or_create_data_entry_user(self, arabic_name: str) -> Optional[User]:
"""
Create or get PX-Staff user from Arabic data entry person name.
Transliterates Arabic name to Latin username using first and last name only.
Stores full Arabic name in first_name field.
Args:
arabic_name: Arabic name from Excel (e.g., "أحمد محمد عبدالله")
Returns:
User object or None if name is empty
"""
if not arabic_name:
return None
try:
from unidecode import unidecode
except ImportError:
logger.error("unidecode library not installed. Run: pip install unidecode")
return None
# Split name and get first and last parts only
parts = arabic_name.split()
if len(parts) >= 2:
first_name = parts[0]
last_name = parts[-1]
else:
first_name = arabic_name
last_name = "staff"
# Transliterate to Latin for username
username_first = unidecode(first_name).lower().strip()
username_last = unidecode(last_name).lower().strip()
# Clean username (remove special chars, spaces)
username_first = re.sub(r"[^a-z0-9]", "", username_first)
username_last = re.sub(r"[^a-z0-9]", "", username_last)
if not username_first:
username_first = "user"
if not username_last:
username_last = "staff"
username = f"{username_first}.{username_last}"
# Check if user already exists
user = User.objects.filter(username=username).first()
if user:
return user
# Check for similar users (same first name part)
similar_user = User.objects.filter(username__startswith=username_first, first_name=arabic_name).first()
if similar_user:
return similar_user
# Create new user
try:
# Generate unique email
email = f"{username}@alhammadi.med.sa"
user = User(
username=username,
first_name=arabic_name, # Full Arabic name
last_name="",
email=email,
is_active=True,
)
user.save()
logger.info(f"Created new PX-Staff user: {username} ({arabic_name})")
return user
except Exception as e:
logger.error(f"Error creating user {username}: {e}")
# Try with numbered suffix if username exists
for i in range(2, 100):
try:
email = f"{username}{i}@alhammadi.med.sa"
user = User(
username=f"{username}{i}",
first_name=arabic_name,
last_name="",
email=email,
is_active=True,
)
user.save()
logger.info(f"Created new PX-Staff user: {username}{i} ({arabic_name})")
return user
except Exception as e2:
logger.error(f"Error creating user {username}{i}: {e2}")
continue
return None
def _determine_status(self, row_data: Dict) -> str:
"""Determine complaint status from timeline dates."""
if row_data.get("closed_date"):
return "closed"
elif row_data.get("resolved_date"):
return "resolved"
elif row_data.get("escalated_date"):
return "in_progress"
else:
return "open"
def _build_title(self, row_data: Dict) -> str:
"""Build complaint title from description."""
desc = row_data.get("description_en") or row_data.get("description_ar") or ""
return desc[:500] if desc else "No description"
def _build_description(self, row_data: Dict) -> str:
"""Build complaint description (English preferred)."""
desc_en = row_data.get("description_en") or ""
desc_ar = row_data.get("description_ar") or ""
if desc_en and desc_ar:
return f"{desc_en}\n\n[Arabic]:\n{desc_ar}"
return desc_en or desc_ar or "No description provided"
def _build_metadata(self, row_data: Dict, ref_num: str) -> Dict:
"""Build metadata dictionary."""
return {
"import_source": "historical_excel_2022",
"imported_at": datetime.now().isoformat(),
"original_sheet": self.sheet_name,
"reference_number": ref_num,
"original_complaint_num": row_data.get("complaint_num"),
"mrn": row_data.get("mrn"),
"source": row_data.get("source"),
"satisfaction": row_data.get("satisfaction"),
"original_staff_name": row_data.get("accused_staff"),
"original_location": row_data.get("location_name"),
"original_departments": {
"main": row_data.get("main_dept_name"),
"sub": row_data.get("sub_dept_name"),
},
"taxonomy": {
"domain": row_data.get("domain"),
"category": row_data.get("category"),
"subcategory": row_data.get("subcategory"),
"classification": row_data.get("classification"),
},
"timeline": {
"received": str(row_data.get("date_received")) if row_data.get("date_received") else None,
"sent": str(row_data.get("date_sent")) if row_data.get("date_sent") else None,
"first_reminder": str(row_data.get("first_reminder")) if row_data.get("first_reminder") else None,
"escalated": str(row_data.get("escalated_date")) if row_data.get("escalated_date") else None,
"closed": str(row_data.get("closed_date")) if row_data.get("closed_date") else None,
"resolved": str(row_data.get("resolved_date")) if row_data.get("resolved_date") else None,
},
}
def _log_unmapped_taxonomy(self, row_data: Dict):
"""Log unmapped taxonomy items."""
items = [
row_data.get("domain"),
row_data.get("category"),
row_data.get("subcategory"),
row_data.get("classification"),
]
for item in items:
if item:
self.unmapped_taxonomy.add(item)
def _print_report(self):
"""Print import summary report."""
self.stdout.write("\n" + "=" * 80)
self.stdout.write(self.style.SUCCESS("IMPORT REPORT"))
self.stdout.write("=" * 80)
self.stdout.write(f"\nSheet: {self.sheet_name}")
self.stdout.write(f"Mode: {'DRY RUN' if self.dry_run else 'ACTUAL IMPORT'}")
self.stdout.write("\n--- Statistics ---")
self.stdout.write(f"Total rows processed: {self.stats['processed']}")
self.stdout.write(self.style.SUCCESS(f"Successfully imported: {self.stats['success']}"))
self.stdout.write(self.style.WARNING(f"Skipped (duplicates): {self.stats['skipped_duplicate']}"))
self.stdout.write(self.style.ERROR(f"Failed: {self.stats['failed']}"))
if self.unmapped_taxonomy:
self.stdout.write("\n--- Unmapped Taxonomy Items ---")
self.stdout.write("Add these to complaint_taxonomy_mapping.py:")
for item in sorted(self.unmapped_taxonomy):
self.stdout.write(f" - {item}")
if self.unmatched_locations:
self.stdout.write("\n--- Unmatched Locations ---")
self.stdout.write("No Location found with these name_ar values:")
for loc in sorted(self.unmatched_locations):
self.stdout.write(f" - {loc}")
if self.unmatched_departments:
self.stdout.write("\n--- Unmatched Departments ---")
self.stdout.write("No MainSection/SubSection found with these name_ar values:")
for dept in sorted(self.unmatched_departments):
self.stdout.write(f" - {dept}")
if self.errors:
self.stdout.write("\n--- Errors ---")
self.stdout.write(f"Total errors: {len(self.errors)}")
for error in self.errors[:10]: # Show first 10
self.stdout.write(
self.style.ERROR(f"Row {error['row']} (Complaint #{error['complaint_num']}): {error['error']}")
)
if len(self.errors) > 10:
self.stdout.write(f"... and {len(self.errors) - 10} more errors")
self.stdout.write("\n" + "=" * 80)
if self.dry_run:
self.stdout.write(self.style.WARNING("\nThis was a DRY RUN. No data was saved."))
self.stdout.write("Run without --dry-run to perform actual import.")