HH/apps/complaints/management/commands/import_historical_complaints.py
2026-03-28 14:03:56 +03:00

615 lines
25 KiB
Python

"""
Import historical complaints from Excel (Aug-Dec 2022).
Usage:
# Test import (AUG 2022 only, dry run)
python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="AUG 2022 " --dry-run
# Actual import (AUG 2022)
python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="AUG 2022 "
# Import all months
python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="SEP 2022 "
python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="OCT 2022"
python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="NOV 2022"
python manage.py import_historical_complaints "Complaints Report - 2022.xlsx" --sheet="DEC 2022"
"""
import logging
import re
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from django.utils import timezone
from apps.organizations.models import Hospital, Location, MainSection, SubSection, Staff
from apps.complaints.models import Complaint, ComplaintCategory
from apps.accounts.models import User
from .complaint_taxonomy_mapping import (
DOMAIN_MAPPING,
CATEGORY_MAPPING,
SUBCATEGORY_MAPPING,
CLASSIFICATION_MAPPING,
get_mapped_category,
is_taxonomy_mapped,
)
logger = logging.getLogger(__name__)
# Default hospital code for all imported complaints
DEFAULT_HOSPITAL_CODE = "NUZHA-DEV"
# Column mapping: field_name -> column_number (1-based)
COLUMN_MAPPING = {
"complaint_num": 3, # رقم الشكوى
"mrn": 4, # رقم الملف
"source": 5, # جهة الشكوى
"location_name": 6, # الموقع
"main_dept_name": 7, # القسم الرئيس
"sub_dept_name": 8, # القسم الفرعي
"date_received": 9, # تاريخ إستلام الشكوى
"data_entry_person": 10, # المدخل (Data Entry Person)
"accused_staff_id": 48, # ID (Employee ID)
"accused_staff_name": 49, # اسم الشخص المشتكى عليه - ان وجد
"domain": 50, # Domain
"category": 51, # Category
"subcategory": 52, # Sub-Category
"classification": 53, # Classification
"description_ar": 54, # محتوى الشكوى (عربي)
"description_en": 55, # محتوى الشكوى (English)
"satisfaction": 56, # Satisfied/Dissatisfied
"rightful_side": 57, # The Rightful Side
# Timeline columns
"date_sent": 20, # تم ارسال الشكوى (Complaint Sent/Activated)
"first_reminder": 24, # First Reminder Sent
"second_reminder": 28, # Second Reminder Sent
"escalated_date": 32, # Escalated
"closed_date": 37, # Closed
"resolved_date": 44, # Resolved
"response_date": 41, # تاريخ الرد (Response Date - for explanation received)
}
# Month mapping for reference numbers
MONTH_MAP = {
# Full month names (2023-2025 format)
"JANUARY": "01",
"FEBRUARY": "02",
"MARCH": "03",
"APRIL": "04",
"MAY": "05",
"JUNE": "06",
"JULY": "07",
"AUGUST": "08",
"SEPTEMBER": "09",
"OCTOBER": "10",
"NOVEMBER": "11",
"DECEMBER": "12",
# Short names (2022 format for backward compatibility)
"AUG": "08",
"SEP": "09",
"OCT": "10",
"NOV": "11",
"DEC": "12",
}
class Command(BaseCommand):
help = "Import historical complaints from Excel (Aug-Dec 2022)"
def add_arguments(self, parser):
parser.add_argument("excel_file", type=str, help="Path to the Excel file")
parser.add_argument(
"--sheet", type=str, default="AUG 2022 ", help='Sheet name to import (default: "AUG 2022 ")'
)
parser.add_argument("--dry-run", action="store_true", help="Preview without saving to database")
parser.add_argument("--start-row", type=int, default=3, help="First data row (default: 3, skipping header)")
def handle(self, *args, **options):
self.excel_file = options["excel_file"]
self.sheet_name = options["sheet"]
self.dry_run = options["dry_run"]
self.start_row = options["start_row"]
# Load hospital
self.hospital = self._load_hospital()
if not self.hospital:
raise CommandError(f'Hospital with code "{DEFAULT_HOSPITAL_CODE}" not found')
self.stdout.write(self.style.SUCCESS(f"Using hospital: {self.hospital.name}"))
# Load Excel workbook
try:
import openpyxl
self.wb = openpyxl.load_workbook(self.excel_file)
except ImportError:
raise CommandError("openpyxl is required. Install with: pip install openpyxl")
except Exception as e:
raise CommandError(f"Error loading Excel file: {e}")
# Check sheet exists
if self.sheet_name not in self.wb.sheetnames:
available = ", ".join(self.wb.sheetnames)
raise CommandError(f'Sheet "{self.sheet_name}" not found. Available: {available}')
self.ws = self.wb[self.sheet_name]
self.stdout.write(f"Processing sheet: {self.sheet_name}")
self.stdout.write(f"Total rows: {self.ws.max_row}")
# Statistics tracking
self.stats = {
"processed": 0,
"success": 0,
"failed": 0,
"skipped_duplicate": 0,
"skipped_unmapped_taxonomy": 0,
}
self.errors = []
self.unmapped_taxonomy = set()
self.unmatched_locations = set()
self.unmatched_departments = set()
# Process rows
self._process_sheet()
# Generate report
self._print_report()
def _load_hospital(self) -> Optional[Hospital]:
"""Load default hospital by code."""
try:
return Hospital.objects.get(code=DEFAULT_HOSPITAL_CODE)
except Hospital.DoesNotExist:
return None
def _process_sheet(self):
"""Process all rows in the sheet."""
row_num = self.start_row
while row_num <= self.ws.max_row:
try:
# Extract row data
row_data = self._extract_row_data(row_num)
# Skip empty rows
if not row_data.get("complaint_num"):
row_num += 1
continue
self.stats["processed"] += 1
# Check for duplicate
ref_num = self._build_reference_number(row_data["complaint_num"])
if Complaint.objects.filter(reference_number=ref_num).exists():
self.stats["skipped_duplicate"] += 1
row_num += 1
continue
# Resolve taxonomy - skip if unmapped
taxonomy = self._resolve_taxonomy(
row_data.get("domain"),
row_data.get("category"),
row_data.get("subcategory"),
row_data.get("classification"),
)
if not is_taxonomy_mapped(
row_data.get("domain"),
row_data.get("category"),
row_data.get("subcategory"),
row_data.get("classification"),
):
self.stats["skipped_unmapped_taxonomy"] += 1
self._log_unmapped_taxonomy(row_data)
row_num += 1
continue
# Resolve location and departments
location = self._resolve_location(row_data.get("location_name"))
main_section = self._resolve_section(row_data.get("main_dept_name"))
subsection = self._resolve_subsection(row_data.get("sub_dept_name"))
# Determine status
status = self._determine_status(row_data)
# Parse date_received for created_at
date_received = row_data.get("date_received")
created_at = timezone.now() # Default fallback
if date_received:
if isinstance(date_received, str):
try:
created_at = datetime.strptime(date_received, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
created_at = datetime.strptime(date_received, "%Y-%m-%d")
except ValueError:
pass
elif isinstance(date_received, datetime):
created_at = date_received
# Get or create data entry person user
data_entry_person = row_data.get("data_entry_person")
assigned_to_user = self._get_or_create_data_entry_user(data_entry_person)
# Parse timeline dates
date_sent = self._parse_datetime(row_data.get("date_sent"))
first_reminder = self._parse_datetime(row_data.get("first_reminder"))
second_reminder = self._parse_datetime(row_data.get("second_reminder"))
escalated_date = self._parse_datetime(row_data.get("escalated_date"))
closed_date = self._parse_datetime(row_data.get("closed_date"))
resolved_date = self._parse_datetime(row_data.get("resolved_date"))
response_date = self._parse_datetime(row_data.get("response_date"))
# Determine explanation tracking
explanation_requested = bool(date_sent)
explanation_requested_at = date_sent
explanation_received_at = response_date
# Resolve accused staff
accused_staff_id = row_data.get("accused_staff_id")
accused_staff = self._resolve_staff_by_id(accused_staff_id)
# Map rightful side to resolution outcome
rightful_side = row_data.get("rightful_side", "").lower().strip()
resolution_outcome = ""
if rightful_side in ["patient", "hospital", "other"]:
resolution_outcome = rightful_side
if not self.dry_run:
# Create complaint
with transaction.atomic():
complaint = Complaint.objects.create(
reference_number=ref_num,
hospital=self.hospital,
location=location,
main_section=main_section,
subsection=subsection,
title=self._build_title(row_data),
description=self._build_description(row_data),
patient_name="Unknown",
national_id="",
relation_to_patient="patient",
staff=accused_staff,
staff_name=row_data.get("accused_staff_name") or "",
domain=taxonomy.get("domain"),
category=taxonomy.get("category"),
subcategory_obj=taxonomy.get("subcategory"),
classification_obj=taxonomy.get("classification"),
status=status,
assigned_to=assigned_to_user,
resolved_by=assigned_to_user,
resolution_outcome=resolution_outcome,
# Timeline fields
activated_at=date_sent,
reminder_sent_at=first_reminder,
second_reminder_sent_at=second_reminder,
escalated_at=escalated_date,
closed_at=closed_date,
resolved_at=resolved_date,
# Explanation tracking
explanation_requested=explanation_requested,
explanation_requested_at=explanation_requested_at,
explanation_received_at=explanation_received_at,
metadata=self._build_metadata(row_data, ref_num),
)
# Update created_at to historical date (can't set during create due to auto_now_add)
Complaint.objects.filter(pk=complaint.pk).update(created_at=created_at)
self.stats["success"] += 1
except Exception as e:
self.stats["failed"] += 1
self.errors.append(
{
"row": row_num,
"complaint_num": row_data.get("complaint_num") if "row_data" in locals() else None,
"error": str(e),
}
)
logger.error(f"Error processing row {row_num}: {e}", exc_info=True)
row_num += 1
def _extract_row_data(self, row_num: int) -> Dict:
"""Extract data from Excel row."""
data = {}
for field, col in COLUMN_MAPPING.items():
cell_value = self.ws.cell(row_num, col).value
# Clean classification field (remove Excel artifacts like "AX5:BA5")
if field == "classification" and cell_value:
cell_value = re.sub(r"[A-Z]+\d+:[A-Z]+\d+", "", str(cell_value)).strip()
data[field] = cell_value
return data
def _build_reference_number(self, complaint_num) -> str:
"""Build reference number: CMP-YYYY-MM-NNNN."""
# Parse year and month from sheet name (e.g., "January 2023 " -> year=2023, month=January)
sheet_parts = self.sheet_name.strip().split()
year = sheet_parts[-1] if len(sheet_parts) > 1 else "2022"
month_part = sheet_parts[0].upper()
month_code = MONTH_MAP.get(month_part, "00")
return f"CMP-{year}-{month_code}-{int(complaint_num):04d}"
def _resolve_taxonomy(self, domain, category, subcategory, classification) -> Dict:
"""Resolve taxonomy to ComplaintCategory objects."""
return {
"domain": self._get_category_by_uuid(get_mapped_category(domain, DOMAIN_MAPPING)),
"category": self._get_category_by_uuid(get_mapped_category(category, CATEGORY_MAPPING)),
"subcategory": self._get_category_by_uuid(get_mapped_category(subcategory, SUBCATEGORY_MAPPING)),
"classification": self._get_category_by_uuid(get_mapped_category(classification, CLASSIFICATION_MAPPING)),
}
def _get_category_by_uuid(self, uuid: str) -> Optional[ComplaintCategory]:
"""Get ComplaintCategory by UUID."""
if not uuid:
return None
try:
return ComplaintCategory.objects.get(id=uuid)
except ComplaintCategory.DoesNotExist:
return None
def _parse_datetime(self, value) -> Optional[datetime]:
"""Parse datetime from various formats."""
if not value:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
try:
return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
return datetime.strptime(value, "%Y-%m-%d")
except ValueError:
return None
return None
def _resolve_location(self, name_ar: str) -> Optional[Location]:
"""Resolve location by Arabic name."""
if not name_ar:
return None
location = Location.objects.filter(name_ar=name_ar).first()
if not location:
self.unmatched_locations.add(name_ar)
return location
def _resolve_section(self, name_ar: str) -> Optional[MainSection]:
"""Resolve main section/department by Arabic name."""
if not name_ar:
return None
# Try Section model
section = MainSection.objects.filter(name_ar=name_ar).first()
if not section:
self.unmatched_departments.add(name_ar)
return section
def _resolve_subsection(self, name_ar: str) -> Optional[SubSection]:
"""Resolve subsection by Arabic name."""
if not name_ar:
return None
return SubSection.objects.filter(name_ar=name_ar).first()
def _resolve_staff_by_id(self, employee_id: str) -> Optional[Staff]:
"""Resolve staff by employee ID."""
if not employee_id:
return None
try:
return Staff.objects.get(employee_id=str(employee_id))
except Staff.DoesNotExist:
return None
def _get_or_create_data_entry_user(self, arabic_name: str) -> Optional[User]:
"""
Create or get PX-Coordinator user from Arabic data entry person name.
Transliterates Arabic name to Latin username using first and last name only.
Stores full Arabic name in first_name field.
Args:
arabic_name: Arabic name from Excel (e.g., "أحمد محمد عبدالله")
Returns:
User object or None if name is empty
"""
if not arabic_name:
return None
try:
from unidecode import unidecode
except ImportError:
logger.error("unidecode library not installed. Run: pip install unidecode")
return None
# Split name and get first and last parts only
parts = arabic_name.split()
if len(parts) >= 2:
first_name = parts[0]
last_name = parts[-1]
else:
first_name = arabic_name
last_name = "coordinator"
# Transliterate to Latin for username
username_first = unidecode(first_name).lower().strip()
username_last = unidecode(last_name).lower().strip()
# Clean username (remove special chars, spaces)
username_first = re.sub(r"[^a-z0-9]", "", username_first)
username_last = re.sub(r"[^a-z0-9]", "", username_last)
if not username_first:
username_first = "user"
if not username_last:
username_last = "coordinator"
username = f"{username_first}.{username_last}"
# Check if user already exists
user = User.objects.filter(username=username).first()
if user:
return user
# Check for similar users (same first name part)
similar_user = User.objects.filter(username__startswith=username_first, first_name=arabic_name).first()
if similar_user:
return similar_user
# Create new user
try:
# Generate unique email
email = f"{username}@alhammadi.med.sa"
user = User(
username=username,
first_name=arabic_name, # Full Arabic name
last_name="",
email=email,
is_active=True,
)
user.save()
logger.info(f"Created new PX-Coordinator user: {username} ({arabic_name})")
return user
except Exception as e:
logger.error(f"Error creating user {username}: {e}")
# Try with numbered suffix if username exists
for i in range(2, 100):
try:
email = f"{username}{i}@alhammadi.med.sa"
user = User(
username=f"{username}{i}",
first_name=arabic_name,
last_name="",
email=email,
is_active=True,
)
user.save()
logger.info(f"Created new PX-Coordinator user: {username}{i} ({arabic_name})")
return user
except Exception as e2:
logger.error(f"Error creating user {username}{i}: {e2}")
continue
return None
def _determine_status(self, row_data: Dict) -> str:
"""Determine complaint status from timeline dates."""
if row_data.get("closed_date"):
return "closed"
elif row_data.get("resolved_date"):
return "resolved"
elif row_data.get("escalated_date"):
return "in_progress"
else:
return "open"
def _build_title(self, row_data: Dict) -> str:
"""Build complaint title from description."""
desc = row_data.get("description_en") or row_data.get("description_ar") or ""
return desc[:500] if desc else "No description"
def _build_description(self, row_data: Dict) -> str:
"""Build complaint description (English preferred)."""
desc_en = row_data.get("description_en") or ""
desc_ar = row_data.get("description_ar") or ""
if desc_en and desc_ar:
return f"{desc_en}\n\n[Arabic]:\n{desc_ar}"
return desc_en or desc_ar or "No description provided"
def _build_metadata(self, row_data: Dict, ref_num: str) -> Dict:
"""Build metadata dictionary."""
return {
"import_source": "historical_excel_2022",
"imported_at": datetime.now().isoformat(),
"original_sheet": self.sheet_name,
"reference_number": ref_num,
"original_complaint_num": row_data.get("complaint_num"),
"mrn": row_data.get("mrn"),
"source": row_data.get("source"),
"satisfaction": row_data.get("satisfaction"),
"original_staff_name": row_data.get("accused_staff"),
"original_location": row_data.get("location_name"),
"original_departments": {
"main": row_data.get("main_dept_name"),
"sub": row_data.get("sub_dept_name"),
},
"taxonomy": {
"domain": row_data.get("domain"),
"category": row_data.get("category"),
"subcategory": row_data.get("subcategory"),
"classification": row_data.get("classification"),
},
"timeline": {
"received": str(row_data.get("date_received")) if row_data.get("date_received") else None,
"sent": str(row_data.get("date_sent")) if row_data.get("date_sent") else None,
"first_reminder": str(row_data.get("first_reminder")) if row_data.get("first_reminder") else None,
"escalated": str(row_data.get("escalated_date")) if row_data.get("escalated_date") else None,
"closed": str(row_data.get("closed_date")) if row_data.get("closed_date") else None,
"resolved": str(row_data.get("resolved_date")) if row_data.get("resolved_date") else None,
},
}
def _log_unmapped_taxonomy(self, row_data: Dict):
"""Log unmapped taxonomy items."""
items = [
row_data.get("domain"),
row_data.get("category"),
row_data.get("subcategory"),
row_data.get("classification"),
]
for item in items:
if item:
self.unmapped_taxonomy.add(item)
def _print_report(self):
"""Print import summary report."""
self.stdout.write("\n" + "=" * 80)
self.stdout.write(self.style.SUCCESS("IMPORT REPORT"))
self.stdout.write("=" * 80)
self.stdout.write(f"\nSheet: {self.sheet_name}")
self.stdout.write(f"Mode: {'DRY RUN' if self.dry_run else 'ACTUAL IMPORT'}")
self.stdout.write("\n--- Statistics ---")
self.stdout.write(f"Total rows processed: {self.stats['processed']}")
self.stdout.write(self.style.SUCCESS(f"Successfully imported: {self.stats['success']}"))
self.stdout.write(self.style.WARNING(f"Skipped (duplicates): {self.stats['skipped_duplicate']}"))
self.stdout.write(self.style.WARNING(f"Skipped (unmapped taxonomy): {self.stats['skipped_unmapped_taxonomy']}"))
self.stdout.write(self.style.ERROR(f"Failed: {self.stats['failed']}"))
if self.unmapped_taxonomy:
self.stdout.write("\n--- Unmapped Taxonomy Items ---")
self.stdout.write("Add these to complaint_taxonomy_mapping.py:")
for item in sorted(self.unmapped_taxonomy):
self.stdout.write(f" - {item}")
if self.unmatched_locations:
self.stdout.write("\n--- Unmatched Locations ---")
self.stdout.write("No Location found with these name_ar values:")
for loc in sorted(self.unmatched_locations):
self.stdout.write(f" - {loc}")
if self.unmatched_departments:
self.stdout.write("\n--- Unmatched Departments ---")
self.stdout.write("No MainSection/SubSection found with these name_ar values:")
for dept in sorted(self.unmatched_departments):
self.stdout.write(f" - {dept}")
if self.errors:
self.stdout.write("\n--- Errors ---")
self.stdout.write(f"Total errors: {len(self.errors)}")
for error in self.errors[:10]: # Show first 10
self.stdout.write(
self.style.ERROR(f"Row {error['row']} (Complaint #{error['complaint_num']}): {error['error']}")
)
if len(self.errors) > 10:
self.stdout.write(f"... and {len(self.errors) - 10} more errors")
self.stdout.write("\n" + "=" * 80)
if self.dry_run:
self.stdout.write(self.style.WARNING("\nThis was a DRY RUN. No data was saved."))
self.stdout.write("Run without --dry-run to perform actual import.")