hospital-management/patients/data_generators/saudi_claims_generator.py
Marwan Alwali 610e165e17 update
2025-09-04 19:19:52 +03:00

593 lines
26 KiB
Python

"""
Saudi-influenced Insurance Claims Data Generator
This module generates realistic insurance claims data tailored for the Saudi healthcare system,
including local insurance providers, medical facilities, and healthcare practices.
"""
import random
import uuid
from datetime import datetime, timedelta
from decimal import Decimal
from django.utils import timezone
from django.contrib.auth import get_user_model
User = get_user_model()
class SaudiClaimsDataGenerator:
"""
Generates realistic insurance claims data for Saudi healthcare system.
"""
# Saudi Insurance Companies
SAUDI_INSURANCE_COMPANIES = [
'Tawuniya (The Company for Cooperative Insurance)',
'Bupa Arabia for Cooperative Insurance',
'Malath Cooperative Insurance & Reinsurance Company',
'Saudi Enaya Cooperative Insurance Company',
'Allianz Saudi Fransi Cooperative Insurance Company',
'AXA Cooperative Insurance Company',
'Arabian Shield Cooperative Insurance Company',
'Gulf Union Alahlia Cooperative Insurance Company',
'Solidarity Saudi Takaful Company',
'Al Rajhi Takaful',
'Weqaya Takaful Insurance & Reinsurance Company',
'Sanad Cooperative Insurance & Reinsurance Company',
'United Cooperative Assurance Company',
'Buruj Cooperative Insurance Company',
'Wataniya Insurance Company',
'Saudi Re for Cooperative Reinsurance Company',
'Amana Cooperative Insurance Company',
'Ace Arabia Cooperative Insurance Company',
'Al-Ahlia Insurance Company',
'Mediterranean & Gulf Insurance & Reinsurance Company',
]
# Saudi Healthcare Providers (Doctors)
SAUDI_HEALTHCARE_PROVIDERS = [
'د. أحمد محمد العبدالله', # Dr. Ahmed Mohammed Al-Abdullah
'د. فاطمة علي الزهراني', # Dr. Fatima Ali Al-Zahrani
'د. محمد عبدالرحمن القحطاني', # Dr. Mohammed Abdulrahman Al-Qahtani
'د. نورا سعد الغامدي', # Dr. Nora Saad Al-Ghamdi
'د. خالد يوسف الشهري', # Dr. Khalid Youssef Al-Shahri
'د. عائشة حسن الحربي', # Dr. Aisha Hassan Al-Harbi
'د. عبدالله سليمان المطيري', # Dr. Abdullah Sulaiman Al-Mutairi
'د. مريم أحمد الدوسري', # Dr. Maryam Ahmed Al-Dosari
'د. سعد محمد العتيبي', # Dr. Saad Mohammed Al-Otaibi
'د. هند عبدالعزيز الراشد', # Dr. Hind Abdulaziz Al-Rashid
'د. عمر فهد الخالدي', # Dr. Omar Fahad Al-Khalidi
'د. سارة عبدالرحمن الفيصل', # Dr. Sarah Abdulrahman Al-Faisal
'د. يوسف علي البقمي', # Dr. Youssef Ali Al-Baqami
'د. ليلى محمد الجبير', # Dr. Layla Mohammed Al-Jubair
'د. فيصل عبدالله السديري', # Dr. Faisal Abdullah Al-Sudairi
'د. رنا سعود الأحمد', # Dr. Rana Saud Al-Ahmad
'د. طارق حسام الدين الأنصاري', # Dr. Tariq Hussamuddin Al-Ansari
'د. إيمان عبدالمحسن الشمري', # Dr. Iman Abdulmohsen Al-Shamri
'د. ماجد فواز العسيري', # Dr. Majed Fawaz Al-Asiri
'د. دانا محمد الفهد', # Dr. Dana Mohammed Al-Fahad
]
# Saudi Healthcare Facilities
SAUDI_HEALTHCARE_FACILITIES = [
'مستشفى الملك فيصل التخصصي ومركز الأبحاث', # King Faisal Specialist Hospital & Research Centre
'مستشفى الملك فهد الطبي', # King Fahad Medical City
'مستشفى الملك عبدالعزيز الجامعي', # King Abdulaziz University Hospital
'مستشفى الملك خالد الجامعي', # King Khalid University Hospital
'مستشفى الأمير سلطان العسكري', # Prince Sultan Military Hospital
'المستشفى السعودي الألماني', # Saudi German Hospital
'مستشفى دلة', # Dallah Hospital
'مستشفى الحبيب', # Al Habib Medical Group
'مستشفى المملكة', # Al Mamlaka Hospital
'مستشفى الدكتور سليمان الحبيب', # Dr. Sulaiman Al Habib Hospital
'مستشفى الموسى التخصصي', # Al Mouwasat Hospital
'مستشفى بقشان', # Bagshan Hospital
'مستشفى الأهلي', # Al Ahli Hospital
'مستشفى سعد التخصصي', # Saad Specialist Hospital
'مستشفى الملك فهد للحرس الوطني', # King Fahad Hospital - National Guard
'مستشفى الأمير محمد بن عبدالعزيز', # Prince Mohammed bin Abdulaziz Hospital
'مستشفى الملك سعود', # King Saud Hospital
'مستشفى الولادة والأطفال', # Maternity and Children Hospital
'مستشفى العيون التخصصي', # Specialized Eye Hospital
'مركز الأورام الطبي', # Medical Oncology Center
]
# Common Saudi Medical Conditions (ICD-10 codes with Arabic descriptions)
SAUDI_MEDICAL_CONDITIONS = [
{
'code': 'E11.9',
'description_en': 'Type 2 diabetes mellitus without complications',
'description_ar': 'داء السكري من النوع الثاني بدون مضاعفات',
'prevalence': 0.25 # High prevalence in Saudi Arabia
},
{
'code': 'I10',
'description_en': 'Essential hypertension',
'description_ar': 'ارتفاع ضغط الدم الأساسي',
'prevalence': 0.20
},
{
'code': 'E78.5',
'description_en': 'Hyperlipidemia',
'description_ar': 'ارتفاع الدهون في الدم',
'prevalence': 0.18
},
{
'code': 'M79.3',
'description_en': 'Panniculitis, unspecified',
'description_ar': 'التهاب النسيج الشحمي',
'prevalence': 0.15
},
{
'code': 'J45.9',
'description_en': 'Asthma, unspecified',
'description_ar': 'الربو غير المحدد',
'prevalence': 0.12
},
{
'code': 'K21.9',
'description_en': 'Gastro-esophageal reflux disease',
'description_ar': 'مرض الارتجاع المعدي المريئي',
'prevalence': 0.10
},
{
'code': 'M25.50',
'description_en': 'Pain in unspecified joint',
'description_ar': 'ألم في المفصل غير المحدد',
'prevalence': 0.08
},
{
'code': 'N18.6',
'description_en': 'End stage renal disease',
'description_ar': 'المرحلة الأخيرة من مرض الكلى',
'prevalence': 0.06
},
{
'code': 'F32.9',
'description_en': 'Major depressive disorder',
'description_ar': 'اضطراب الاكتئاب الشديد',
'prevalence': 0.05
},
{
'code': 'Z51.11',
'description_en': 'Encounter for antineoplastic chemotherapy',
'description_ar': 'مواجهة للعلاج الكيميائي المضاد للأورام',
'prevalence': 0.04
},
]
# Common Procedures (CPT codes)
SAUDI_MEDICAL_PROCEDURES = [
{
'code': '99213',
'description': 'Office visit - established patient',
'description_ar': 'زيارة العيادة - مريض منتظم',
'cost_range': (150, 300)
},
{
'code': '99214',
'description': 'Office visit - established patient, moderate complexity',
'description_ar': 'زيارة العيادة - مريض منتظم، تعقيد متوسط',
'cost_range': (200, 400)
},
{
'code': '80053',
'description': 'Comprehensive metabolic panel',
'description_ar': 'فحص الأيض الشامل',
'cost_range': (100, 200)
},
{
'code': '85025',
'description': 'Blood count; complete (CBC)',
'description_ar': 'تعداد الدم الكامل',
'cost_range': (50, 120)
},
{
'code': '71020',
'description': 'Chest X-ray',
'description_ar': 'أشعة سينية للصدر',
'cost_range': (80, 150)
},
{
'code': '93000',
'description': 'Electrocardiogram',
'description_ar': 'تخطيط القلب الكهربائي',
'cost_range': (75, 150)
},
{
'code': '76700',
'description': 'Abdominal ultrasound',
'description_ar': 'الموجات فوق الصوتية للبطن',
'cost_range': (200, 400)
},
{
'code': '45378',
'description': 'Colonoscopy',
'description_ar': 'تنظير القولون',
'cost_range': (800, 1500)
},
{
'code': '47562',
'description': 'Laparoscopic cholecystectomy',
'description_ar': 'استئصال المرارة بالمنظار',
'cost_range': (5000, 8000)
},
{
'code': '66984',
'description': 'Cataract surgery',
'description_ar': 'جراحة الساد',
'cost_range': (3000, 6000)
},
]
# Saudi Names for generating realistic patient data
SAUDI_FIRST_NAMES_MALE = [
'محمد', 'أحمد', 'عبدالله', 'عبدالرحمن', 'علي', 'سعد', 'فهد', 'خالد',
'عبدالعزيز', 'سلطان', 'فيصل', 'عمر', 'يوسف', 'إبراهيم', 'حسن', 'طارق',
'ماجد', 'نواف', 'بندر', 'تركي', 'مشعل', 'وليد', 'صالح', 'عادل'
]
SAUDI_FIRST_NAMES_FEMALE = [
'فاطمة', 'عائشة', 'نورا', 'سارة', 'مريم', 'هند', 'ليلى', 'رنا', 'دانا',
'ريم', 'أمل', 'منى', 'سمر', 'لمى', 'غادة', 'نهى', 'إيمان', 'خديجة',
'زينب', 'رقية', 'جواهر', 'شهد', 'روان', 'لين'
]
SAUDI_FAMILY_NAMES = [
'العبدالله', 'الأحمد', 'المحمد', 'العلي', 'الزهراني', 'الغامدي', 'القحطاني',
'الشهري', 'الحربي', 'المطيري', 'الدوسري', 'العتيبي', 'الراشد', 'الخالدي',
'الفيصل', 'البقمي', 'الجبير', 'السديري', 'الأنصاري', 'الشمري', 'العسيري',
'الفهد', 'السعود', 'آل سعود', 'الملك', 'الأمير', 'الشيخ', 'العثمان',
'الصالح', 'الحسن', 'الحسين', 'الطيار', 'الرشيد', 'الفارس'
]
def __init__(self):
"""Initialize the Saudi claims data generator."""
self.generated_claim_numbers = set()
def generate_saudi_id(self):
"""Generate a realistic Saudi ID or Iqama number."""
# Saudi ID: 1 for Saudi, 2 for resident
prefix = random.choice(['1', '2'])
# Next 9 digits
middle = ''.join([str(random.randint(0, 9)) for _ in range(8)])
# Check digit (simplified)
check_digit = str(random.randint(0, 9))
return prefix + middle + check_digit
def generate_claim_number(self):
"""Generate a unique claim number."""
while True:
year = datetime.now().year
sequence = random.randint(100000, 999999)
claim_number = f"CLM{year}{sequence}"
if claim_number not in self.generated_claim_numbers:
self.generated_claim_numbers.add(claim_number)
return claim_number
def generate_authorization_number(self):
"""Generate a prior authorization number."""
return f"AUTH{random.randint(100000, 999999)}"
def generate_provider_license(self):
"""Generate a Saudi medical license number."""
return f"SML{random.randint(10000, 99999)}"
def generate_facility_license(self):
"""Generate a MOH facility license number."""
return f"MOH{random.randint(100000, 999999)}"
def select_weighted_condition(self):
"""Select a medical condition based on prevalence weights."""
conditions = self.SAUDI_MEDICAL_CONDITIONS.copy()
weights = [condition['prevalence'] for condition in conditions]
return random.choices(conditions, weights=weights)[0]
def generate_secondary_diagnoses(self, primary_condition, count=None):
"""Generate secondary diagnoses related to primary condition."""
if count is None:
count = random.choices([0, 1, 2, 3], weights=[0.4, 0.3, 0.2, 0.1])[0]
secondary = []
available_conditions = [c for c in self.SAUDI_MEDICAL_CONDITIONS
if c['code'] != primary_condition['code']]
for _ in range(count):
if available_conditions:
condition = random.choice(available_conditions)
secondary.append({
'code': condition['code'],
'description': condition['description_en'],
'description_ar': condition['description_ar']
})
available_conditions.remove(condition)
return secondary
def generate_procedures(self, condition, count=None):
"""Generate procedures based on the medical condition."""
if count is None:
count = random.choices([1, 2, 3], weights=[0.6, 0.3, 0.1])[0]
procedures = []
for _ in range(count):
procedure = random.choice(self.SAUDI_MEDICAL_PROCEDURES)
procedures.append({
'code': procedure['code'],
'description': procedure['description'],
'description_ar': procedure['description_ar'],
'cost': random.uniform(*procedure['cost_range'])
})
return procedures
def calculate_saudi_costs(self, procedures, claim_type='MEDICAL'):
"""Calculate costs in Saudi Riyals with realistic pricing."""
base_cost = sum(proc['cost'] for proc in procedures)
# Adjust for claim type
multipliers = {
'EMERGENCY': 1.5,
'INPATIENT': 2.0,
'SURGICAL': 3.0,
'MATERNITY': 2.5,
'DENTAL': 0.8,
'VISION': 0.6,
'PHARMACY': 0.3,
'PREVENTIVE': 0.5,
}
multiplier = multipliers.get(claim_type, 1.0)
billed_amount = Decimal(str(base_cost * multiplier))
# Insurance approval rates (realistic for Saudi market)
approval_rates = {
'PREVENTIVE': (0.95, 1.0),
'MEDICAL': (0.80, 0.95),
'EMERGENCY': (0.90, 1.0),
'INPATIENT': (0.85, 0.95),
'SURGICAL': (0.75, 0.90),
'DENTAL': (0.70, 0.85),
'VISION': (0.60, 0.80),
'PHARMACY': (0.85, 0.95),
'MATERNITY': (0.90, 1.0),
}
min_rate, max_rate = approval_rates.get(claim_type, (0.75, 0.90))
approval_rate = random.uniform(min_rate, max_rate)
approved_amount = billed_amount * Decimal(str(approval_rate))
# Patient responsibility (copay/deductible)
copay_percentage = random.uniform(0.10, 0.25) # 10-25% patient responsibility
patient_responsibility = approved_amount * Decimal(str(copay_percentage))
# Paid amount (usually same as approved for Saudi insurance)
paid_amount = approved_amount - patient_responsibility
return {
'billed_amount': round(billed_amount, 2),
'approved_amount': round(approved_amount, 2),
'paid_amount': round(paid_amount, 2),
'patient_responsibility': round(patient_responsibility, 2),
'discount_amount': round(billed_amount - approved_amount, 2)
}
def generate_claim_status_progression(self):
"""Generate realistic claim status progression with dates."""
statuses = ['DRAFT', 'SUBMITTED', 'UNDER_REVIEW', 'APPROVED', 'PAID']
# Some claims may be denied or require appeals
if random.random() < 0.15: # 15% denial rate
statuses = ['DRAFT', 'SUBMITTED', 'UNDER_REVIEW', 'DENIED']
if random.random() < 0.3: # 30% of denied claims are appealed
statuses.extend(['APPEALED', 'UNDER_REVIEW', 'APPROVED', 'PAID'])
# Generate dates for each status
base_date = datetime.now() - timedelta(days=random.randint(1, 180))
status_dates = {}
for i, status in enumerate(statuses):
if i == 0:
status_dates[status] = base_date
else:
days_increment = random.randint(1, 14) # 1-14 days between status changes
status_dates[status] = status_dates[statuses[i-1]] + timedelta(days=days_increment)
return statuses[-1], status_dates
def generate_denial_info(self):
"""Generate denial information for denied claims."""
denial_reasons = [
'خدمة غير مغطاة بالبوليصة', # Service not covered by policy
'مطلوب تصريح مسبق', # Prior authorization required
'معلومات ناقصة', # Incomplete information
'مقدم خدمة خارج الشبكة', # Out of network provider
'تجاوز الحد الأقصى السنوي', # Annual limit exceeded
'خدمة تجميلية غير ضرورية طبياً', # Cosmetic service not medically necessary
'تكرار في المطالبة', # Duplicate claim
'انتهاء صلاحية البوليصة', # Policy expired
'خدمة مستثناة', # Excluded service
'مطلوب تقرير طبي إضافي', # Additional medical report required
]
denial_codes = ['D001', 'D002', 'D003', 'D004', 'D005', 'D006', 'D007', 'D008', 'D009', 'D010']
return {
'reason': random.choice(denial_reasons),
'code': random.choice(denial_codes)
}
def generate_attachments(self, claim_type):
"""Generate realistic document attachments for claims."""
base_attachments = [
{'type': 'MEDICAL_REPORT', 'name': 'تقرير طبي.pdf'},
{'type': 'INVOICE', 'name': 'فاتورة.pdf'},
{'type': 'INSURANCE_CARD', 'name': 'بطاقة التأمين.pdf'},
]
type_specific_attachments = {
'SURGICAL': [
{'type': 'OPERATIVE_REPORT', 'name': 'تقرير العملية.pdf'},
{'type': 'AUTHORIZATION', 'name': 'تصريح مسبق.pdf'},
],
'EMERGENCY': [
{'type': 'DISCHARGE_SUMMARY', 'name': 'ملخص الخروج.pdf'},
],
'PHARMACY': [
{'type': 'PRESCRIPTION', 'name': 'وصفة طبية.pdf'},
],
'RADIOLOGY': [
{'type': 'RADIOLOGY_REPORT', 'name': 'تقرير الأشعة.pdf'},
],
'DIAGNOSTIC': [
{'type': 'LAB_RESULT', 'name': 'نتائج المختبر.pdf'},
],
}
attachments = base_attachments.copy()
if claim_type in type_specific_attachments:
attachments.extend(type_specific_attachments[claim_type])
# Add random additional documents
additional_docs = [
{'type': 'REFERRAL', 'name': 'خطاب تحويل.pdf'},
{'type': 'ID_COPY', 'name': 'نسخة الهوية.pdf'},
{'type': 'OTHER', 'name': 'مستند إضافي.pdf'},
]
num_additional = random.randint(0, 2)
attachments.extend(random.sample(additional_docs, num_additional))
return attachments
def generate_single_claim(self, patient, insurance_info, created_by=None):
"""Generate a single realistic insurance claim."""
# Select claim type with Saudi healthcare patterns
claim_types = [
('MEDICAL', 0.35),
('OUTPATIENT', 0.25),
('PHARMACY', 0.15),
('DIAGNOSTIC', 0.10),
('EMERGENCY', 0.05),
('INPATIENT', 0.04),
('PREVENTIVE', 0.03),
('DENTAL', 0.02),
('SURGICAL', 0.01),
]
claim_type = random.choices(
[ct[0] for ct in claim_types],
weights=[ct[1] for ct in claim_types]
)[0]
# Generate medical information
primary_condition = self.select_weighted_condition()
secondary_conditions = self.generate_secondary_diagnoses(primary_condition)
procedures = self.generate_procedures(primary_condition)
# Calculate costs
costs = self.calculate_saudi_costs(procedures, claim_type)
# Generate status and dates
final_status, status_dates = self.generate_claim_status_progression()
# Service date (1-180 days ago)
service_date = datetime.now().date() - timedelta(days=random.randint(1, 180))
# Generate claim data
claim_data = {
'claim_number': self.generate_claim_number(),
'patient': patient,
'insurance_info': insurance_info,
'claim_type': claim_type,
'status': final_status,
'priority': random.choices(
['LOW', 'NORMAL', 'HIGH', 'URGENT', 'EMERGENCY'],
weights=[0.1, 0.6, 0.2, 0.08, 0.02]
)[0],
'service_date': service_date,
'service_provider': random.choice(self.SAUDI_HEALTHCARE_PROVIDERS),
'service_provider_license': self.generate_provider_license(),
'facility_name': random.choice(self.SAUDI_HEALTHCARE_FACILITIES),
'facility_license': self.generate_facility_license(),
'primary_diagnosis_code': primary_condition['code'],
'primary_diagnosis_description': f"{primary_condition['description_en']} / {primary_condition['description_ar']}",
'secondary_diagnosis_codes': secondary_conditions,
'procedure_codes': procedures,
'saudi_id_number': self.generate_saudi_id(),
'insurance_card_number': f"IC{random.randint(100000000, 999999999)}",
'authorization_number': self.generate_authorization_number() if random.random() < 0.3 else None,
'notes': f"مطالبة تأمينية لـ {claim_type.lower()} - تم إنشاؤها تلقائياً",
'attachments': self.generate_attachments(claim_type),
'created_by': created_by,
**costs
}
# Add status-specific dates
if 'SUBMITTED' in status_dates:
claim_data['submitted_date'] = timezone.make_aware(
datetime.combine(status_dates['SUBMITTED'].date(), datetime.min.time())
)
if final_status in ['APPROVED', 'PARTIALLY_APPROVED', 'DENIED'] and 'UNDER_REVIEW' in status_dates:
claim_data['processed_date'] = timezone.make_aware(
datetime.combine(status_dates['UNDER_REVIEW'].date(), datetime.min.time())
) + timedelta(days=random.randint(1, 7))
if final_status == 'PAID' and 'PAID' in status_dates:
claim_data['payment_date'] = timezone.make_aware(
datetime.combine(status_dates['PAID'].date(), datetime.min.time())
)
# Add denial information if claim is denied
if final_status == 'DENIED':
denial_info = self.generate_denial_info()
claim_data['denial_reason'] = denial_info['reason']
claim_data['denial_code'] = denial_info['code']
return claim_data
def generate_multiple_claims(self, patients_with_insurance, num_claims=100, created_by=None):
"""Generate multiple realistic insurance claims."""
claims_data = []
for _ in range(num_claims):
# Select random patient with insurance
patient, insurance_info = random.choice(patients_with_insurance)
# Generate claim
claim_data = self.generate_single_claim(patient, insurance_info, created_by)
claims_data.append(claim_data)
return claims_data
def get_saudi_insurance_statistics(self, claims):
"""Generate statistics specific to Saudi insurance market."""
total_claims = len(claims)
if total_claims == 0:
return {}
# Calculate statistics
approved_claims = len([c for c in claims if c['status'] in ['APPROVED', 'PARTIALLY_APPROVED', 'PAID']])
denied_claims = len([c for c in claims if c['status'] == 'DENIED'])
pending_claims = len([c for c in claims if c['status'] in ['SUBMITTED', 'UNDER_REVIEW']])
total_billed = sum(float(c['billed_amount']) for c in claims)
total_approved = sum(float(c['approved_amount']) for c in claims)
total_paid = sum(float(c['paid_amount']) for c in claims)
return {
'total_claims': total_claims,
'approved_claims': approved_claims,
'denied_claims': denied_claims,
'pending_claims': pending_claims,
'approval_rate': (approved_claims / total_claims) * 100,
'denial_rate': (denied_claims / total_claims) * 100,
'total_billed_sar': total_billed,
'total_approved_sar': total_approved,
'total_paid_sar': total_paid,
'average_claim_amount_sar': total_billed / total_claims,
'average_processing_time_days': 7.5, # Typical for Saudi market
}