""" Saudi-influenced Insurance Claims Data Generator This module generates realistic insurance claims data tailored for the Saudi healthcare system, including local insurance providers, medical facilities, and healthcare practices. """ import random import uuid from datetime import datetime, timedelta from decimal import Decimal from django.utils import timezone from django.contrib.auth import get_user_model User = get_user_model() class SaudiClaimsDataGenerator: """ Generates realistic insurance claims data for Saudi healthcare system. """ # Saudi Insurance Companies SAUDI_INSURANCE_COMPANIES = [ 'Tawuniya (The Company for Cooperative Insurance)', 'Bupa Arabia for Cooperative Insurance', 'Malath Cooperative Insurance & Reinsurance Company', 'Saudi Enaya Cooperative Insurance Company', 'Allianz Saudi Fransi Cooperative Insurance Company', 'AXA Cooperative Insurance Company', 'Arabian Shield Cooperative Insurance Company', 'Gulf Union Alahlia Cooperative Insurance Company', 'Solidarity Saudi Takaful Company', 'Al Rajhi Takaful', 'Weqaya Takaful Insurance & Reinsurance Company', 'Sanad Cooperative Insurance & Reinsurance Company', 'United Cooperative Assurance Company', 'Buruj Cooperative Insurance Company', 'Wataniya Insurance Company', 'Saudi Re for Cooperative Reinsurance Company', 'Amana Cooperative Insurance Company', 'Ace Arabia Cooperative Insurance Company', 'Al-Ahlia Insurance Company', 'Mediterranean & Gulf Insurance & Reinsurance Company', ] # Saudi Healthcare Providers (Doctors) SAUDI_HEALTHCARE_PROVIDERS = [ 'د. أحمد محمد العبدالله', # Dr. Ahmed Mohammed Al-Abdullah 'د. فاطمة علي الزهراني', # Dr. Fatima Ali Al-Zahrani 'د. محمد عبدالرحمن القحطاني', # Dr. Mohammed Abdulrahman Al-Qahtani 'د. نورا سعد الغامدي', # Dr. Nora Saad Al-Ghamdi 'د. خالد يوسف الشهري', # Dr. Khalid Youssef Al-Shahri 'د. عائشة حسن الحربي', # Dr. Aisha Hassan Al-Harbi 'د. عبدالله سليمان المطيري', # Dr. Abdullah Sulaiman Al-Mutairi 'د. مريم أحمد الدوسري', # Dr. Maryam Ahmed Al-Dosari 'د. سعد محمد العتيبي', # Dr. Saad Mohammed Al-Otaibi 'د. هند عبدالعزيز الراشد', # Dr. Hind Abdulaziz Al-Rashid 'د. عمر فهد الخالدي', # Dr. Omar Fahad Al-Khalidi 'د. سارة عبدالرحمن الفيصل', # Dr. Sarah Abdulrahman Al-Faisal 'د. يوسف علي البقمي', # Dr. Youssef Ali Al-Baqami 'د. ليلى محمد الجبير', # Dr. Layla Mohammed Al-Jubair 'د. فيصل عبدالله السديري', # Dr. Faisal Abdullah Al-Sudairi 'د. رنا سعود الأحمد', # Dr. Rana Saud Al-Ahmad 'د. طارق حسام الدين الأنصاري', # Dr. Tariq Hussamuddin Al-Ansari 'د. إيمان عبدالمحسن الشمري', # Dr. Iman Abdulmohsen Al-Shamri 'د. ماجد فواز العسيري', # Dr. Majed Fawaz Al-Asiri 'د. دانا محمد الفهد', # Dr. Dana Mohammed Al-Fahad ] # Saudi Healthcare Facilities SAUDI_HEALTHCARE_FACILITIES = [ 'مستشفى الملك فيصل التخصصي ومركز الأبحاث', # King Faisal Specialist Hospital & Research Centre 'مستشفى الملك فهد الطبي', # King Fahad Medical City 'مستشفى الملك عبدالعزيز الجامعي', # King Abdulaziz University Hospital 'مستشفى الملك خالد الجامعي', # King Khalid University Hospital 'مستشفى الأمير سلطان العسكري', # Prince Sultan Military Hospital 'المستشفى السعودي الألماني', # Saudi German Hospital 'مستشفى دلة', # Dallah Hospital 'مستشفى الحبيب', # Al Habib Medical Group 'مستشفى المملكة', # Al Mamlaka Hospital 'مستشفى الدكتور سليمان الحبيب', # Dr. Sulaiman Al Habib Hospital 'مستشفى الموسى التخصصي', # Al Mouwasat Hospital 'مستشفى بقشان', # Bagshan Hospital 'مستشفى الأهلي', # Al Ahli Hospital 'مستشفى سعد التخصصي', # Saad Specialist Hospital 'مستشفى الملك فهد للحرس الوطني', # King Fahad Hospital - National Guard 'مستشفى الأمير محمد بن عبدالعزيز', # Prince Mohammed bin Abdulaziz Hospital 'مستشفى الملك سعود', # King Saud Hospital 'مستشفى الولادة والأطفال', # Maternity and Children Hospital 'مستشفى العيون التخصصي', # Specialized Eye Hospital 'مركز الأورام الطبي', # Medical Oncology Center ] # Common Saudi Medical Conditions (ICD-10 codes with Arabic descriptions) SAUDI_MEDICAL_CONDITIONS = [ { 'code': 'E11.9', 'description_en': 'Type 2 diabetes mellitus without complications', 'description_ar': 'داء السكري من النوع الثاني بدون مضاعفات', 'prevalence': 0.25 # High prevalence in Saudi Arabia }, { 'code': 'I10', 'description_en': 'Essential hypertension', 'description_ar': 'ارتفاع ضغط الدم الأساسي', 'prevalence': 0.20 }, { 'code': 'E78.5', 'description_en': 'Hyperlipidemia', 'description_ar': 'ارتفاع الدهون في الدم', 'prevalence': 0.18 }, { 'code': 'M79.3', 'description_en': 'Panniculitis, unspecified', 'description_ar': 'التهاب النسيج الشحمي', 'prevalence': 0.15 }, { 'code': 'J45.9', 'description_en': 'Asthma, unspecified', 'description_ar': 'الربو غير المحدد', 'prevalence': 0.12 }, { 'code': 'K21.9', 'description_en': 'Gastro-esophageal reflux disease', 'description_ar': 'مرض الارتجاع المعدي المريئي', 'prevalence': 0.10 }, { 'code': 'M25.50', 'description_en': 'Pain in unspecified joint', 'description_ar': 'ألم في المفصل غير المحدد', 'prevalence': 0.08 }, { 'code': 'N18.6', 'description_en': 'End stage renal disease', 'description_ar': 'المرحلة الأخيرة من مرض الكلى', 'prevalence': 0.06 }, { 'code': 'F32.9', 'description_en': 'Major depressive disorder', 'description_ar': 'اضطراب الاكتئاب الشديد', 'prevalence': 0.05 }, { 'code': 'Z51.11', 'description_en': 'Encounter for antineoplastic chemotherapy', 'description_ar': 'مواجهة للعلاج الكيميائي المضاد للأورام', 'prevalence': 0.04 }, ] # Common Procedures (CPT codes) SAUDI_MEDICAL_PROCEDURES = [ { 'code': '99213', 'description': 'Office visit - established patient', 'description_ar': 'زيارة العيادة - مريض منتظم', 'cost_range': (150, 300) }, { 'code': '99214', 'description': 'Office visit - established patient, moderate complexity', 'description_ar': 'زيارة العيادة - مريض منتظم، تعقيد متوسط', 'cost_range': (200, 400) }, { 'code': '80053', 'description': 'Comprehensive metabolic panel', 'description_ar': 'فحص الأيض الشامل', 'cost_range': (100, 200) }, { 'code': '85025', 'description': 'Blood count; complete (CBC)', 'description_ar': 'تعداد الدم الكامل', 'cost_range': (50, 120) }, { 'code': '71020', 'description': 'Chest X-ray', 'description_ar': 'أشعة سينية للصدر', 'cost_range': (80, 150) }, { 'code': '93000', 'description': 'Electrocardiogram', 'description_ar': 'تخطيط القلب الكهربائي', 'cost_range': (75, 150) }, { 'code': '76700', 'description': 'Abdominal ultrasound', 'description_ar': 'الموجات فوق الصوتية للبطن', 'cost_range': (200, 400) }, { 'code': '45378', 'description': 'Colonoscopy', 'description_ar': 'تنظير القولون', 'cost_range': (800, 1500) }, { 'code': '47562', 'description': 'Laparoscopic cholecystectomy', 'description_ar': 'استئصال المرارة بالمنظار', 'cost_range': (5000, 8000) }, { 'code': '66984', 'description': 'Cataract surgery', 'description_ar': 'جراحة الساد', 'cost_range': (3000, 6000) }, ] # Saudi Names for generating realistic patient data SAUDI_FIRST_NAMES_MALE = [ 'محمد', 'أحمد', 'عبدالله', 'عبدالرحمن', 'علي', 'سعد', 'فهد', 'خالد', 'عبدالعزيز', 'سلطان', 'فيصل', 'عمر', 'يوسف', 'إبراهيم', 'حسن', 'طارق', 'ماجد', 'نواف', 'بندر', 'تركي', 'مشعل', 'وليد', 'صالح', 'عادل' ] SAUDI_FIRST_NAMES_FEMALE = [ 'فاطمة', 'عائشة', 'نورا', 'سارة', 'مريم', 'هند', 'ليلى', 'رنا', 'دانا', 'ريم', 'أمل', 'منى', 'سمر', 'لمى', 'غادة', 'نهى', 'إيمان', 'خديجة', 'زينب', 'رقية', 'جواهر', 'شهد', 'روان', 'لين' ] SAUDI_FAMILY_NAMES = [ 'العبدالله', 'الأحمد', 'المحمد', 'العلي', 'الزهراني', 'الغامدي', 'القحطاني', 'الشهري', 'الحربي', 'المطيري', 'الدوسري', 'العتيبي', 'الراشد', 'الخالدي', 'الفيصل', 'البقمي', 'الجبير', 'السديري', 'الأنصاري', 'الشمري', 'العسيري', 'الفهد', 'السعود', 'آل سعود', 'الملك', 'الأمير', 'الشيخ', 'العثمان', 'الصالح', 'الحسن', 'الحسين', 'الطيار', 'الرشيد', 'الفارس' ] def __init__(self): """Initialize the Saudi claims data generator.""" self.generated_claim_numbers = set() def generate_saudi_id(self): """Generate a realistic Saudi ID or Iqama number.""" # Saudi ID: 1 for Saudi, 2 for resident prefix = random.choice(['1', '2']) # Next 9 digits middle = ''.join([str(random.randint(0, 9)) for _ in range(8)]) # Check digit (simplified) check_digit = str(random.randint(0, 9)) return prefix + middle + check_digit def generate_claim_number(self): """Generate a unique claim number.""" while True: year = datetime.now().year sequence = random.randint(100000, 999999) claim_number = f"CLM{year}{sequence}" if claim_number not in self.generated_claim_numbers: self.generated_claim_numbers.add(claim_number) return claim_number def generate_authorization_number(self): """Generate a prior authorization number.""" return f"AUTH{random.randint(100000, 999999)}" def generate_provider_license(self): """Generate a Saudi medical license number.""" return f"SML{random.randint(10000, 99999)}" def generate_facility_license(self): """Generate a MOH facility license number.""" return f"MOH{random.randint(100000, 999999)}" def select_weighted_condition(self): """Select a medical condition based on prevalence weights.""" conditions = self.SAUDI_MEDICAL_CONDITIONS.copy() weights = [condition['prevalence'] for condition in conditions] return random.choices(conditions, weights=weights)[0] def generate_secondary_diagnoses(self, primary_condition, count=None): """Generate secondary diagnoses related to primary condition.""" if count is None: count = random.choices([0, 1, 2, 3], weights=[0.4, 0.3, 0.2, 0.1])[0] secondary = [] available_conditions = [c for c in self.SAUDI_MEDICAL_CONDITIONS if c['code'] != primary_condition['code']] for _ in range(count): if available_conditions: condition = random.choice(available_conditions) secondary.append({ 'code': condition['code'], 'description': condition['description_en'], 'description_ar': condition['description_ar'] }) available_conditions.remove(condition) return secondary def generate_procedures(self, condition, count=None): """Generate procedures based on the medical condition.""" if count is None: count = random.choices([1, 2, 3], weights=[0.6, 0.3, 0.1])[0] procedures = [] for _ in range(count): procedure = random.choice(self.SAUDI_MEDICAL_PROCEDURES) procedures.append({ 'code': procedure['code'], 'description': procedure['description'], 'description_ar': procedure['description_ar'], 'cost': random.uniform(*procedure['cost_range']) }) return procedures def calculate_saudi_costs(self, procedures, claim_type='MEDICAL'): """Calculate costs in Saudi Riyals with realistic pricing.""" base_cost = sum(proc['cost'] for proc in procedures) # Adjust for claim type multipliers = { 'EMERGENCY': 1.5, 'INPATIENT': 2.0, 'SURGICAL': 3.0, 'MATERNITY': 2.5, 'DENTAL': 0.8, 'VISION': 0.6, 'PHARMACY': 0.3, 'PREVENTIVE': 0.5, } multiplier = multipliers.get(claim_type, 1.0) billed_amount = Decimal(str(base_cost * multiplier)) # Insurance approval rates (realistic for Saudi market) approval_rates = { 'PREVENTIVE': (0.95, 1.0), 'MEDICAL': (0.80, 0.95), 'EMERGENCY': (0.90, 1.0), 'INPATIENT': (0.85, 0.95), 'SURGICAL': (0.75, 0.90), 'DENTAL': (0.70, 0.85), 'VISION': (0.60, 0.80), 'PHARMACY': (0.85, 0.95), 'MATERNITY': (0.90, 1.0), } min_rate, max_rate = approval_rates.get(claim_type, (0.75, 0.90)) approval_rate = random.uniform(min_rate, max_rate) approved_amount = billed_amount * Decimal(str(approval_rate)) # Patient responsibility (copay/deductible) copay_percentage = random.uniform(0.10, 0.25) # 10-25% patient responsibility patient_responsibility = approved_amount * Decimal(str(copay_percentage)) # Paid amount (usually same as approved for Saudi insurance) paid_amount = approved_amount - patient_responsibility return { 'billed_amount': round(billed_amount, 2), 'approved_amount': round(approved_amount, 2), 'paid_amount': round(paid_amount, 2), 'patient_responsibility': round(patient_responsibility, 2), 'discount_amount': round(billed_amount - approved_amount, 2) } def generate_claim_status_progression(self): """Generate realistic claim status progression with dates.""" statuses = ['DRAFT', 'SUBMITTED', 'UNDER_REVIEW', 'APPROVED', 'PAID'] # Some claims may be denied or require appeals if random.random() < 0.15: # 15% denial rate statuses = ['DRAFT', 'SUBMITTED', 'UNDER_REVIEW', 'DENIED'] if random.random() < 0.3: # 30% of denied claims are appealed statuses.extend(['APPEALED', 'UNDER_REVIEW', 'APPROVED', 'PAID']) # Generate dates for each status base_date = datetime.now() - timedelta(days=random.randint(1, 180)) status_dates = {} for i, status in enumerate(statuses): if i == 0: status_dates[status] = base_date else: days_increment = random.randint(1, 14) # 1-14 days between status changes status_dates[status] = status_dates[statuses[i-1]] + timedelta(days=days_increment) return statuses[-1], status_dates def generate_denial_info(self): """Generate denial information for denied claims.""" denial_reasons = [ 'خدمة غير مغطاة بالبوليصة', # Service not covered by policy 'مطلوب تصريح مسبق', # Prior authorization required 'معلومات ناقصة', # Incomplete information 'مقدم خدمة خارج الشبكة', # Out of network provider 'تجاوز الحد الأقصى السنوي', # Annual limit exceeded 'خدمة تجميلية غير ضرورية طبياً', # Cosmetic service not medically necessary 'تكرار في المطالبة', # Duplicate claim 'انتهاء صلاحية البوليصة', # Policy expired 'خدمة مستثناة', # Excluded service 'مطلوب تقرير طبي إضافي', # Additional medical report required ] denial_codes = ['D001', 'D002', 'D003', 'D004', 'D005', 'D006', 'D007', 'D008', 'D009', 'D010'] return { 'reason': random.choice(denial_reasons), 'code': random.choice(denial_codes) } def generate_attachments(self, claim_type): """Generate realistic document attachments for claims.""" base_attachments = [ {'type': 'MEDICAL_REPORT', 'name': 'تقرير طبي.pdf'}, {'type': 'INVOICE', 'name': 'فاتورة.pdf'}, {'type': 'INSURANCE_CARD', 'name': 'بطاقة التأمين.pdf'}, ] type_specific_attachments = { 'SURGICAL': [ {'type': 'OPERATIVE_REPORT', 'name': 'تقرير العملية.pdf'}, {'type': 'AUTHORIZATION', 'name': 'تصريح مسبق.pdf'}, ], 'EMERGENCY': [ {'type': 'DISCHARGE_SUMMARY', 'name': 'ملخص الخروج.pdf'}, ], 'PHARMACY': [ {'type': 'PRESCRIPTION', 'name': 'وصفة طبية.pdf'}, ], 'RADIOLOGY': [ {'type': 'RADIOLOGY_REPORT', 'name': 'تقرير الأشعة.pdf'}, ], 'DIAGNOSTIC': [ {'type': 'LAB_RESULT', 'name': 'نتائج المختبر.pdf'}, ], } attachments = base_attachments.copy() if claim_type in type_specific_attachments: attachments.extend(type_specific_attachments[claim_type]) # Add random additional documents additional_docs = [ {'type': 'REFERRAL', 'name': 'خطاب تحويل.pdf'}, {'type': 'ID_COPY', 'name': 'نسخة الهوية.pdf'}, {'type': 'OTHER', 'name': 'مستند إضافي.pdf'}, ] num_additional = random.randint(0, 2) attachments.extend(random.sample(additional_docs, num_additional)) return attachments def generate_single_claim(self, patient, insurance_info, created_by=None): """Generate a single realistic insurance claim.""" # Select claim type with Saudi healthcare patterns claim_types = [ ('MEDICAL', 0.35), ('OUTPATIENT', 0.25), ('PHARMACY', 0.15), ('DIAGNOSTIC', 0.10), ('EMERGENCY', 0.05), ('INPATIENT', 0.04), ('PREVENTIVE', 0.03), ('DENTAL', 0.02), ('SURGICAL', 0.01), ] claim_type = random.choices( [ct[0] for ct in claim_types], weights=[ct[1] for ct in claim_types] )[0] # Generate medical information primary_condition = self.select_weighted_condition() secondary_conditions = self.generate_secondary_diagnoses(primary_condition) procedures = self.generate_procedures(primary_condition) # Calculate costs costs = self.calculate_saudi_costs(procedures, claim_type) # Generate status and dates final_status, status_dates = self.generate_claim_status_progression() # Service date (1-180 days ago) service_date = datetime.now().date() - timedelta(days=random.randint(1, 180)) # Generate claim data claim_data = { 'claim_number': self.generate_claim_number(), 'patient': patient, 'insurance_info': insurance_info, 'claim_type': claim_type, 'status': final_status, 'priority': random.choices( ['LOW', 'NORMAL', 'HIGH', 'URGENT', 'EMERGENCY'], weights=[0.1, 0.6, 0.2, 0.08, 0.02] )[0], 'service_date': service_date, 'service_provider': random.choice(self.SAUDI_HEALTHCARE_PROVIDERS), 'service_provider_license': self.generate_provider_license(), 'facility_name': random.choice(self.SAUDI_HEALTHCARE_FACILITIES), 'facility_license': self.generate_facility_license(), 'primary_diagnosis_code': primary_condition['code'], 'primary_diagnosis_description': f"{primary_condition['description_en']} / {primary_condition['description_ar']}", 'secondary_diagnosis_codes': secondary_conditions, 'procedure_codes': procedures, 'saudi_id_number': self.generate_saudi_id(), 'insurance_card_number': f"IC{random.randint(100000000, 999999999)}", 'authorization_number': self.generate_authorization_number() if random.random() < 0.3 else None, 'notes': f"مطالبة تأمينية لـ {claim_type.lower()} - تم إنشاؤها تلقائياً", 'attachments': self.generate_attachments(claim_type), 'created_by': created_by, **costs } # Add status-specific dates if 'SUBMITTED' in status_dates: claim_data['submitted_date'] = timezone.make_aware( datetime.combine(status_dates['SUBMITTED'].date(), datetime.min.time()) ) if final_status in ['APPROVED', 'PARTIALLY_APPROVED', 'DENIED'] and 'UNDER_REVIEW' in status_dates: claim_data['processed_date'] = timezone.make_aware( datetime.combine(status_dates['UNDER_REVIEW'].date(), datetime.min.time()) ) + timedelta(days=random.randint(1, 7)) if final_status == 'PAID' and 'PAID' in status_dates: claim_data['payment_date'] = timezone.make_aware( datetime.combine(status_dates['PAID'].date(), datetime.min.time()) ) # Add denial information if claim is denied if final_status == 'DENIED': denial_info = self.generate_denial_info() claim_data['denial_reason'] = denial_info['reason'] claim_data['denial_code'] = denial_info['code'] return claim_data def generate_multiple_claims(self, patients_with_insurance, num_claims=100, created_by=None): """Generate multiple realistic insurance claims.""" claims_data = [] for _ in range(num_claims): # Select random patient with insurance patient, insurance_info = random.choice(patients_with_insurance) # Generate claim claim_data = self.generate_single_claim(patient, insurance_info, created_by) claims_data.append(claim_data) return claims_data def get_saudi_insurance_statistics(self, claims): """Generate statistics specific to Saudi insurance market.""" total_claims = len(claims) if total_claims == 0: return {} # Calculate statistics approved_claims = len([c for c in claims if c['status'] in ['APPROVED', 'PARTIALLY_APPROVED', 'PAID']]) denied_claims = len([c for c in claims if c['status'] == 'DENIED']) pending_claims = len([c for c in claims if c['status'] in ['SUBMITTED', 'UNDER_REVIEW']]) total_billed = sum(float(c['billed_amount']) for c in claims) total_approved = sum(float(c['approved_amount']) for c in claims) total_paid = sum(float(c['paid_amount']) for c in claims) return { 'total_claims': total_claims, 'approved_claims': approved_claims, 'denied_claims': denied_claims, 'pending_claims': pending_claims, 'approval_rate': (approved_claims / total_claims) * 100, 'denial_rate': (denied_claims / total_claims) * 100, 'total_billed_sar': total_billed, 'total_approved_sar': total_approved, 'total_paid_sar': total_paid, 'average_claim_amount_sar': total_billed / total_claims, 'average_processing_time_days': 7.5, # Typical for Saudi market }