""" Enhanced staff matching with fuzzy matching and improved accuracy. This module provides improved staff matching functions with: - Fuzzy string matching (Levenshtein distance) - Better handling of name variations - Matching against original full name field - Improved confidence scoring """ import logging from typing import Optional, Dict, Any, Tuple, List from django.db.models import Q logger = logging.getLogger(__name__) def fuzzy_match_ratio(str1: str, str2: str) -> float: """ Calculate fuzzy match ratio using difflib. Args: str1: First string str2: Second string Returns: Float from 0.0 to 1.0 representing similarity """ try: from difflib import SequenceMatcher return SequenceMatcher(None, str1.lower(), str2.lower()).ratio() except Exception: return 0.0 def normalize_name(name: str) -> str: """ Normalize name for better matching. - Remove extra spaces - Remove hyphens (Al-Shammari -> AlShammari) - Convert to lowercase - Remove common titles """ if not name: return "" name = name.strip().lower() # Remove common titles (both English and Arabic) titles = ['dr.', 'dr', 'mr.', 'mr', 'mrs.', 'mrs', 'ms.', 'ms', 'د.', 'السيد', 'السيدة', 'الدكتور'] for title in titles: if name.startswith(title): name = name[len(title):].strip() # Remove hyphens for better matching (Al-Shammari -> AlShammari) name = name.replace('-', '') # Remove extra spaces while ' ' in name: name = name.replace(' ', ' ') return name.strip() def match_staff_from_name_enhanced( staff_name: str, hospital_id: str, department_name: Optional[str] = None, return_all: bool = False, fuzzy_threshold: float = 0.65 ) -> Tuple[list, float, str]: """ Enhanced staff matching with fuzzy matching and better accuracy. Args: staff_name: Name extracted from complaint (without titles) hospital_id: Hospital ID to search within department_name: Optional department name to prioritize matching return_all: If True, return all matching staff. If False, return single best match. fuzzy_threshold: Minimum similarity ratio for fuzzy matches (0.0 to 1.0) Returns: If return_all=True: Tuple of (matches_list, confidence_score, matching_method) If return_all=False: Tuple of (staff_id, confidence_score, matching_method) """ from apps.organizations.models import Staff, Department if not staff_name or not staff_name.strip(): return [], 0.0, "No staff name provided" staff_name = staff_name.strip() normalized_input = normalize_name(staff_name) matches = [] # Build base query - staff from this hospital, active status base_query = Staff.objects.filter( hospital_id=hospital_id, status='active' ) # Get department if specified dept_id = None if department_name: department = Department.objects.filter( hospital_id=hospital_id, name__iexact=department_name, status='active' ).first() if department: dept_id = department.id # Fetch all staff to perform fuzzy matching all_staff = list(base_query) # If department specified, filter if dept_id: dept_staff = [s for s in all_staff if str(s.department.id) == dept_id if s.department] else: dept_staff = [] # ======================================== # LAYER 1: EXACT MATCHES # ======================================== # 1a. Exact match on first_name + last_name (English) words = staff_name.split() if len(words) >= 2: first_name = words[0] last_name = ' '.join(words[1:]) for staff in all_staff: if staff.first_name.lower() == first_name.lower() and \ staff.last_name.lower() == last_name.lower(): confidence = 0.95 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.90 method = f"Exact English match in {'correct' if (dept_id and staff.department and str(staff.department.id) == dept_id) else 'any'} department" if not any(m['id'] == str(staff.id) for m in matches): matches.append(create_match_dict(staff, confidence, method, staff_name)) logger.info(f"EXACT MATCH (EN): {staff.first_name} {staff.last_name} == {first_name} {last_name}") # 1b. Exact match on full Arabic name for staff in all_staff: full_arabic = f"{staff.first_name_ar} {staff.last_name_ar}".strip() if full_arabic == staff_name: confidence = 0.95 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.90 method = f"Exact Arabic match in {'correct' if (dept_id and staff.department and str(staff.department.id) == dept_id) else 'any'} department" if not any(m['id'] == str(staff.id) for m in matches): matches.append(create_match_dict(staff, confidence, method, staff_name)) logger.info(f"EXACT MATCH (AR): {full_arabic} == {staff_name}") # 1c. Exact match on 'name' field (original full name) for staff in all_staff: if staff.name and staff.name.lower() == staff_name.lower(): confidence = 0.93 method = "Exact match on original name field" if not any(m['id'] == str(staff.id) for m in matches): matches.append(create_match_dict(staff, confidence, method, staff_name)) logger.info(f"EXACT MATCH (name field): {staff.name} == {staff_name}") # ======================================== # LAYER 2: FUZZY MATCHES (if no exact) # ======================================== if not matches: logger.info(f"No exact matches found, trying fuzzy matching for: {staff_name}") for staff in all_staff: # Try different name combinations name_combinations = [ f"{staff.first_name} {staff.last_name}", f"{staff.first_name_ar} {staff.last_name_ar}", staff.name or "", staff.first_name, staff.last_name, staff.first_name_ar, staff.last_name_ar ] # Check if any combination matches fuzzily best_ratio = 0.0 best_match_name = "" for combo in name_combinations: if not combo: continue ratio = fuzzy_match_ratio(staff_name, combo) if ratio > best_ratio: best_ratio = ratio best_match_name = combo # If good fuzzy match found if best_ratio >= fuzzy_threshold: # Adjust confidence based on match quality and department dept_bonus = 0.05 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.0 confidence = best_ratio * 0.85 + dept_bonus # Scale down slightly for fuzzy method = f"Fuzzy match ({best_ratio:.2f}) on '{best_match_name}'" if not any(m['id'] == str(staff.id) for m in matches): matches.append(create_match_dict(staff, confidence, method, staff_name)) logger.info(f"FUZZY MATCH ({best_ratio:.2f}): {best_match_name} ~ {staff_name}") # ======================================== # LAYER 3: PARTIAL/WORD MATCHES # ======================================== if not matches: logger.info(f"No fuzzy matches found, trying partial/word matching for: {staff_name}") # Split input name into words input_words = [normalize_name(w) for w in staff_name.split() if normalize_name(w)] for staff in all_staff: # Build list of all name fields staff_names = [ staff.first_name, staff.last_name, staff.first_name_ar, staff.last_name_ar, staff.name or "" ] # Count word matches match_count = 0 total_words = len(input_words) for word in input_words: word_matched = False for staff_name_field in staff_names: if normalize_name(staff_name_field) == word or \ word in normalize_name(staff_name_field): word_matched = True break if word_matched: match_count += 1 # If at least 2 words match (or all if only 2 words) if match_count >= 2 or (total_words == 2 and match_count == 2): confidence = 0.60 + (match_count / total_words) * 0.15 dept_bonus = 0.05 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.0 confidence += dept_bonus method = f"Partial match ({match_count}/{total_words} words)" if not any(m['id'] == str(staff.id) for m in matches): matches.append(create_match_dict(staff, confidence, method, staff_name)) logger.info(f"PARTIAL MATCH ({match_count}/{total_words}): {staff.first_name} {staff.last_name}") # ======================================== # FINAL: SORT AND RETURN # ======================================== if matches: # Sort by confidence (descending) matches.sort(key=lambda x: x['confidence'], reverse=True) best_confidence = matches[0]['confidence'] best_method = matches[0]['matching_method'] logger.info( f"Returning {len(matches)} match(es) for '{staff_name}'. " f"Best: {matches[0]['name_en']} (confidence: {best_confidence:.2f}, method: {best_method})" ) if not return_all: return str(matches[0]['id']), best_confidence, best_method else: return matches, best_confidence, best_method else: logger.warning(f"No staff match found for name: '{staff_name}'") return [], 0.0, "No match found" def create_match_dict(staff, confidence: float, method: str, source_name: str) -> Dict[str, Any]: """ Create a match dictionary for a staff member. Args: staff: Staff model instance confidence: Confidence score (0.0 to 1.0) method: Description of matching method source_name: Original input name that was matched Returns: Dictionary with match details """ return { 'id': str(staff.id), 'name_en': f"{staff.first_name} {staff.last_name}", 'name_ar': f"{staff.first_name_ar} {staff.last_name_ar}" if staff.first_name_ar and staff.last_name_ar else "", 'original_name': staff.name or "", 'job_title': staff.job_title, 'specialization': staff.specialization, 'department': staff.department.name if staff.department else None, 'department_id': str(staff.department.id) if staff.department else None, 'confidence': confidence, 'matching_method': method, 'source_name': source_name } def test_enhanced_matching(): """Test the enhanced matching function with sample data.""" from apps.organizations.models import Staff, Hospital print("\n" + "=" * 80) print("🧪 TESTING ENHANCED STAFF MATCHING") print("=" * 80) hospital = Hospital.objects.first() if not hospital: print("❌ No hospitals found") return # Test cases test_cases = [ # Exact matches (existing staff) ("Omar Al-Harbi", "Should match exact"), ("Ahmed Al-Farsi", "Should match exact"), ("محمد الرشيد", "Should match Arabic exact"), # Fuzzy matches (variations) ("Omar Al Harbi", "Should match without hyphen"), ("Omar Alharbi", "Should match fuzzy"), ("احمد الفارسي", "Should match Arabic fuzzy"), # Partial matches ("Omar", "Should match first name"), ("Al-Harbi", "Should match last name"), # Non-existent (for testing suggestions) ("Ibrahim Abdulaziz Al-Shammari", "Non-existent staff"), ] for name, description in test_cases: print(f"\n🔍 Testing: '{name}'") print(f" Expected: {description}") matches, confidence, method = match_staff_from_name_enhanced( staff_name=name, hospital_id=str(hospital.id), return_all=True, fuzzy_threshold=0.65 ) if matches: print(f" ✅ Found {len(matches)} match(es)") print(f" Best confidence: {confidence:.2f}") print(f" Method: {method}") for i, match in enumerate(matches[:3], 1): print(f" {i}. {match['name_en']} ({match['name_ar']}) - {match['confidence']:.2f}") if match['original_name']: print(f" Original: {match['original_name']}") else: print(f" ❌ No matches found") print(f" Confidence: {confidence:.2f}") print(f" Method: {method}") if __name__ == '__main__': import os import django os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.dev') django.setup() test_enhanced_matching()