371 lines
14 KiB
Python
371 lines
14 KiB
Python
"""
|
|
Enhanced staff matching with fuzzy matching and improved accuracy.
|
|
|
|
This module provides improved staff matching functions with:
|
|
- Fuzzy string matching (Levenshtein distance)
|
|
- Better handling of name variations
|
|
- Matching against original full name field
|
|
- Improved confidence scoring
|
|
"""
|
|
import logging
|
|
from typing import Optional, Dict, Any, Tuple, List
|
|
|
|
from django.db.models import Q
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def fuzzy_match_ratio(str1: str, str2: str) -> float:
|
|
"""
|
|
Calculate fuzzy match ratio using difflib.
|
|
|
|
Args:
|
|
str1: First string
|
|
str2: Second string
|
|
|
|
Returns:
|
|
Float from 0.0 to 1.0 representing similarity
|
|
"""
|
|
try:
|
|
from difflib import SequenceMatcher
|
|
return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
|
|
except Exception:
|
|
return 0.0
|
|
|
|
|
|
def normalize_name(name: str) -> str:
|
|
"""
|
|
Normalize name for better matching.
|
|
|
|
- Remove extra spaces
|
|
- Remove hyphens (Al-Shammari -> AlShammari)
|
|
- Convert to lowercase
|
|
- Remove common titles
|
|
"""
|
|
if not name:
|
|
return ""
|
|
|
|
name = name.strip().lower()
|
|
|
|
# Remove common titles (both English and Arabic)
|
|
titles = ['dr.', 'dr', 'mr.', 'mr', 'mrs.', 'mrs', 'ms.', 'ms',
|
|
'د.', 'السيد', 'السيدة', 'الدكتور']
|
|
for title in titles:
|
|
if name.startswith(title):
|
|
name = name[len(title):].strip()
|
|
|
|
# Remove hyphens for better matching (Al-Shammari -> AlShammari)
|
|
name = name.replace('-', '')
|
|
|
|
# Remove extra spaces
|
|
while ' ' in name:
|
|
name = name.replace(' ', ' ')
|
|
|
|
return name.strip()
|
|
|
|
|
|
def match_staff_from_name_enhanced(
|
|
staff_name: str,
|
|
hospital_id: str,
|
|
department_name: Optional[str] = None,
|
|
return_all: bool = False,
|
|
fuzzy_threshold: float = 0.65
|
|
) -> Tuple[list, float, str]:
|
|
"""
|
|
Enhanced staff matching with fuzzy matching and better accuracy.
|
|
|
|
Args:
|
|
staff_name: Name extracted from complaint (without titles)
|
|
hospital_id: Hospital ID to search within
|
|
department_name: Optional department name to prioritize matching
|
|
return_all: If True, return all matching staff. If False, return single best match.
|
|
fuzzy_threshold: Minimum similarity ratio for fuzzy matches (0.0 to 1.0)
|
|
|
|
Returns:
|
|
If return_all=True: Tuple of (matches_list, confidence_score, matching_method)
|
|
If return_all=False: Tuple of (staff_id, confidence_score, matching_method)
|
|
"""
|
|
from apps.organizations.models import Staff, Department
|
|
|
|
if not staff_name or not staff_name.strip():
|
|
return [], 0.0, "No staff name provided"
|
|
|
|
staff_name = staff_name.strip()
|
|
normalized_input = normalize_name(staff_name)
|
|
|
|
matches = []
|
|
|
|
# Build base query - staff from this hospital, active status
|
|
base_query = Staff.objects.filter(
|
|
hospital_id=hospital_id,
|
|
status='active'
|
|
)
|
|
|
|
# Get department if specified
|
|
dept_id = None
|
|
if department_name:
|
|
department = Department.objects.filter(
|
|
hospital_id=hospital_id,
|
|
name__iexact=department_name,
|
|
status='active'
|
|
).first()
|
|
if department:
|
|
dept_id = department.id
|
|
|
|
# Fetch all staff to perform fuzzy matching
|
|
all_staff = list(base_query)
|
|
|
|
# If department specified, filter
|
|
if dept_id:
|
|
dept_staff = [s for s in all_staff if str(s.department.id) == dept_id if s.department]
|
|
else:
|
|
dept_staff = []
|
|
|
|
# ========================================
|
|
# LAYER 1: EXACT MATCHES
|
|
# ========================================
|
|
|
|
# 1a. Exact match on first_name + last_name (English)
|
|
words = staff_name.split()
|
|
if len(words) >= 2:
|
|
first_name = words[0]
|
|
last_name = ' '.join(words[1:])
|
|
|
|
for staff in all_staff:
|
|
if staff.first_name.lower() == first_name.lower() and \
|
|
staff.last_name.lower() == last_name.lower():
|
|
confidence = 0.95 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.90
|
|
method = f"Exact English match in {'correct' if (dept_id and staff.department and str(staff.department.id) == dept_id) else 'any'} department"
|
|
|
|
if not any(m['id'] == str(staff.id) for m in matches):
|
|
matches.append(create_match_dict(staff, confidence, method, staff_name))
|
|
logger.info(f"EXACT MATCH (EN): {staff.first_name} {staff.last_name} == {first_name} {last_name}")
|
|
|
|
# 1b. Exact match on full Arabic name
|
|
for staff in all_staff:
|
|
full_arabic = f"{staff.first_name_ar} {staff.last_name_ar}".strip()
|
|
if full_arabic == staff_name:
|
|
confidence = 0.95 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.90
|
|
method = f"Exact Arabic match in {'correct' if (dept_id and staff.department and str(staff.department.id) == dept_id) else 'any'} department"
|
|
|
|
if not any(m['id'] == str(staff.id) for m in matches):
|
|
matches.append(create_match_dict(staff, confidence, method, staff_name))
|
|
logger.info(f"EXACT MATCH (AR): {full_arabic} == {staff_name}")
|
|
|
|
# 1c. Exact match on 'name' field (original full name)
|
|
for staff in all_staff:
|
|
if staff.name and staff.name.lower() == staff_name.lower():
|
|
confidence = 0.93
|
|
method = "Exact match on original name field"
|
|
|
|
if not any(m['id'] == str(staff.id) for m in matches):
|
|
matches.append(create_match_dict(staff, confidence, method, staff_name))
|
|
logger.info(f"EXACT MATCH (name field): {staff.name} == {staff_name}")
|
|
|
|
# ========================================
|
|
# LAYER 2: FUZZY MATCHES (if no exact)
|
|
# ========================================
|
|
|
|
if not matches:
|
|
logger.info(f"No exact matches found, trying fuzzy matching for: {staff_name}")
|
|
|
|
for staff in all_staff:
|
|
# Try different name combinations
|
|
name_combinations = [
|
|
f"{staff.first_name} {staff.last_name}",
|
|
f"{staff.first_name_ar} {staff.last_name_ar}",
|
|
staff.name or "",
|
|
staff.first_name,
|
|
staff.last_name,
|
|
staff.first_name_ar,
|
|
staff.last_name_ar
|
|
]
|
|
|
|
# Check if any combination matches fuzzily
|
|
best_ratio = 0.0
|
|
best_match_name = ""
|
|
|
|
for combo in name_combinations:
|
|
if not combo:
|
|
continue
|
|
ratio = fuzzy_match_ratio(staff_name, combo)
|
|
if ratio > best_ratio:
|
|
best_ratio = ratio
|
|
best_match_name = combo
|
|
|
|
# If good fuzzy match found
|
|
if best_ratio >= fuzzy_threshold:
|
|
# Adjust confidence based on match quality and department
|
|
dept_bonus = 0.05 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.0
|
|
confidence = best_ratio * 0.85 + dept_bonus # Scale down slightly for fuzzy
|
|
|
|
method = f"Fuzzy match ({best_ratio:.2f}) on '{best_match_name}'"
|
|
|
|
if not any(m['id'] == str(staff.id) for m in matches):
|
|
matches.append(create_match_dict(staff, confidence, method, staff_name))
|
|
logger.info(f"FUZZY MATCH ({best_ratio:.2f}): {best_match_name} ~ {staff_name}")
|
|
|
|
# ========================================
|
|
# LAYER 3: PARTIAL/WORD MATCHES
|
|
# ========================================
|
|
|
|
if not matches:
|
|
logger.info(f"No fuzzy matches found, trying partial/word matching for: {staff_name}")
|
|
|
|
# Split input name into words
|
|
input_words = [normalize_name(w) for w in staff_name.split() if normalize_name(w)]
|
|
|
|
for staff in all_staff:
|
|
# Build list of all name fields
|
|
staff_names = [
|
|
staff.first_name,
|
|
staff.last_name,
|
|
staff.first_name_ar,
|
|
staff.last_name_ar,
|
|
staff.name or ""
|
|
]
|
|
|
|
# Count word matches
|
|
match_count = 0
|
|
total_words = len(input_words)
|
|
|
|
for word in input_words:
|
|
word_matched = False
|
|
for staff_name_field in staff_names:
|
|
if normalize_name(staff_name_field) == word or \
|
|
word in normalize_name(staff_name_field):
|
|
word_matched = True
|
|
break
|
|
if word_matched:
|
|
match_count += 1
|
|
|
|
# If at least 2 words match (or all if only 2 words)
|
|
if match_count >= 2 or (total_words == 2 and match_count == 2):
|
|
confidence = 0.60 + (match_count / total_words) * 0.15
|
|
dept_bonus = 0.05 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.0
|
|
confidence += dept_bonus
|
|
|
|
method = f"Partial match ({match_count}/{total_words} words)"
|
|
|
|
if not any(m['id'] == str(staff.id) for m in matches):
|
|
matches.append(create_match_dict(staff, confidence, method, staff_name))
|
|
logger.info(f"PARTIAL MATCH ({match_count}/{total_words}): {staff.first_name} {staff.last_name}")
|
|
|
|
# ========================================
|
|
# FINAL: SORT AND RETURN
|
|
# ========================================
|
|
|
|
if matches:
|
|
# Sort by confidence (descending)
|
|
matches.sort(key=lambda x: x['confidence'], reverse=True)
|
|
best_confidence = matches[0]['confidence']
|
|
best_method = matches[0]['matching_method']
|
|
|
|
logger.info(
|
|
f"Returning {len(matches)} match(es) for '{staff_name}'. "
|
|
f"Best: {matches[0]['name_en']} (confidence: {best_confidence:.2f}, method: {best_method})"
|
|
)
|
|
|
|
if not return_all:
|
|
return str(matches[0]['id']), best_confidence, best_method
|
|
else:
|
|
return matches, best_confidence, best_method
|
|
else:
|
|
logger.warning(f"No staff match found for name: '{staff_name}'")
|
|
return [], 0.0, "No match found"
|
|
|
|
|
|
def create_match_dict(staff, confidence: float, method: str, source_name: str) -> Dict[str, Any]:
|
|
"""
|
|
Create a match dictionary for a staff member.
|
|
|
|
Args:
|
|
staff: Staff model instance
|
|
confidence: Confidence score (0.0 to 1.0)
|
|
method: Description of matching method
|
|
source_name: Original input name that was matched
|
|
|
|
Returns:
|
|
Dictionary with match details
|
|
"""
|
|
return {
|
|
'id': str(staff.id),
|
|
'name_en': f"{staff.first_name} {staff.last_name}",
|
|
'name_ar': f"{staff.first_name_ar} {staff.last_name_ar}" if staff.first_name_ar and staff.last_name_ar else "",
|
|
'original_name': staff.name or "",
|
|
'job_title': staff.job_title,
|
|
'specialization': staff.specialization,
|
|
'department': staff.department.name if staff.department else None,
|
|
'department_id': str(staff.department.id) if staff.department else None,
|
|
'confidence': confidence,
|
|
'matching_method': method,
|
|
'source_name': source_name
|
|
}
|
|
|
|
|
|
def test_enhanced_matching():
|
|
"""Test the enhanced matching function with sample data."""
|
|
from apps.organizations.models import Staff, Hospital
|
|
|
|
print("\n" + "=" * 80)
|
|
print("🧪 TESTING ENHANCED STAFF MATCHING")
|
|
print("=" * 80)
|
|
|
|
hospital = Hospital.objects.first()
|
|
if not hospital:
|
|
print("❌ No hospitals found")
|
|
return
|
|
|
|
# Test cases
|
|
test_cases = [
|
|
# Exact matches (existing staff)
|
|
("Omar Al-Harbi", "Should match exact"),
|
|
("Ahmed Al-Farsi", "Should match exact"),
|
|
("محمد الرشيد", "Should match Arabic exact"),
|
|
|
|
# Fuzzy matches (variations)
|
|
("Omar Al Harbi", "Should match without hyphen"),
|
|
("Omar Alharbi", "Should match fuzzy"),
|
|
("احمد الفارسي", "Should match Arabic fuzzy"),
|
|
|
|
# Partial matches
|
|
("Omar", "Should match first name"),
|
|
("Al-Harbi", "Should match last name"),
|
|
|
|
# Non-existent (for testing suggestions)
|
|
("Ibrahim Abdulaziz Al-Shammari", "Non-existent staff"),
|
|
]
|
|
|
|
for name, description in test_cases:
|
|
print(f"\n🔍 Testing: '{name}'")
|
|
print(f" Expected: {description}")
|
|
|
|
matches, confidence, method = match_staff_from_name_enhanced(
|
|
staff_name=name,
|
|
hospital_id=str(hospital.id),
|
|
return_all=True,
|
|
fuzzy_threshold=0.65
|
|
)
|
|
|
|
if matches:
|
|
print(f" ✅ Found {len(matches)} match(es)")
|
|
print(f" Best confidence: {confidence:.2f}")
|
|
print(f" Method: {method}")
|
|
for i, match in enumerate(matches[:3], 1):
|
|
print(f" {i}. {match['name_en']} ({match['name_ar']}) - {match['confidence']:.2f}")
|
|
if match['original_name']:
|
|
print(f" Original: {match['original_name']}")
|
|
else:
|
|
print(f" ❌ No matches found")
|
|
print(f" Confidence: {confidence:.2f}")
|
|
print(f" Method: {method}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import os
|
|
import django
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.dev')
|
|
django.setup()
|
|
|
|
test_enhanced_matching()
|