HH/apps/complaints/tasks_enhanced.py

"""
Enhanced staff matching with fuzzy matching and improved accuracy.

This module provides improved staff matching functions with:
- Fuzzy string matching (Levenshtein distance)
- Better handling of name variations
- Matching against original full name field
- Improved confidence scoring
"""
import logging
from typing import Optional, Dict, Any, Tuple, List

from django.db.models import Q

logger = logging.getLogger(__name__)


def fuzzy_match_ratio(str1: str, str2: str) -> float:
    """
    Calculate fuzzy match ratio using difflib.

    Args:
        str1: First string
        str2: Second string

    Returns:
        Float from 0.0 to 1.0 representing similarity
    """
    try:
        from difflib import SequenceMatcher
        return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()
    except Exception:
        return 0.0


def normalize_name(name: str) -> str:
    """
    Normalize name for better matching.

    - Remove extra spaces
    - Remove hyphens (Al-Shammari -> AlShammari)
    - Convert to lowercase
    - Remove common titles
    """
    if not name:
        return ""

    name = name.strip().lower()

    # Remove common titles (both English and Arabic)
    titles = ['dr.', 'dr', 'mr.', 'mr', 'mrs.', 'mrs', 'ms.', 'ms',
              'د.', 'السيد', 'السيدة', 'الدكتور']
    for title in titles:
        if name.startswith(title):
            name = name[len(title):].strip()

    # Remove hyphens for better matching (Al-Shammari -> AlShammari)
    name = name.replace('-', '')

    # Remove extra spaces
    while '  ' in name:
        name = name.replace('  ', ' ')

    return name.strip()


def match_staff_from_name_enhanced(
    staff_name: str,
    hospital_id: str,
    department_name: Optional[str] = None,
    return_all: bool = False,
    fuzzy_threshold: float = 0.65
) -> Tuple[list, float, str]:
    """
    Enhanced staff matching with fuzzy matching and better accuracy.

    Args:
        staff_name: Name extracted from complaint (without titles)
        hospital_id: Hospital ID to search within
        department_name: Optional department name to prioritize matching
        return_all: If True, return all matching staff. If False, return single best match.
        fuzzy_threshold: Minimum similarity ratio for fuzzy matches (0.0 to 1.0)

    Returns:
        If return_all=True: Tuple of (matches_list, confidence_score, matching_method)
        If return_all=False: Tuple of (staff_id, confidence_score, matching_method)
    """
    from apps.organizations.models import Staff, Department

    if not staff_name or not staff_name.strip():
        return [], 0.0, "No staff name provided"

    staff_name = staff_name.strip()
    normalized_input = normalize_name(staff_name)

    matches = []

    # Build base query - staff from this hospital, active status
    base_query = Staff.objects.filter(
        hospital_id=hospital_id,
        status='active'
    )

    # Get department if specified
    dept_id = None
    if department_name:
        department = Department.objects.filter(
            hospital_id=hospital_id,
            name__iexact=department_name,
            status='active'
        ).first()
        if department:
            dept_id = department.id

    # Fetch all staff to perform fuzzy matching
    all_staff = list(base_query)

    # If department specified, filter
    if dept_id:
        dept_staff = [s for s in all_staff if str(s.department.id) == dept_id if s.department]
    else:
        dept_staff = []

    # ========================================
    # LAYER 1: EXACT MATCHES
    # ========================================

    # 1a. Exact match on first_name + last_name (English)
    words = staff_name.split()
    if len(words) >= 2:
        first_name = words[0]
        last_name = ' '.join(words[1:])

        for staff in all_staff:
            if staff.first_name.lower() == first_name.lower() and \
               staff.last_name.lower() == last_name.lower():
                confidence = 0.95 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.90
                method = f"Exact English match in {'correct' if (dept_id and staff.department and str(staff.department.id) == dept_id) else 'any'} department"

                if not any(m['id'] == str(staff.id) for m in matches):
                    matches.append(create_match_dict(staff, confidence, method, staff_name))
                    logger.info(f"EXACT MATCH (EN): {staff.first_name} {staff.last_name} == {first_name} {last_name}")

    # 1b. Exact match on full Arabic name
    for staff in all_staff:
        full_arabic = f"{staff.first_name_ar} {staff.last_name_ar}".strip()
        if full_arabic == staff_name:
            confidence = 0.95 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.90
            method = f"Exact Arabic match in {'correct' if (dept_id and staff.department and str(staff.department.id) == dept_id) else 'any'} department"

            if not any(m['id'] == str(staff.id) for m in matches):
                matches.append(create_match_dict(staff, confidence, method, staff_name))
                logger.info(f"EXACT MATCH (AR): {full_arabic} == {staff_name}")

    # 1c. Exact match on 'name' field (original full name)
    for staff in all_staff:
        if staff.name and staff.name.lower() == staff_name.lower():
            confidence = 0.93
            method = "Exact match on original name field"

            if not any(m['id'] == str(staff.id) for m in matches):
                matches.append(create_match_dict(staff, confidence, method, staff_name))
                logger.info(f"EXACT MATCH (name field): {staff.name} == {staff_name}")

    # ========================================
    # LAYER 2: FUZZY MATCHES (if no exact)
    # ========================================

    if not matches:
        logger.info(f"No exact matches found, trying fuzzy matching for: {staff_name}")

        for staff in all_staff:
            # Try different name combinations
            name_combinations = [
                f"{staff.first_name} {staff.last_name}",
                f"{staff.first_name_ar} {staff.last_name_ar}",
                staff.name or "",
                staff.first_name,
                staff.last_name,
                staff.first_name_ar,
                staff.last_name_ar
            ]

            # Check if any combination matches fuzzily
            best_ratio = 0.0
            best_match_name = ""

            for combo in name_combinations:
                if not combo:
                    continue
                ratio = fuzzy_match_ratio(staff_name, combo)
                if ratio > best_ratio:
                    best_ratio = ratio
                    best_match_name = combo

            # If good fuzzy match found
            if best_ratio >= fuzzy_threshold:
                # Adjust confidence based on match quality and department
                dept_bonus = 0.05 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.0
                confidence = best_ratio * 0.85 + dept_bonus  # Scale down slightly for fuzzy

                method = f"Fuzzy match ({best_ratio:.2f}) on '{best_match_name}'"

                if not any(m['id'] == str(staff.id) for m in matches):
                    matches.append(create_match_dict(staff, confidence, method, staff_name))
                    logger.info(f"FUZZY MATCH ({best_ratio:.2f}): {best_match_name} ~ {staff_name}")

        # ========================================
        # LAYER 3: PARTIAL/WORD MATCHES
        # ========================================

        if not matches:
            logger.info(f"No fuzzy matches found, trying partial/word matching for: {staff_name}")

            # Split input name into words
            input_words = [normalize_name(w) for w in staff_name.split() if normalize_name(w)]

            for staff in all_staff:
                # Build list of all name fields
                staff_names = [
                    staff.first_name,
                    staff.last_name,
                    staff.first_name_ar,
                    staff.last_name_ar,
                    staff.name or ""
                ]

                # Count word matches
                match_count = 0
                total_words = len(input_words)

                for word in input_words:
                    word_matched = False
                    for staff_name_field in staff_names:
                        if normalize_name(staff_name_field) == word or \
                           word in normalize_name(staff_name_field):
                            word_matched = True
                            break
                    if word_matched:
                        match_count += 1

                # If at least 2 words match (or all if only 2 words)
                if match_count >= 2 or (total_words == 2 and match_count == 2):
                    confidence = 0.60 + (match_count / total_words) * 0.15
                    dept_bonus = 0.05 if (dept_id and staff.department and str(staff.department.id) == dept_id) else 0.0
                    confidence += dept_bonus

                    method = f"Partial match ({match_count}/{total_words} words)"

                    if not any(m['id'] == str(staff.id) for m in matches):
                        matches.append(create_match_dict(staff, confidence, method, staff_name))
                        logger.info(f"PARTIAL MATCH ({match_count}/{total_words}): {staff.first_name} {staff.last_name}")

    # ========================================
    # FINAL: SORT AND RETURN
    # ========================================

    if matches:
        # Sort by confidence (descending)
        matches.sort(key=lambda x: x['confidence'], reverse=True)
        best_confidence = matches[0]['confidence']
        best_method = matches[0]['matching_method']

        logger.info(
            f"Returning {len(matches)} match(es) for '{staff_name}'. "
            f"Best: {matches[0]['name_en']} (confidence: {best_confidence:.2f}, method: {best_method})"
        )

        if not return_all:
            return str(matches[0]['id']), best_confidence, best_method
        else:
            return matches, best_confidence, best_method
    else:
        logger.warning(f"No staff match found for name: '{staff_name}'")
        return [], 0.0, "No match found"


def create_match_dict(staff, confidence: float, method: str, source_name: str) -> Dict[str, Any]:
    """
    Create a match dictionary for a staff member.

    Args:
        staff: Staff model instance
        confidence: Confidence score (0.0 to 1.0)
        method: Description of matching method
        source_name: Original input name that was matched

    Returns:
        Dictionary with match details
    """
    return {
        'id': str(staff.id),
        'name_en': f"{staff.first_name} {staff.last_name}",
        'name_ar': f"{staff.first_name_ar} {staff.last_name_ar}" if staff.first_name_ar and staff.last_name_ar else "",
        'original_name': staff.name or "",
        'job_title': staff.job_title,
        'specialization': staff.specialization,
        'department': staff.department.name if staff.department else None,
        'department_id': str(staff.department.id) if staff.department else None,
        'confidence': confidence,
        'matching_method': method,
        'source_name': source_name
    }


def test_enhanced_matching():
    """Test the enhanced matching function with sample data."""
    from apps.organizations.models import Staff, Hospital

    print("\n" + "=" * 80)
    print("🧪 TESTING ENHANCED STAFF MATCHING")
    print("=" * 80)

    hospital = Hospital.objects.first()
    if not hospital:
        print("❌ No hospitals found")
        return

    # Test cases
    test_cases = [
        # Exact matches (existing staff)
        ("Omar Al-Harbi", "Should match exact"),
        ("Ahmed Al-Farsi", "Should match exact"),
        ("محمد الرشيد", "Should match Arabic exact"),

        # Fuzzy matches (variations)
        ("Omar Al Harbi", "Should match without hyphen"),
        ("Omar Alharbi", "Should match fuzzy"),
        ("احمد الفارسي", "Should match Arabic fuzzy"),

        # Partial matches
        ("Omar", "Should match first name"),
        ("Al-Harbi", "Should match last name"),

        # Non-existent (for testing suggestions)
        ("Ibrahim Abdulaziz Al-Shammari", "Non-existent staff"),
    ]

    for name, description in test_cases:
        print(f"\n🔍 Testing: '{name}'")
        print(f"   Expected: {description}")

        matches, confidence, method = match_staff_from_name_enhanced(
            staff_name=name,
            hospital_id=str(hospital.id),
            return_all=True,
            fuzzy_threshold=0.65
        )

        if matches:
            print(f"   ✅ Found {len(matches)} match(es)")
            print(f"   Best confidence: {confidence:.2f}")
            print(f"   Method: {method}")
            for i, match in enumerate(matches[:3], 1):
                print(f"   {i}. {match['name_en']} ({match['name_ar']}) - {match['confidence']:.2f}")
                if match['original_name']:
                    print(f"      Original: {match['original_name']}")
        else:
            print(f"   ❌ No matches found")
            print(f"   Confidence: {confidence:.2f}")
            print(f"   Method: {method}")


if __name__ == '__main__':
    import os
    import django
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.dev')
    django.setup()

    test_enhanced_matching()