HH/apps/core/ai_service.py

"""
AI Service - Base class for all AI interactions using OpenRouter

This module provides a unified interface for AI operations using the
OpenRouter API directly via httpx.

Features:
- Complaint analysis (severity, priority classification)
- Chat completion for general AI tasks
- Sentiment analysis
- Entity extraction
- Language detection
"""

import json
import logging
from typing import Dict, List, Optional, Any

import httpx
from django.conf import settings
from django.core.cache import cache

logger = logging.getLogger(__name__)


class AIServiceError(Exception):
    pass


class AIService:
    """
    Base AI Service class using OpenRouter API directly.

    This is the single source of truth for all AI interactions in the application.
    """

    OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
    OPENROUTER_API_KEY = "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"

    DEFAULT_MODEL = "google/gemini-2.5-flash-lite"
    DEFAULT_TEMPERATURE = 0.3
    DEFAULT_MAX_TOKENS = 500
    DEFAULT_TIMEOUT = 30

    SEVERITY_CHOICES = ["low", "medium", "high", "critical"]
    PRIORITY_CHOICES = ["low", "medium", "high"]

    @classmethod
    def _get_api_key(cls) -> str:
        return getattr(settings, "OPENROUTER_API_KEY", None) or cls.OPENROUTER_API_KEY

    @classmethod
    def _strip_model_prefix(cls, model: str) -> str:
        if model.startswith("openrouter/"):
            return model[len("openrouter/"):]
        return model

    @classmethod
    def _openrouter_completion(
        cls,
        model: str,
        messages: list,
        temperature: float = 0.3,
        max_tokens: int = 500,
        timeout: int = 30,
        response_format: Optional[Dict] = None,
    ) -> str:
        """
        Call OpenRouter chat completions API directly.

        Returns the content string from the first choice.
        """
        model = cls._strip_model_prefix(model)
        api_key = cls._get_api_key()
        url = f"{cls.OPENROUTER_BASE_URL}/chat/completions"

        payload = {
            "model": model,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
        }
        if response_format:
            payload["response_format"] = response_format

        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        }

        logger.info(f"AI Request: model={model}, temp={temperature}")

        with httpx.Client(timeout=timeout) as client:
            resp = client.post(url, headers=headers, json=payload)
            resp.raise_for_status()
            data = resp.json()

        content = data["choices"][0]["message"]["content"]
        logger.info(f"AI Response: length={len(content)}")
        return content

    @classmethod
    def _get_model(cls) -> str:
        """Get AI model from settings"""
        return getattr(settings, "AI_MODEL") or cls.DEFAULT_MODEL

    @classmethod
    def _get_temperature(cls) -> float:
        """Get AI temperature from settings"""
        return float(getattr(settings, "AI_TEMPERATURE")) or cls.DEFAULT_TEMPERATURE

    @classmethod
    def _get_max_tokens(cls) -> int:
        """Get max tokens from settings"""
        return int(getattr(settings, "AI_MAX_TOKENS")) or cls.DEFAULT_MAX_TOKENS

    @classmethod
    def _get_complaint_categories(cls) -> List[str]:
        """Get complaint categories from settings"""
        from apps.complaints.models import ComplaintCategory

        return ComplaintCategory.objects.all().values_list("name_en", flat=True)

    @classmethod
    def _get_complaint_sub_categories(cls, category) -> List[str]:
        """Get complaint subcategories for a given category name"""
        from apps.complaints.models import ComplaintCategory

        if category:
            try:
                # Find the category by name and get its subcategories
                category_obj = ComplaintCategory.objects.filter(name_en=category).first()
                if category_obj:
                    return ComplaintCategory.objects.filter(parent=category_obj).values_list("name_en", flat=True)
            except Exception as e:
                logger.error(f"Error fetching subcategories: {e}")
        return []

    @classmethod
    def _get_all_categories_with_subcategories(cls) -> Dict[str, List[str]]:
        """Get all categories with their subcategories in a structured format"""
        from apps.complaints.models import ComplaintCategory

        result = {}
        try:
            # Get all parent categories (no parent or parent is null)
            parent_categories = ComplaintCategory.objects.filter(parent__isnull=True).all()

            for category in parent_categories:
                # Get subcategories for this parent
                subcategories = list(
                    ComplaintCategory.objects.filter(parent=category).values_list("name_en", flat=True)
                )
                result[category.name_en] = subcategories if subcategories else []

        except Exception as e:
            logger.error(f"Error fetching categories with subcategories: {e}")

        return result

    @classmethod
    def _get_taxonomy_hierarchy(cls) -> Dict:
        """
        Get complete 4-level SHCT taxonomy hierarchy for AI classification.

        Returns a structured dictionary representing the full taxonomy tree:
        {
            'domains': [
                {
                    'code': 'CLINICAL',
                    'name_en': 'Clinical',
                    'name_ar': 'سريري',
                    'categories': [
                        {
                            'code': 'QUALITY',
                            'name_en': 'Quality',
                            'name_ar': 'الجودة',
                            'subcategories': [
                                {
                                    'code': 'EXAMINATION',
                                    'name_en': 'Examination',
                                    'name_ar': 'الفحص',
                                    'classifications': [
                                        {
                                            'code': 'exam_not_performed',
                                            'name_en': 'Examination not performed',
                                            'name_ar': 'لم يتم إجراء الفحص'
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
            ]
        }
        """
        from apps.complaints.models import ComplaintCategory

        result = {"domains": []}

        try:
            # Get Level 1: Domains
            domains = ComplaintCategory.objects.filter(
                level=ComplaintCategory.LevelChoices.DOMAIN, is_active=True
            ).order_by("domain_type", "order")

            for domain in domains:
                domain_data = {
                    "code": domain.code or domain.name_en.upper(),
                    "name_en": domain.name_en,
                    "name_ar": domain.name_ar or "",
                    "categories": [],
                }

                # Get Level 2: Categories for this domain
                categories = ComplaintCategory.objects.filter(
                    parent=domain, level=ComplaintCategory.LevelChoices.CATEGORY, is_active=True
                ).order_by("order")

                for category in categories:
                    category_data = {
                        "code": category.code or category.name_en.upper(),
                        "name_en": category.name_en,
                        "name_ar": category.name_ar or "",
                        "subcategories": [],
                    }

                    # Get Level 3: Subcategories for this category
                    subcategories = ComplaintCategory.objects.filter(
                        parent=category, level=ComplaintCategory.LevelChoices.SUBCATEGORY, is_active=True
                    ).order_by("order")

                    for subcategory in subcategories:
                        subcategory_data = {
                            "code": subcategory.code or subcategory.name_en.upper(),
                            "name_en": subcategory.name_en,
                            "name_ar": subcategory.name_ar or "",
                            "classifications": [],
                        }

                        # Get Level 4: Classifications for this subcategory
                        classifications = ComplaintCategory.objects.filter(
                            parent=subcategory, level=ComplaintCategory.LevelChoices.CLASSIFICATION, is_active=True
                        ).order_by("order")

                        for classification in classifications:
                            classification_data = {
                                "code": classification.code,
                                "name_en": classification.name_en,
                                "name_ar": classification.name_ar or "",
                            }
                            subcategory_data["classifications"].append(classification_data)

                        category_data["subcategories"].append(subcategory_data)

                    domain_data["categories"].append(category_data)

                result["domains"].append(domain_data)

            logger.info(f"Taxonomy hierarchy loaded: {len(result['domains'])} domains")

        except Exception as e:
            logger.error(f"Error fetching taxonomy hierarchy: {e}")

        return result

    @classmethod
    def _find_category_by_name_or_code(
        cls, name_or_code: str, level: int, parent_id: str = None, fuzzy_threshold: float = 0.85
    ) -> dict:
        """
        Find a ComplaintCategory by name (English/Arabic) or code with fuzzy matching.

        Args:
            name_or_code: The name or code to search for
            level: The level of category to find (1-4)
            parent_id: Optional parent category ID for hierarchical search
            fuzzy_threshold: Minimum similarity ratio for fuzzy matching (0.0 to 1.0)

        Returns:
            Dictionary with category details or None if not found:
            {
                'id': str,
                'code': str,
                'name_en': str,
                'name_ar': str,
                'level': int,
                'parent_id': str or None,
                'confidence': float
            }
        """
        from apps.complaints.models import ComplaintCategory
        from difflib import SequenceMatcher

        if not name_or_code or not name_or_code.strip():
            return None

        search_term = name_or_code.strip().lower()
        matches = []

        # Build base query
        query = ComplaintCategory.objects.filter(level=level, is_active=True)
        if parent_id:
            query = query.filter(parent_id=parent_id)

        categories = list(query)

        # Try exact matches first (English name, Arabic name, code)
        for cat in categories:
            # Exact match on code
            if cat.code and cat.code.lower() == search_term:
                return {
                    "id": str(cat.id),
                    "code": cat.code,
                    "name_en": cat.name_en,
                    "name_ar": cat.name_ar or "",
                    "level": cat.level,
                    "parent_id": str(cat.parent_id) if cat.parent else None,
                    "confidence": 1.0,
                    "match_type": "exact_code",
                }

            # Exact match on English name
            if cat.name_en.lower() == search_term:
                return {
                    "id": str(cat.id),
                    "code": cat.code or "",
                    "name_en": cat.name_en,
                    "name_ar": cat.name_ar or "",
                    "level": cat.level,
                    "parent_id": str(cat.parent_id) if cat.parent else None,
                    "confidence": 0.95,
                    "match_type": "exact_name_en",
                }

            # Exact match on Arabic name
            if cat.name_ar and cat.name_ar.lower() == search_term:
                return {
                    "id": str(cat.id),
                    "code": cat.code or "",
                    "name_en": cat.name_en,
                    "name_ar": cat.name_ar,
                    "level": cat.level,
                    "parent_id": str(cat.parent_id) if cat.parent else None,
                    "confidence": 0.95,
                    "match_type": "exact_name_ar",
                }

        # No exact match found, try fuzzy matching
        for cat in categories:
            # Try English name
            ratio_en = SequenceMatcher(None, search_term, cat.name_en.lower()).ratio()
            if ratio_en >= fuzzy_threshold:
                matches.append(
                    {
                        "id": str(cat.id),
                        "code": cat.code or "",
                        "name_en": cat.name_en,
                        "name_ar": cat.name_ar or "",
                        "level": cat.level,
                        "parent_id": str(cat.parent_id) if cat.parent else None,
                        "confidence": ratio_en * 0.85,  # Lower confidence for fuzzy matches
                        "match_type": "fuzzy_name_en",
                    }
                )

            # Try Arabic name
            if cat.name_ar:
                ratio_ar = SequenceMatcher(None, search_term, cat.name_ar.lower()).ratio()
                if ratio_ar >= fuzzy_threshold:
                    # Avoid duplicate matches
                    if not any(m["id"] == str(cat.id) for m in matches):
                        matches.append(
                            {
                                "id": str(cat.id),
                                "code": cat.code or "",
                                "name_en": cat.name_en,
                                "name_ar": cat.name_ar,
                                "level": cat.level,
                                "parent_id": str(cat.parent_id) if cat.parent else None,
                                "confidence": ratio_ar * 0.85,
                                "match_type": "fuzzy_name_ar",
                            }
                        )

        # Sort by confidence and return best match
        if matches:
            matches.sort(key=lambda x: x["confidence"], reverse=True)
            logger.info(
                f"Fuzzy match found for '{name_or_code}': {matches[0]['name_en']} (confidence: {matches[0]['confidence']:.2f})"
            )
            return matches[0]

        logger.warning(f"No match found for taxonomy term: '{name_or_code}' (level: {level})")
        return None

    @classmethod
    def _map_ai_taxonomy_to_db(cls, taxonomy_data: Dict) -> Dict:
        """
        Map AI taxonomy classification to database objects.

        Takes AI-provided taxonomy classification (codes/names for domain, category, subcategory, classification)
        and maps them to actual ComplaintCategory database objects with fuzzy matching fallback.

        Args:
            taxonomy_data: Dictionary from AI with taxonomy classifications:
            {
                'domain': {'code': 'CLINICAL', 'name_en': 'Clinical', ...},
                'category': {'code': 'QUALITY', 'name_en': 'Quality', ...},
                'subcategory': {'code': 'EXAMINATION', 'name_en': 'Examination', ...},
                'classification': {'code': 'exam_not_performed', 'name_en': 'Examination not performed', ...}
            }

        Returns:
            Dictionary with mapped database IDs and confidence scores:
            {
                'domain': {'id': str, 'confidence': float, 'match_type': str} or None,
                'category': {'id': str, 'confidence': float, 'match_type': str} or None,
                'subcategory': {'id': str, 'confidence': float, 'match_type': str} or None,
                'classification': {'id': str, 'confidence': float, 'match_type': str} or None,
                'errors': list
            }
        """
        from apps.complaints.models import ComplaintCategory

        result = {"domain": None, "category": None, "subcategory": None, "classification": None, "errors": []}

        # Level 1: Domain (no parent)
        if "domain" in taxonomy_data and taxonomy_data["domain"]:
            domain_data = taxonomy_data["domain"]
            domain_code = domain_data.get("code")
            domain_name = domain_data.get("name_en")

            # Try code first, then name
            search_term = domain_code or domain_name
            if search_term:
                result["domain"] = cls._find_category_by_name_or_code(
                    name_or_code=search_term, level=ComplaintCategory.LevelChoices.DOMAIN, parent_id=None
                )
                if not result["domain"]:
                    result["errors"].append(f"Domain not found: {search_term}")

        # Level 2: Category (child of domain)
        if "category" in taxonomy_data and taxonomy_data["category"] and result["domain"]:
            category_data = taxonomy_data["category"]
            category_code = category_data.get("code")
            category_name = category_data.get("name_en")

            search_term = category_code or category_name
            if search_term:
                result["category"] = cls._find_category_by_name_or_code(
                    name_or_code=search_term,
                    level=ComplaintCategory.LevelChoices.CATEGORY,
                    parent_id=result["domain"]["id"],
                )
                if not result["category"]:
                    result["errors"].append(
                        f"Category not found: {search_term} (under domain: {result['domain']['name_en']})"
                    )

        # Level 3: Subcategory (child of category)
        if "subcategory" in taxonomy_data and taxonomy_data["subcategory"] and result["category"]:
            subcategory_data = taxonomy_data["subcategory"]
            subcategory_code = subcategory_data.get("code")
            subcategory_name = subcategory_data.get("name_en")

            search_term = subcategory_code or subcategory_name
            if search_term:
                result["subcategory"] = cls._find_category_by_name_or_code(
                    name_or_code=search_term,
                    level=ComplaintCategory.LevelChoices.SUBCATEGORY,
                    parent_id=result["category"]["id"],
                )
                if not result["subcategory"]:
                    result["errors"].append(
                        f"Subcategory not found: {search_term} (under category: {result['category']['name_en']})"
                    )

        # Level 4: Classification (child of subcategory)
        if "classification" in taxonomy_data and taxonomy_data["classification"] and result["subcategory"]:
            classification_data = taxonomy_data["classification"]
            classification_code = classification_data.get("code")
            classification_name = classification_data.get("name_en")

            search_term = classification_code or classification_name
            if search_term:
                result["classification"] = cls._find_category_by_name_or_code(
                    name_or_code=search_term,
                    level=ComplaintCategory.LevelChoices.CLASSIFICATION,
                    parent_id=result["subcategory"]["id"],
                )
                if not result["classification"]:
                    result["errors"].append(
                        f"Classification not found: {search_term} (under subcategory: {result['subcategory']['name_en']})"
                    )

        logger.info(
            f"Taxonomy mapping complete: domain={result['domain']}, category={result['category']}, subcategory={result['subcategory']}, classification={result['classification']}, errors={len(result['errors'])}"
        )

        return result

    @classmethod
    def _get_hospital_departments(cls, hospital_id: int) -> List[str]:
        """Get all departments for a specific hospital"""
        from apps.organizations.models import Department

        try:
            departments = Department.objects.filter(hospital_id=hospital_id, status="active").values_list(
                "name", flat=True
            )
            return list(departments)
        except Exception as e:
            logger.error(f"Error fetching hospital departments: {e}")
            return []

    @classmethod
    def chat_completion(
        cls,
        prompt: str,
        model: Optional[str] = None,
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None,
        system_prompt: Optional[str] = None,
        response_format: Optional[str] = None,
    ) -> str:
        """
        Perform a chat completion using LiteLLM.

        Args:
            prompt: User prompt
            model: AI model (uses default if not provided)
            temperature: Temperature for randomness (uses default if not provided)
            max_tokens: Maximum tokens to generate
            system_prompt: System prompt to set context
            response_format: Response format ('text' or 'json_object')

        Returns:
            Generated text response

        Raises:
            AIServiceError: If API call fails
        """
        try:
            model_name = model or cls._get_model()
            temp = temperature if temperature is not None else cls._get_temperature()
            max_tok = max_tokens or cls._get_max_tokens()
            timeout = cls.DEFAULT_TIMEOUT

            messages = []
            if system_prompt:
                messages.append({"role": "system", "content": system_prompt})
            messages.append({"role": "user", "content": prompt})

            rf = {"type": response_format} if response_format else None

            content = cls._openrouter_completion(
                model=model_name,
                messages=messages,
                temperature=temp,
                max_tokens=max_tok,
                timeout=timeout,
                response_format=rf,
            )

            return content

        except Exception as e:
            logger.error(f"AI service error: {str(e)}")
            raise AIServiceError(f"Failed to get AI response: {str(e)}")

    @classmethod
    def analyze_complaint(
        cls,
        title: Optional[str] = None,
        description: str = "",
        category: Optional[str] = None,
        hospital_id: Optional[int] = None,
        use_taxonomy: bool = True,
    ) -> Dict[str, Any]:
        """
        Analyze a complaint and determine type (complaint vs appreciation), title, severity, priority,
        4-level SHCT taxonomy (Domain, Category, Subcategory, Classification), and department.

        Args:
            title: Complaint title (optional, will be generated if not provided)
            description: Complaint description
            category: Complaint category (deprecated, kept for backward compatibility)
            hospital_id: Hospital ID to fetch departments
            use_taxonomy: Whether to use 4-level SHCT taxonomy classification (default: True)

        Returns:
            Dictionary with analysis:
            {
                'complaint_type': 'complaint' | 'appreciation',  # Type of feedback
                'title_en': str,  # Generated or provided title (English)
                'title_ar': str,  # Generated or provided title (Arabic)
                'short_description_en': str,  # 2-3 sentence summary (English)
                'short_description_ar': str,  # 2-3 sentence summary (Arabic)
                'suggested_action_en': str,  # Suggested action (English)
                'suggested_action_ar': str,  # Suggested action (Arabic)
                'severity': 'low' | 'medium' | 'high' | 'critical',
                'priority': 'low' | 'medium' | 'high',
                'category': str,  # Legacy category name (deprecated, kept for backward compatibility)
                'subcategory': str,  # Legacy subcategory name (deprecated, kept for backward compatibility)
                'department': str,  # Name of department
                'taxonomy': {  # NEW: 4-level SHCT taxonomy classification
                    'domain': {
                        'code': 'CLINICAL',
                        'name_en': 'Clinical',
                        'name_ar': 'سريري',
                        'confidence': 0.95
                    },
                    'category': {
                        'code': 'QUALITY',
                        'name_en': 'Quality',
                        'name_ar': 'الجودة',
                        'confidence': 0.88
                    },
                    'subcategory': {
                        'code': 'EXAMINATION',
                        'name_en': 'Examination',
                        'name_ar': 'الفحص',
                        'confidence': 0.82
                    },
                    'classification': {
                        'code': 'exam_not_performed',
                        'name_en': 'Examination not performed',
                        'name_ar': 'لم يتم إجراء الفحص',
                        'confidence': 0.75
                    }
                },
                'staff_names': list,  # All staff names mentioned
                'primary_staff_name': str,  # Primary staff name
                'reasoning_en': str,  # Explanation for classification (English)
                'reasoning_ar': str  # Explanation for classification (Arabic)
            }
        """
        # Check cache first
        cache_key = f"complaint_analysis:{hash(str(title) + description + str(hospital_id) + str(use_taxonomy))}"
        cached_result = cache.get(cache_key)
        if cached_result:
            logger.info("Using cached complaint analysis")
            return cached_result

        # Get 4-level SHCT taxonomy hierarchy
        taxonomy_hierarchy = cls._get_taxonomy_hierarchy()

        # Format taxonomy for prompt
        taxonomy_text = cls._format_taxonomy_for_prompt(taxonomy_hierarchy)

        # Get hospital departments if hospital_id is provided
        departments_text = ""
        if hospital_id:
            departments = cls._get_hospital_departments(hospital_id)
            if departments:
                departments_text = f"\nAvailable Departments for this hospital:\n"
                for dept in departments:
                    departments_text += f"- {dept}\n"
                departments_text += "\n"

        # Build prompt
        title_text = f"Complaint Title: {title}\n" if title else ""
        prompt = f"""Analyze this healthcare complaint and classify it using the 4-level SHCT taxonomy.

            Complaint Description: {description}
            {title_text}{departments_text}Severity Classification (choose one):
            - low: Minor issues, no impact on patient care, routine matters
            - medium: Moderate issues, some patient dissatisfaction, not urgent
            - high: Serious issues, significant patient impact, requires timely attention
            - critical: Emergency, immediate threat to patient safety, requires instant action

            Priority Classification (choose one):
            - low: Can be addressed within 1-2 weeks
            - medium: Should be addressed within 3-5 days
            - high: Requires immediate attention (within 24 hours)

            4-Level SHCT Taxonomy Hierarchy:
            {taxonomy_text}

            Instructions:
            1. If no title is provided, generate a concise title (max 10 words) that summarizes the complaint in BOTH English and Arabic
            2. Generate a brief_summary (exactly 2-3 words) that serves as a quick tag/label for the complaint in BOTH English and Arabic. Examples: "Wait Time", "Staff Attitude", "Medication Error", "Billing Issue", "Facility Cleanliness", "Privacy Concern"
            3. Generate a short_description (2-3 sentences) that captures the main issue and context in BOTH English and Arabic
            3. Classify the complaint using the 4-level SHCT taxonomy:
               a. Select the most appropriate DOMAIN (Level 1)
               b. Select the most appropriate CATEGORY within that domain (Level 2)
               c. Select the most appropriate SUBCATEGORY within that category (Level 3)
               d. Select the most appropriate CLASSIFICATION within that subcategory (Level 4)
               e. Use the CODE and NAME from the taxonomy above - DO NOT invent new categories
            4. For each taxonomy level, assign a confidence score (0.0 to 1.0) reflecting how certain you are
            5. Select the most appropriate department from the hospital's departments (if available)
            6. Extract ALL staff members mentioned in the complaint (physicians, nurses, etc.)
            7. Return ALL staff names WITHOUT titles (Dr., Nurse, دكتور, ممرض, etc.)
            8. Identify the PRIMARY staff member (the one most relevant to the complaint)
            9. If no staff is mentioned, return empty arrays for staff names
            10. Generate 3-5 suggested_actions as a JSON list, each with:
                - action: Specific, actionable step
                - priority: high|medium|low
                - category: clinical_quality|patient_safety|service_quality|staff_behavior|facility|process_improvement|other
                Provide all actions in BOTH English and Arabic

            IMPORTANT: ALL TEXT FIELDS MUST BE PROVIDED IN BOTH ENGLISH AND ARABIC
            - title: Provide in both English and Arabic
            - short_description: Provide in both English and Arabic
            - suggested_actions: Provide as a list with English and Arabic for each action
            - reasoning: Provide in both English and Arabic

            Provide your analysis in JSON format:
            {{
                "title_en": "concise title in English summarizing the complaint (max 10 words)",
                "title_ar": "العنوان بالعربية",
                "brief_summary_en": "exactly 2-3 word tag/label in English (e.g., 'Wait Time', 'Staff Attitude', 'Billing Issue')",
                "brief_summary_ar": "وصف مختصر من 2-3 كلمات بالعربية",
                "short_description_en": "2-3 sentence summary in English of the complaint that captures the main issue and context",
                "short_description_ar": "ملخص من 2-3 جمل بالعربية",
                "severity": "low|medium|high|critical",
                "priority": "low|medium|high",
                "category": "exact category name from Level 2 of taxonomy (for backward compatibility)",
                "subcategory": "exact subcategory name from Level 3 of taxonomy (for backward compatibility)",
                "department": "exact department name from the hospital's departments, or empty string if not applicable",
                "staff_names": ["name1", "name2", "name3"],
                "primary_staff_name": "name of PRIMARY staff member (the one most relevant to the complaint), or empty string if no staff mentioned",
                "suggested_actions": [
                    {{
                        "action_en": "Specific actionable step in English",
                        "action_ar": "خطوة محددة بالعربية",
                        "priority": "high|medium|low",
                        "category": "clinical_quality|patient_safety|service_quality|staff_behavior|facility|process_improvement|other"
                    }},
                    {{
                        "action_en": "Another action in English",
                        "action_ar": "إجراء آخر بالعربية",
                        "priority": "medium",
                        "category": "process_improvement"
                    }}
                ],
                "reasoning_en": "Brief explanation in English of your classification (2-3 sentences)",
                "reasoning_ar": "شرح مختصر بالعربية",
                "taxonomy": {{
                    "domain": {{
                        "code": "exact code from taxonomy (e.g., CLINICAL)",
                        "name_en": "exact English name from taxonomy",
                        "name_ar": "exact Arabic name from taxonomy",
                        "confidence": 0.95
                    }},
                    "category": {{
                        "code": "exact code from taxonomy (e.g., QUALITY)",
                        "name_en": "exact English name from taxonomy",
                        "name_ar": "exact Arabic name from taxonomy",
                        "confidence": 0.88
                    }},
                    "subcategory": {{
                        "code": "exact code from taxonomy (e.g., EXAMINATION)",
                        "name_en": "exact English name from taxonomy",
                        "name_ar": "exact Arabic name from taxonomy",
                        "confidence": 0.82
                    }},
                    "classification": {{
                        "code": "exact code from taxonomy (e.g., exam_not_performed)",
                        "name_en": "exact English name from taxonomy",
                        "name_ar": "exact Arabic name from taxonomy",
                        "confidence": 0.75
                    }}
                }}
            }}"""

        system_prompt = """You are a healthcare complaint analysis expert fluent in both English and Arabic.
            Your job is to classify complaints using the 4-level SHCT taxonomy (Domain, Category, Subcategory, Classification).
            Always use EXACT names and codes from the provided taxonomy - do not invent new categories.
            Be conservative - when in doubt, choose a higher severity/priority.
            Generate clear, concise titles that accurately summarize the complaint in BOTH English and Arabic.
            Provide all text fields in both languages.
            Assign realistic confidence scores based on how clearly the complaint fits each taxonomy level.

            ARABIC LANGUAGE REQUIREMENTS:
            - Use Modern Standard Arabic (Fusha) - الفصحى
            - Use formal, professional medical terminology
            - Ensure Arabic text is culturally appropriate for Saudi Arabian healthcare context
            - Arabic translations should be natural and fluent, not literal word-for-word translations
            - Use proper Arabic grammar and sentence structure
            - Avoid mixing Arabic and English characters (no Arabizi)
            - If the complaint is in Arabic, improve and professionalize the Arabic text while keeping the meaning
            - If the complaint is in English, provide high-quality professional Arabic translations"""

        try:
            response = cls.chat_completion(
                prompt=prompt,
                system_prompt=system_prompt,
                response_format="json_object",
                temperature=0.2,  # Lower temperature for consistent classification
            )

            # Parse JSON response
            result = json.loads(response)

            # Detect complaint type
            complaint_type = cls._detect_complaint_type(description + " " + (title or ""))
            result["complaint_type"] = complaint_type

            # Map AI taxonomy to database objects
            if use_taxonomy and "taxonomy" in result:
                taxonomy_mapping = cls._map_ai_taxonomy_to_db(result["taxonomy"])
                # Replace AI taxonomy IDs with database IDs
                result["taxonomy_mapping"] = taxonomy_mapping
                result["taxonomy"] = result["taxonomy"]  # Keep original AI response

            # Use provided title if available, otherwise use AI-generated title
            if title:
                result["title"] = title

            # Validate severity
            if result.get("severity") not in cls.SEVERITY_CHOICES:
                result["severity"] = "medium"
                logger.warning(f"Invalid severity, defaulting to medium")

            # Validate priority
            if result.get("priority") not in cls.PRIORITY_CHOICES:
                result["priority"] = "medium"
                logger.warning(f"Invalid priority, defaulting to medium")

            # Validate Arabic text quality
            result = cls._validate_arabic_text(result)

            # Ensure title exists (for backward compatibility)
            if not result.get("title"):
                result["title"] = "Complaint"

            # Cache result for 1 hour
            cache.set(cache_key, result, timeout=3600)

            logger.info(
                f"Complaint analyzed: title={result['title']}, severity={result['severity']}, "
                f"priority={result['priority']}, taxonomy={result.get('taxonomy', {}).get('domain', {}).get('name_en', 'N/A')}"
            )
            return result

        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse AI response: {e}")
            # Return defaults
            return {
                "title": title or "Complaint",
                "title_en": title or "Complaint",
                "title_ar": title or "شكوى",
                "short_description_en": description[:200] if description else "",
                "short_description_ar": description[:200] if description else "",
                "severity": "medium",
                "priority": "medium",
                "category": "other",
                "subcategory": "",
                "department": "",
                "staff_names": [],
                "primary_staff_name": "",
                "suggested_action_en": "",
                "suggested_action_ar": "",
                "reasoning_en": "AI analysis failed, using default values",
                "reasoning_ar": "فشل تحليل الذكاء الاصطناعي، استخدام القيم الافتراضية",
                "taxonomy": None,
                "taxonomy_mapping": None,
            }
        except AIServiceError as e:
            logger.error(f"AI service error: {e}")
            return {
                "title": title or "Complaint",
                "title_en": title or "Complaint",
                "title_ar": title or "شكوى",
                "short_description_en": description[:200] if description else "",
                "short_description_ar": description[:200] if description else "",
                "severity": "medium",
                "priority": "medium",
                "category": "other",
                "subcategory": "",
                "department": "",
                "staff_names": [],
                "primary_staff_name": "",
                "suggested_action_en": "",
                "suggested_action_ar": "",
                "reasoning_en": f"AI service unavailable: {str(e)}",
                "reasoning_ar": f"خدمة الذكاء الاصطناعي غير متوفرة: {str(e)}",
                "taxonomy": None,
                "taxonomy_mapping": None,
            }

    @classmethod
    def analyze_inquiry(
        cls,
        subject: str,
        message: str,
        category: Optional[str] = None,
        hospital_id: Optional[int] = None,
    ) -> Dict[str, Any]:
        """
        Analyze an inquiry to determine priority, department, taxonomy, and generate bilingual summaries.

        Args:
            subject: Inquiry subject/title
            message: Inquiry message/body
            category: Inquiry category (appointment, billing, etc.)
            hospital_id: Hospital ID to fetch departments

        Returns:
            Dictionary with analysis:
            {
                'title_en': str,
                'title_ar': str,
                'brief_summary_en': str,
                'brief_summary_ar': str,
                'short_description_en': str,
                'short_description_ar': str,
                'priority': 'low' | 'medium' | 'high',
                'department': str,
                'reasoning_en': str,
                'reasoning_ar': str,
                'taxonomy': dict (4-level SHCT taxonomy),
                'taxonomy_mapping': dict (database FK IDs),
            }
        """
        departments_text = ""
        if hospital_id:
            departments = cls._get_hospital_departments(hospital_id)
            if departments:
                departments_text = f"\nAvailable Departments for this hospital:\n"
                for dept in departments:
                    departments_text += f"- {dept}\n"
                departments_text += "\n"

        taxonomy_hierarchy = cls._get_taxonomy_hierarchy()
        taxonomy_text = cls._format_taxonomy_for_prompt(taxonomy_hierarchy)

        prompt = f"""Analyze this healthcare inquiry and classify it appropriately.

Inquiry Subject: {subject}
Inquiry Message: {message}
Category: {category or "Not specified"}
{departments_text}Priority Classification (choose one):
- low: General information, routine questions, can be addressed within 1-2 weeks
- medium: Needs attention, moderate urgency, should be addressed within 3-5 days
- high: Urgent inquiry, requires immediate attention (within 24 hours)

Instructions:
1. Generate a concise title (max 10 words) that summarizes the inquiry in BOTH English and Arabic
2. Generate a brief_summary (exactly 2-3 words) as a quick tag/label in BOTH English and Arabic
3. Generate a short_description (2-3 sentences) capturing the main inquiry in BOTH English and Arabic
4. Select the most appropriate priority level
5. Select the most appropriate department from the hospital's departments (if available)
6. Classify using the 4-level SHCT taxonomy below
7. Provide reasoning for your classification in BOTH English and Arabic

SHCT Taxonomy:
{taxonomy_text}

IMPORTANT: ALL TEXT FIELDS MUST BE PROVIDED IN BOTH ENGLISH AND ARABIC

Provide your analysis in JSON format:
{{
    "title_en": "concise title in English summarizing the inquiry (max 10 words)",
    "title_ar": "العنوان بالعربية",
    "brief_summary_en": "exactly 2-3 word tag/label in English",
    "brief_summary_ar": "وصف مختصر من 2-3 كلمات بالعربية",
    "short_description_en": "2-3 sentence summary in English of the inquiry",
    "short_description_ar": "ملخص من 2-3 جمل بالعربية",
    "priority": "low|medium|high",
    "department": "exact department name from the hospital's departments, or empty string if not applicable",
    "reasoning_en": "Brief explanation in English of your classification (2-3 sentences)",
    "reasoning_ar": "شرح مختصر بالعربية",
    "taxonomy": {{
        "domain": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}},
        "category": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}},
        "subcategory": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}},
        "classification": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}}
    }}
}}"""

        system_prompt = """You are a healthcare inquiry analysis expert fluent in both English and Arabic.
Your job is to analyze patient inquiries, determine priority, suggest appropriate department, classify using SHCT taxonomy, and generate bilingual summaries.
Be accurate in priority assessment - err on the side of higher priority for sensitive medical topics.
Generate clear, concise summaries that accurately capture the inquiry in BOTH English and Arabic.

ARABIC LANGUAGE REQUIREMENTS:
- Use Modern Standard Arabic (Fusha) - الفصحى
- Use formal, professional medical terminology
- Ensure Arabic text is culturally appropriate for Saudi Arabian healthcare context
- Arabic translations should be natural and fluent, not literal word-for-word translations

Always use EXACT names and codes from the provided taxonomy - do not invent new categories."""

        try:
            response = cls.chat_completion(
                prompt=prompt,
                system_prompt=system_prompt,
                response_format="json_object",
                temperature=0.2,
            )

            result = json.loads(response)

            if result.get("priority") not in cls.PRIORITY_CHOICES:
                result["priority"] = "medium"
                logger.warning(f"Invalid priority for inquiry, defaulting to medium")

            result = cls._validate_arabic_text(result)

            if "taxonomy" in result:
                taxonomy_mapping = cls._map_ai_taxonomy_to_db(result["taxonomy"])
                result["taxonomy_mapping"] = taxonomy_mapping

            logger.info(f"Inquiry analyzed: title={result.get('title_en')}, priority={result.get('priority')}")
            return result

        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse inquiry AI response: {e}")
            return {
                "title_en": subject,
                "title_ar": subject,
                "brief_summary_en": "Inquiry",
                "brief_summary_ar": "استفسار",
                "short_description_en": message[:200] if message else "",
                "short_description_ar": message[:200] if message else "",
                "priority": "medium",
                "department": "",
                "reasoning_en": "AI analysis failed, using default values",
                "reasoning_ar": "فشل تحليل الذكاء الاصطناعي، استخدام القيم الافتراضية",
                "taxonomy": None,
                "taxonomy_mapping": None,
            }
        except AIServiceError as e:
            logger.error(f"AI service error for inquiry: {e}")
            return {
                "title_en": subject,
                "title_ar": subject,
                "brief_summary_en": "Inquiry",
                "brief_summary_ar": "استفسار",
                "short_description_en": message[:200] if message else "",
                "short_description_ar": message[:200] if message else "",
                "priority": "medium",
                "department": "",
                "reasoning_en": f"AI service unavailable: {str(e)}",
                "reasoning_ar": f"خدمة الذكاء الاصطناعي غير متوفرة: {str(e)}",
                "taxonomy": None,
                "taxonomy_mapping": None,
            }

    @classmethod
    def analyze_observation(
        cls,
        title: Optional[str] = None,
        description: str = "",
        hospital_id: Optional[int] = None,
    ) -> Dict[str, Any]:
        """
        Analyze a staff observation to determine severity, category, and generate bilingual summaries
        and suggested PX actions for quality improvement.

        Args:
            title: Observation title (optional, will be generated if not provided)
            description: Observation description
            hospital_id: Hospital ID to fetch departments

        Returns:
            Dictionary with analysis:
            {
                'title_en': str,
                'title_ar': str,
                'brief_summary_en': str,
                'brief_summary_ar': str,
                'short_description_en': str,
                'short_description_ar': str,
                'severity': 'low' | 'medium' | 'high' | 'critical',
                'category': str,
                'department': str,
                'suggested_actions': [
                    {'action_en': str, 'action_ar': str, 'priority': str, 'category': str}
                ],
                'suggested_action_en': str,
                'suggested_action_ar': str,
                'reasoning_en': str,
                'reasoning_ar': str,
            }
        """
        cache_key = f"observation_analysis:{hash(str(title) + description + str(hospital_id))}"
        cached_result = cache.get(cache_key)
        if cached_result:
            logger.info("Using cached observation analysis")
            return cached_result

        departments_text = ""
        if hospital_id:
            departments = cls._get_hospital_departments(hospital_id)
            if departments:
                departments_text = f"\nAvailable Departments for this hospital:\n"
                for dept in departments:
                    departments_text += f"- {dept}\n"
                departments_text += "\n"

        title_text = f"Observation Title: {title}\n" if title else ""

        taxonomy_hierarchy = cls._get_taxonomy_hierarchy()
        taxonomy_text = cls._format_taxonomy_for_prompt(taxonomy_hierarchy)

        prompt = f"""Analyze this healthcare staff observation and classify it appropriately.

            Observation Description: {description}
            {title_text}{departments_text}Severity Classification (choose one):
            - low: Minor issues, no impact on patient care or safety, routine matters
            - medium: Moderate issues, potential for patient dissatisfaction, not urgent
            - high: Serious issues, significant impact on patient care or safety, requires timely attention
            - critical: Emergency, immediate threat to patient safety, requires instant action

            Observation Categories (choose one):
            - Patient Safety (سلامة المرضى)
            - Clinical Quality (الجودة السريرية)
            - Infection Control (مكافحة العدوى)
            - Medication Safety (سلامة الأدوية)
            - Equipment & Devices (المعدات والأجهزة)
            - Facility & Environment (المرافق والبيئة)
            - Staff Behavior (سلوك الموظفين)
            - Communication (التواصل)
            - Documentation (التوثيق)
            - Process & Workflow (العمليات وسير العمل)
            - Security (الأمن)
            - IT & Systems (تقنية المعلومات والأنظمة)
            - Housekeeping (التدبير المنزلي)
            - Food Services (خدمات الطعام)
            - Other (أخرى)

            Instructions:
            1. If no title is provided, generate a concise title (max 10 words) that summarizes the observation in BOTH English and Arabic
            2. Generate a brief_summary (exactly 2-3 words) that serves as a quick tag/label for the observation in BOTH English and Arabic. Examples: "Fall Risk", "Hand Hygiene", "Equipment Failure", "Cleanliness Issue"
            3. Generate a short_description (2-3 sentences) that captures the main observation and context in BOTH English and Arabic
            4. Classify the observation using the appropriate category from the list above
            5. Classify severity accurately - be conservative, when in doubt choose higher severity for patient safety
            6. Select the most appropriate department from the hospital's departments (if available)
            7. Generate 3-5 suggested_actions as improvement actions (PX Actions), each with:
               - action: Specific, actionable improvement step
               - priority: high|medium|low
               - category: patient_safety|clinical_quality|service_quality|staff_behavior|facility|process_improvement|infection_control|medication_safety|other
               Provide all actions in BOTH English and Arabic
            8. Provide reasoning for your classification in BOTH English and Arabic
            9. Classify using the 4-level SHCT taxonomy below

            SHCT Taxonomy:
            {taxonomy_text}

            IMPORTANT: ALL TEXT FIELDS MUST BE PROVIDED IN BOTH ENGLISH AND ARABIC
            - title: Provide in both English and Arabic
            - brief_summary: Provide in both English and Arabic
            - short_description: Provide in both English and Arabic
            - suggested_actions: Provide as a list with English and Arabic for each action
            - reasoning: Provide in both English and Arabic

            Provide your analysis in JSON format:
            {{
                "title_en": "concise title in English summarizing the observation (max 10 words)",
                "title_ar": "العنوان بالعربية",
                "brief_summary_en": "exactly 2-3 word tag/label in English (e.g., 'Fall Risk', 'Hand Hygiene')",
                "brief_summary_ar": "وصف مختصر من 2-3 كلمات بالعربية",
                "short_description_en": "2-3 sentence summary in English of the observation that captures the main issue and context",
                "short_description_ar": "ملخص من 2-3 جمل بالعربية",
                "severity": "low|medium|high|critical",
                "category": "exact category name from the list above",
                "department": "exact department name from the hospital's departments, or empty string if not applicable",
                "suggested_actions": [
                    {{
                        "action_en": "Specific actionable improvement step in English",
                        "action_ar": "خطوة محددة للتحسين بالعربية",
                        "priority": "high|medium|low",
                        "category": "patient_safety|clinical_quality|service_quality|staff_behavior|facility|process_improvement|infection_control|medication_safety|other"
                    }},
                    {{
                        "action_en": "Another action in English",
                        "action_ar": "إجراء آخر بالعربية",
                        "priority": "medium",
                        "category": "process_improvement"
                    }}
                ],
                "suggested_action_en": "top suggested action in English (backward compatibility)",
                "suggested_action_ar": "أهم إجراء مقترح بالعربية (لل توافق مع الأنظمة السابقة)",
                "reasoning_en": "Brief explanation in English of your classification (2-3 sentences)",
                "reasoning_ar": "شرح مختصر بالعربية",
                "taxonomy": {{
                    "domain": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}},
                    "category": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}},
                    "subcategory": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}},
                    "classification": {{"code": "exact code from taxonomy", "name_en": "exact English name", "name_ar": "exact Arabic name", "confidence": 0.0}}
                }}
            }}"""

        system_prompt = """You are a healthcare observation analysis expert fluent in both English and Arabic.
            Your job is to analyze staff-reported observations in a healthcare setting, determine severity,
            classify the observation category, classify using SHCT taxonomy, and suggest actionable improvement steps (PX Actions).
            Be conservative with severity - when in doubt, choose higher severity for patient safety concerns.
            Generate clear, concise summaries that accurately capture the observation in BOTH English and Arabic.
            Suggested actions should be specific, actionable, and focused on quality improvement.
            Always use EXACT names and codes from the provided taxonomy - do not invent new categories.

            ARABIC LANGUAGE REQUIREMENTS:
            - Use Modern Standard Arabic (Fusha) - الفصحى
            - Use formal, professional medical terminology
            - Ensure Arabic text is culturally appropriate for Saudi Arabian healthcare context
            - Arabic translations should be natural and fluent, not literal word-for-word translations
            - Use proper Arabic grammar and sentence structure
            - Avoid mixing Arabic and English characters (no Arabizi)
            - If the observation is in Arabic, improve and professionalize the Arabic text while keeping the meaning
            - If the observation is in English, provide high-quality professional Arabic translations"""

        try:
            response = cls.chat_completion(
                prompt=prompt,
                system_prompt=system_prompt,
                response_format="json_object",
                temperature=0.2,
            )

            result = json.loads(response)

            if title:
                result["title"] = title

            if result.get("severity") not in cls.SEVERITY_CHOICES:
                result["severity"] = "medium"
                logger.warning(f"Invalid severity for observation, defaulting to medium")

            result = cls._validate_arabic_text(result)

            if not result.get("title"):
                result["title"] = "Observation"

            if not result.get("suggested_action_en") and result.get("suggested_actions"):
                first_action = result["suggested_actions"][0]
                result["suggested_action_en"] = first_action.get("action_en", "")
                result["suggested_action_ar"] = first_action.get("action_ar", "")

            if "taxonomy" in result:
                taxonomy_mapping = cls._map_ai_taxonomy_to_db(result["taxonomy"])
                result["taxonomy_mapping"] = taxonomy_mapping

            cache.set(cache_key, result, timeout=3600)

            logger.info(
                f"Observation analyzed: title={result.get('title_en')}, severity={result.get('severity')}, "
                f"category={result.get('category', 'N/A')}"
            )
            return result

        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse observation AI response: {e}")
            return {
                "title": title or "Observation",
                "title_en": title or "Observation",
                "title_ar": title or "ملاحظة",
                "brief_summary_en": "Observation",
                "brief_summary_ar": "ملاحظة",
                "short_description_en": description[:200] if description else "",
                "short_description_ar": description[:200] if description else "",
                "severity": "medium",
                "category": "Other",
                "department": "",
                "suggested_actions": [],
                "suggested_action_en": "",
                "suggested_action_ar": "",
                "reasoning_en": "AI analysis failed, using default values",
                "reasoning_ar": "فشل تحليل الذكاء الاصطناعي، استخدام القيم الافتراضية",
                "taxonomy": None,
                "taxonomy_mapping": None,
            }
        except AIServiceError as e:
            logger.error(f"AI service error for observation: {e}")
            return {
                "title": title or "Observation",
                "title_en": title or "Observation",
                "title_ar": title or "ملاحظة",
                "brief_summary_en": "Observation",
                "brief_summary_ar": "ملاحظة",
                "short_description_en": description[:200] if description else "",
                "short_description_ar": description[:200] if description else "",
                "severity": "medium",
                "category": "Other",
                "department": "",
                "suggested_actions": [],
                "suggested_action_en": "",
                "suggested_action_ar": "",
                "reasoning_en": f"AI service unavailable: {str(e)}",
                "reasoning_ar": f"خدمة الذكاء الاصطناعي غير متوفرة: {str(e)}",
                "taxonomy": None,
                "taxonomy_mapping": None,
            }

    @classmethod
    def _format_taxonomy_for_prompt(cls, taxonomy_hierarchy: Dict) -> str:
        """
        Format taxonomy hierarchy for AI prompt.

        Args:
            taxonomy_hierarchy: Dictionary from _get_taxonomy_hierarchy()

        Returns:
            Formatted string representation of taxonomy
        """
        text = ""

        for domain in taxonomy_hierarchy.get("domains", []):
            text += f"\nDOMAIN: {domain['code']} - {domain['name_en']} ({domain['name_ar']})\n"

            for category in domain.get("categories", []):
                text += f"  CATEGORY: {category['code']} - {category['name_en']} ({category['name_ar']})\n"

                for subcategory in category.get("subcategories", []):
                    text += f"    SUBCATEGORY: {subcategory['code']} - {subcategory['name_en']} ({subcategory['name_ar']})\n"

                    for classification in subcategory.get("classifications", []):
                        text += f"      CLASSIFICATION: {classification['code']} - {classification['name_en']} ({classification['name_ar']})\n"

        return text

    @classmethod
    def _validate_arabic_text(cls, result: Dict[str, Any]) -> Dict[str, Any]:
        """
        Validate and clean Arabic text fields to ensure quality.

        Args:
            result: Dictionary containing AI analysis results

        Returns:
            Validated and cleaned result dictionary
        """
        import re

        def is_valid_arabic(text: str) -> bool:
            """Check if text contains valid Arabic characters"""
            if not text:
                return False
            # Check for Arabic Unicode range (including Arabic Presentation Forms)
            arabic_pattern = re.compile(r"[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]")
            return bool(arabic_pattern.search(text))

        def clean_arabic_text(text: str) -> str:
            """Clean and normalize Arabic text"""
            if not text:
                return ""
            # Remove excessive whitespace
            text = re.sub(r"\s+", " ", text).strip()
            # Remove mixed Arabic-English words (Arabizi)
            # This is a basic check - more sophisticated detection could be added
            return text

        # Fields to validate
        arabic_fields = ["title_ar", "brief_summary_ar", "short_description_ar", "reasoning_ar"]

        for field in arabic_fields:
            if field in result and result[field]:
                # Clean the text
                result[field] = clean_arabic_text(result[field])

                # Check if valid Arabic
                if not is_valid_arabic(result[field]):
                    logger.warning(f"Field {field} does not contain valid Arabic text")
                    # Generate fallback if needed
                    if field == "title_ar" and result.get("title_en"):
                        result[field] = f"[AR] {result['title_en']}"  # Mark for manual translation
                    elif field == "brief_summary_ar" and result.get("brief_summary_en"):
                        result[field] = f"[AR] {result['brief_summary_en']}"
                    elif field == "short_description_ar" and result.get("short_description_en"):
                        result[field] = f"[AR] {result['short_description_en']}"
                    elif field == "reasoning_ar" and result.get("reasoning_en"):
                        result[field] = f"[AR] {result['reasoning_en']}"

        # Validate suggested actions
        if "suggested_actions" in result and isinstance(result["suggested_actions"], list):
            for action in result["suggested_actions"]:
                if "action_ar" in action and action["action_ar"]:
                    action["action_ar"] = clean_arabic_text(action["action_ar"])
                    if not is_valid_arabic(action["action_ar"]):
                        logger.warning("Suggested action does not contain valid Arabic text")
                        if "action_en" in action:
                            action["action_ar"] = f"[AR] {action['action_en']}"

        # Validate taxonomy Arabic names
        if "taxonomy" in result and result["taxonomy"]:
            for level in ["domain", "category", "subcategory", "classification"]:
                if level in result["taxonomy"] and result["taxonomy"][level]:
                    level_data = result["taxonomy"][level]
                    if "name_ar" in level_data and level_data["name_ar"]:
                        level_data["name_ar"] = clean_arabic_text(level_data["name_ar"])
                        if not is_valid_arabic(level_data["name_ar"]):
                            logger.warning(f"Taxonomy {level} does not contain valid Arabic name")

        return result

    @classmethod
    def classify_sentiment(cls, text: str) -> Dict[str, Any]:
        """
        Classify sentiment of text.

        Args:
            text: Text to analyze

        Returns:
            Dictionary with sentiment analysis:
            {
                'sentiment': 'positive' | 'neutral' | 'negative',
                'score': float,  # -1.0 to 1.0
                'confidence': float  # 0.0 to 1.0
            }
        """
        prompt = f"""Analyze the sentiment of this text:

        {text}

        Provide your analysis in JSON format:
        {{
            "sentiment": "positive|neutral|negative",
            "score": float,  # -1.0 (very negative) to 1.0 (very positive)
            "confidence": float  # 0.0 to 1.0
        }}"""

        system_prompt = """You are a sentiment analysis expert.
        Analyze the emotional tone of the text accurately."""

        try:
            response = cls.chat_completion(
                prompt=prompt, system_prompt=system_prompt, response_format="json_object", temperature=0.1
            )

            result = json.loads(response)
            return result

        except (json.JSONDecodeError, AIServiceError) as e:
            logger.error(f"Sentiment analysis failed: {e}")
            return {"sentiment": "neutral", "score": 0.0, "confidence": 0.0}

    @classmethod
    def categorize_comment(cls, text: str) -> Dict[str, Any]:
        """
        Categorize a survey comment into one of five categories.

        Args:
            text: Comment text to categorize (supports English and Arabic)

        Returns:
            Dictionary with:
            {
                'comment_category': 'negative' | 'positive' | 'suggestion' | 'appreciation' | 'complaint',
                'confidence': float,  # 0.0 to 1.0
                'reasoning': str  # Brief explanation
            }
        """
        prompt = f"""Categorize this patient feedback comment into exactly ONE of these five categories:

Categories:
- "negative": General negative feedback, dissatisfaction, or criticism (not a formal complaint)
- "positive": General positive feedback, satisfaction, or praise (not specific appreciation)
- "suggestion": A specific recommendation or idea for improvement
- "appreciation": Specific thanks, gratitude, or recognition of staff/service
- "complaint": A formal grievance, specific problem report, or issue requiring action

Comment:
{text}

Respond in JSON format:
{{
    "comment_category": "negative|positive|suggestion|appreciation|complaint",
    "confidence": float,
    "reasoning": "brief explanation"
}}"""

        system_prompt = """You are a healthcare feedback classification expert.
        Categorize patient comments accurately. A complaint requires a specific problem that needs resolution.
        An appreciation is specific gratitude toward staff. A suggestion is a constructive recommendation.
        Positive/negative are general sentiment without specific actionable content."""

        try:
            response = cls.chat_completion(
                prompt=prompt, system_prompt=system_prompt, response_format="json_object", temperature=0.1
            )
            result = json.loads(response)
            valid = {"negative", "positive", "suggestion", "appreciation", "complaint"}
            if result.get("comment_category") not in valid:
                result["comment_category"] = "negative"
            return result
        except (json.JSONDecodeError, AIServiceError) as e:
            logger.error(f"Comment categorization failed: {e}")
            return {"comment_category": "negative", "confidence": 0.0, "reasoning": "Fallback classification"}

    @classmethod
    def analyze_emotion(cls, text: str) -> Dict[str, Any]:
        """
        Analyze emotion in text to identify primary emotion and intensity.

        Args:
            text: Text to analyze (supports English and Arabic)

        Returns:
            Dictionary with emotion analysis:
            {
                'emotion': 'anger' | 'sadness' | 'confusion' | 'fear' | 'neutral',
                'intensity': float,  # 0.0 to 1.0 (how strong the emotion is)
                'confidence': float  # 0.0 to 1.0 (how confident AI is)
            }
        """
        prompt = f"""Analyze the primary emotion in this text (supports English and Arabic):

        {text}

        Identify the PRIMARY emotion from these options:
        - anger: Strong feelings of displeasure, hostility, or rage
        - sadness: Feelings of sorrow, grief, or unhappiness
        - confusion: Lack of understanding, bewilderment, or uncertainty
        - fear: Feelings of anxiety, worry, or being afraid
        - neutral: No strong emotion detected

        Provide your analysis in JSON format:
        {{
            "emotion": "anger|sadness|confusion|fear|neutral",
            "intensity": float,  # 0.0 (very weak) to 1.0 (extremely strong)
            "confidence": float  # 0.0 to 1.0 (how confident you are)
        }}

        Examples:
        - "This is unacceptable! I demand to speak to management!" -> emotion: "anger", intensity: 0.9
        - "I'm very disappointed with the care my father received" -> emotion: "sadness", intensity: 0.7
        - "I don't understand what happened, can you explain?" -> emotion: "confusion", intensity: 0.5
        - "I'm worried about the side effects of this medication" -> emotion: "fear", intensity: 0.6
        - "I would like to report a minor issue" -> emotion: "neutral", intensity: 0.2
        """

        system_prompt = """You are an emotion analysis expert fluent in both English and Arabic.
        Analyze the text to identify the PRIMARY emotion and its intensity.
        Be accurate in distinguishing between different emotions.
        Provide intensity scores that reflect how strongly the emotion is expressed (0.0 to 1.0)."""

        try:
            response = cls.chat_completion(
                prompt=prompt, system_prompt=system_prompt, response_format="json_object", temperature=0.1
            )

            result = json.loads(response)

            # Validate emotion
            valid_emotions = ["anger", "sadness", "confusion", "fear", "neutral"]
            if result.get("emotion") not in valid_emotions:
                result["emotion"] = "neutral"
                logger.warning(f"Invalid emotion detected, defaulting to neutral")

            # Validate intensity
            intensity = float(result.get("intensity", 0.0))
            if not (0.0 <= intensity <= 1.0):
                intensity = max(0.0, min(1.0, intensity))
                result["intensity"] = intensity
                logger.warning(f"Intensity out of range, clamping to {intensity}")

            # Validate confidence
            confidence = float(result.get("confidence", 0.0))
            if not (0.0 <= confidence <= 1.0):
                confidence = max(0.0, min(1.0, confidence))
                result["confidence"] = confidence
                logger.warning(f"Confidence out of range, clamping to {confidence}")

            logger.info(f"Emotion analysis: {result['emotion']}, intensity={intensity}, confidence={confidence}")
            return result

        except (json.JSONDecodeError, AIServiceError) as e:
            logger.error(f"Emotion analysis failed: {e}")
            return {"emotion": "neutral", "intensity": 0.0, "confidence": 0.0}

    @classmethod
    def extract_entities(cls, text: str) -> List[Dict[str, str]]:
        prompt = f"""Extract named entities from this text:
        "{text}"

        Focus heavily on PERSON names.
        IMPORTANT: Extract the clean name only. Remove titles like 'Dr.', 'Nurse', 'Mr.', 'Professor', 'دكتور', 'ممرض'.

        Provide entities in JSON format:
        {{
            "entities": [
                {{"text": "Name", "type": "PERSON"}},
                {{"text": "DepartmentName", "type": "ORGANIZATION"}}
            ]
        }}"""

        system_prompt = (
            "You are an expert in bilingual NER (Arabic and English). Extract formal names for database lookup."
        )

        try:
            response = cls.chat_completion(
                prompt=prompt, system_prompt=system_prompt, response_format="json_object", temperature=0.0
            )
            return json.loads(response).get("entities", [])
        except (json.JSONDecodeError, AIServiceError):
            return []

    @classmethod
    def generate_summary(cls, text: str, max_length: int = 200) -> str:
        """
        Generate a summary of text.

        Args:
            text: Text to summarize
            max_length: Maximum length of summary

        Returns:
            Summary text
        """
        prompt = f"""Summarize this text in {max_length} characters or less:

        {text}"""

        system_prompt = """You are a text summarization expert.
            Create a concise summary that captures the main points."""

        try:
            response = cls.chat_completion(prompt=prompt, system_prompt=system_prompt, temperature=0.3, max_tokens=150)

            return response.strip()

        except AIServiceError as e:
            logger.error(f"Summary generation failed: {e}")
            return text[:max_length]

    @classmethod
    def create_px_action_from_complaint(cls, complaint) -> Dict[str, Any]:
        """
        Generate PX Action data from a complaint using AI analysis.

        Args:
            complaint: Complaint model instance

        Returns:
            Dictionary with PX Action data:
            {
                'title': str,
                'description': str,
                'category': str,
                'priority': str,
                'severity': str,
                'reasoning': str
            }
        """
        # Get complaint data
        title = complaint.title
        description = complaint.description
        complaint_category = complaint.category.name_en if complaint.category else "other"
        severity = complaint.severity
        priority = complaint.priority

        # Build prompt for AI to generate action details
        prompt = f"""Generate a PX Action from this complaint:

        Complaint Title: {title}
        Complaint Description: {description}
        Complaint Category: {complaint_category}
        Severity: {severity}
        Priority: {priority}

        Available PX Action Categories:
        - clinical_quality: Issues related to medical care quality, diagnosis, treatment
        - patient_safety: Issues that could harm patients, safety violations, risks
        - service_quality: Issues with service delivery, wait times, customer service
        - staff_behavior: Issues with staff professionalism, attitude, conduct
        - facility: Issues with facilities, equipment, environment, cleanliness
        - process_improvement: Issues with processes, workflows, procedures
        - other: General issues that don't fit specific categories

        Instructions:
        1. Generate a clear, action-oriented title for the PX Action (max 15 words)
        2. Create a detailed description that explains what needs to be done
        3. Select the most appropriate PX Action category from the list above
        4. Keep the same severity and priority as the complaint
        5. Provide reasoning for your choices

        Provide your response in JSON format:
        {{
            "title": "Action-oriented title (max 15 words)",
            "description": "Detailed description of what needs to be done to address this complaint",
            "category": "exact category name from the list above",
            "priority": "low|medium|high",
            "severity": "low|medium|high|critical",
            "reasoning": "Brief explanation of why this category and action are appropriate"
        }}"""

        system_prompt = """You are a healthcare quality improvement expert.
        Generate PX Actions that are actionable, specific, and focused on improvement.
        The action should clearly state what needs to be done to address the complaint.
        Be specific and practical in your descriptions."""

        try:
            response = cls.chat_completion(
                prompt=prompt, system_prompt=system_prompt, response_format="json_object", temperature=0.3
            )

            # Parse JSON response
            result = json.loads(response)

            # Validate category
            valid_categories = [
                "clinical_quality",
                "patient_safety",
                "service_quality",
                "staff_behavior",
                "facility",
                "process_improvement",
                "other",
            ]
            if result.get("category") not in valid_categories:
                # Fallback: map complaint category to action category
                result["category"] = cls._map_category_to_action_category(complaint_category)

            # Validate severity
            if result.get("severity") not in cls.SEVERITY_CHOICES:
                result["severity"] = severity  # Use complaint severity as fallback

            # Validate priority
            if result.get("priority") not in cls.PRIORITY_CHOICES:
                result["priority"] = priority  # Use complaint priority as fallback

            logger.info(f"PX Action generated: title={result['title']}, category={result['category']}")
            return result

        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse AI response: {e}")
            # Return fallback based on complaint data
            return {
                "title": f"Address: {title}",
                "description": f"Resolve the complaint: {description}",
                "category": cls._map_category_to_action_category(complaint_category),
                "priority": priority,
                "severity": severity,
                "reasoning": "AI generation failed, using complaint data as fallback",
            }
        except AIServiceError as e:
            logger.error(f"AI service error: {e}")
            # Return fallback based on complaint data
            return {
                "title": f"Address: {title}",
                "description": f"Resolve the complaint: {description}",
                "category": cls._map_category_to_action_category(complaint_category),
                "priority": priority,
                "severity": severity,
                "reasoning": f"AI service unavailable: {str(e)}",
            }

    @classmethod
    def _map_category_to_action_category(cls, complaint_category: str) -> str:
        """
        Map complaint category to PX Action category.

        Args:
            complaint_category: Complaint category name

        Returns:
            PX Action category name
        """
        # Normalize category name (lowercase, remove spaces)
        category_lower = complaint_category.lower().replace(" ", "_")

        # Mapping dictionary
        mapping = {
            # Clinical categories
            "clinical": "clinical_quality",
            "medical": "clinical_quality",
            "diagnosis": "clinical_quality",
            "treatment": "clinical_quality",
            "care": "clinical_quality",
            # Safety categories
            "safety": "patient_safety",
            "infection": "patient_safety",
            "risk": "patient_safety",
            "dangerous": "patient_safety",
            # Service quality
            "service": "service_quality",
            "wait": "service_quality",
            "waiting": "service_quality",
            "appointment": "service_quality",
            "scheduling": "service_quality",
            # Staff behavior
            "staff": "staff_behavior",
            "behavior": "staff_behavior",
            "attitude": "staff_behavior",
            "rude": "staff_behavior",
            "communication": "staff_behavior",
            # Facility
            "facility": "facility",
            "environment": "facility",
            "clean": "facility",
            "cleanliness": "facility",
            "equipment": "facility",
            "room": "facility",
            "bathroom": "facility",
            # Process
            "process": "process_improvement",
            "workflow": "process_improvement",
            "procedure": "process_improvement",
            "policy": "process_improvement",
        }

        # Check for partial matches
        for key, value in mapping.items():
            if key in category_lower:
                return value

        # Default to 'other' if no match found
        return "other"

    @classmethod
    def _detect_complaint_type(cls, text: str) -> str:
        """
        Detect if the text is a complaint or appreciation using sentiment and keywords.

        Args:
            text: Text to analyze

        Returns:
            'complaint' or 'appreciation'
        """
        # Keywords for appreciation (English and Arabic)
        appreciation_keywords_en = [
            "thank",
            "thanks",
            "excellent",
            "great",
            "wonderful",
            "amazing",
            "appreciate",
            "commend",
            "outstanding",
            "fantastic",
            "brilliant",
            "professional",
            "caring",
            "helpful",
            "friendly",
            "good",
            "nice",
            "impressive",
            "exceptional",
            "superb",
            "pleased",
            "satisfied",
        ]
        appreciation_keywords_ar = [
            "شكرا",
            "ممتاز",
            "رائع",
            "بارك",
            "مدهش",
            "عظيم",
            "أقدر",
            "شكر",
            "متميز",
            "مهني",
            "رعاية",
            "مفيد",
            "ودود",
            "جيد",
            "لطيف",
            "مبهر",
            "استثنائي",
            "سعيد",
            "رضا",
            "احترافية",
            "خدمة ممتازة",
        ]

        # Keywords for complaints (English and Arabic)
        complaint_keywords_en = [
            "problem",
            "issue",
            "complaint",
            "bad",
            "terrible",
            "awful",
            "disappointed",
            "unhappy",
            "poor",
            "worst",
            "unacceptable",
            "rude",
            "slow",
            "delay",
            "wait",
            "neglect",
            "ignore",
            "angry",
            "frustrated",
            "dissatisfied",
            "concern",
            "worried",
        ]
        complaint_keywords_ar = [
            "مشكلة",
            "مشاكل",
            "سيء",
            "مخيب",
            "سيء للغاية",
            "تعيس",
            "ضعيف",
            "أسوأ",
            "غير مقبول",
            "فظ",
            "بطيء",
            "تأخير",
            "انتظار",
            "إهمال",
            "تجاهل",
            "غاضب",
            "محبط",
            "غير راضي",
            "قلق",
        ]

        text_lower = text.lower()

        # Count keyword matches
        appreciation_count = 0
        complaint_count = 0

        for keyword in appreciation_keywords_en + appreciation_keywords_ar:
            if keyword in text_lower:
                appreciation_count += 1

        for keyword in complaint_keywords_en + complaint_keywords_ar:
            if keyword in text_lower:
                complaint_count += 1

        # Get sentiment analysis
        try:
            sentiment_result = cls.classify_sentiment(text)
            sentiment = sentiment_result.get("sentiment", "neutral")
            sentiment_score = sentiment_result.get("score", 0.0)

            logger.info(f"Sentiment analysis: sentiment={sentiment}, score={sentiment_score}")

            # If sentiment is clearly positive and has appreciation keywords
            if sentiment == "positive" and sentiment_score > 0.5:
                if appreciation_count >= complaint_count:
                    return "appreciation"

            # If sentiment is clearly negative
            if sentiment == "negative" and sentiment_score < -0.3:
                return "complaint"

        except Exception as e:
            logger.warning(f"Sentiment analysis failed, using keyword-based detection: {e}")

        # Fallback to keyword-based detection
        if appreciation_count > complaint_count:
            return "appreciation"
        elif complaint_count > appreciation_count:
            return "complaint"
        else:
            # No clear indicators, default to complaint
            return "complaint"

    @classmethod
    def analyze_suggestion(
        cls,
        message: str,
        title: Optional[str] = None,
    ) -> Dict[str, Any]:
        title_text = f"Suggestion Title: {title}\n" if title else ""
        prompt = f"""Analyze this hospital suggestion/feedback and provide a structured assessment.

            Suggestion: {message}
            {title_text}
            Suggestion Categories (choose one):
            - clinical_care: Related to medical treatment, diagnosis, nursing care
            - facility: Related to buildings, rooms, cleanliness, parking, signage
            - staff_service: Related to staff behavior, attitude, responsiveness
            - communication: Related to information flow, signage, language barriers
            - technology: Related to IT systems, digital services, website, apps
            - food_service: Related to cafeteria, patient meals, nutrition
            - appointment: Related to scheduling, wait times, booking systems
            - process_improvement: Related to workflows, procedures, efficiency
            - other: Anything not covered above

            Instructions:
            1. Generate a short_description (2-3 sentences) summarizing the key points in BOTH English and Arabic
            2. Classify the category from the list above
            3. Assign a priority level based on potential impact
            4. Generate 1-3 suggested_actions as concrete improvement steps, each with:
               - action: Specific, actionable step the hospital can take
               - priority: high|medium|low
               - category: clinical_quality|patient_safety|service_quality|staff_behavior|facility|process_improvement|other
               Provide all actions in BOTH English and Arabic
            5. Provide reasoning for your classification in BOTH English and Arabic

            IMPORTANT: ALL TEXT FIELDS MUST BE PROVIDED IN BOTH ENGLISH AND ARABIC

            Provide your analysis in JSON format:
            {{
                "short_description_en": "2-3 sentence summary in English of the suggestion",
                "short_description_ar": "ملخص من 2-3 جمل بالعربية",
                "category": "clinical_care|facility|staff_service|communication|technology|food_service|appointment|process_improvement|other",
                "priority": "low|medium|high",
                "suggested_actions": [
                    {{
                        "action_en": "Specific actionable improvement step in English",
                        "action_ar": "خطوة محددة للتحسين بالعربية",
                        "priority": "high|medium|low",
                        "category": "clinical_quality|patient_safety|service_quality|staff_behavior|facility|process_improvement|other"
                    }}
                ],
                "suggested_action_en": "top suggested action in English",
                "suggested_action_ar": "أهم إجراء مقترح بالعربية",
                "reasoning_en": "Brief explanation in English (2-3 sentences)",
                "reasoning_ar": "شرح مختصر بالعربية"
            }}"""

        system_prompt = """You are a healthcare quality improvement expert fluent in both English and Arabic.
            Your job is to analyze patient/staff suggestions submitted to a hospital, classify them,
            and recommend actionable improvement steps.
            Focus on practical, implementable suggestions that can improve patient experience.

            ARABIC LANGUAGE REQUIREMENTS:
            - Use Modern Standard Arabic (Fusha) - الفصحى
            - Use formal, professional medical terminology
            - Ensure Arabic text is culturally appropriate for Saudi Arabian healthcare context
            - Arabic translations should be natural and fluent, not literal word-for-word translations
            - Use proper Arabic grammar and sentence structure"""

        try:
            response = cls.chat_completion(
                prompt=prompt,
                system_prompt=system_prompt,
                response_format="json_object",
                temperature=0.2,
            )

            result = json.loads(response)

            if result.get("priority") not in ["low", "medium", "high"]:
                result["priority"] = "medium"

            result = cls._validate_arabic_text(result)

            if not result.get("suggested_action_en") and result.get("suggested_actions"):
                first_action = result["suggested_actions"][0]
                result["suggested_action_en"] = first_action.get("action_en", "")
                result["suggested_action_ar"] = first_action.get("action_ar", "")

            logger.info(
                f"Suggestion analyzed: category={result.get('category')}, priority={result.get('priority')}"
            )
            return result

        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse suggestion AI response: {e}")
            return {
                "short_description_en": message[:200] if message else "",
                "short_description_ar": message[:200] if message else "",
                "category": "other",
                "priority": "medium",
                "suggested_actions": [],
                "suggested_action_en": "",
                "suggested_action_ar": "",
                "reasoning_en": "AI analysis failed, using default values",
                "reasoning_ar": "فشل تحليل الذكاء الاصطناعي، استخدام القيم الافتراضية",
            }
        except AIServiceError as e:
            logger.error(f"AI service error for suggestion: {e}")
            return {
                "short_description_en": message[:200] if message else "",
                "short_description_ar": message[:200] if message else "",
                "category": "other",
                "priority": "medium",
                "suggested_actions": [],
                "suggested_action_en": "",
                "suggested_action_ar": "",
                "reasoning_en": "AI service unavailable",
                "reasoning_ar": "خدمة الذكاء الاصطناعي غير متوفرة",
            }


# Convenience singleton instance
ai_service = AIService()