HH/apps/ai_engine/services.py

"""
AI Engine services - Sentiment analysis and NLP

This module provides AI services for:
- Sentiment analysis (positive, neutral, negative)
- Keyword extraction
- Entity recognition
- Emotion detection
- Language detection

Currently uses a stub implementation that can be replaced with:
- OpenAI API
- Azure Cognitive Services
- AWS Comprehend
- Custom ML models
"""
import re
import time
from decimal import Decimal
from typing import Dict, List, Optional, Tuple

from django.contrib.contenttypes.models import ContentType
from django.db import transaction

from .models import SentimentResult


class SentimentAnalysisService:
    """
    Sentiment analysis service with stub implementation.

    This service provides realistic sentiment analysis without external API calls.
    Replace the stub methods with real AI service calls when ready.
    """

    # Positive keywords (English and Arabic)
    POSITIVE_KEYWORDS = {
        'en': [
            'excellent', 'great', 'good', 'wonderful', 'amazing', 'fantastic',
            'outstanding', 'superb', 'perfect', 'best', 'love', 'happy',
            'satisfied', 'pleased', 'thank', 'appreciate', 'helpful', 'kind',
            'professional', 'caring', 'friendly', 'clean', 'comfortable'
        ],
        'ar': [
            'ممتاز', 'رائع', 'جيد', 'جميل', 'مذهل', 'رائع',
            'متميز', 'ممتاز', 'مثالي', 'أفضل', 'أحب', 'سعيد',
            'راض', 'مسرور', 'شكر', 'أقدر', 'مفيد', 'لطيف',
            'محترف', 'مهتم', 'ودود', 'نظيف', 'مريح'
        ]
    }

    # Negative keywords (English and Arabic)
    NEGATIVE_KEYWORDS = {
        'en': [
            'bad', 'terrible', 'horrible', 'awful', 'poor', 'worst',
            'disappointed', 'unhappy', 'unsatisfied', 'angry', 'frustrated',
            'rude', 'unprofessional', 'dirty', 'uncomfortable', 'painful',
            'long wait', 'delayed', 'ignored', 'neglected', 'complaint'
        ],
        'ar': [
            'سيء', 'فظيع', 'مروع', 'سيء', 'ضعيف', 'أسوأ',
            'خائب', 'غير سعيد', 'غير راض', 'غاضب', 'محبط',
            'وقح', 'غير محترف', 'قذر', 'غير مريح', 'مؤلم',
            'انتظار طويل', 'متأخر', 'تجاهل', 'مهمل', 'شكوى'
        ]
    }

    # Emotion keywords
    EMOTION_KEYWORDS = {
        'joy': ['happy', 'joy', 'pleased', 'delighted', 'سعيد', 'فرح', 'مسرور'],
        'anger': ['angry', 'furious', 'mad', 'غاضب', 'غضب', 'حنق'],
        'sadness': ['sad', 'unhappy', 'disappointed', 'حزين', 'خائب', 'محبط'],
        'fear': ['afraid', 'scared', 'worried', 'خائف', 'قلق', 'مذعور'],
        'surprise': ['surprised', 'shocked', 'amazed', 'متفاجئ', 'مندهش', 'مذهول'],
    }

    @classmethod
    def detect_language(cls, text: str) -> str:
        """
        Detect language of text (English or Arabic).

        Simple detection based on character ranges.
        """
        # Count Arabic characters
        arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
        # Count English characters
        english_chars = len(re.findall(r'[a-zA-Z]', text))

        if arabic_chars > english_chars:
            return 'ar'
        return 'en'

    @classmethod
    def extract_keywords(cls, text: str, language: str, max_keywords: int = 10) -> List[str]:
        """
        Extract keywords from text.

        Stub implementation: Returns words that appear in positive/negative keyword lists.
        Replace with proper NLP keyword extraction (TF-IDF, RAKE, etc.)
        """
        text_lower = text.lower()
        keywords = []

        # Check positive keywords
        for keyword in cls.POSITIVE_KEYWORDS.get(language, []):
            if keyword in text_lower:
                keywords.append(keyword)

        # Check negative keywords
        for keyword in cls.NEGATIVE_KEYWORDS.get(language, []):
            if keyword in text_lower:
                keywords.append(keyword)

        return keywords[:max_keywords]

    @classmethod
    def extract_entities(cls, text: str, language: str) -> List[Dict[str, str]]:
        """
        Extract named entities from text.

        Stub implementation: Returns basic pattern matching.
        Replace with proper NER (spaCy, Stanford NER, etc.)
        """
        entities = []

        # Simple email detection
        emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
        for email in emails:
            entities.append({'text': email, 'type': 'EMAIL'})

        # Simple phone detection
        phones = re.findall(r'\b\d{10,}\b', text)
        for phone in phones:
            entities.append({'text': phone, 'type': 'PHONE'})

        return entities

    @classmethod
    def detect_emotions(cls, text: str) -> Dict[str, float]:
        """
        Detect emotions in text.

        Stub implementation: Returns emotion scores based on keyword matching.
        Replace with proper emotion detection model.
        """
        text_lower = text.lower()
        emotions = {}

        for emotion, keywords in cls.EMOTION_KEYWORDS.items():
            score = 0.0
            for keyword in keywords:
                if keyword in text_lower:
                    score += 0.2
            emotions[emotion] = min(score, 1.0)

        return emotions

    @classmethod
    def calculate_sentiment_score(cls, text: str, language: str) -> Tuple[str, float, float]:
        """
        Calculate sentiment score for text.

        Returns:
            Tuple of (sentiment, score, confidence)
            - sentiment: 'positive', 'neutral', or 'negative'
            - score: float from -1 (very negative) to 1 (very positive)
            - confidence: float from 0 to 1

        Stub implementation: Uses keyword matching.
        Replace with ML model (BERT, RoBERTa, etc.)
        """
        text_lower = text.lower()

        # Count positive and negative keywords
        positive_count = 0
        negative_count = 0

        for keyword in cls.POSITIVE_KEYWORDS.get(language, []):
            positive_count += text_lower.count(keyword)

        for keyword in cls.NEGATIVE_KEYWORDS.get(language, []):
            negative_count += text_lower.count(keyword)

        # Calculate score
        total_keywords = positive_count + negative_count

        if total_keywords == 0:
            # No sentiment keywords found - neutral
            return 'neutral', 0.0, 0.5

        # Calculate sentiment score (-1 to 1)
        score = (positive_count - negative_count) / max(total_keywords, 1)

        # Determine sentiment category
        if score > 0.2:
            sentiment = 'positive'
        elif score < -0.2:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'

        # Calculate confidence (higher when more keywords found)
        confidence = min(total_keywords / 10.0, 1.0)
        confidence = max(confidence, 0.3)  # Minimum confidence

        return sentiment, score, confidence

    @classmethod
    def analyze_text(
        cls,
        text: str,
        language: Optional[str] = None,
        extract_keywords: bool = True,
        extract_entities: bool = True,
        detect_emotions: bool = True
    ) -> Dict:
        """
        Perform complete sentiment analysis on text.

        Args:
            text: Text to analyze
            language: Language code ('en' or 'ar'), auto-detected if None
            extract_keywords: Whether to extract keywords
            extract_entities: Whether to extract entities
            detect_emotions: Whether to detect emotions

        Returns:
            Dictionary with analysis results
        """
        start_time = time.time()

        # Detect language if not provided
        if language is None:
            language = cls.detect_language(text)

        # Calculate sentiment
        sentiment, score, confidence = cls.calculate_sentiment_score(text, language)

        # Extract additional features
        keywords = []
        if extract_keywords:
            keywords = cls.extract_keywords(text, language)

        entities = []
        if extract_entities:
            entities = cls.extract_entities(text, language)

        emotions = {}
        if detect_emotions:
            emotions = cls.detect_emotions(text)

        # Calculate processing time
        processing_time_ms = int((time.time() - start_time) * 1000)

        return {
            'text': text,
            'language': language,
            'sentiment': sentiment,
            'sentiment_score': score,
            'confidence': confidence,
            'keywords': keywords,
            'entities': entities,
            'emotions': emotions,
            'ai_service': 'stub',
            'ai_model': 'keyword_matching_v1',
            'processing_time_ms': processing_time_ms,
        }

    @classmethod
    @transaction.atomic
    def analyze_and_save(
        cls,
        text: str,
        content_object,
        language: Optional[str] = None,
        **kwargs
    ) -> SentimentResult:
        """
        Analyze text and save result to database.

        Args:
            text: Text to analyze
            content_object: Django model instance to link to
            language: Language code ('en' or 'ar'), auto-detected if None
            **kwargs: Additional arguments for analyze_text

        Returns:
            SentimentResult instance
        """
        # Perform analysis
        analysis = cls.analyze_text(text, language, **kwargs)

        # Get content type
        content_type = ContentType.objects.get_for_model(content_object)

        # Create sentiment result
        sentiment_result = SentimentResult.objects.create(
            content_type=content_type,
            object_id=content_object.id,
            text=analysis['text'],
            language=analysis['language'],
            sentiment=analysis['sentiment'],
            sentiment_score=Decimal(str(analysis['sentiment_score'])),
            confidence=Decimal(str(analysis['confidence'])),
            keywords=analysis['keywords'],
            entities=analysis['entities'],
            emotions=analysis['emotions'],
            ai_service=analysis['ai_service'],
            ai_model=analysis['ai_model'],
            processing_time_ms=analysis['processing_time_ms'],
        )

        return sentiment_result

    @classmethod
    def analyze_batch(cls, texts: List[str], language: Optional[str] = None) -> List[Dict]:
        """
        Analyze multiple texts in batch.

        Args:
            texts: List of texts to analyze
            language: Language code ('en' or 'ar'), auto-detected if None

        Returns:
            List of analysis results
        """
        results = []
        for text in texts:
            result = cls.analyze_text(text, language)
            results.append(result)
        return results


class AIEngineService:
    """
    Main AI Engine service - facade for all AI capabilities.
    """

    sentiment = SentimentAnalysisService

    @classmethod
    def get_sentiment_for_object(cls, content_object) -> Optional[SentimentResult]:
        """
        Get the most recent sentiment result for an object.
        """
        content_type = ContentType.objects.get_for_model(content_object)
        return SentimentResult.objects.filter(
            content_type=content_type,
            object_id=content_object.id
        ).first()

    @classmethod
    def get_sentiment_stats(cls, queryset=None) -> Dict:
        """
        Get sentiment statistics.

        Args:
            queryset: Optional SentimentResult queryset to filter

        Returns:
            Dictionary with statistics
        """
        if queryset is None:
            queryset = SentimentResult.objects.all()

        total = queryset.count()

        if total == 0:
            return {
                'total': 0,
                'positive': 0,
                'neutral': 0,
                'negative': 0,
                'positive_pct': 0,
                'neutral_pct': 0,
                'negative_pct': 0,
                'avg_score': 0,
                'avg_confidence': 0,
            }

        positive = queryset.filter(sentiment='positive').count()
        neutral = queryset.filter(sentiment='neutral').count()
        negative = queryset.filter(sentiment='negative').count()

        # Calculate averages
        from django.db.models import Avg
        avg_score = queryset.aggregate(Avg('sentiment_score'))['sentiment_score__avg'] or 0
        avg_confidence = queryset.aggregate(Avg('confidence'))['confidence__avg'] or 0

        return {
            'total': total,
            'positive': positive,
            'neutral': neutral,
            'negative': negative,
            'positive_pct': round((positive / total) * 100, 1),
            'neutral_pct': round((neutral / total) * 100, 1),
            'negative_pct': round((negative / total) * 100, 1),
            'avg_score': float(avg_score),
            'avg_confidence': float(avg_confidence),
        }