""" AI Engine services - Sentiment analysis and NLP This module provides AI services for: - Sentiment analysis (positive, neutral, negative) - Keyword extraction - Entity recognition - Emotion detection - Language detection Currently uses a stub implementation that can be replaced with: - OpenAI API - Azure Cognitive Services - AWS Comprehend - Custom ML models """ import re import time from decimal import Decimal from typing import Dict, List, Optional, Tuple from django.contrib.contenttypes.models import ContentType from django.db import transaction from .models import SentimentResult class SentimentAnalysisService: """ Sentiment analysis service with stub implementation. This service provides realistic sentiment analysis without external API calls. Replace the stub methods with real AI service calls when ready. """ # Positive keywords (English and Arabic) POSITIVE_KEYWORDS = { 'en': [ 'excellent', 'great', 'good', 'wonderful', 'amazing', 'fantastic', 'outstanding', 'superb', 'perfect', 'best', 'love', 'happy', 'satisfied', 'pleased', 'thank', 'appreciate', 'helpful', 'kind', 'professional', 'caring', 'friendly', 'clean', 'comfortable' ], 'ar': [ 'ممتاز', 'رائع', 'جيد', 'جميل', 'مذهل', 'رائع', 'متميز', 'ممتاز', 'مثالي', 'أفضل', 'أحب', 'سعيد', 'راض', 'مسرور', 'شكر', 'أقدر', 'مفيد', 'لطيف', 'محترف', 'مهتم', 'ودود', 'نظيف', 'مريح' ] } # Negative keywords (English and Arabic) NEGATIVE_KEYWORDS = { 'en': [ 'bad', 'terrible', 'horrible', 'awful', 'poor', 'worst', 'disappointed', 'unhappy', 'unsatisfied', 'angry', 'frustrated', 'rude', 'unprofessional', 'dirty', 'uncomfortable', 'painful', 'long wait', 'delayed', 'ignored', 'neglected', 'complaint' ], 'ar': [ 'سيء', 'فظيع', 'مروع', 'سيء', 'ضعيف', 'أسوأ', 'خائب', 'غير سعيد', 'غير راض', 'غاضب', 'محبط', 'وقح', 'غير محترف', 'قذر', 'غير مريح', 'مؤلم', 'انتظار طويل', 'متأخر', 'تجاهل', 'مهمل', 'شكوى' ] } # Emotion keywords EMOTION_KEYWORDS = { 'joy': ['happy', 'joy', 'pleased', 'delighted', 'سعيد', 'فرح', 'مسرور'], 'anger': ['angry', 'furious', 'mad', 'غاضب', 'غضب', 'حنق'], 'sadness': ['sad', 'unhappy', 'disappointed', 'حزين', 'خائب', 'محبط'], 'fear': ['afraid', 'scared', 'worried', 'خائف', 'قلق', 'مذعور'], 'surprise': ['surprised', 'shocked', 'amazed', 'متفاجئ', 'مندهش', 'مذهول'], } @classmethod def detect_language(cls, text: str) -> str: """ Detect language of text (English or Arabic). Simple detection based on character ranges. """ # Count Arabic characters arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text)) # Count English characters english_chars = len(re.findall(r'[a-zA-Z]', text)) if arabic_chars > english_chars: return 'ar' return 'en' @classmethod def extract_keywords(cls, text: str, language: str, max_keywords: int = 10) -> List[str]: """ Extract keywords from text. Stub implementation: Returns words that appear in positive/negative keyword lists. Replace with proper NLP keyword extraction (TF-IDF, RAKE, etc.) """ text_lower = text.lower() keywords = [] # Check positive keywords for keyword in cls.POSITIVE_KEYWORDS.get(language, []): if keyword in text_lower: keywords.append(keyword) # Check negative keywords for keyword in cls.NEGATIVE_KEYWORDS.get(language, []): if keyword in text_lower: keywords.append(keyword) return keywords[:max_keywords] @classmethod def extract_entities(cls, text: str, language: str) -> List[Dict[str, str]]: """ Extract named entities from text. Stub implementation: Returns basic pattern matching. Replace with proper NER (spaCy, Stanford NER, etc.) """ entities = [] # Simple email detection emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text) for email in emails: entities.append({'text': email, 'type': 'EMAIL'}) # Simple phone detection phones = re.findall(r'\b\d{10,}\b', text) for phone in phones: entities.append({'text': phone, 'type': 'PHONE'}) return entities @classmethod def detect_emotions(cls, text: str) -> Dict[str, float]: """ Detect emotions in text. Stub implementation: Returns emotion scores based on keyword matching. Replace with proper emotion detection model. """ text_lower = text.lower() emotions = {} for emotion, keywords in cls.EMOTION_KEYWORDS.items(): score = 0.0 for keyword in keywords: if keyword in text_lower: score += 0.2 emotions[emotion] = min(score, 1.0) return emotions @classmethod def calculate_sentiment_score(cls, text: str, language: str) -> Tuple[str, float, float]: """ Calculate sentiment score for text. Returns: Tuple of (sentiment, score, confidence) - sentiment: 'positive', 'neutral', or 'negative' - score: float from -1 (very negative) to 1 (very positive) - confidence: float from 0 to 1 Stub implementation: Uses keyword matching. Replace with ML model (BERT, RoBERTa, etc.) """ text_lower = text.lower() # Count positive and negative keywords positive_count = 0 negative_count = 0 for keyword in cls.POSITIVE_KEYWORDS.get(language, []): positive_count += text_lower.count(keyword) for keyword in cls.NEGATIVE_KEYWORDS.get(language, []): negative_count += text_lower.count(keyword) # Calculate score total_keywords = positive_count + negative_count if total_keywords == 0: # No sentiment keywords found - neutral return 'neutral', 0.0, 0.5 # Calculate sentiment score (-1 to 1) score = (positive_count - negative_count) / max(total_keywords, 1) # Determine sentiment category if score > 0.2: sentiment = 'positive' elif score < -0.2: sentiment = 'negative' else: sentiment = 'neutral' # Calculate confidence (higher when more keywords found) confidence = min(total_keywords / 10.0, 1.0) confidence = max(confidence, 0.3) # Minimum confidence return sentiment, score, confidence @classmethod def analyze_text( cls, text: str, language: Optional[str] = None, extract_keywords: bool = True, extract_entities: bool = True, detect_emotions: bool = True ) -> Dict: """ Perform complete sentiment analysis on text. Args: text: Text to analyze language: Language code ('en' or 'ar'), auto-detected if None extract_keywords: Whether to extract keywords extract_entities: Whether to extract entities detect_emotions: Whether to detect emotions Returns: Dictionary with analysis results """ start_time = time.time() # Detect language if not provided if language is None: language = cls.detect_language(text) # Calculate sentiment sentiment, score, confidence = cls.calculate_sentiment_score(text, language) # Extract additional features keywords = [] if extract_keywords: keywords = cls.extract_keywords(text, language) entities = [] if extract_entities: entities = cls.extract_entities(text, language) emotions = {} if detect_emotions: emotions = cls.detect_emotions(text) # Calculate processing time processing_time_ms = int((time.time() - start_time) * 1000) return { 'text': text, 'language': language, 'sentiment': sentiment, 'sentiment_score': score, 'confidence': confidence, 'keywords': keywords, 'entities': entities, 'emotions': emotions, 'ai_service': 'stub', 'ai_model': 'keyword_matching_v1', 'processing_time_ms': processing_time_ms, } @classmethod @transaction.atomic def analyze_and_save( cls, text: str, content_object, language: Optional[str] = None, **kwargs ) -> SentimentResult: """ Analyze text and save result to database. Args: text: Text to analyze content_object: Django model instance to link to language: Language code ('en' or 'ar'), auto-detected if None **kwargs: Additional arguments for analyze_text Returns: SentimentResult instance """ # Perform analysis analysis = cls.analyze_text(text, language, **kwargs) # Get content type content_type = ContentType.objects.get_for_model(content_object) # Create sentiment result sentiment_result = SentimentResult.objects.create( content_type=content_type, object_id=content_object.id, text=analysis['text'], language=analysis['language'], sentiment=analysis['sentiment'], sentiment_score=Decimal(str(analysis['sentiment_score'])), confidence=Decimal(str(analysis['confidence'])), keywords=analysis['keywords'], entities=analysis['entities'], emotions=analysis['emotions'], ai_service=analysis['ai_service'], ai_model=analysis['ai_model'], processing_time_ms=analysis['processing_time_ms'], ) return sentiment_result @classmethod def analyze_batch(cls, texts: List[str], language: Optional[str] = None) -> List[Dict]: """ Analyze multiple texts in batch. Args: texts: List of texts to analyze language: Language code ('en' or 'ar'), auto-detected if None Returns: List of analysis results """ results = [] for text in texts: result = cls.analyze_text(text, language) results.append(result) return results class AIEngineService: """ Main AI Engine service - facade for all AI capabilities. """ sentiment = SentimentAnalysisService @classmethod def get_sentiment_for_object(cls, content_object) -> Optional[SentimentResult]: """ Get the most recent sentiment result for an object. """ content_type = ContentType.objects.get_for_model(content_object) return SentimentResult.objects.filter( content_type=content_type, object_id=content_object.id ).first() @classmethod def get_sentiment_stats(cls, queryset=None) -> Dict: """ Get sentiment statistics. Args: queryset: Optional SentimentResult queryset to filter Returns: Dictionary with statistics """ if queryset is None: queryset = SentimentResult.objects.all() total = queryset.count() if total == 0: return { 'total': 0, 'positive': 0, 'neutral': 0, 'negative': 0, 'positive_pct': 0, 'neutral_pct': 0, 'negative_pct': 0, 'avg_score': 0, 'avg_confidence': 0, } positive = queryset.filter(sentiment='positive').count() neutral = queryset.filter(sentiment='neutral').count() negative = queryset.filter(sentiment='negative').count() # Calculate averages from django.db.models import Avg avg_score = queryset.aggregate(Avg('sentiment_score'))['sentiment_score__avg'] or 0 avg_confidence = queryset.aggregate(Avg('confidence'))['confidence__avg'] or 0 return { 'total': total, 'positive': positive, 'neutral': neutral, 'negative': negative, 'positive_pct': round((positive / total) * 100, 1), 'neutral_pct': round((neutral / total) * 100, 1), 'negative_pct': round((negative / total) * 100, 1), 'avg_score': float(avg_score), 'avg_confidence': float(avg_confidence), }