HH/apps/ai_engine/services.py
2025-12-24 14:10:18 +03:00

401 lines
13 KiB
Python

"""
AI Engine services - Sentiment analysis and NLP
This module provides AI services for:
- Sentiment analysis (positive, neutral, negative)
- Keyword extraction
- Entity recognition
- Emotion detection
- Language detection
Currently uses a stub implementation that can be replaced with:
- OpenAI API
- Azure Cognitive Services
- AWS Comprehend
- Custom ML models
"""
import re
import time
from decimal import Decimal
from typing import Dict, List, Optional, Tuple
from django.contrib.contenttypes.models import ContentType
from django.db import transaction
from .models import SentimentResult
class SentimentAnalysisService:
"""
Sentiment analysis service with stub implementation.
This service provides realistic sentiment analysis without external API calls.
Replace the stub methods with real AI service calls when ready.
"""
# Positive keywords (English and Arabic)
POSITIVE_KEYWORDS = {
'en': [
'excellent', 'great', 'good', 'wonderful', 'amazing', 'fantastic',
'outstanding', 'superb', 'perfect', 'best', 'love', 'happy',
'satisfied', 'pleased', 'thank', 'appreciate', 'helpful', 'kind',
'professional', 'caring', 'friendly', 'clean', 'comfortable'
],
'ar': [
'ممتاز', 'رائع', 'جيد', 'جميل', 'مذهل', 'رائع',
'متميز', 'ممتاز', 'مثالي', 'أفضل', 'أحب', 'سعيد',
'راض', 'مسرور', 'شكر', 'أقدر', 'مفيد', 'لطيف',
'محترف', 'مهتم', 'ودود', 'نظيف', 'مريح'
]
}
# Negative keywords (English and Arabic)
NEGATIVE_KEYWORDS = {
'en': [
'bad', 'terrible', 'horrible', 'awful', 'poor', 'worst',
'disappointed', 'unhappy', 'unsatisfied', 'angry', 'frustrated',
'rude', 'unprofessional', 'dirty', 'uncomfortable', 'painful',
'long wait', 'delayed', 'ignored', 'neglected', 'complaint'
],
'ar': [
'سيء', 'فظيع', 'مروع', 'سيء', 'ضعيف', 'أسوأ',
'خائب', 'غير سعيد', 'غير راض', 'غاضب', 'محبط',
'وقح', 'غير محترف', 'قذر', 'غير مريح', 'مؤلم',
'انتظار طويل', 'متأخر', 'تجاهل', 'مهمل', 'شكوى'
]
}
# Emotion keywords
EMOTION_KEYWORDS = {
'joy': ['happy', 'joy', 'pleased', 'delighted', 'سعيد', 'فرح', 'مسرور'],
'anger': ['angry', 'furious', 'mad', 'غاضب', 'غضب', 'حنق'],
'sadness': ['sad', 'unhappy', 'disappointed', 'حزين', 'خائب', 'محبط'],
'fear': ['afraid', 'scared', 'worried', 'خائف', 'قلق', 'مذعور'],
'surprise': ['surprised', 'shocked', 'amazed', 'متفاجئ', 'مندهش', 'مذهول'],
}
@classmethod
def detect_language(cls, text: str) -> str:
"""
Detect language of text (English or Arabic).
Simple detection based on character ranges.
"""
# Count Arabic characters
arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
# Count English characters
english_chars = len(re.findall(r'[a-zA-Z]', text))
if arabic_chars > english_chars:
return 'ar'
return 'en'
@classmethod
def extract_keywords(cls, text: str, language: str, max_keywords: int = 10) -> List[str]:
"""
Extract keywords from text.
Stub implementation: Returns words that appear in positive/negative keyword lists.
Replace with proper NLP keyword extraction (TF-IDF, RAKE, etc.)
"""
text_lower = text.lower()
keywords = []
# Check positive keywords
for keyword in cls.POSITIVE_KEYWORDS.get(language, []):
if keyword in text_lower:
keywords.append(keyword)
# Check negative keywords
for keyword in cls.NEGATIVE_KEYWORDS.get(language, []):
if keyword in text_lower:
keywords.append(keyword)
return keywords[:max_keywords]
@classmethod
def extract_entities(cls, text: str, language: str) -> List[Dict[str, str]]:
"""
Extract named entities from text.
Stub implementation: Returns basic pattern matching.
Replace with proper NER (spaCy, Stanford NER, etc.)
"""
entities = []
# Simple email detection
emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
for email in emails:
entities.append({'text': email, 'type': 'EMAIL'})
# Simple phone detection
phones = re.findall(r'\b\d{10,}\b', text)
for phone in phones:
entities.append({'text': phone, 'type': 'PHONE'})
return entities
@classmethod
def detect_emotions(cls, text: str) -> Dict[str, float]:
"""
Detect emotions in text.
Stub implementation: Returns emotion scores based on keyword matching.
Replace with proper emotion detection model.
"""
text_lower = text.lower()
emotions = {}
for emotion, keywords in cls.EMOTION_KEYWORDS.items():
score = 0.0
for keyword in keywords:
if keyword in text_lower:
score += 0.2
emotions[emotion] = min(score, 1.0)
return emotions
@classmethod
def calculate_sentiment_score(cls, text: str, language: str) -> Tuple[str, float, float]:
"""
Calculate sentiment score for text.
Returns:
Tuple of (sentiment, score, confidence)
- sentiment: 'positive', 'neutral', or 'negative'
- score: float from -1 (very negative) to 1 (very positive)
- confidence: float from 0 to 1
Stub implementation: Uses keyword matching.
Replace with ML model (BERT, RoBERTa, etc.)
"""
text_lower = text.lower()
# Count positive and negative keywords
positive_count = 0
negative_count = 0
for keyword in cls.POSITIVE_KEYWORDS.get(language, []):
positive_count += text_lower.count(keyword)
for keyword in cls.NEGATIVE_KEYWORDS.get(language, []):
negative_count += text_lower.count(keyword)
# Calculate score
total_keywords = positive_count + negative_count
if total_keywords == 0:
# No sentiment keywords found - neutral
return 'neutral', 0.0, 0.5
# Calculate sentiment score (-1 to 1)
score = (positive_count - negative_count) / max(total_keywords, 1)
# Determine sentiment category
if score > 0.2:
sentiment = 'positive'
elif score < -0.2:
sentiment = 'negative'
else:
sentiment = 'neutral'
# Calculate confidence (higher when more keywords found)
confidence = min(total_keywords / 10.0, 1.0)
confidence = max(confidence, 0.3) # Minimum confidence
return sentiment, score, confidence
@classmethod
def analyze_text(
cls,
text: str,
language: Optional[str] = None,
extract_keywords: bool = True,
extract_entities: bool = True,
detect_emotions: bool = True
) -> Dict:
"""
Perform complete sentiment analysis on text.
Args:
text: Text to analyze
language: Language code ('en' or 'ar'), auto-detected if None
extract_keywords: Whether to extract keywords
extract_entities: Whether to extract entities
detect_emotions: Whether to detect emotions
Returns:
Dictionary with analysis results
"""
start_time = time.time()
# Detect language if not provided
if language is None:
language = cls.detect_language(text)
# Calculate sentiment
sentiment, score, confidence = cls.calculate_sentiment_score(text, language)
# Extract additional features
keywords = []
if extract_keywords:
keywords = cls.extract_keywords(text, language)
entities = []
if extract_entities:
entities = cls.extract_entities(text, language)
emotions = {}
if detect_emotions:
emotions = cls.detect_emotions(text)
# Calculate processing time
processing_time_ms = int((time.time() - start_time) * 1000)
return {
'text': text,
'language': language,
'sentiment': sentiment,
'sentiment_score': score,
'confidence': confidence,
'keywords': keywords,
'entities': entities,
'emotions': emotions,
'ai_service': 'stub',
'ai_model': 'keyword_matching_v1',
'processing_time_ms': processing_time_ms,
}
@classmethod
@transaction.atomic
def analyze_and_save(
cls,
text: str,
content_object,
language: Optional[str] = None,
**kwargs
) -> SentimentResult:
"""
Analyze text and save result to database.
Args:
text: Text to analyze
content_object: Django model instance to link to
language: Language code ('en' or 'ar'), auto-detected if None
**kwargs: Additional arguments for analyze_text
Returns:
SentimentResult instance
"""
# Perform analysis
analysis = cls.analyze_text(text, language, **kwargs)
# Get content type
content_type = ContentType.objects.get_for_model(content_object)
# Create sentiment result
sentiment_result = SentimentResult.objects.create(
content_type=content_type,
object_id=content_object.id,
text=analysis['text'],
language=analysis['language'],
sentiment=analysis['sentiment'],
sentiment_score=Decimal(str(analysis['sentiment_score'])),
confidence=Decimal(str(analysis['confidence'])),
keywords=analysis['keywords'],
entities=analysis['entities'],
emotions=analysis['emotions'],
ai_service=analysis['ai_service'],
ai_model=analysis['ai_model'],
processing_time_ms=analysis['processing_time_ms'],
)
return sentiment_result
@classmethod
def analyze_batch(cls, texts: List[str], language: Optional[str] = None) -> List[Dict]:
"""
Analyze multiple texts in batch.
Args:
texts: List of texts to analyze
language: Language code ('en' or 'ar'), auto-detected if None
Returns:
List of analysis results
"""
results = []
for text in texts:
result = cls.analyze_text(text, language)
results.append(result)
return results
class AIEngineService:
"""
Main AI Engine service - facade for all AI capabilities.
"""
sentiment = SentimentAnalysisService
@classmethod
def get_sentiment_for_object(cls, content_object) -> Optional[SentimentResult]:
"""
Get the most recent sentiment result for an object.
"""
content_type = ContentType.objects.get_for_model(content_object)
return SentimentResult.objects.filter(
content_type=content_type,
object_id=content_object.id
).first()
@classmethod
def get_sentiment_stats(cls, queryset=None) -> Dict:
"""
Get sentiment statistics.
Args:
queryset: Optional SentimentResult queryset to filter
Returns:
Dictionary with statistics
"""
if queryset is None:
queryset = SentimentResult.objects.all()
total = queryset.count()
if total == 0:
return {
'total': 0,
'positive': 0,
'neutral': 0,
'negative': 0,
'positive_pct': 0,
'neutral_pct': 0,
'negative_pct': 0,
'avg_score': 0,
'avg_confidence': 0,
}
positive = queryset.filter(sentiment='positive').count()
neutral = queryset.filter(sentiment='neutral').count()
negative = queryset.filter(sentiment='negative').count()
# Calculate averages
from django.db.models import Avg
avg_score = queryset.aggregate(Avg('sentiment_score'))['sentiment_score__avg'] or 0
avg_confidence = queryset.aggregate(Avg('confidence'))['confidence__avg'] or 0
return {
'total': total,
'positive': positive,
'neutral': neutral,
'negative': negative,
'positive_pct': round((positive / total) * 100, 1),
'neutral_pct': round((neutral / total) * 100, 1),
'negative_pct': round((negative / total) * 100, 1),
'avg_score': float(avg_score),
'avg_confidence': float(avg_confidence),
}