401 lines
13 KiB
Python
401 lines
13 KiB
Python
"""
|
|
AI Engine services - Sentiment analysis and NLP
|
|
|
|
This module provides AI services for:
|
|
- Sentiment analysis (positive, neutral, negative)
|
|
- Keyword extraction
|
|
- Entity recognition
|
|
- Emotion detection
|
|
- Language detection
|
|
|
|
Currently uses a stub implementation that can be replaced with:
|
|
- OpenAI API
|
|
- Azure Cognitive Services
|
|
- AWS Comprehend
|
|
- Custom ML models
|
|
"""
|
|
import re
|
|
import time
|
|
from decimal import Decimal
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
from django.contrib.contenttypes.models import ContentType
|
|
from django.db import transaction
|
|
|
|
from .models import SentimentResult
|
|
|
|
|
|
class SentimentAnalysisService:
|
|
"""
|
|
Sentiment analysis service with stub implementation.
|
|
|
|
This service provides realistic sentiment analysis without external API calls.
|
|
Replace the stub methods with real AI service calls when ready.
|
|
"""
|
|
|
|
# Positive keywords (English and Arabic)
|
|
POSITIVE_KEYWORDS = {
|
|
'en': [
|
|
'excellent', 'great', 'good', 'wonderful', 'amazing', 'fantastic',
|
|
'outstanding', 'superb', 'perfect', 'best', 'love', 'happy',
|
|
'satisfied', 'pleased', 'thank', 'appreciate', 'helpful', 'kind',
|
|
'professional', 'caring', 'friendly', 'clean', 'comfortable'
|
|
],
|
|
'ar': [
|
|
'ممتاز', 'رائع', 'جيد', 'جميل', 'مذهل', 'رائع',
|
|
'متميز', 'ممتاز', 'مثالي', 'أفضل', 'أحب', 'سعيد',
|
|
'راض', 'مسرور', 'شكر', 'أقدر', 'مفيد', 'لطيف',
|
|
'محترف', 'مهتم', 'ودود', 'نظيف', 'مريح'
|
|
]
|
|
}
|
|
|
|
# Negative keywords (English and Arabic)
|
|
NEGATIVE_KEYWORDS = {
|
|
'en': [
|
|
'bad', 'terrible', 'horrible', 'awful', 'poor', 'worst',
|
|
'disappointed', 'unhappy', 'unsatisfied', 'angry', 'frustrated',
|
|
'rude', 'unprofessional', 'dirty', 'uncomfortable', 'painful',
|
|
'long wait', 'delayed', 'ignored', 'neglected', 'complaint'
|
|
],
|
|
'ar': [
|
|
'سيء', 'فظيع', 'مروع', 'سيء', 'ضعيف', 'أسوأ',
|
|
'خائب', 'غير سعيد', 'غير راض', 'غاضب', 'محبط',
|
|
'وقح', 'غير محترف', 'قذر', 'غير مريح', 'مؤلم',
|
|
'انتظار طويل', 'متأخر', 'تجاهل', 'مهمل', 'شكوى'
|
|
]
|
|
}
|
|
|
|
# Emotion keywords
|
|
EMOTION_KEYWORDS = {
|
|
'joy': ['happy', 'joy', 'pleased', 'delighted', 'سعيد', 'فرح', 'مسرور'],
|
|
'anger': ['angry', 'furious', 'mad', 'غاضب', 'غضب', 'حنق'],
|
|
'sadness': ['sad', 'unhappy', 'disappointed', 'حزين', 'خائب', 'محبط'],
|
|
'fear': ['afraid', 'scared', 'worried', 'خائف', 'قلق', 'مذعور'],
|
|
'surprise': ['surprised', 'shocked', 'amazed', 'متفاجئ', 'مندهش', 'مذهول'],
|
|
}
|
|
|
|
@classmethod
|
|
def detect_language(cls, text: str) -> str:
|
|
"""
|
|
Detect language of text (English or Arabic).
|
|
|
|
Simple detection based on character ranges.
|
|
"""
|
|
# Count Arabic characters
|
|
arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
|
|
# Count English characters
|
|
english_chars = len(re.findall(r'[a-zA-Z]', text))
|
|
|
|
if arabic_chars > english_chars:
|
|
return 'ar'
|
|
return 'en'
|
|
|
|
@classmethod
|
|
def extract_keywords(cls, text: str, language: str, max_keywords: int = 10) -> List[str]:
|
|
"""
|
|
Extract keywords from text.
|
|
|
|
Stub implementation: Returns words that appear in positive/negative keyword lists.
|
|
Replace with proper NLP keyword extraction (TF-IDF, RAKE, etc.)
|
|
"""
|
|
text_lower = text.lower()
|
|
keywords = []
|
|
|
|
# Check positive keywords
|
|
for keyword in cls.POSITIVE_KEYWORDS.get(language, []):
|
|
if keyword in text_lower:
|
|
keywords.append(keyword)
|
|
|
|
# Check negative keywords
|
|
for keyword in cls.NEGATIVE_KEYWORDS.get(language, []):
|
|
if keyword in text_lower:
|
|
keywords.append(keyword)
|
|
|
|
return keywords[:max_keywords]
|
|
|
|
@classmethod
|
|
def extract_entities(cls, text: str, language: str) -> List[Dict[str, str]]:
|
|
"""
|
|
Extract named entities from text.
|
|
|
|
Stub implementation: Returns basic pattern matching.
|
|
Replace with proper NER (spaCy, Stanford NER, etc.)
|
|
"""
|
|
entities = []
|
|
|
|
# Simple email detection
|
|
emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
|
|
for email in emails:
|
|
entities.append({'text': email, 'type': 'EMAIL'})
|
|
|
|
# Simple phone detection
|
|
phones = re.findall(r'\b\d{10,}\b', text)
|
|
for phone in phones:
|
|
entities.append({'text': phone, 'type': 'PHONE'})
|
|
|
|
return entities
|
|
|
|
@classmethod
|
|
def detect_emotions(cls, text: str) -> Dict[str, float]:
|
|
"""
|
|
Detect emotions in text.
|
|
|
|
Stub implementation: Returns emotion scores based on keyword matching.
|
|
Replace with proper emotion detection model.
|
|
"""
|
|
text_lower = text.lower()
|
|
emotions = {}
|
|
|
|
for emotion, keywords in cls.EMOTION_KEYWORDS.items():
|
|
score = 0.0
|
|
for keyword in keywords:
|
|
if keyword in text_lower:
|
|
score += 0.2
|
|
emotions[emotion] = min(score, 1.0)
|
|
|
|
return emotions
|
|
|
|
@classmethod
|
|
def calculate_sentiment_score(cls, text: str, language: str) -> Tuple[str, float, float]:
|
|
"""
|
|
Calculate sentiment score for text.
|
|
|
|
Returns:
|
|
Tuple of (sentiment, score, confidence)
|
|
- sentiment: 'positive', 'neutral', or 'negative'
|
|
- score: float from -1 (very negative) to 1 (very positive)
|
|
- confidence: float from 0 to 1
|
|
|
|
Stub implementation: Uses keyword matching.
|
|
Replace with ML model (BERT, RoBERTa, etc.)
|
|
"""
|
|
text_lower = text.lower()
|
|
|
|
# Count positive and negative keywords
|
|
positive_count = 0
|
|
negative_count = 0
|
|
|
|
for keyword in cls.POSITIVE_KEYWORDS.get(language, []):
|
|
positive_count += text_lower.count(keyword)
|
|
|
|
for keyword in cls.NEGATIVE_KEYWORDS.get(language, []):
|
|
negative_count += text_lower.count(keyword)
|
|
|
|
# Calculate score
|
|
total_keywords = positive_count + negative_count
|
|
|
|
if total_keywords == 0:
|
|
# No sentiment keywords found - neutral
|
|
return 'neutral', 0.0, 0.5
|
|
|
|
# Calculate sentiment score (-1 to 1)
|
|
score = (positive_count - negative_count) / max(total_keywords, 1)
|
|
|
|
# Determine sentiment category
|
|
if score > 0.2:
|
|
sentiment = 'positive'
|
|
elif score < -0.2:
|
|
sentiment = 'negative'
|
|
else:
|
|
sentiment = 'neutral'
|
|
|
|
# Calculate confidence (higher when more keywords found)
|
|
confidence = min(total_keywords / 10.0, 1.0)
|
|
confidence = max(confidence, 0.3) # Minimum confidence
|
|
|
|
return sentiment, score, confidence
|
|
|
|
@classmethod
|
|
def analyze_text(
|
|
cls,
|
|
text: str,
|
|
language: Optional[str] = None,
|
|
extract_keywords: bool = True,
|
|
extract_entities: bool = True,
|
|
detect_emotions: bool = True
|
|
) -> Dict:
|
|
"""
|
|
Perform complete sentiment analysis on text.
|
|
|
|
Args:
|
|
text: Text to analyze
|
|
language: Language code ('en' or 'ar'), auto-detected if None
|
|
extract_keywords: Whether to extract keywords
|
|
extract_entities: Whether to extract entities
|
|
detect_emotions: Whether to detect emotions
|
|
|
|
Returns:
|
|
Dictionary with analysis results
|
|
"""
|
|
start_time = time.time()
|
|
|
|
# Detect language if not provided
|
|
if language is None:
|
|
language = cls.detect_language(text)
|
|
|
|
# Calculate sentiment
|
|
sentiment, score, confidence = cls.calculate_sentiment_score(text, language)
|
|
|
|
# Extract additional features
|
|
keywords = []
|
|
if extract_keywords:
|
|
keywords = cls.extract_keywords(text, language)
|
|
|
|
entities = []
|
|
if extract_entities:
|
|
entities = cls.extract_entities(text, language)
|
|
|
|
emotions = {}
|
|
if detect_emotions:
|
|
emotions = cls.detect_emotions(text)
|
|
|
|
# Calculate processing time
|
|
processing_time_ms = int((time.time() - start_time) * 1000)
|
|
|
|
return {
|
|
'text': text,
|
|
'language': language,
|
|
'sentiment': sentiment,
|
|
'sentiment_score': score,
|
|
'confidence': confidence,
|
|
'keywords': keywords,
|
|
'entities': entities,
|
|
'emotions': emotions,
|
|
'ai_service': 'stub',
|
|
'ai_model': 'keyword_matching_v1',
|
|
'processing_time_ms': processing_time_ms,
|
|
}
|
|
|
|
@classmethod
|
|
@transaction.atomic
|
|
def analyze_and_save(
|
|
cls,
|
|
text: str,
|
|
content_object,
|
|
language: Optional[str] = None,
|
|
**kwargs
|
|
) -> SentimentResult:
|
|
"""
|
|
Analyze text and save result to database.
|
|
|
|
Args:
|
|
text: Text to analyze
|
|
content_object: Django model instance to link to
|
|
language: Language code ('en' or 'ar'), auto-detected if None
|
|
**kwargs: Additional arguments for analyze_text
|
|
|
|
Returns:
|
|
SentimentResult instance
|
|
"""
|
|
# Perform analysis
|
|
analysis = cls.analyze_text(text, language, **kwargs)
|
|
|
|
# Get content type
|
|
content_type = ContentType.objects.get_for_model(content_object)
|
|
|
|
# Create sentiment result
|
|
sentiment_result = SentimentResult.objects.create(
|
|
content_type=content_type,
|
|
object_id=content_object.id,
|
|
text=analysis['text'],
|
|
language=analysis['language'],
|
|
sentiment=analysis['sentiment'],
|
|
sentiment_score=Decimal(str(analysis['sentiment_score'])),
|
|
confidence=Decimal(str(analysis['confidence'])),
|
|
keywords=analysis['keywords'],
|
|
entities=analysis['entities'],
|
|
emotions=analysis['emotions'],
|
|
ai_service=analysis['ai_service'],
|
|
ai_model=analysis['ai_model'],
|
|
processing_time_ms=analysis['processing_time_ms'],
|
|
)
|
|
|
|
return sentiment_result
|
|
|
|
@classmethod
|
|
def analyze_batch(cls, texts: List[str], language: Optional[str] = None) -> List[Dict]:
|
|
"""
|
|
Analyze multiple texts in batch.
|
|
|
|
Args:
|
|
texts: List of texts to analyze
|
|
language: Language code ('en' or 'ar'), auto-detected if None
|
|
|
|
Returns:
|
|
List of analysis results
|
|
"""
|
|
results = []
|
|
for text in texts:
|
|
result = cls.analyze_text(text, language)
|
|
results.append(result)
|
|
return results
|
|
|
|
|
|
class AIEngineService:
|
|
"""
|
|
Main AI Engine service - facade for all AI capabilities.
|
|
"""
|
|
|
|
sentiment = SentimentAnalysisService
|
|
|
|
@classmethod
|
|
def get_sentiment_for_object(cls, content_object) -> Optional[SentimentResult]:
|
|
"""
|
|
Get the most recent sentiment result for an object.
|
|
"""
|
|
content_type = ContentType.objects.get_for_model(content_object)
|
|
return SentimentResult.objects.filter(
|
|
content_type=content_type,
|
|
object_id=content_object.id
|
|
).first()
|
|
|
|
@classmethod
|
|
def get_sentiment_stats(cls, queryset=None) -> Dict:
|
|
"""
|
|
Get sentiment statistics.
|
|
|
|
Args:
|
|
queryset: Optional SentimentResult queryset to filter
|
|
|
|
Returns:
|
|
Dictionary with statistics
|
|
"""
|
|
if queryset is None:
|
|
queryset = SentimentResult.objects.all()
|
|
|
|
total = queryset.count()
|
|
|
|
if total == 0:
|
|
return {
|
|
'total': 0,
|
|
'positive': 0,
|
|
'neutral': 0,
|
|
'negative': 0,
|
|
'positive_pct': 0,
|
|
'neutral_pct': 0,
|
|
'negative_pct': 0,
|
|
'avg_score': 0,
|
|
'avg_confidence': 0,
|
|
}
|
|
|
|
positive = queryset.filter(sentiment='positive').count()
|
|
neutral = queryset.filter(sentiment='neutral').count()
|
|
negative = queryset.filter(sentiment='negative').count()
|
|
|
|
# Calculate averages
|
|
from django.db.models import Avg
|
|
avg_score = queryset.aggregate(Avg('sentiment_score'))['sentiment_score__avg'] or 0
|
|
avg_confidence = queryset.aggregate(Avg('confidence'))['confidence__avg'] or 0
|
|
|
|
return {
|
|
'total': total,
|
|
'positive': positive,
|
|
'neutral': neutral,
|
|
'negative': negative,
|
|
'positive_pct': round((positive / total) * 100, 1),
|
|
'neutral_pct': round((neutral / total) * 100, 1),
|
|
'negative_pct': round((negative / total) * 100, 1),
|
|
'avg_score': float(avg_score),
|
|
'avg_confidence': float(avg_confidence),
|
|
}
|