431 lines
18 KiB
Python
431 lines
18 KiB
Python
"""
|
|
OpenRouter API service for AI-powered comment analysis.
|
|
Handles authentication, requests, and response parsing for sentiment analysis,
|
|
keyword extraction, topic identification, and entity recognition.
|
|
"""
|
|
import logging
|
|
import json
|
|
from typing import Dict, List, Any, Optional
|
|
from decimal import Decimal
|
|
import httpx
|
|
|
|
from django.conf import settings
|
|
from django.utils import timezone
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class OpenRouterService:
|
|
"""
|
|
Service for interacting with OpenRouter API to analyze comments.
|
|
Provides sentiment analysis, keyword extraction, topic identification, and entity recognition.
|
|
"""
|
|
|
|
DEFAULT_MODEL = "anthropic/claude-3-haiku"
|
|
DEFAULT_MAX_TOKENS = 1024
|
|
DEFAULT_TEMPERATURE = 0.1
|
|
|
|
def __init__(
|
|
self,
|
|
api_key: Optional[str] = None,
|
|
model: Optional[str] = None,
|
|
timeout: int = 30
|
|
):
|
|
"""
|
|
Initialize OpenRouter service.
|
|
|
|
Args:
|
|
api_key: OpenRouter API key (defaults to settings.OPENROUTER_API_KEY)
|
|
model: Model to use (defaults to settings.OPENROUTER_MODEL or DEFAULT_MODEL)
|
|
timeout: Request timeout in seconds
|
|
"""
|
|
self.api_key = api_key or getattr(settings, 'OPENROUTER_API_KEY', None)
|
|
self.model = model or getattr(settings, 'OPENROUTER_MODEL', self.DEFAULT_MODEL)
|
|
self.timeout = timeout
|
|
self.api_url = "https://openrouter.ai/api/v1/chat/completions"
|
|
|
|
if not self.api_key:
|
|
logger.warning(
|
|
"OpenRouter API key not configured. "
|
|
"Set OPENROUTER_API_KEY in your .env file."
|
|
)
|
|
|
|
logger.info(f"OpenRouter service initialized with model: {self.model}")
|
|
|
|
def _build_analysis_prompt(self, comments: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Build prompt for batch comment analysis with bilingual output.
|
|
|
|
Args:
|
|
comments: List of comment dictionaries with 'id' and 'text' keys
|
|
|
|
Returns:
|
|
Formatted prompt string
|
|
"""
|
|
comments_text = "\n".join([
|
|
f"Comment {i+1}: {c['text']}"
|
|
for i, c in enumerate(comments)
|
|
])
|
|
|
|
# Using regular string instead of f-string to avoid JSON brace escaping issues
|
|
prompt = """You are a bilingual AI analyst specializing in social media sentiment analysis. Analyze the following comments and provide a COMPLETE bilingual analysis in BOTH English and Arabic.
|
|
|
|
Comments to analyze:
|
|
""" + comments_text + """
|
|
|
|
IMPORTANT REQUIREMENTS:
|
|
1. ALL analysis MUST be provided in BOTH English and Arabic
|
|
2. Use clear, modern Arabic that all Arabic speakers can understand
|
|
3. Detect comment's language and provide appropriate translations
|
|
4. Maintain accuracy and cultural appropriateness in both languages
|
|
|
|
For each comment, provide:
|
|
|
|
A. Sentiment Analysis (Bilingual)
|
|
- classification: {"en": "positive|neutral|negative", "ar": "إيجابي|محايد|سلبي"}
|
|
- score: number from -1.0 to 1.0
|
|
- confidence: number from 0.0 to 1.0
|
|
|
|
B. Summaries (Bilingual)
|
|
- en: 2-3 sentence English summary of comment's main points and sentiment
|
|
- ar: 2-3 sentence Arabic summary (ملخص بالعربية) with the same depth
|
|
|
|
C. Keywords (Bilingual - 5-7 each)
|
|
- en: list of English keywords
|
|
- ar: list of Arabic keywords
|
|
|
|
D. Topics (Bilingual - 3-5 each)
|
|
- en: list of English topics
|
|
- ar: list of Arabic topics
|
|
|
|
E. Entities (Bilingual)
|
|
- For each entity: {"text": {"en": "...", "ar": "..."}, "type": {"en": "PERSON|ORGANIZATION|LOCATION|BRAND|OTHER", "ar": "شخص|منظمة|موقع|علامة تجارية|أخرى"}}
|
|
|
|
F. Emotions
|
|
- Provide scores for: joy, anger, sadness, fear, surprise, disgust
|
|
- Each emotion: 0.0 to 1.0
|
|
- labels: {"emotion_name": {"en": "English label", "ar": "Arabic label"}}
|
|
|
|
Return ONLY valid JSON in this exact format:
|
|
{
|
|
"analyses": [
|
|
{
|
|
"comment_index": 0,
|
|
"sentiment": {
|
|
"classification": {"en": "positive", "ar": "إيجابي"},
|
|
"score": 0.85,
|
|
"confidence": 0.92
|
|
},
|
|
"summaries": {
|
|
"en": "The customer is very satisfied with the excellent service and fast delivery. They praised the staff's professionalism and product quality.",
|
|
"ar": "العميل راضٍ جداً عن الخدمة الممتازة والتسليم السريع. أشاد باحترافية الموظفين وجودة المنتج."
|
|
},
|
|
"keywords": {
|
|
"en": ["excellent service", "fast delivery", "professional", "quality"],
|
|
"ar": ["خدمة ممتازة", "تسليم سريع", "احترافي", "جودة"]
|
|
},
|
|
"topics": {
|
|
"en": ["customer service", "delivery speed", "staff professionalism"],
|
|
"ar": ["خدمة العملاء", "سرعة التسليم", "احترافية الموظفين"]
|
|
},
|
|
"entities": [
|
|
{
|
|
"text": {"en": "Amazon", "ar": "أمازون"},
|
|
"type": {"en": "ORGANIZATION", "ar": "منظمة"}
|
|
}
|
|
],
|
|
"emotions": {
|
|
"joy": 0.9,
|
|
"anger": 0.05,
|
|
"sadness": 0.0,
|
|
"fear": 0.0,
|
|
"surprise": 0.15,
|
|
"disgust": 0.0,
|
|
"labels": {
|
|
"joy": {"en": "Joy/Happiness", "ar": "فرح/سعادة"},
|
|
"anger": {"en": "Anger", "ar": "غضب"},
|
|
"sadness": {"en": "Sadness", "ar": "حزن"},
|
|
"fear": {"en": "Fear", "ar": "خوف"},
|
|
"surprise": {"en": "Surprise", "ar": "مفاجأة"},
|
|
"disgust": {"en": "Disgust", "ar": "اشمئزاز"}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
"""
|
|
return prompt
|
|
|
|
async def analyze_comments_async(self, comments: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Analyze a batch of comments using OpenRouter API (async).
|
|
|
|
Args:
|
|
comments: List of comment dictionaries with 'id' and 'text' keys
|
|
|
|
Returns:
|
|
Dictionary with success status and analysis results
|
|
"""
|
|
logger.info("=" * 80)
|
|
logger.info("STARTING OPENROUTER API ANALYSIS")
|
|
logger.info("=" * 80)
|
|
|
|
if not self.api_key:
|
|
logger.error("API KEY NOT CONFIGURED")
|
|
return {
|
|
'success': False,
|
|
'error': 'OpenRouter API key not configured'
|
|
}
|
|
|
|
logger.info(f"API Key: {self.api_key[:20]}...{self.api_key[-4:]}")
|
|
|
|
if not comments:
|
|
logger.warning("No comments to analyze")
|
|
return {
|
|
'success': True,
|
|
'analyses': []
|
|
}
|
|
|
|
try:
|
|
logger.info(f"Building prompt for {len(comments)} comments...")
|
|
prompt = self._build_analysis_prompt(comments)
|
|
logger.info(f"Prompt length: {len(prompt)} characters")
|
|
|
|
headers = {
|
|
'Authorization': f'Bearer {self.api_key}',
|
|
'Content-Type': 'application/json',
|
|
'HTTP-Referer': getattr(settings, 'SITE_URL', 'http://localhost'),
|
|
'X-Title': 'Social Media Comment Analyzer'
|
|
}
|
|
|
|
logger.info(f"Request headers prepared: {list(headers.keys())}")
|
|
|
|
payload = {
|
|
'model': self.model,
|
|
'messages': [
|
|
{
|
|
'role': 'system',
|
|
'content': 'You are an expert social media sentiment analyzer. Always respond with valid JSON only.'
|
|
},
|
|
{
|
|
'role': 'user',
|
|
'content': prompt
|
|
}
|
|
],
|
|
'max_tokens': self.DEFAULT_MAX_TOKENS,
|
|
'temperature': self.DEFAULT_TEMPERATURE
|
|
}
|
|
|
|
logger.info(f"Request payload prepared:")
|
|
logger.info(f" - Model: {payload['model']}")
|
|
logger.info(f" - Max tokens: {payload['max_tokens']}")
|
|
logger.info(f" - Temperature: {payload['temperature']}")
|
|
logger.info(f" - Messages: {len(payload['messages'])}")
|
|
logger.info(f" - Payload size: {len(json.dumps(payload))} bytes")
|
|
|
|
logger.info("-" * 80)
|
|
logger.info("SENDING HTTP REQUEST TO OPENROUTER API")
|
|
logger.info("-" * 80)
|
|
logger.info(f"URL: {self.api_url}")
|
|
logger.info(f"Timeout: {self.timeout}s")
|
|
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
response = await client.post(
|
|
self.api_url,
|
|
headers=headers,
|
|
json=payload
|
|
)
|
|
|
|
logger.info("-" * 80)
|
|
logger.info("RESPONSE RECEIVED")
|
|
logger.info("-" * 80)
|
|
logger.info(f"Status Code: {response.status_code}")
|
|
logger.info(f"Status Reason: {response.reason_phrase}")
|
|
logger.info(f"HTTP Version: {response.http_version}")
|
|
logger.info(f"Headers: {dict(response.headers)}")
|
|
|
|
# Get raw response text BEFORE any parsing
|
|
raw_content = response.text
|
|
logger.info(f"Raw response length: {len(raw_content)} characters")
|
|
|
|
# Log first and last parts of response for debugging
|
|
logger.debug("-" * 80)
|
|
logger.debug("RAW RESPONSE CONTENT (First 500 chars):")
|
|
logger.debug(raw_content[:500])
|
|
logger.debug("-" * 80)
|
|
logger.debug("RAW RESPONSE CONTENT (Last 500 chars):")
|
|
logger.debug(raw_content[-500:] if len(raw_content) > 500 else raw_content)
|
|
logger.debug("-" * 80)
|
|
|
|
response.raise_for_status()
|
|
|
|
logger.info("Response status OK (200), attempting to parse JSON...")
|
|
|
|
data = response.json()
|
|
logger.info(f"Successfully parsed JSON response")
|
|
logger.info(f"Response structure: {list(data.keys()) if isinstance(data, dict) else type(data)}")
|
|
|
|
# Extract analysis from response
|
|
if 'choices' in data and len(data['choices']) > 0:
|
|
logger.info(f"Found {len(data['choices'])} choices in response")
|
|
content = data['choices'][0]['message']['content']
|
|
logger.info(f"Content message length: {len(content)} characters")
|
|
|
|
# Parse JSON response
|
|
try:
|
|
# Clean up response in case there's any extra text
|
|
logger.info("Cleaning response content...")
|
|
content = content.strip()
|
|
logger.info(f"After strip: {len(content)} chars")
|
|
|
|
# Remove markdown code blocks if present
|
|
if content.startswith('```json'):
|
|
logger.info("Detected ```json prefix, removing...")
|
|
content = content[7:]
|
|
elif content.startswith('```'):
|
|
logger.info("Detected ``` prefix, removing...")
|
|
content = content[3:]
|
|
|
|
if content.endswith('```'):
|
|
logger.info("Detected ``` suffix, removing...")
|
|
content = content[:-3]
|
|
|
|
content = content.strip()
|
|
logger.info(f"After cleaning: {len(content)} chars")
|
|
|
|
logger.debug("-" * 80)
|
|
logger.debug("CLEANED CONTENT (First 300 chars):")
|
|
logger.debug(content[:300])
|
|
logger.debug("-" * 80)
|
|
|
|
logger.info("Attempting to parse JSON...")
|
|
analysis_data = json.loads(content)
|
|
logger.info("JSON parsed successfully!")
|
|
logger.info(f"Analysis data keys: {list(analysis_data.keys()) if isinstance(analysis_data, dict) else type(analysis_data)}")
|
|
|
|
if 'analyses' in analysis_data:
|
|
logger.info(f"Found {len(analysis_data['analyses'])} analyses")
|
|
|
|
# Map comment indices back to IDs
|
|
analyses = []
|
|
for idx, analysis in enumerate(analysis_data.get('analyses', [])):
|
|
comment_idx = analysis.get('comment_index', 0)
|
|
if comment_idx < len(comments):
|
|
comment_id = comments[comment_idx]['id']
|
|
logger.debug(f" Analysis {idx+1}: comment_index={comment_idx}, comment_id={comment_id}")
|
|
analyses.append({
|
|
'comment_id': comment_id,
|
|
**analysis
|
|
})
|
|
|
|
# Extract metadata
|
|
metadata = {
|
|
'model': self.model,
|
|
'prompt_tokens': data.get('usage', {}).get('prompt_tokens', 0),
|
|
'completion_tokens': data.get('usage', {}).get('completion_tokens', 0),
|
|
'total_tokens': data.get('usage', {}).get('total_tokens', 0),
|
|
'analyzed_at': timezone.now().isoformat()
|
|
}
|
|
|
|
logger.info(f"Metadata: {metadata}")
|
|
logger.info("=" * 80)
|
|
logger.info("ANALYSIS COMPLETED SUCCESSFULLY")
|
|
logger.info("=" * 80)
|
|
|
|
return {
|
|
'success': True,
|
|
'analyses': analyses,
|
|
'metadata': metadata
|
|
}
|
|
|
|
except json.JSONDecodeError as e:
|
|
logger.error("=" * 80)
|
|
logger.error("JSON PARSE ERROR")
|
|
logger.error("=" * 80)
|
|
logger.error(f"Error: {e}")
|
|
logger.error(f"Error position: Line {e.lineno}, Column {e.colno}")
|
|
logger.error(f"Error message: {e.msg}")
|
|
logger.error("-" * 80)
|
|
logger.error("FULL CONTENT THAT FAILED TO PARSE:")
|
|
logger.error("-" * 80)
|
|
logger.error(content)
|
|
logger.error("-" * 80)
|
|
logger.error("CHARACTER AT ERROR POSITION:")
|
|
logger.error("-" * 80)
|
|
if hasattr(e, 'pos') and e.pos:
|
|
start = max(0, e.pos - 100)
|
|
end = min(len(content), e.pos + 100)
|
|
logger.error(content[start:end])
|
|
logger.error(f"^ (error at position {e.pos})")
|
|
|
|
return {
|
|
'success': False,
|
|
'error': f'Invalid JSON response from API: {str(e)}'
|
|
}
|
|
else:
|
|
logger.error(f"No choices found in response. Response keys: {list(data.keys()) if isinstance(data, dict) else type(data)}")
|
|
return {
|
|
'success': False,
|
|
'error': 'No analysis returned from API'
|
|
}
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error("=" * 80)
|
|
logger.error("HTTP STATUS ERROR")
|
|
logger.error("=" * 80)
|
|
logger.error(f"Status Code: {e.response.status_code}")
|
|
logger.error(f"Response Text: {e.response.text}")
|
|
return {
|
|
'success': False,
|
|
'error': f'API error: {e.response.status_code} - {str(e)}'
|
|
}
|
|
except httpx.RequestError as e:
|
|
logger.error("=" * 80)
|
|
logger.error("HTTP REQUEST ERROR")
|
|
logger.error("=" * 80)
|
|
logger.error(f"Error: {str(e)}")
|
|
return {
|
|
'success': False,
|
|
'error': f'Request failed: {str(e)}'
|
|
}
|
|
except Exception as e:
|
|
logger.error("=" * 80)
|
|
logger.error("UNEXPECTED ERROR")
|
|
logger.error("=" * 80)
|
|
logger.error(f"Error Type: {type(e).__name__}")
|
|
logger.error(f"Error Message: {str(e)}")
|
|
logger.error("=" * 80)
|
|
logger.error("FULL TRACEBACK:", exc_info=True)
|
|
logger.error("=" * 80)
|
|
return {
|
|
'success': False,
|
|
'error': f'Unexpected error: {str(e)}'
|
|
}
|
|
|
|
def analyze_comments(self, comments: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Analyze a batch of comments using OpenRouter API (synchronous wrapper).
|
|
|
|
Args:
|
|
comments: List of comment dictionaries with 'id' and 'text' keys
|
|
|
|
Returns:
|
|
Dictionary with success status and analysis results
|
|
"""
|
|
import asyncio
|
|
|
|
try:
|
|
# Run async function in event loop
|
|
loop = asyncio.get_event_loop()
|
|
except RuntimeError:
|
|
# No event loop exists, create new one
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
|
|
return loop.run_until_complete(self.analyze_comments_async(comments))
|
|
|
|
def is_configured(self) -> bool:
|
|
"""Check if service is properly configured."""
|
|
return bool(self.api_key)
|