HH/apps/social/services/openrouter_service.py

"""
OpenRouter API service for AI-powered comment analysis.
Handles authentication, requests, and response parsing for sentiment analysis,
keyword extraction, topic identification, and entity recognition.
"""
import logging
import json
from typing import Dict, List, Any, Optional
from decimal import Decimal
import httpx

from django.conf import settings
from django.utils import timezone


logger = logging.getLogger(__name__)


class OpenRouterService:
    """
    Service for interacting with OpenRouter API to analyze comments.
    Provides sentiment analysis, keyword extraction, topic identification, and entity recognition.
    """

    DEFAULT_MODEL = "anthropic/claude-3-haiku"
    DEFAULT_MAX_TOKENS = 1024
    DEFAULT_TEMPERATURE = 0.1

    def __init__(
        self,
        api_key: Optional[str] = None,
        model: Optional[str] = None,
        timeout: int = 30
    ):
        """
        Initialize OpenRouter service.

        Args:
            api_key: OpenRouter API key (defaults to settings.OPENROUTER_API_KEY)
            model: Model to use (defaults to settings.OPENROUTER_MODEL or DEFAULT_MODEL)
            timeout: Request timeout in seconds
        """
        self.api_key = api_key or getattr(settings, 'OPENROUTER_API_KEY', None)
        self.model = model or getattr(settings, 'OPENROUTER_MODEL', self.DEFAULT_MODEL)
        self.timeout = timeout
        self.api_url = "https://openrouter.ai/api/v1/chat/completions"

        if not self.api_key:
            logger.warning(
                "OpenRouter API key not configured. "
                "Set OPENROUTER_API_KEY in your .env file."
            )

        logger.info(f"OpenRouter service initialized with model: {self.model}")

    def _build_analysis_prompt(self, comments: List[Dict[str, Any]]) -> str:
        """
        Build prompt for batch comment analysis with bilingual output.

        Args:
            comments: List of comment dictionaries with 'id' and 'text' keys

        Returns:
            Formatted prompt string
        """
        comments_text = "\n".join([
            f"Comment {i+1}: {c['text']}"
            for i, c in enumerate(comments)
        ])

        # Using regular string instead of f-string to avoid JSON brace escaping issues
        prompt = """You are a bilingual AI analyst specializing in social media sentiment analysis. Analyze the following comments and provide a COMPLETE bilingual analysis in BOTH English and Arabic.

Comments to analyze:
""" + comments_text + """

IMPORTANT REQUIREMENTS:
1. ALL analysis MUST be provided in BOTH English and Arabic
2. Use clear, modern Arabic that all Arabic speakers can understand
3. Detect comment's language and provide appropriate translations
4. Maintain accuracy and cultural appropriateness in both languages

For each comment, provide:

A. Sentiment Analysis (Bilingual)
   - classification: {"en": "positive|neutral|negative", "ar": "إيجابي|محايد|سلبي"}
   - score: number from -1.0 to 1.0
   - confidence: number from 0.0 to 1.0

B. Summaries (Bilingual)
   - en: 2-3 sentence English summary of comment's main points and sentiment
   - ar: 2-3 sentence Arabic summary (ملخص بالعربية) with the same depth

C. Keywords (Bilingual - 5-7 each)
   - en: list of English keywords
   - ar: list of Arabic keywords

D. Topics (Bilingual - 3-5 each)
   - en: list of English topics
   - ar: list of Arabic topics

E. Entities (Bilingual)
   - For each entity: {"text": {"en": "...", "ar": "..."}, "type": {"en": "PERSON|ORGANIZATION|LOCATION|BRAND|OTHER", "ar": "شخص|منظمة|موقع|علامة تجارية|أخرى"}}

F. Emotions
   - Provide scores for: joy, anger, sadness, fear, surprise, disgust
   - Each emotion: 0.0 to 1.0
   - labels: {"emotion_name": {"en": "English label", "ar": "Arabic label"}}

Return ONLY valid JSON in this exact format:
{
    "analyses": [
        {
            "comment_index": 0,
            "sentiment": {
                "classification": {"en": "positive", "ar": "إيجابي"},
                "score": 0.85,
                "confidence": 0.92
            },
            "summaries": {
                "en": "The customer is very satisfied with the excellent service and fast delivery. They praised the staff's professionalism and product quality.",
                "ar": "العميل راضٍ جداً عن الخدمة الممتازة والتسليم السريع. أشاد باحترافية الموظفين وجودة المنتج."
            },
            "keywords": {
                "en": ["excellent service", "fast delivery", "professional", "quality"],
                "ar": ["خدمة ممتازة", "تسليم سريع", "احترافي", "جودة"]
            },
            "topics": {
                "en": ["customer service", "delivery speed", "staff professionalism"],
                "ar": ["خدمة العملاء", "سرعة التسليم", "احترافية الموظفين"]
            },
            "entities": [
                {
                    "text": {"en": "Amazon", "ar": "أمازون"},
                    "type": {"en": "ORGANIZATION", "ar": "منظمة"}
                }
            ],
            "emotions": {
                "joy": 0.9,
                "anger": 0.05,
                "sadness": 0.0,
                "fear": 0.0,
                "surprise": 0.15,
                "disgust": 0.0,
                "labels": {
                    "joy": {"en": "Joy/Happiness", "ar": "فرح/سعادة"},
                    "anger": {"en": "Anger", "ar": "غضب"},
                    "sadness": {"en": "Sadness", "ar": "حزن"},
                    "fear": {"en": "Fear", "ar": "خوف"},
                    "surprise": {"en": "Surprise", "ar": "مفاجأة"},
                    "disgust": {"en": "Disgust", "ar": "اشمئزاز"}
                }
            }
        }
    ]
}
"""
        return prompt

    async def analyze_comments_async(self, comments: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Analyze a batch of comments using OpenRouter API (async).

        Args:
            comments: List of comment dictionaries with 'id' and 'text' keys

        Returns:
            Dictionary with success status and analysis results
        """
        logger.info("=" * 80)
        logger.info("STARTING OPENROUTER API ANALYSIS")
        logger.info("=" * 80)

        if not self.api_key:
            logger.error("API KEY NOT CONFIGURED")
            return {
                'success': False,
                'error': 'OpenRouter API key not configured'
            }

        logger.info(f"API Key: {self.api_key[:20]}...{self.api_key[-4:]}")

        if not comments:
            logger.warning("No comments to analyze")
            return {
                'success': True,
                'analyses': []
            }

        try:
            logger.info(f"Building prompt for {len(comments)} comments...")
            prompt = self._build_analysis_prompt(comments)
            logger.info(f"Prompt length: {len(prompt)} characters")

            headers = {
                'Authorization': f'Bearer {self.api_key}',
                'Content-Type': 'application/json',
                'HTTP-Referer': getattr(settings, 'SITE_URL', 'http://localhost'),
                'X-Title': 'Social Media Comment Analyzer'
            }

            logger.info(f"Request headers prepared: {list(headers.keys())}")

            payload = {
                'model': self.model,
                'messages': [
                    {
                        'role': 'system',
                        'content': 'You are an expert social media sentiment analyzer. Always respond with valid JSON only.'
                    },
                    {
                        'role': 'user',
                        'content': prompt
                    }
                ],
                'max_tokens': self.DEFAULT_MAX_TOKENS,
                'temperature': self.DEFAULT_TEMPERATURE
            }

            logger.info(f"Request payload prepared:")
            logger.info(f"  - Model: {payload['model']}")
            logger.info(f"  - Max tokens: {payload['max_tokens']}")
            logger.info(f"  - Temperature: {payload['temperature']}")
            logger.info(f"  - Messages: {len(payload['messages'])}")
            logger.info(f"  - Payload size: {len(json.dumps(payload))} bytes")

            logger.info("-" * 80)
            logger.info("SENDING HTTP REQUEST TO OPENROUTER API")
            logger.info("-" * 80)
            logger.info(f"URL: {self.api_url}")
            logger.info(f"Timeout: {self.timeout}s")

            async with httpx.AsyncClient(timeout=self.timeout) as client:
                response = await client.post(
                    self.api_url,
                    headers=headers,
                    json=payload
                )

                logger.info("-" * 80)
                logger.info("RESPONSE RECEIVED")
                logger.info("-" * 80)
                logger.info(f"Status Code: {response.status_code}")
                logger.info(f"Status Reason: {response.reason_phrase}")
                logger.info(f"HTTP Version: {response.http_version}")
                logger.info(f"Headers: {dict(response.headers)}")

                # Get raw response text BEFORE any parsing
                raw_content = response.text
                logger.info(f"Raw response length: {len(raw_content)} characters")

                # Log first and last parts of response for debugging
                logger.debug("-" * 80)
                logger.debug("RAW RESPONSE CONTENT (First 500 chars):")
                logger.debug(raw_content[:500])
                logger.debug("-" * 80)
                logger.debug("RAW RESPONSE CONTENT (Last 500 chars):")
                logger.debug(raw_content[-500:] if len(raw_content) > 500 else raw_content)
                logger.debug("-" * 80)

                response.raise_for_status()

                logger.info("Response status OK (200), attempting to parse JSON...")

                data = response.json()
                logger.info(f"Successfully parsed JSON response")
                logger.info(f"Response structure: {list(data.keys()) if isinstance(data, dict) else type(data)}")

                # Extract analysis from response
                if 'choices' in data and len(data['choices']) > 0:
                    logger.info(f"Found {len(data['choices'])} choices in response")
                    content = data['choices'][0]['message']['content']
                    logger.info(f"Content message length: {len(content)} characters")

                    # Parse JSON response
                    try:
                        # Clean up response in case there's any extra text
                        logger.info("Cleaning response content...")
                        content = content.strip()
                        logger.info(f"After strip: {len(content)} chars")

                        # Remove markdown code blocks if present
                        if content.startswith('```json'):
                            logger.info("Detected ```json prefix, removing...")
                            content = content[7:]
                        elif content.startswith('```'):
                            logger.info("Detected ``` prefix, removing...")
                            content = content[3:]

                        if content.endswith('```'):
                            logger.info("Detected ``` suffix, removing...")
                            content = content[:-3]

                        content = content.strip()
                        logger.info(f"After cleaning: {len(content)} chars")

                        logger.debug("-" * 80)
                        logger.debug("CLEANED CONTENT (First 300 chars):")
                        logger.debug(content[:300])
                        logger.debug("-" * 80)

                        logger.info("Attempting to parse JSON...")
                        analysis_data = json.loads(content)
                        logger.info("JSON parsed successfully!")
                        logger.info(f"Analysis data keys: {list(analysis_data.keys()) if isinstance(analysis_data, dict) else type(analysis_data)}")

                        if 'analyses' in analysis_data:
                            logger.info(f"Found {len(analysis_data['analyses'])} analyses")

                        # Map comment indices back to IDs
                        analyses = []
                        for idx, analysis in enumerate(analysis_data.get('analyses', [])):
                            comment_idx = analysis.get('comment_index', 0)
                            if comment_idx < len(comments):
                                comment_id = comments[comment_idx]['id']
                                logger.debug(f"  Analysis {idx+1}: comment_index={comment_idx}, comment_id={comment_id}")
                                analyses.append({
                                    'comment_id': comment_id,
                                    **analysis
                                })

                        # Extract metadata
                        metadata = {
                            'model': self.model,
                            'prompt_tokens': data.get('usage', {}).get('prompt_tokens', 0),
                            'completion_tokens': data.get('usage', {}).get('completion_tokens', 0),
                            'total_tokens': data.get('usage', {}).get('total_tokens', 0),
                            'analyzed_at': timezone.now().isoformat()
                        }

                        logger.info(f"Metadata: {metadata}")
                        logger.info("=" * 80)
                        logger.info("ANALYSIS COMPLETED SUCCESSFULLY")
                        logger.info("=" * 80)

                        return {
                            'success': True,
                            'analyses': analyses,
                            'metadata': metadata
                        }

                    except json.JSONDecodeError as e:
                        logger.error("=" * 80)
                        logger.error("JSON PARSE ERROR")
                        logger.error("=" * 80)
                        logger.error(f"Error: {e}")
                        logger.error(f"Error position: Line {e.lineno}, Column {e.colno}")
                        logger.error(f"Error message: {e.msg}")
                        logger.error("-" * 80)
                        logger.error("FULL CONTENT THAT FAILED TO PARSE:")
                        logger.error("-" * 80)
                        logger.error(content)
                        logger.error("-" * 80)
                        logger.error("CHARACTER AT ERROR POSITION:")
                        logger.error("-" * 80)
                        if hasattr(e, 'pos') and e.pos:
                            start = max(0, e.pos - 100)
                            end = min(len(content), e.pos + 100)
                            logger.error(content[start:end])
                            logger.error(f"^ (error at position {e.pos})")

                        return {
                            'success': False,
                            'error': f'Invalid JSON response from API: {str(e)}'
                        }
                else:
                    logger.error(f"No choices found in response. Response keys: {list(data.keys()) if isinstance(data, dict) else type(data)}")
                    return {
                        'success': False,
                        'error': 'No analysis returned from API'
                    }

        except httpx.HTTPStatusError as e:
            logger.error("=" * 80)
            logger.error("HTTP STATUS ERROR")
            logger.error("=" * 80)
            logger.error(f"Status Code: {e.response.status_code}")
            logger.error(f"Response Text: {e.response.text}")
            return {
                'success': False,
                'error': f'API error: {e.response.status_code} - {str(e)}'
            }
        except httpx.RequestError as e:
            logger.error("=" * 80)
            logger.error("HTTP REQUEST ERROR")
            logger.error("=" * 80)
            logger.error(f"Error: {str(e)}")
            return {
                'success': False,
                'error': f'Request failed: {str(e)}'
            }
        except Exception as e:
            logger.error("=" * 80)
            logger.error("UNEXPECTED ERROR")
            logger.error("=" * 80)
            logger.error(f"Error Type: {type(e).__name__}")
            logger.error(f"Error Message: {str(e)}")
            logger.error("=" * 80)
            logger.error("FULL TRACEBACK:", exc_info=True)
            logger.error("=" * 80)
            return {
                'success': False,
                'error': f'Unexpected error: {str(e)}'
            }

    def analyze_comments(self, comments: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Analyze a batch of comments using OpenRouter API (synchronous wrapper).

        Args:
            comments: List of comment dictionaries with 'id' and 'text' keys

        Returns:
            Dictionary with success status and analysis results
        """
        import asyncio

        try:
            # Run async function in event loop
            loop = asyncio.get_event_loop()
        except RuntimeError:
            # No event loop exists, create new one
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)

        return loop.run_until_complete(self.analyze_comments_async(comments))

    def is_configured(self) -> bool:
        """Check if service is properly configured."""
        return bool(self.api_key)