HH/apps/social/services/linkedin.py

import requests
import time
import datetime
import hmac
import hashlib
from urllib.parse import urlencode, quote
from django.conf import settings
from django.utils import timezone
from apps.social.utils.linkedin import LinkedInConstants
from apps.social.models import SocialAccount

class LinkedInAPIError(Exception):
    """Custom exception for LinkedIn API errors"""
    pass

class LinkedInService:
    """Service class for LinkedIn API interactions (RestLi 2.0)"""

    # ==========================================
    # HELPER METHODS
    # ==========================================

    @staticmethod
    def _get_headers(token):
        """Generate headers for LinkedIn API requests"""
        return {
            "Authorization": f"Bearer {token}",
            "Linkedin-Version": LinkedInConstants.API_VERSION,
            "X-Restli-Protocol-Version": "2.0.0",
            "Content-Type": "application/json"
        }

    @staticmethod
    def _normalize_urn(platform_id, urn_type="organization"):
        """
        Normalize platform ID to proper URN format.

        Args:
            platform_id: Either a bare ID or full URN
            urn_type: Type of URN (organization, person, etc.)

        Returns:
            Properly formatted URN string
        """
        if not platform_id:
            raise ValueError("platform_id cannot be empty")

        # If it already looks like a URN (contains colons), return it as-is.
        # This prevents corrupting 'urn:li:share:123' into 'urn:li:organization:urn:li:share:123'
        if ":" in platform_id:
            return platform_id

        urn_prefix = f"urn:li:{urn_type}:"
        return f"{urn_prefix}{platform_id}"

    @staticmethod
    def _encode_urn(urn):
        """URL encode URN for use in API requests"""
        return quote(urn, safe='')

    @staticmethod
    def _parse_timestamp(time_ms):
        """
        Convert LinkedIn timestamp (milliseconds since epoch) to Django timezone-aware datetime.

        Args:
            time_ms: Timestamp in milliseconds

        Returns:
            Timezone-aware datetime object or current time if parsing fails
        """
        if not time_ms:
            return timezone.now()

        try:
            # LinkedIn returns milliseconds, divide by 1000 for seconds
            return datetime.datetime.fromtimestamp(
                time_ms / 1000.0,
                tz=datetime.timezone.utc
            )
        except (ValueError, OSError):
            return timezone.now()

    # ==========================================
    # AUTHENTICATION
    # ==========================================

    @staticmethod
    def get_auth_url(state=None):
        """Generate LinkedIn OAuth authorization URL."""
        params = {
            "response_type": "code",
            "client_id": settings.LINKEDIN_CLIENT_ID,
            "redirect_uri": settings.LINKEDIN_REDIRECT_URI,
            "scope": " ".join(LinkedInConstants.SCOPES),
            "state": state or "random_state_123",
        }
        return f"{LinkedInConstants.AUTH_URL}?{urlencode(params)}"

    @staticmethod
    def exchange_code_for_token(code):
        """Exchange authorization code for access token."""
        payload = {
            "grant_type": "authorization_code",
            "code": code,
            "redirect_uri": settings.LINKEDIN_REDIRECT_URI,
            "client_id": settings.LINKEDIN_CLIENT_ID,
            "client_secret": settings.LINKEDIN_CLIENT_SECRET
        }

        response = requests.post(LinkedInConstants.TOKEN_URL, data=payload)

        if response.status_code != 200:
            raise LinkedInAPIError(f"Token Exchange Failed: {response.text}")

        return response.json()

    @staticmethod
    def refresh_access_token(account):
        """Refresh access token if expired or expiring soon."""
        if not account.is_active:
            raise LinkedInAPIError("Account is inactive")

        # Refresh if token expires within 15 minutes
        if timezone.now() >= account.expires_at - datetime.timedelta(minutes=15):
            payload = {
                "grant_type": "refresh_token",
                "refresh_token": account.refresh_token,
                "client_id": settings.LINKEDIN_CLIENT_ID,
                "client_secret": settings.LINKEDIN_CLIENT_SECRET,
            }

            response = requests.post(LinkedInConstants.TOKEN_URL, data=payload)

            if response.status_code != 200:
                account.is_active = False
                account.save()
                raise LinkedInAPIError(f"Refresh Token Expired: {response.text}")

            data = response.json()
            account.access_token = data['access_token']
            account.expires_at = timezone.now() + datetime.timedelta(seconds=data['expires_in'])

            if 'refresh_token' in data:
                account.refresh_token = data['refresh_token']

            account.save()

        return account.access_token

    # ==========================================
    # API REQUEST HANDLER
    # ==========================================

    @staticmethod
    def _make_request(account, method, url, payload=None, retry_count=0):
        """Make authenticated API request with rate limit handling."""
        token = LinkedInService.refresh_access_token(account)
        headers = LinkedInService._get_headers(token)

        try:
            if method == "GET":
                response = requests.get(url, headers=headers, params=payload, timeout=30)
            elif method == "POST":
                response = requests.post(url, headers=headers, json=payload, timeout=30)
            elif method == "DELETE":
                response = requests.delete(url, headers=headers, params=payload, timeout=30)
            else:
                raise ValueError(f"Unsupported HTTP method: {method}")

            # Handle rate limiting
            if response.status_code == 429:
                if retry_count >= LinkedInConstants.MAX_RETRIES:
                    raise LinkedInAPIError("Max retries exceeded for rate limit")

                reset_time = int(response.headers.get('X-RateLimit-Reset', time.time() + 60))
                sleep_duration = max(1, reset_time - int(time.time()))

                print(f"[Rate Limit] Sleeping for {sleep_duration}s (attempt {retry_count + 1})")
                time.sleep(sleep_duration)

                return LinkedInService._make_request(account, method, url, payload, retry_count + 1)

            # Handle 404 as empty response (resource not found)
            if response.status_code == 404:
                return {}

            # Handle other errors
            if response.status_code >= 400:
                raise LinkedInAPIError(f"API Error {response.status_code}: {response.text}")

            return response.json()

        except requests.exceptions.RequestException as e:
            raise LinkedInAPIError(f"Request failed: {str(e)}")

    # ==========================================
    # POSTS API
    # ==========================================

    @staticmethod
    def fetch_posts(account, count=50):
        """
        Fetch organization posts using new Posts API.
        Returns post objects containing full URNs (e.g., urn:li:share:123).
        """
        posts = []
        org_urn = LinkedInService._normalize_urn(account.platform_id, "organization")

        params = {
            "author": org_urn,
            "q": "author",
            "count": min(count, LinkedInConstants.MAX_PAGE_SIZE),
            "sortBy": "LAST_MODIFIED"
        }

        try:
            data = LinkedInService._make_request(
                account,
                "GET",
                f"{LinkedInConstants.BASE_URL}/posts",  # versioned endpoint
                payload=params
            )
            posts = data.get('elements', [])
        except LinkedInAPIError as e:
            print(f"Error fetching posts: {e}")

        return posts

    # ==========================================
    # COMMENTS API
    # ==========================================

    @staticmethod
    def fetch_all_comments(account, post_urn):
        """
        Fetch ALL comments for a post (for complete historical sync).
        post_urn: Must be full URN (e.g., urn:li:share:123)
        """
        comments = []
        start = 0
        batch_size = LinkedInConstants.DEFAULT_PAGE_SIZE
        encoded_urn = LinkedInService._encode_urn(post_urn)

        while True:
            params = {"count": batch_size, "start": start}

            try:
                data = LinkedInService._make_request(
                    account,
                    "GET",
                    f"{LinkedInConstants.BASE_URL}/socialActions/{encoded_urn}/comments",
                    payload=params
                )
            except LinkedInAPIError as e:
                print(f"Error fetching comments: {e}")
                break

            if not data or 'elements' not in data:
                break

            elements = data.get('elements', [])
            if not elements:
                break

            for comment in elements:
                comment['post_urn'] = post_urn
                comments.append(comment)

            if len(elements) < batch_size:
                break

            start += batch_size
            time.sleep(0.3)

        return comments

    @staticmethod
    def fetch_comments_limited(account, post_urn, limit=200):
        """Fetch limited number of most recent comments."""
        comments = []
        start = 0
        batch_size = LinkedInConstants.DEFAULT_PAGE_SIZE
        encoded_urn = LinkedInService._encode_urn(post_urn)

        while len(comments) < limit:
            remaining = limit - len(comments)
            current_batch = min(batch_size, remaining)
            params = {"count": current_batch, "start": start}

            try:
                data = LinkedInService._make_request(
                    account,
                    "GET",
                    f"{LinkedInConstants.BASE_URL}/socialActions/{encoded_urn}/comments",
                    payload=params
                )
            except LinkedInAPIError as e:
                print(f"Error fetching comments: {e}")
                break

            if not data or 'elements' not in data:
                break

            elements = data.get('elements', [])
            if not elements:
                break

            for comment in elements:
                comment['post_urn'] = post_urn
                comments.append(comment)

            if len(elements) < current_batch:
                break

            start += current_batch
            time.sleep(0.3)

        return comments

    @staticmethod
    def fetch_comments_delta(account, post_urn, since_timestamp=None):
        """Fetch only NEW comments since a specific timestamp."""
        comments = []
        start = 0
        batch_size = LinkedInConstants.DEFAULT_PAGE_SIZE
        encoded_urn = LinkedInService._encode_urn(post_urn)

        while True:
            params = {"count": batch_size, "start": start}

            try:
                data = LinkedInService._make_request(
                    account,
                    "GET",
                    f"{LinkedInConstants.BASE_URL}/socialActions/{encoded_urn}/comments",
                    payload=params
                )
            except LinkedInAPIError as e:
                print(f"Error fetching comments: {e}")
                break

            if not data or 'elements' not in data:
                break

            elements = data.get('elements', [])
            if not elements:
                break

            # Optimization: Check newest item in batch first
            newest_in_batch = elements[0].get('created', {}).get('time')
            if since_timestamp and newest_in_batch:
                newest_dt = LinkedInService._parse_timestamp(newest_in_batch)
                if newest_dt <= since_timestamp:
                    break  # Even the newest comment is old, stop entirely

            # Check oldest item to see if we should stop paginating
            if since_timestamp and elements:
                oldest_in_batch = elements[-1].get('created', {}).get('time')
                if oldest_in_batch:
                    oldest_dt = LinkedInService._parse_timestamp(oldest_in_batch)
                    if oldest_dt <= since_timestamp:
                        # Filter only those strictly newer than timestamp
                        for comment in elements:
                            c_time_ms = comment.get('created', {}).get('time')
                            if c_time_ms:
                                c_dt = LinkedInService._parse_timestamp(c_time_ms)
                                if c_dt > since_timestamp:
                                    comment['post_urn'] = post_urn
                                    comments.append(comment)
                        break

            for comment in elements:
                comment['post_urn'] = post_urn
                comments.append(comment)

            if len(elements) < batch_size:
                break

            start += batch_size
            time.sleep(0.3)

        return comments

    @staticmethod
    def fetch_single_comment(account, post_urn, comment_id):
        """
        Fetch a specific comment by ID (efficient for webhook processing).
        """
        encoded_post_urn = LinkedInService._encode_urn(post_urn)
        url = f"{LinkedInConstants.BASE_URL}/socialActions/{encoded_post_urn}/comments/{comment_id}"

        try:
            data = LinkedInService._make_request(account, "GET", url)
            if data:
                data['post_urn'] = post_urn
            return data
        except LinkedInAPIError as e:
            print(f"Error fetching comment {comment_id}: {e}")
            return None

    # ==========================================
    # COMMENT ACTIONS
    # ==========================================

    @staticmethod
    def post_reply(account, parent_urn, text):
        """
        Reply to a post or comment.
        parent_urn: URN of the post (urn:li:share:...) or comment (urn:li:comment:...)
        """
        encoded_parent_urn = LinkedInService._encode_urn(parent_urn)
        url = f"{LinkedInConstants.BASE_URL}/socialActions/{encoded_parent_urn}/comments"

        org_urn = LinkedInService._normalize_urn(account.platform_id, "organization")

        payload = {
            "actor": org_urn,
            "message": {
                "text": text
            }
        }

        return LinkedInService._make_request(account, "POST", url, payload=payload)

    # @staticmethod
    # def delete_comment(account, post_urn, comment_id):
    #     """
    #     Delete a comment.

    #     Note: The 'actor' is NOT passed as a query parameter in the new API.
    #     It is derived from the OAuth Access Token.
    #     """
    #     encoded_post_urn = LinkedInService._encode_urn(post_urn)

    #     # Construct URL
    #     url = f"{LinkedInConstants.BASE_URL}/socialActions/{encoded_post_urn}/comments/{comment_id}"

    #     # Make request (payload={} for safe DELETE handling in _make_request)
    #     LinkedInService._make_request(account, "DELETE", url, payload={})
    #     return True

    # ==========================================
    # WEBHOOK UTILITIES
    # ==========================================

    @staticmethod
    def calculate_hmac_sha256(secret_key, message):
        """Calculate HMAC-SHA256 signature."""
        if isinstance(message, str):
            message = message.encode('utf-8')
        if isinstance(secret_key, str):
            secret_key = secret_key.encode('utf-8')

        return hmac.new(secret_key, message, hashlib.sha256).hexdigest()

    @staticmethod
    def verify_webhook_signature(received_signature, body_raw, client_secret):
        """Verify webhook signature for authenticity."""
        if not received_signature or not body_raw:
            return False

        calculated_digest = LinkedInService.calculate_hmac_sha256(client_secret, body_raw)
        expected_signature = f"hmacsha256={calculated_digest}"

        return hmac.compare_digest(received_signature, expected_signature)