HH/apps/social/tasks.py

"""
Celery scheduled tasks for social media comment scraping and analysis.
"""
import logging
from celery import shared_task
from celery.schedules import crontab
from django.utils import timezone
from datetime import timedelta
from django.conf import settings

from .services import CommentService
from .services.analysis_service import AnalysisService


logger = logging.getLogger(__name__)

# Analysis settings
ANALYSIS_BATCH_SIZE = 10  # Number of comments to analyze per batch


logger = logging.getLogger(__name__)


@shared_task
def scrape_all_platforms():
    """
    Scheduled task to scrape all configured social media platforms.
    This task is scheduled using Celery Beat.

    After scraping, automatically queues analysis for pending comments.

    Usage: Schedule this task to run at regular intervals (e.g., daily, hourly)

    Returns:
        Dictionary with results from each platform
    """
    logger.info("Starting scheduled scrape for all platforms")

    try:
        service = CommentService()
        results = service.scrape_and_save()

        logger.info(f"Completed scheduled scrape. Results: {results}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return results

    except Exception as e:
        logger.error(f"Error in scheduled scrape task: {e}")
        raise


@shared_task
def scrape_youtube_comments(channel_id: str = None):
    """
    Scheduled task to scrape YouTube comments.

    Args:
        channel_id: Optional YouTube channel ID (uses default from settings if not provided)

    Returns:
        Dictionary with 'total' and 'comments'
    """
    logger.info("Starting scheduled YouTube scrape")

    try:
        service = CommentService()
        result = service.scrape_youtube(channel_id=channel_id, save_to_db=True)

        logger.info(f"Completed YouTube scrape. Total comments: {len(result)}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return {'total': len(result), 'comments': result}

    except Exception as e:
        logger.error(f"Error in YouTube scrape task: {e}")
        raise


@shared_task
def scrape_facebook_comments(page_id: str = None):
    """
    Scheduled task to scrape Facebook comments.

    Args:
        page_id: Optional Facebook page ID (uses default from settings if not provided)

    Returns:
        Dictionary with 'total' and 'comments'
    """
    logger.info("Starting scheduled Facebook scrape")

    try:
        service = CommentService()
        result = service.scrape_facebook(page_id=page_id, save_to_db=True)

        logger.info(f"Completed Facebook scrape. Total comments: {len(result)}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return {'total': len(result), 'comments': result}

    except Exception as e:
        logger.error(f"Error in Facebook scrape task: {e}")
        raise


@shared_task
def scrape_instagram_comments(account_id: str = None):
    """
    Scheduled task to scrape Instagram comments.

    Args:
        account_id: Optional Instagram account ID (uses default from settings if not provided)

    Returns:
        Dictionary with 'total' and 'comments'
    """
    logger.info("Starting scheduled Instagram scrape")

    try:
        service = CommentService()
        result = service.scrape_instagram(account_id=account_id, save_to_db=True)

        logger.info(f"Completed Instagram scrape. Total comments: {len(result)}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return {'total': len(result), 'comments': result}

    except Exception as e:
        logger.error(f"Error in Instagram scrape task: {e}")
        raise


@shared_task
def scrape_twitter_comments(username: str = None):
    """
    Scheduled task to scrape Twitter/X comments (replies).

    Args:
        username: Optional Twitter username (uses default from settings if not provided)

    Returns:
        Dictionary with 'total' and 'comments'
    """
    logger.info("Starting scheduled Twitter/X scrape")

    try:
        service = CommentService()
        result = service.scrape_twitter(username=username, save_to_db=True)

        logger.info(f"Completed Twitter/X scrape. Total comments: {len(result)}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return {'total': len(result), 'comments': result}

    except Exception as e:
        logger.error(f"Error in Twitter/X scrape task: {e}")
        raise


@shared_task
def scrape_linkedin_comments(organization_id: str = None):
    """
    Scheduled task to scrape LinkedIn comments from organization posts.

    Args:
        organization_id: Optional LinkedIn organization URN (uses default from settings if not provided)

    Returns:
        Dictionary with 'total' and 'comments'
    """
    logger.info("Starting scheduled LinkedIn scrape")

    try:
        service = CommentService()
        result = service.scrape_linkedin(organization_id=organization_id, save_to_db=True)

        logger.info(f"Completed LinkedIn scrape. Total comments: {len(result)}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return {'total': len(result), 'comments': result}

    except Exception as e:
        logger.error(f"Error in LinkedIn scrape task: {e}")
        raise


@shared_task
def scrape_google_reviews(location_names: list = None):
    """
    Scheduled task to scrape Google Reviews from business locations.

    Args:
        location_names: Optional list of location names to scrape (uses all locations if not provided)

    Returns:
        Dictionary with 'total' and 'reviews'
    """
    logger.info("Starting scheduled Google Reviews scrape")

    try:
        service = CommentService()
        result = service.scrape_google_reviews(location_names=location_names, save_to_db=True)

        logger.info(f"Completed Google Reviews scrape. Total reviews: {len(result)}")

        # Automatically queue analysis for pending comments
        analyze_pending_comments.delay(limit=ANALYSIS_BATCH_SIZE)
        logger.info("Queued analysis task for pending comments")

        return {'total': len(result), 'reviews': result}

    except Exception as e:
        logger.error(f"Error in Google Reviews scrape task: {e}")
        raise


# ============================================================================
# AI Analysis Tasks
# ============================================================================

@shared_task
def analyze_pending_comments(limit: int = 100):
    """
    Scheduled task to analyze all pending (unanalyzed) comments.

    Args:
        limit: Maximum number of comments to analyze in one run

    Returns:
        Dictionary with analysis statistics
    """
    if not getattr(settings, 'ANALYSIS_ENABLED', True):
        logger.info("Comment analysis is disabled")
        return {'success': False, 'message': 'Analysis disabled'}

    logger.info("Starting scheduled comment analysis")

    try:
        service = AnalysisService()
        results = service.analyze_pending_comments(limit=limit)

        logger.info(f"Completed comment analysis. Results: {results}")

        # Check if there are more pending comments and queue another batch if needed
        from .models import SocialMediaComment
        pending_count = SocialMediaComment.objects.filter(
            ai_analysis__isnull=True
        ).count() + SocialMediaComment.objects.filter(
            ai_analysis={}
        ).count()

        # FIXED: Queue if ANY pending comments remain (not just >= batch size)
        if pending_count > 0:
            logger.info(f"  - Found {pending_count} pending comments, queuing next batch")
            # Use min() to ensure we don't exceed batch size
            batch_size = min(pending_count, ANALYSIS_BATCH_SIZE)
            analyze_pending_comments.delay(limit=batch_size)

        return results

    except Exception as e:
        logger.error(f"Error in comment analysis task: {e}", exc_info=True)
        raise


@shared_task
def analyze_recent_comments(hours: int = 24, limit: int = 100):
    """
    Scheduled task to analyze comments scraped in the last N hours.

    Args:
        hours: Number of hours to look back
        limit: Maximum number of comments to analyze

    Returns:
        Dictionary with analysis statistics
    """
    if not getattr(settings, 'ANALYSIS_ENABLED', True):
        logger.info("Comment analysis is disabled")
        return {'success': False, 'message': 'Analysis disabled'}

    logger.info(f"Starting analysis for comments from last {hours} hours")

    try:
        service = AnalysisService()
        results = service.analyze_recent_comments(hours=hours, limit=limit)

        logger.info(f"Completed recent comment analysis. Results: {results}")
        return results

    except Exception as e:
        logger.error(f"Error in recent comment analysis task: {e}", exc_info=True)
        raise


@shared_task
def analyze_platform_comments(platform: str, limit: int = 100):
    """
    Scheduled task to analyze comments from a specific platform.

    Args:
        platform: Platform name (e.g., 'youtube', 'facebook', 'instagram')
        limit: Maximum number of comments to analyze

    Returns:
        Dictionary with analysis statistics
    """
    if not getattr(settings, 'ANALYSIS_ENABLED', True):
        logger.info("Comment analysis is disabled")
        return {'success': False, 'message': 'Analysis disabled'}

    logger.info(f"Starting analysis for {platform} comments")

    try:
        service = AnalysisService()
        results = service.analyze_comments_by_platform(platform=platform, limit=limit)

        logger.info(f"Completed {platform} comment analysis. Results: {results}")
        return results

    except Exception as e:
        logger.error(f"Error in {platform} comment analysis task: {e}", exc_info=True)
        raise