""" Physicians Celery Tasks Background tasks for: - Processing doctor rating import jobs - Monthly aggregation of ratings - Ranking updates """ import logging from celery import shared_task from django.utils import timezone from apps.organizations.models import Hospital from .adapter import DoctorRatingAdapter from .models import DoctorRatingImportJob, PhysicianIndividualRating, PhysicianMonthlyRating logger = logging.getLogger(__name__) @shared_task(bind=True, max_retries=3, default_retry_delay=60) def process_doctor_rating_job(self, job_id: str): """ Process a doctor rating import job in the background. This task is called when a bulk import is queued (from API or CSV upload). """ try: job = DoctorRatingImportJob.objects.get(id=job_id) except DoctorRatingImportJob.DoesNotExist: logger.error(f"Doctor rating import job {job_id} not found") return {'error': 'Job not found'} try: # Update job status job.status = DoctorRatingImportJob.JobStatus.PROCESSING job.started_at = timezone.now() job.save() logger.info(f"Starting doctor rating import job {job_id}: {job.total_records} records") # Get raw data records = job.raw_data hospital = job.hospital # Process through adapter results = DoctorRatingAdapter.process_bulk_ratings( records=records, hospital=hospital, job=job ) logger.info(f"Completed doctor rating import job {job_id}: " f"{results['success']} success, {results['failed']} failed") return { 'job_id': job_id, 'total': results['total'], 'success': results['success'], 'failed': results['failed'], 'skipped': results['skipped'], 'staff_matched': results['staff_matched'] } except Exception as exc: logger.error(f"Error processing doctor rating job {job_id}: {str(exc)}", exc_info=True) # Update job status job.status = DoctorRatingImportJob.JobStatus.FAILED job.error_message = str(exc) job.completed_at = timezone.now() job.save() # Retry raise self.retry(exc=exc) @shared_task(bind=True, max_retries=3, default_retry_delay=60) def aggregate_monthly_ratings_task(self, year: int, month: int, hospital_id: str = None): """ Aggregate individual ratings into monthly summaries. Args: year: Year to aggregate month: Month to aggregate (1-12) hospital_id: Optional hospital ID to filter by """ try: logger.info(f"Starting monthly aggregation for {year}-{month:02d}") hospital = None if hospital_id: try: hospital = Hospital.objects.get(id=hospital_id) except Hospital.DoesNotExist: logger.error(f"Hospital {hospital_id} not found") return {'error': 'Hospital not found'} # Run aggregation results = DoctorRatingAdapter.aggregate_monthly_ratings( year=year, month=month, hospital=hospital ) logger.info(f"Completed monthly aggregation for {year}-{month:02d}: " f"{results['aggregated']} physicians aggregated") # Calculate rankings after aggregation if hospital: update_hospital_rankings.delay(year, month, hospital_id) else: # Update rankings for all hospitals for h in Hospital.objects.filter(status='active'): update_hospital_rankings.delay(year, month, str(h.id)) return { 'year': year, 'month': month, 'hospital_id': hospital_id, 'aggregated': results['aggregated'], 'errors': len(results['errors']) } except Exception as exc: logger.error(f"Error aggregating monthly ratings: {str(exc)}", exc_info=True) raise self.retry(exc=exc) @shared_task(bind=True, max_retries=3, default_retry_delay=60) def update_hospital_rankings(self, year: int, month: int, hospital_id: str): """ Update hospital and department rankings for physicians. This should be called after monthly aggregation is complete. """ try: from django.db.models import Window, F from django.db.models.functions import RowNumber hospital = Hospital.objects.get(id=hospital_id) logger.info(f"Updating rankings for {hospital.name} - {year}-{month:02d}") # Get all ratings for this hospital and period ratings = PhysicianMonthlyRating.objects.filter( staff__hospital=hospital, year=year, month=month ).select_related('staff', 'staff__department') # Update hospital rankings (order by average_rating desc) hospital_rankings = list(ratings.order_by('-average_rating')) for rank, rating in enumerate(hospital_rankings, start=1): rating.hospital_rank = rank rating.save(update_fields=['hospital_rank']) # Update department rankings from apps.organizations.models import Department departments = Department.objects.filter(hospital=hospital) for dept in departments: dept_ratings = ratings.filter(staff__department=dept).order_by('-average_rating') for rank, rating in enumerate(dept_ratings, start=1): rating.department_rank = rank rating.save(update_fields=['department_rank']) logger.info(f"Updated rankings for {hospital.name}: " f"{len(hospital_rankings)} physicians ranked") return { 'hospital_id': hospital_id, 'hospital_name': hospital.name, 'year': year, 'month': month, 'total_ranked': len(hospital_rankings) } except Exception as exc: logger.error(f"Error updating rankings: {str(exc)}", exc_info=True) raise self.retry(exc=exc) @shared_task def auto_aggregate_daily(): """ Daily task to automatically aggregate unaggregated ratings. This task should be scheduled to run daily to keep monthly ratings up-to-date. """ try: logger.info("Starting daily auto-aggregation of doctor ratings") # Find months with unaggregated ratings unaggregated = PhysicianIndividualRating.objects.filter( is_aggregated=False ).values('rating_date__year', 'rating_date__month').distinct() aggregated_count = 0 for item in unaggregated: year = item['rating_date__year'] month = item['rating_date__month'] # Aggregate for each hospital separately hospitals_with_ratings = PhysicianIndividualRating.objects.filter( is_aggregated=False, rating_date__year=year, rating_date__month=month ).values_list('hospital', flat=True).distinct() for hospital_id in hospitals_with_ratings: results = DoctorRatingAdapter.aggregate_monthly_ratings( year=year, month=month, hospital_id=hospital_id ) aggregated_count += results['aggregated'] logger.info(f"Daily auto-aggregation complete: {aggregated_count} physicians updated") return { 'aggregated_count': aggregated_count } except Exception as e: logger.error(f"Error in daily auto-aggregation: {str(e)}", exc_info=True) return {'error': str(e)} @shared_task def cleanup_old_import_jobs(days: int = 30): """ Clean up old completed import jobs and their raw data. Args: days: Delete jobs older than this many days """ from datetime import timedelta cutoff_date = timezone.now() - timedelta(days=days) old_jobs = DoctorRatingImportJob.objects.filter( created_at__lt=cutoff_date, status__in=[ DoctorRatingImportJob.JobStatus.COMPLETED, DoctorRatingImportJob.JobStatus.FAILED ] ) count = old_jobs.count() # Clear raw data first to save space for job in old_jobs: if job.raw_data: job.raw_data = [] job.save(update_fields=['raw_data']) logger.info(f"Cleaned up {count} old doctor rating import jobs") return {'cleaned_count': count}