""" Physicians Celery Tasks Background tasks for: - Processing doctor rating import jobs - Monthly aggregation of ratings - Ranking updates """ import logging from celery import shared_task from django.utils import timezone from apps.organizations.models import Hospital from .adapter import DoctorRatingAdapter from .models import DoctorRatingImportJob, PhysicianIndividualRating, PhysicianMonthlyRating logger = logging.getLogger(__name__) @shared_task(bind=True, max_retries=3, default_retry_delay=60) def process_doctor_rating_job(self, job_id: str): """ Process a doctor rating import job in the background. This task is called when a bulk import is queued (from API or CSV upload). """ try: job = DoctorRatingImportJob.objects.get(id=job_id) except DoctorRatingImportJob.DoesNotExist: logger.error(f"Doctor rating import job {job_id} not found") return {"error": "Job not found"} try: # Update job status job.status = DoctorRatingImportJob.JobStatus.PROCESSING job.started_at = timezone.now() job.save() logger.info(f"Starting doctor rating import job {job_id}: {job.total_records} records") # Get raw data records = job.raw_data hospital = job.hospital # Process through adapter results = DoctorRatingAdapter.process_bulk_ratings(records=records, hospital=hospital, job=job) logger.info( f"Completed doctor rating import job {job_id}: {results['success']} success, {results['failed']} failed" ) return { "job_id": job_id, "total": results["total"], "success": results["success"], "failed": results["failed"], "skipped": results["skipped"], "staff_matched": results["staff_matched"], } except Exception as exc: logger.error(f"Error processing doctor rating job {job_id}: {str(exc)}", exc_info=True) # Update job status job.status = DoctorRatingImportJob.JobStatus.FAILED job.error_message = str(exc) job.completed_at = timezone.now() job.save() # Retry raise self.retry(exc=exc) @shared_task(bind=True, max_retries=3, default_retry_delay=60) def aggregate_monthly_ratings_task(self, year: int, month: int, hospital_id: str = None): """ Aggregate individual ratings into monthly summaries. Args: year: Year to aggregate month: Month to aggregate (1-12) hospital_id: Optional hospital ID to filter by """ try: logger.info(f"Starting monthly aggregation for {year}-{month:02d}") hospital = None if hospital_id: try: hospital = Hospital.objects.get(id=hospital_id) except Hospital.DoesNotExist: logger.error(f"Hospital {hospital_id} not found") return {"error": "Hospital not found"} # Run aggregation results = DoctorRatingAdapter.aggregate_monthly_ratings(year=year, month=month, hospital=hospital) logger.info( f"Completed monthly aggregation for {year}-{month:02d}: {results['aggregated']} physicians aggregated" ) # Calculate rankings after aggregation if hospital: update_hospital_rankings.delay(year, month, hospital_id) else: # Update rankings for all hospitals for h in Hospital.objects.filter(status="active"): update_hospital_rankings.delay(year, month, str(h.id)) return { "year": year, "month": month, "hospital_id": hospital_id, "aggregated": results["aggregated"], "errors": len(results["errors"]), } except Exception as exc: logger.error(f"Error aggregating monthly ratings: {str(exc)}", exc_info=True) raise self.retry(exc=exc) @shared_task(bind=True, max_retries=3, default_retry_delay=60) def update_hospital_rankings(self, year: int, month: int, hospital_id: str): """ Update hospital and department rankings for physicians. This should be called after monthly aggregation is complete. """ try: from django.db.models import Window, F from django.db.models.functions import RowNumber hospital = Hospital.objects.get(id=hospital_id) logger.info(f"Updating rankings for {hospital.name} - {year}-{month:02d}") # Get all ratings for this hospital and period ratings = PhysicianMonthlyRating.objects.filter( staff__hospital=hospital, year=year, month=month ).select_related("staff", "staff__department") # Update hospital rankings (order by average_rating desc) hospital_rankings = list(ratings.order_by("-average_rating")) for rank, rating in enumerate(hospital_rankings, start=1): rating.hospital_rank = rank rating.save(update_fields=["hospital_rank"]) # Update department rankings from apps.organizations.models import Department departments = Department.objects.filter(hospital=hospital) for dept in departments: dept_ratings = ratings.filter(staff__department=dept).order_by("-average_rating") for rank, rating in enumerate(dept_ratings, start=1): rating.department_rank = rank rating.save(update_fields=["department_rank"]) logger.info(f"Updated rankings for {hospital.name}: {len(hospital_rankings)} physicians ranked") return { "hospital_id": hospital_id, "hospital_name": hospital.name, "year": year, "month": month, "total_ranked": len(hospital_rankings), } except Exception as exc: logger.error(f"Error updating rankings: {str(exc)}", exc_info=True) raise self.retry(exc=exc) @shared_task def auto_aggregate_daily(): """ Daily task to automatically aggregate unaggregated ratings. This task should be scheduled to run daily to keep monthly ratings up-to-date. """ try: logger.info("Starting daily auto-aggregation of doctor ratings") # Find months with unaggregated ratings unaggregated = ( PhysicianIndividualRating.objects.filter(is_aggregated=False) .values("rating_date__year", "rating_date__month") .distinct() ) aggregated_count = 0 for item in unaggregated: year = item["rating_date__year"] month = item["rating_date__month"] # Aggregate for each hospital separately hospitals_with_ratings = ( PhysicianIndividualRating.objects.filter( is_aggregated=False, rating_date__year=year, rating_date__month=month ) .values_list("hospital", flat=True) .distinct() ) for hospital_id in hospitals_with_ratings: results = DoctorRatingAdapter.aggregate_monthly_ratings(year=year, month=month, hospital_id=hospital_id) aggregated_count += results["aggregated"] logger.info(f"Daily auto-aggregation complete: {aggregated_count} physicians updated") return {"aggregated_count": aggregated_count} except Exception as e: logger.error(f"Error in daily auto-aggregation: {str(e)}", exc_info=True) return {"error": str(e)} @shared_task def cleanup_old_import_jobs(days: int = 30): """ Clean up old completed import jobs and their raw data. Args: days: Delete jobs older than this many days """ from datetime import timedelta cutoff_date = timezone.now() - timedelta(days=days) old_jobs = DoctorRatingImportJob.objects.filter( created_at__lt=cutoff_date, status__in=[DoctorRatingImportJob.JobStatus.COMPLETED, DoctorRatingImportJob.JobStatus.FAILED], ) count = old_jobs.count() # Clear raw data first to save space for job in old_jobs: if job.raw_data: job.raw_data = [] job.save(update_fields=["raw_data"]) logger.info(f"Cleaned up {count} old doctor rating import jobs") return {"cleaned_count": count} @shared_task(bind=True, max_retries=3, default_retry_delay=300) def fetch_his_doctor_ratings_monthly(self): """ Monthly task to fetch doctor ratings from HIS API. Runs on the 1st of each month to fetch the previous month's ratings. Example: On March 1st, fetches all ratings from February 1-28/29. This task runs at 1:00 AM on the 1st of each month, before the aggregation task which runs at 2:00 AM. """ from datetime import datetime from calendar import monthrange try: # Calculate previous month now = timezone.now() if now.month == 1: target_year = now.year - 1 target_month = 12 else: target_year = now.year target_month = now.month - 1 month_label = f"{target_year}-{target_month:02d}" logger.info(f"Starting monthly HIS doctor rating fetch for {month_label}") # Calculate date range for the month from_date = datetime(target_year, target_month, 1) last_day = monthrange(target_year, target_month)[1] to_date = datetime(target_year, target_month, last_day, 23, 59, 59) # Initialize HIS client from apps.integrations.services.his_client import HISClient client = HISClient() # Fetch ratings from HIS his_data = client.fetch_doctor_ratings(from_date, to_date) if not his_data: logger.error("Failed to fetch data from HIS API") return {"success": False, "error": "Failed to fetch data from HIS API", "month": month_label} if his_data.get("Code") != 200: error_msg = his_data.get("Message", "Unknown error") logger.error(f"HIS API error: {error_msg}") return {"success": False, "error": f"HIS API error: {error_msg}", "month": month_label} ratings_list = his_data.get("FetchDoctorRatingMAPI1List", []) if not ratings_list: logger.info(f"No ratings found for {month_label}") return { "success": True, "month": month_label, "total_ratings": 0, "message": "No ratings found for this period", } logger.info(f"Fetched {len(ratings_list)} ratings from HIS for {month_label}") # Create import job for tracking first_hospital = Hospital.objects.first() if first_hospital: job = DoctorRatingImportJob.objects.create( name=f"Monthly HIS Import - {month_label}", status=DoctorRatingImportJob.JobStatus.PROCESSING, source=DoctorRatingImportJob.JobSource.HIS_API, hospital=first_hospital, total_records=len(ratings_list), started_at=timezone.now(), ) else: job = None logger.warning("No hospitals found, creating ratings without import job") # Process ratings stats = { "total": len(ratings_list), "success": 0, "failed": 0, "duplicates": 0, "staff_matched": 0, } for idx, rating_data in enumerate(ratings_list, 1): try: # Find hospital by name hospital_name = rating_data.get("HospitalName", "") hospital = Hospital.objects.filter(name__iexact=hospital_name).first() if not hospital: hospital = Hospital.objects.filter(name__icontains=hospital_name).first() if not hospital: stats["failed"] += 1 logger.warning(f"Hospital not found: {hospital_name}") continue # Process the rating result = DoctorRatingAdapter.process_his_rating_record(rating_data, hospital) if result["is_duplicate"]: stats["duplicates"] += 1 elif result["success"]: stats["success"] += 1 if result["staff_matched"]: stats["staff_matched"] += 1 else: stats["failed"] += 1 logger.warning(f"Failed to process rating: {result.get('message')}") # Update job progress every 100 records if job and idx % 100 == 0: job.processed_count = idx job.success_count = stats["success"] job.failed_count = stats["failed"] job.save() logger.info(f"Progress: {idx}/{stats['total']} processed") except Exception as e: stats["failed"] += 1 logger.error(f"Error processing rating {idx}: {e}", exc_info=True) # Finalize job if job: job.processed_count = stats["total"] job.success_count = stats["success"] job.failed_count = stats["failed"] job.completed_at = timezone.now() if stats["failed"] == 0: job.status = DoctorRatingImportJob.JobStatus.COMPLETED elif stats["success"] == 0: job.status = DoctorRatingImportJob.JobStatus.FAILED else: job.status = DoctorRatingImportJob.JobStatus.PARTIAL job.results = {"stats": stats} job.save() logger.info( f"Completed monthly HIS doctor rating fetch for {month_label}: " f"{stats['success']} success, {stats['failed']} failed, {stats['duplicates']} duplicates" ) return { "success": True, "month": month_label, "total_ratings": stats["total"], "success_count": stats["success"], "failed_count": stats["failed"], "duplicate_count": stats["duplicates"], "staff_matched_count": stats["staff_matched"], } except Exception as exc: logger.error(f"Error in monthly HIS doctor rating fetch: {exc}", exc_info=True) # Retry the task raise self.retry(exc=exc)