HH/apps/physicians/tasks.py
2026-03-28 14:03:56 +03:00

413 lines
14 KiB
Python

"""
Physicians Celery Tasks
Background tasks for:
- Processing doctor rating import jobs
- Monthly aggregation of ratings
- Ranking updates
"""
import logging
from celery import shared_task
from django.utils import timezone
from apps.organizations.models import Hospital
from .adapter import DoctorRatingAdapter
from .models import DoctorRatingImportJob, PhysicianIndividualRating, PhysicianMonthlyRating
logger = logging.getLogger(__name__)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
def process_doctor_rating_job(self, job_id: str):
"""
Process a doctor rating import job in the background.
This task is called when a bulk import is queued (from API or CSV upload).
"""
try:
job = DoctorRatingImportJob.objects.get(id=job_id)
except DoctorRatingImportJob.DoesNotExist:
logger.error(f"Doctor rating import job {job_id} not found")
return {"error": "Job not found"}
try:
# Update job status
job.status = DoctorRatingImportJob.JobStatus.PROCESSING
job.started_at = timezone.now()
job.save()
logger.info(f"Starting doctor rating import job {job_id}: {job.total_records} records")
# Get raw data
records = job.raw_data
hospital = job.hospital
# Process through adapter
results = DoctorRatingAdapter.process_bulk_ratings(records=records, hospital=hospital, job=job)
logger.info(
f"Completed doctor rating import job {job_id}: {results['success']} success, {results['failed']} failed"
)
return {
"job_id": job_id,
"total": results["total"],
"success": results["success"],
"failed": results["failed"],
"skipped": results["skipped"],
"staff_matched": results["staff_matched"],
}
except Exception as exc:
logger.error(f"Error processing doctor rating job {job_id}: {str(exc)}", exc_info=True)
# Update job status
job.status = DoctorRatingImportJob.JobStatus.FAILED
job.error_message = str(exc)
job.completed_at = timezone.now()
job.save()
# Retry
raise self.retry(exc=exc)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
def aggregate_monthly_ratings_task(self, year: int, month: int, hospital_id: str = None):
"""
Aggregate individual ratings into monthly summaries.
Args:
year: Year to aggregate
month: Month to aggregate (1-12)
hospital_id: Optional hospital ID to filter by
"""
try:
logger.info(f"Starting monthly aggregation for {year}-{month:02d}")
hospital = None
if hospital_id:
try:
hospital = Hospital.objects.get(id=hospital_id)
except Hospital.DoesNotExist:
logger.error(f"Hospital {hospital_id} not found")
return {"error": "Hospital not found"}
# Run aggregation
results = DoctorRatingAdapter.aggregate_monthly_ratings(year=year, month=month, hospital=hospital)
logger.info(
f"Completed monthly aggregation for {year}-{month:02d}: {results['aggregated']} physicians aggregated"
)
# Calculate rankings after aggregation
if hospital:
update_hospital_rankings.delay(year, month, hospital_id)
else:
# Update rankings for all hospitals
for h in Hospital.objects.filter(status="active"):
update_hospital_rankings.delay(year, month, str(h.id))
return {
"year": year,
"month": month,
"hospital_id": hospital_id,
"aggregated": results["aggregated"],
"errors": len(results["errors"]),
}
except Exception as exc:
logger.error(f"Error aggregating monthly ratings: {str(exc)}", exc_info=True)
raise self.retry(exc=exc)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
def update_hospital_rankings(self, year: int, month: int, hospital_id: str):
"""
Update hospital and department rankings for physicians.
This should be called after monthly aggregation is complete.
"""
try:
from django.db.models import Window, F
from django.db.models.functions import RowNumber
hospital = Hospital.objects.get(id=hospital_id)
logger.info(f"Updating rankings for {hospital.name} - {year}-{month:02d}")
# Get all ratings for this hospital and period
ratings = PhysicianMonthlyRating.objects.filter(
staff__hospital=hospital, year=year, month=month
).select_related("staff", "staff__department")
# Update hospital rankings (order by average_rating desc)
hospital_rankings = list(ratings.order_by("-average_rating"))
for rank, rating in enumerate(hospital_rankings, start=1):
rating.hospital_rank = rank
rating.save(update_fields=["hospital_rank"])
# Update department rankings
from apps.organizations.models import Department
departments = Department.objects.filter(hospital=hospital)
for dept in departments:
dept_ratings = ratings.filter(staff__department=dept).order_by("-average_rating")
for rank, rating in enumerate(dept_ratings, start=1):
rating.department_rank = rank
rating.save(update_fields=["department_rank"])
logger.info(f"Updated rankings for {hospital.name}: {len(hospital_rankings)} physicians ranked")
return {
"hospital_id": hospital_id,
"hospital_name": hospital.name,
"year": year,
"month": month,
"total_ranked": len(hospital_rankings),
}
except Exception as exc:
logger.error(f"Error updating rankings: {str(exc)}", exc_info=True)
raise self.retry(exc=exc)
@shared_task
def auto_aggregate_daily():
"""
Daily task to automatically aggregate unaggregated ratings.
This task should be scheduled to run daily to keep monthly ratings up-to-date.
"""
try:
logger.info("Starting daily auto-aggregation of doctor ratings")
# Find months with unaggregated ratings
unaggregated = (
PhysicianIndividualRating.objects.filter(is_aggregated=False)
.values("rating_date__year", "rating_date__month")
.distinct()
)
aggregated_count = 0
for item in unaggregated:
year = item["rating_date__year"]
month = item["rating_date__month"]
# Aggregate for each hospital separately
hospitals_with_ratings = (
PhysicianIndividualRating.objects.filter(
is_aggregated=False, rating_date__year=year, rating_date__month=month
)
.values_list("hospital", flat=True)
.distinct()
)
for hospital_id in hospitals_with_ratings:
results = DoctorRatingAdapter.aggregate_monthly_ratings(year=year, month=month, hospital_id=hospital_id)
aggregated_count += results["aggregated"]
logger.info(f"Daily auto-aggregation complete: {aggregated_count} physicians updated")
return {"aggregated_count": aggregated_count}
except Exception as e:
logger.error(f"Error in daily auto-aggregation: {str(e)}", exc_info=True)
return {"error": str(e)}
@shared_task
def cleanup_old_import_jobs(days: int = 30):
"""
Clean up old completed import jobs and their raw data.
Args:
days: Delete jobs older than this many days
"""
from datetime import timedelta
cutoff_date = timezone.now() - timedelta(days=days)
old_jobs = DoctorRatingImportJob.objects.filter(
created_at__lt=cutoff_date,
status__in=[DoctorRatingImportJob.JobStatus.COMPLETED, DoctorRatingImportJob.JobStatus.FAILED],
)
count = old_jobs.count()
# Clear raw data first to save space
for job in old_jobs:
if job.raw_data:
job.raw_data = []
job.save(update_fields=["raw_data"])
logger.info(f"Cleaned up {count} old doctor rating import jobs")
return {"cleaned_count": count}
@shared_task(bind=True, max_retries=3, default_retry_delay=300)
def fetch_his_doctor_ratings_monthly(self):
"""
Monthly task to fetch doctor ratings from HIS API.
Runs on the 1st of each month to fetch the previous month's ratings.
Example: On March 1st, fetches all ratings from February 1-28/29.
This task runs at 1:00 AM on the 1st of each month, before the
aggregation task which runs at 2:00 AM.
"""
from datetime import datetime
from calendar import monthrange
try:
# Calculate previous month
now = timezone.now()
if now.month == 1:
target_year = now.year - 1
target_month = 12
else:
target_year = now.year
target_month = now.month - 1
month_label = f"{target_year}-{target_month:02d}"
logger.info(f"Starting monthly HIS doctor rating fetch for {month_label}")
# Calculate date range for the month
from_date = datetime(target_year, target_month, 1)
last_day = monthrange(target_year, target_month)[1]
to_date = datetime(target_year, target_month, last_day, 23, 59, 59)
# Initialize HIS client
from apps.integrations.services.his_client import HISClient
client = HISClient()
# Fetch ratings from HIS
his_data = client.fetch_doctor_ratings(from_date, to_date)
if not his_data:
logger.error("Failed to fetch data from HIS API")
return {"success": False, "error": "Failed to fetch data from HIS API", "month": month_label}
if his_data.get("Code") != 200:
error_msg = his_data.get("Message", "Unknown error")
logger.error(f"HIS API error: {error_msg}")
return {"success": False, "error": f"HIS API error: {error_msg}", "month": month_label}
ratings_list = his_data.get("FetchDoctorRatingMAPI1List", [])
if not ratings_list:
logger.info(f"No ratings found for {month_label}")
return {
"success": True,
"month": month_label,
"total_ratings": 0,
"message": "No ratings found for this period",
}
logger.info(f"Fetched {len(ratings_list)} ratings from HIS for {month_label}")
# Create import job for tracking
first_hospital = Hospital.objects.first()
if first_hospital:
job = DoctorRatingImportJob.objects.create(
name=f"Monthly HIS Import - {month_label}",
status=DoctorRatingImportJob.JobStatus.PROCESSING,
source=DoctorRatingImportJob.JobSource.HIS_API,
hospital=first_hospital,
total_records=len(ratings_list),
started_at=timezone.now(),
)
else:
job = None
logger.warning("No hospitals found, creating ratings without import job")
# Process ratings
stats = {
"total": len(ratings_list),
"success": 0,
"failed": 0,
"duplicates": 0,
"staff_matched": 0,
}
for idx, rating_data in enumerate(ratings_list, 1):
try:
# Find hospital by name
hospital_name = rating_data.get("HospitalName", "")
hospital = Hospital.objects.filter(name__iexact=hospital_name).first()
if not hospital:
hospital = Hospital.objects.filter(name__icontains=hospital_name).first()
if not hospital:
stats["failed"] += 1
logger.warning(f"Hospital not found: {hospital_name}")
continue
# Process the rating
result = DoctorRatingAdapter.process_his_rating_record(rating_data, hospital)
if result["is_duplicate"]:
stats["duplicates"] += 1
elif result["success"]:
stats["success"] += 1
if result["staff_matched"]:
stats["staff_matched"] += 1
else:
stats["failed"] += 1
logger.warning(f"Failed to process rating: {result.get('message')}")
# Update job progress every 100 records
if job and idx % 100 == 0:
job.processed_count = idx
job.success_count = stats["success"]
job.failed_count = stats["failed"]
job.save()
logger.info(f"Progress: {idx}/{stats['total']} processed")
except Exception as e:
stats["failed"] += 1
logger.error(f"Error processing rating {idx}: {e}", exc_info=True)
# Finalize job
if job:
job.processed_count = stats["total"]
job.success_count = stats["success"]
job.failed_count = stats["failed"]
job.completed_at = timezone.now()
if stats["failed"] == 0:
job.status = DoctorRatingImportJob.JobStatus.COMPLETED
elif stats["success"] == 0:
job.status = DoctorRatingImportJob.JobStatus.FAILED
else:
job.status = DoctorRatingImportJob.JobStatus.PARTIAL
job.results = {"stats": stats}
job.save()
logger.info(
f"Completed monthly HIS doctor rating fetch for {month_label}: "
f"{stats['success']} success, {stats['failed']} failed, {stats['duplicates']} duplicates"
)
return {
"success": True,
"month": month_label,
"total_ratings": stats["total"],
"success_count": stats["success"],
"failed_count": stats["failed"],
"duplicate_count": stats["duplicates"],
"staff_matched_count": stats["staff_matched"],
}
except Exception as exc:
logger.error(f"Error in monthly HIS doctor rating fetch: {exc}", exc_info=True)
# Retry the task
raise self.retry(exc=exc)