""" Survey analytics utilities. This module provides reusable functions and classes for survey analytics: - Statistical analysis (correlation, skewness, kurtosis) - Analytics generation - Report generation (Markdown, HTML, JSON) These utilities can be used by: - Management commands (CLI) - API views (REST) - Other services """ import json import math from datetime import datetime, timedelta from decimal import Decimal from django.db.models import Avg, Count, StdDev, Q, F, Sum, ExpressionWrapper, IntegerField from django.utils import timezone from django.conf import settings from apps.surveys.models import SurveyTemplate, SurveyInstance, SurveyResponse, SurveyQuestion class StatisticalAnalyzer: """Statistical analysis helper class""" @staticmethod def calculate_mean(values): """Calculate mean of values""" if not values: return 0.0 return sum(values) / len(values) @staticmethod def calculate_stddev(values, mean=None): """Calculate standard deviation""" if not values: return 0.0 if mean is None: mean = StatisticalAnalyzer.calculate_mean(values) variance = sum((x - mean) ** 2 for x in values) / len(values) return math.sqrt(variance) @staticmethod def calculate_skewness(values, mean=None, stddev=None): """Calculate skewness (measure of asymmetry)""" if not values or len(values) < 3: return 0.0 if mean is None: mean = StatisticalAnalyzer.calculate_mean(values) if stddev is None: stddev = StatisticalAnalyzer.calculate_stddev(values, mean) if stddev == 0: return 0.0 n = len(values) skew = (n / ((n - 1) * (n - 2) * stddev ** 3)) * \ sum((x - mean) ** 3 for x in values) return skew @staticmethod def calculate_kurtosis(values, mean=None, stddev=None): """Calculate kurtosis (measure of tail heaviness)""" if not values or len(values) < 4: return 0.0 if mean is None: mean = StatisticalAnalyzer.calculate_mean(values) if stddev is None: stddev = StatisticalAnalyzer.calculate_stddev(values, mean) if stddev == 0: return 0.0 n = len(values) kurt = ((n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3) * stddev ** 4)) * \ sum((x - mean) ** 4 for x in values) - \ (3 * (n - 1) ** 2) / ((n - 2) * (n - 3)) return kurt @staticmethod def calculate_correlation(x_values, y_values): """Calculate Pearson correlation coefficient""" if not x_values or not y_values or len(x_values) != len(y_values): return 0.0 if len(x_values) < 2: return 0.0 mean_x = StatisticalAnalyzer.calculate_mean(x_values) mean_y = StatisticalAnalyzer.calculate_mean(y_values) stddev_x = StatisticalAnalyzer.calculate_stddev(x_values, mean_x) stddev_y = StatisticalAnalyzer.calculate_stddev(y_values, mean_y) if stddev_x == 0 or stddev_y == 0: return 0.0 n = len(x_values) covariance = sum((x_values[i] - mean_x) * (y_values[i] - mean_y) for i in range(n)) / n correlation = covariance / (stddev_x * stddev_y) return correlation class AnalyticsGenerator: """Main analytics generator class""" def calculate_analytics(self, template_name=None, start_date=None, end_date=None): """ Calculate comprehensive survey analytics. Args: template_name: Optional filter by survey template name start_date: Optional start date (datetime.date or datetime) end_date: Optional end date (datetime.date or datetime) Returns: dict: Complete analytics data """ # Parse date range if start_date is None: start_date = timezone.now() - timedelta(days=365) else: if not isinstance(start_date, datetime): start_date = timezone.make_aware(datetime.combine(start_date, datetime.min.time())) else: start_date = timezone.make_aware(start_date) if timezone.is_naive(start_date) else start_date if end_date is None: end_date = timezone.now() else: if not isinstance(end_date, datetime): end_date = timezone.make_aware(datetime.combine(end_date, datetime.max.time())) else: end_date = timezone.make_aware(end_date) if timezone.is_naive(end_date) else end_date # Get survey templates templates = SurveyTemplate.objects.filter(is_active=True) if template_name: templates = templates.filter(name__icontains=template_name) templates = templates.order_by('name') # Generate analytics data analytics_data = { 'report_generated_at': timezone.now().isoformat(), 'date_range': { 'start': start_date.isoformat(), 'end': end_date.isoformat() }, 'templates': [] } for template in templates: template_data = self._analyze_template(template, start_date, end_date) analytics_data['templates'].append(template_data) # Add overall summary analytics_data['summary'] = self._generate_summary(analytics_data['templates']) return analytics_data def _analyze_template(self, template, start_date, end_date): """Analyze a single survey template""" # Get all instances for this template instances = SurveyInstance.objects.filter( survey_template=template, created_at__gte=start_date, created_at__lte=end_date ) # Get completed instances completed_instances = instances.filter(status='completed') # Basic metrics total_sent = instances.count() total_completed = completed_instances.count() completion_rate = (total_completed / total_sent * 100) if total_sent > 0 else 0 # Score metrics avg_score = completed_instances.aggregate(avg=Avg('total_score'))['avg'] or 0 std_dev = completed_instances.aggregate(std=StdDev('total_score'))['std'] or 0 # Negative surveys negative_count = completed_instances.filter(is_negative=True).count() negative_rate = (negative_count / total_completed * 100) if total_completed > 0 else 0 # Score distribution score_distribution = self._get_score_distribution(completed_instances) # Status breakdown status_breakdown = dict( instances.values('status').annotate(count=Count('id')).values_list('status', 'count') ) # Channel performance channel_performance = self._get_channel_performance(instances) # Monthly trends monthly_trends = self._get_monthly_trends(completed_instances, start_date, end_date) # Engagement metrics engagement = self._get_engagement_metrics(completed_instances) # Patient contact metrics patient_contact = self._get_patient_contact_metrics(completed_instances) # Comments comments_data = self._get_comments_metrics(completed_instances) # Question-level analytics questions = self._analyze_questions(template, completed_instances) # Generate question rankings and insights template_data = { 'template_id': str(template.id), 'template_name': template.name, 'template_name_ar': template.name_ar, 'survey_type': template.survey_type, 'scoring_method': template.scoring_method, 'negative_threshold': float(template.negative_threshold), 'question_count': template.questions.count(), 'metrics': { 'total_sent': total_sent, 'total_completed': total_completed, 'completion_rate': round(completion_rate, 2), 'avg_score': round(float(avg_score), 2), 'std_deviation': round(float(std_dev), 2), 'negative_count': negative_count, 'negative_rate': round(negative_rate, 2), }, 'score_distribution': score_distribution, 'status_breakdown': status_breakdown, 'channel_performance': channel_performance, 'monthly_trends': monthly_trends, 'engagement_metrics': engagement, 'patient_contact_metrics': patient_contact, 'comments_metrics': comments_data, 'questions': questions } # Add rankings and insights template_data['rankings'] = self._generate_question_rankings(template_data) template_data['insights'] = self._generate_insights(template_data) return template_data def _get_score_distribution(self, instances): """Get score distribution""" distribution = { 'excellent': instances.filter(total_score__gte=4.5).count(), 'good': instances.filter(total_score__gte=3.5, total_score__lt=4.5).count(), 'average': instances.filter(total_score__gte=2.5, total_score__lt=3.5).count(), 'poor': instances.filter(total_score__lt=2.5).count(), } total = instances.count() for key in list(distribution.keys()): distribution[f'{key}_percent'] = round(distribution[key] / total * 100, 2) if total > 0 else 0 return distribution def _get_channel_performance(self, instances): """Get performance by delivery channel""" channels = ['sms', 'whatsapp', 'email'] performance = {} for channel in channels: channel_instances = instances.filter(delivery_channel=channel) total = channel_instances.count() completed = channel_instances.filter(status='completed').count() performance[channel] = { 'total_sent': total, 'completed': completed, 'completion_rate': round((completed / total * 100) if total > 0 else 0, 2), 'avg_score': round(float( channel_instances.filter(status='completed').aggregate( avg=Avg('total_score') )['avg'] or 0 ), 2) } return performance def _get_monthly_trends(self, instances, start_date, end_date): """Get monthly trends""" trends = [] current_date = start_date.replace(day=1) while current_date <= end_date: next_date = (current_date + timedelta(days=32)).replace(day=1) month_instances = instances.filter( completed_at__gte=current_date, completed_at__lt=next_date ) trends.append({ 'month': current_date.strftime('%Y-%m'), 'month_name': current_date.strftime('%B %Y'), 'count': month_instances.count(), 'avg_score': round(float( month_instances.aggregate(avg=Avg('total_score'))['avg'] or 0 ), 2), 'negative_count': month_instances.filter(is_negative=True).count() }) current_date = next_date return trends def _get_engagement_metrics(self, instances): """Get engagement metrics""" # Time to complete time_stats = instances.filter( time_spent_seconds__isnull=False ).aggregate( avg_time=Avg('time_spent_seconds'), min_time=Avg('time_spent_seconds'), max_time=Avg('time_spent_seconds') ) # Open count open_stats = instances.aggregate( avg_opens=Avg('open_count'), max_opens=Avg('open_count') ) return { 'avg_completion_time_seconds': round(float(time_stats['avg_time'] or 0), 2), 'min_completion_time_seconds': round(float(time_stats['min_time'] or 0), 2), 'max_completion_time_seconds': round(float(time_stats['max_time'] or 0), 2), 'avg_opens': round(float(open_stats['avg_opens'] or 0), 2), 'max_opens': round(float(open_stats['max_opens'] or 0), 2) } def _get_patient_contact_metrics(self, instances): """Get patient contact metrics for negative surveys""" negative_instances = instances.filter(is_negative=True) contacted = negative_instances.filter(patient_contacted=True).count() resolved = negative_instances.filter(issue_resolved=True).count() return { 'total_negative': negative_instances.count(), 'contacted': contacted, 'contacted_rate': round((contacted / negative_instances.count() * 100) if negative_instances.count() > 0 else 0, 2), 'resolved': resolved, 'resolved_rate': round((resolved / negative_instances.count() * 100) if negative_instances.count() > 0 else 0, 2) } def _get_comments_metrics(self, instances): """Get comments metrics""" with_comments = instances.exclude(comment='').count() return { 'with_comments': with_comments, 'comment_rate': round((with_comments / instances.count() * 100) if instances.count() > 0 else 0, 2), 'avg_comment_length': round(float( instances.exclude(comment='').annotate( length=ExpressionWrapper(F('comment'), output_field=IntegerField()) ).aggregate(avg=Avg('length'))['avg'] or 0 ), 2) } def _analyze_questions(self, template, instances): """Analyze each question in the template""" questions = template.questions.all().order_by('order') question_analytics = [] for question in questions: # Get responses for this question responses = SurveyResponse.objects.filter( survey_instance__in=instances, question=question ) response_count = responses.count() if question.question_type in ['rating', 'likert', 'nps']: # Numeric questions numeric_responses = responses.filter(numeric_value__isnull=False) avg_score = numeric_responses.aggregate(avg=Avg('numeric_value'))['avg'] or 0 std_dev = numeric_responses.aggregate(std=StdDev('numeric_value'))['std'] or 0 # Score distribution score_dist = self._get_question_score_distribution(numeric_responses) # Statistical analysis (skewness, kurtosis) stats = self._calculate_question_statistics(numeric_responses) # Correlation with overall survey score correlation = self._calculate_question_correlation(question, instances) # Performance by channel channel_performance = self._get_question_channel_performance(question, instances) # Monthly trends monthly_trends = self._get_question_monthly_trends( question, instances, self._get_date_range_from_instances(instances) ) question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), 'avg_score': round(float(avg_score), 2), 'std_deviation': round(float(std_dev), 2), 'skewness': round(stats['skewness'], 3), 'kurtosis': round(stats['kurtosis'], 3), 'correlation_with_overall': round(correlation, 3), 'score_distribution': score_dist, 'channel_performance': channel_performance, 'monthly_trends': monthly_trends, 'has_text_responses': False } elif question.question_type in ['text', 'textarea']: # Text questions text_responses = responses.exclude(text_value='') question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), 'text_response_count': text_responses.count(), 'avg_text_length': round(float( text_responses.annotate( length=ExpressionWrapper(F('text_value'), output_field=IntegerField()) ).aggregate(avg=Avg('length'))['avg'] or 0 ), 2), 'has_numeric_responses': False } elif question.question_type == 'multiple_choice': # Multiple choice choice_dist = self._get_choice_distribution(responses) question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), 'choice_distribution': choice_dist, 'has_numeric_responses': False } else: question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), } question_analytics.append(question_data) return question_analytics def _get_question_score_distribution(self, responses): """Get score distribution for a question""" distribution = {} scores = [1, 2, 3, 4, 5] for score in scores: count = responses.filter(numeric_value=score).count() distribution[f'score_{score}'] = count total = responses.count() for score in scores: distribution[f'score_{score}_percent'] = round( distribution[f'score_{score}'] / total * 100, 2 ) if total > 0 else 0 return distribution def _get_question_monthly_trends(self, question, instances, date_range): """Get monthly trends for a question""" start_date, end_date = date_range trends = [] current_date = start_date.replace(day=1) while current_date <= end_date: next_date = (current_date + timedelta(days=32)).replace(day=1) month_responses = SurveyResponse.objects.filter( survey_instance__in=instances, question=question, created_at__gte=current_date, created_at__lt=next_date ).filter(numeric_value__isnull=False) trends.append({ 'month': current_date.strftime('%Y-%m'), 'month_name': current_date.strftime('%B %Y'), 'count': month_responses.count(), 'avg_score': round(float( month_responses.aggregate(avg=Avg('numeric_value'))['avg'] or 0 ), 2) }) current_date = next_date return trends def _get_choice_distribution(self, responses): """Get distribution for multiple choice questions""" distribution = dict( responses.values('choice_value').annotate( count=Count('id') ).values_list('choice_value', 'count') ) total = responses.count() if total > 0: for choice in list(distribution.keys()): distribution[f'{choice}_percent'] = round( distribution[choice] / total * 100, 2 ) return distribution def _get_date_range_from_instances(self, instances): """Get date range from instances""" first = instances.order_by('completed_at').first() last = instances.order_by('-completed_at').first() if first and last: return first.completed_at, last.completed_at return timezone.now() - timedelta(days=365), timezone.now() def _calculate_question_statistics(self, responses): """Calculate statistical metrics for a question""" # Extract numeric values values = list(responses.values_list('numeric_value', flat=True)) values = [float(v) for v in values if v is not None] if not values: return {'skewness': 0.0, 'kurtosis': 0.0} # Calculate mean and std dev mean = StatisticalAnalyzer.calculate_mean(values) stddev = StatisticalAnalyzer.calculate_stddev(values, mean) # Calculate skewness and kurtosis skewness = StatisticalAnalyzer.calculate_skewness(values, mean, stddev) kurtosis = StatisticalAnalyzer.calculate_kurtosis(values, mean, stddev) return { 'skewness': skewness, 'kurtosis': kurtosis, 'mean': mean, 'stddev': stddev } def _calculate_question_correlation(self, question, instances): """Calculate correlation between question score and overall survey score""" # Get responses for this question along with survey scores question_responses = SurveyResponse.objects.filter( survey_instance__in=instances, question=question, numeric_value__isnull=False ).select_related('survey_instance') if question_responses.count() < 2: return 0.0 # Extract paired values question_scores = [] survey_scores = [] for response in question_responses: question_score = float(response.numeric_value) survey_score = float(response.survey_instance.total_score) if response.survey_instance.total_score else 0.0 question_scores.append(question_score) survey_scores.append(survey_score) # Calculate correlation return StatisticalAnalyzer.calculate_correlation(question_scores, survey_scores) def _get_question_channel_performance(self, question, instances): """Get question performance by delivery channel""" channels = ['sms', 'whatsapp', 'email'] performance = {} for channel in channels: channel_instances = instances.filter(delivery_channel=channel) channel_responses = SurveyResponse.objects.filter( survey_instance__in=channel_instances, question=question, numeric_value__isnull=False ) count = channel_responses.count() if count > 0: avg = channel_responses.aggregate(avg=Avg('numeric_value'))['avg'] or 0 performance[channel] = { 'response_count': count, 'avg_score': round(float(avg), 2) } else: performance[channel] = { 'response_count': 0, 'avg_score': 0.0 } return performance def _generate_question_rankings(self, template_data): """Generate question rankings based on various metrics""" numeric_questions = [ q for q in template_data['questions'] if q.get('has_numeric_responses', True) or 'avg_score' in q ] # Sort by average score sorted_by_score = sorted( numeric_questions, key=lambda x: x.get('avg_score', 0), reverse=True ) # Sort by correlation sorted_by_correlation = sorted( numeric_questions, key=lambda x: x.get('correlation_with_overall', 0), reverse=True ) # Sort by response rate (most skipped) sorted_by_response_rate = sorted( template_data['questions'], key=lambda x: x.get('response_rate', 0) ) return { 'top_5_by_score': [ { 'question': q['question_text'], 'order': q['order'], 'avg_score': q['avg_score'] } for q in sorted_by_score[:5] ], 'bottom_5_by_score': [ { 'question': q['question_text'], 'order': q['order'], 'avg_score': q['avg_score'] } for q in sorted_by_score[-5:] if sorted_by_score ], 'top_5_by_correlation': [ { 'question': q['question_text'], 'order': q['order'], 'correlation': q.get('correlation_with_overall', 0) } for q in sorted_by_correlation[:5] ], 'most_skipped_5': [ { 'question': q['question_text'], 'order': q['order'], 'response_rate': q['response_rate'] } for q in sorted_by_response_rate[:5] ] } def _generate_insights(self, template_data): """Generate actionable insights based on template analytics""" insights = [] metrics = template_data['metrics'] score_dist = template_data['score_distribution'] channel_perf = template_data['channel_performance'] rankings = template_data['rankings'] # Completion rate insights if metrics['completion_rate'] < 50: insights.append({ 'category': 'Engagement', 'severity': 'high', 'message': f'Low completion rate ({metrics["completion_rate"]}%). Consider improving survey timing and delivery channels.' }) elif metrics['completion_rate'] > 80: insights.append({ 'category': 'Engagement', 'severity': 'positive', 'message': f'Excellent completion rate ({metrics["completion_rate"]}%) showing strong patient engagement.' }) # Average score insights if metrics['avg_score'] < 3.0: insights.append({ 'category': 'Performance', 'severity': 'high', 'message': f'Below average performance ({metrics["avg_score"]}/5.0). Review worst performing questions for improvement opportunities.' }) elif metrics['avg_score'] >= 4.5: insights.append({ 'category': 'Performance', 'severity': 'positive', 'message': f'Outstanding performance ({metrics["avg_score"]}/5.0). Maintain current service levels.' }) # Negative rate insights if metrics['negative_rate'] > 20: insights.append({ 'category': 'Quality', 'severity': 'high', 'message': f'High negative survey rate ({metrics["negative_rate"]}%). Immediate action required to address patient concerns.' }) elif metrics['negative_rate'] < 5: insights.append({ 'category': 'Quality', 'severity': 'positive', 'message': f'Low negative survey rate ({metrics["negative_rate"]}%). Excellent patient satisfaction.' }) # Score distribution insights poor_percent = score_dist['poor_percent'] if poor_percent > 15: insights.append({ 'category': 'Distribution', 'severity': 'medium', 'message': f'High percentage of poor scores ({poor_percent}%). Investigate root causes of dissatisfaction.' }) excellent_percent = score_dist['excellent_percent'] if excellent_percent > 60: insights.append({ 'category': 'Distribution', 'severity': 'positive', 'message': f'Majority of responses are excellent ({excellent_percent}%). Outstanding service delivery.' }) # Channel performance insights best_channel = max(channel_perf.items(), key=lambda x: x[1]['completion_rate'])[0] worst_channel = min(channel_perf.items(), key=lambda x: x[1]['completion_rate'])[0] if channel_perf[best_channel]['completion_rate'] - channel_perf[worst_channel]['completion_rate'] > 30: insights.append({ 'category': 'Channels', 'severity': 'medium', 'message': f'Significant channel performance gap. {best_channel.capitalize()} performs much better ({channel_perf[best_channel]["completion_rate"]}%) than {worst_channel.capitalize()} ({channel_perf[worst_channel]["completion_rate"]}%).' }) # Question-specific insights if rankings['bottom_5_by_score']: worst_question = rankings['bottom_5_by_score'][0] if worst_question['avg_score'] < 3.0: insights.append({ 'category': 'Questions', 'severity': 'medium', 'message': f'Question {worst_question["order"]}: "{worst_question["question"]}" has lowest average score ({worst_question["avg_score"]}/5.0). Consider reviewing this service area.' }) if rankings['most_skipped_5']: most_skipped = rankings['most_skipped_5'][0] if most_skipped['response_rate'] < 70: insights.append({ 'category': 'Questions', 'severity': 'low', 'message': f'Question {most_skipped["order"]}: "{most_skipped["question"]}" has a low response rate ({most_skipped["response_rate"]}%). Consider making it optional or improving clarity.' }) # Patient contact metrics patient_contact = template_data.get('patient_contact_metrics', {}) if patient_contact.get('contacted_rate', 0) < 50 and patient_contact.get('total_negative', 0) > 0: insights.append({ 'category': 'Follow-up', 'severity': 'medium', 'message': f'Only {patient_contact["contacted_rate"]}% of negative survey patients have been contacted. Improve follow-up processes.' }) return insights def _generate_summary(self, templates_data): """Generate overall summary across all templates""" total_sent = sum(t['metrics']['total_sent'] for t in templates_data) total_completed = sum(t['metrics']['total_completed'] for t in templates_data) total_negative = sum(t['metrics']['negative_count'] for t in templates_data) # Calculate weighted average score weighted_score_sum = sum( t['metrics']['avg_score'] * t['metrics']['total_completed'] for t in templates_data ) weighted_avg_score = weighted_score_sum / total_completed if total_completed > 0 else 0 # Find best and worst performing templates sorted_by_score = sorted(templates_data, key=lambda x: x['metrics']['avg_score'], reverse=True) return { 'total_templates': len(templates_data), 'total_surveys_sent': total_sent, 'total_surveys_completed': total_completed, 'overall_completion_rate': round((total_completed / total_sent * 100) if total_sent > 0 else 0, 2), 'overall_avg_score': round(weighted_avg_score, 2), 'total_negative_surveys': total_negative, 'overall_negative_rate': round((total_negative / total_completed * 100) if total_completed > 0 else 0, 2), 'best_performing_template': { 'name': sorted_by_score[0]['template_name'], 'avg_score': sorted_by_score[0]['metrics']['avg_score'] }, 'worst_performing_template': { 'name': sorted_by_score[-1]['template_name'], 'avg_score': sorted_by_score[-1]['metrics']['avg_score'] } } class ReportGenerator: """Report generation helper class""" def generate_markdown_to_file(self, data, output_dir): """Generate Markdown report to file""" output_path = output_dir / 'survey_analytics_report.md' if hasattr(output_dir, 'joinpath') else \ f'{output_dir}/survey_analytics_report.md' markdown_content = self.generate_markdown(data) with open(output_path, 'w') as f: f.write(markdown_content) return output_path def generate_markdown(self, data): """Generate Markdown report content""" lines = [] lines.append('# Survey Analytics Report\n\n') lines.append(f'**Generated:** {data["report_generated_at"]}\n\n') lines.append(f'**Date Range:** {data["date_range"]["start"][:10]} to {data["date_range"]["end"][:10]}\n\n') # Summary lines.append('## Executive Summary\n\n') summary = data['summary'] lines.append(f'- **Total Survey Templates:** {summary["total_templates"]}\n') lines.append(f'- **Total Surveys Sent:** {summary["total_surveys_sent"]:,}\n') lines.append(f'- **Total Surveys Completed:** {summary["total_surveys_completed"]:,}\n') lines.append(f'- **Overall Completion Rate:** {summary["overall_completion_rate"]}%\n') lines.append(f'- **Overall Average Score:** {summary["overall_avg_score"]}/5.0\n') lines.append(f'- **Total Negative Surveys:** {summary["total_negative_surveys"]:,} ({summary["overall_negative_rate"]}%)\n\n') lines.append(f'### Best Performing Template\n') lines.append(f'**{summary["best_performing_template"]["name"]}**\n') lines.append(f'Average Score: {summary["best_performing_template"]["avg_score"]}/5.0\n\n') lines.append(f'### Worst Performing Template\n') lines.append(f'**{summary["worst_performing_template"]["name"]}**\n') lines.append(f'Average Score: {summary["worst_performing_template"]["avg_score"]}/5.0\n\n') # Template details for template_data in data['templates']: lines.append(f'## {template_data["template_name"]}\n\n') # Template metrics metrics = template_data['metrics'] lines.append('### Overview\n\n') lines.append(f'- **Survey Type:** {template_data["survey_type"]}\n') lines.append(f'- **Scoring Method:** {template_data["scoring_method"]}\n') lines.append(f'- **Questions:** {template_data["question_count"]}\n') lines.append(f'- **Total Sent:** {metrics["total_sent"]:,}\n') lines.append(f'- **Completed:** {metrics["total_completed"]:,}\n') lines.append(f'- **Completion Rate:** {metrics["completion_rate"]}%\n') lines.append(f'- **Average Score:** {metrics["avg_score"]}/5.0 (±{metrics["std_deviation"]})\n') lines.append(f'- **Negative Surveys:** {metrics["negative_count"]:,} ({metrics["negative_rate"]}%)\n\n') # Score distribution lines.append('### Score Distribution\n\n') lines.append('| Category | Count | Percentage |\n') lines.append('|----------|-------|------------|\n') for cat in ['excellent', 'good', 'average', 'poor']: count = template_data['score_distribution'][cat] percent = template_data['score_distribution'][f'{cat}_percent'] lines.append(f'| {cat.capitalize()} | {count} | {percent}% |\n') lines.append('\n') # Channel performance lines.append('### Channel Performance\n\n') lines.append('| Channel | Sent | Completed | Rate | Avg Score |\n') lines.append('|---------|-------|-----------|-------|----------|\n') for channel, perf in template_data['channel_performance'].items(): lines.append(f'| {channel.capitalize()} | {perf["total_sent"]} | {perf["completed"]} | {perf["completion_rate"]}% | {perf["avg_score"]} |\n') lines.append('\n') # Questions lines.append('### Question Analysis\n\n') for question in template_data['questions']: lines.append(f'#### Q{question["order"]}: {question["question_text"]}\n\n') lines.append(f'- **Type:** {question["question_type"]}\n') lines.append(f'- **Required:** {"Yes" if question["is_required"] else "No"}\n') lines.append(f'- **Response Rate:** {question["response_rate"]}% ({question["response_count"]} responses)\n') if 'avg_score' in question: lines.append(f'- **Average Score:** {question["avg_score"]}/5.0 (±{question["std_deviation"]})\n') if 'score_distribution' in question: lines.append('\n**Score Distribution:**\n\n') lines.append('| Score | Count | % |\n') lines.append('|-------|-------|----|\n') for i in range(1, 6): count = question['score_distribution'][f'score_{i}'] percent = question['score_distribution'][f'score_{i}_percent'] lines.append(f'| {i} | {count} | {percent}% |\n') lines.append('\n') lines.append('---\n\n') return ''.join(lines) def generate_html_to_file(self, data, output_dir): """Generate HTML report to file""" output_path = output_dir / 'survey_analytics_report.html' if hasattr(output_dir, 'joinpath') else \ f'{output_dir}/survey_analytics_report.html' html_content = self.generate_html(data) with open(output_path, 'w') as f: f.write(html_content) return output_path def generate_html(self, data): """Generate HTML content for report""" return f""" Survey Analytics Report

📊 Survey Analytics Report

Generated: {data['report_generated_at'][:19]}

Date Range: {data['date_range']['start'][:10]} to {data['date_range']['end'][:10]}

Executive Summary

Total Templates

{data['summary']['total_templates']}

Total Surveys Sent

{data['summary']['total_surveys_sent']:,}

Total Completed

{data['summary']['total_surveys_completed']:,}

Completion Rate

{data['summary']['overall_completion_rate']}%

Average Score

{data['summary']['overall_avg_score']}/5.0

Negative Surveys

{data['summary']['total_negative_surveys']:,}
{self._generate_template_sections_html(data)}
""" def _generate_template_sections_html(self, data): """Generate HTML sections for each template""" sections = [] for i, template_data in enumerate(data['templates']): section_id = f"template_{i}" # Score distribution chart dist_chart_id = f"scoreDist_{i}" monthly_chart_id = f"monthly_{i}" html = f"""

{template_data['template_name']}

Total Sent

{template_data['metrics']['total_sent']:,}

Completed

{template_data['metrics']['total_completed']:,}

Completion Rate

{template_data['metrics']['completion_rate']}%

Average Score

{template_data['metrics']['avg_score']}/5.0

Negative Surveys

{template_data['metrics']['negative_count']:,}

Question Analysis

""" for question in template_data['questions']: score = question.get('avg_score', 'N/A') score_class = 'positive' if isinstance(score, (int, float)) and score >= 4.0 else '' if isinstance(score, (int, float)): score = f"{score}/5.0" html += f""" """ html += """
Question Type Responses Response Rate Avg Score
Q{question['order']}: {question['question_text']} {question['question_type']} {question['response_count']} {question['response_rate']}% {score}
""" sections.append(html) return '\n'.join(sections) def _generate_charts_js(self, data): """Generate JavaScript for charts""" js = [] for i, template_data in enumerate(data['templates']): # Score distribution chart dist_chart_id = f"scoreDist_{i}" dist_data = template_data['score_distribution'] js.append(f""" // Score Distribution Chart for {template_data['template_name']} const {dist_chart_id} = new ApexCharts(document.querySelector("#{dist_chart_id}"), {{ series: [{dist_data['excellent']}, {dist_data['good']}, {dist_data['average']}, {dist_data['poor']}], chart: {{ type: 'donut', }}, labels: ['Excellent', 'Good', 'Average', 'Poor'], title: {{ text: 'Score Distribution' }}, colors: ['#28a745', '#17a2b8', '#ffc107', '#dc3545'] }}); {dist_chart_id}.render(); """) # Monthly trends chart monthly_chart_id = f"monthly_{i}" monthly_data = template_data['monthly_trends'] months = [t['month_name'] for t in monthly_data] scores = [t['avg_score'] for t in monthly_data] counts = [t['count'] for t in monthly_data] js.append(f""" // Monthly Trends Chart for {template_data['template_name']} const {monthly_chart_id} = new ApexCharts(document.querySelector("#{monthly_chart_id}"), {{ series: [{{ name: 'Average Score', data: {scores} }}, {{ name: 'Survey Count', data: {counts} }}], chart: {{ type: 'line', height: 400 }}, xaxis: {{ categories: {months} }}, title: {{ text: 'Monthly Trends' }}, yaxis: [{{ title: {{ text: 'Score' }} }}, {{ opposite: true, title: {{ text: 'Count' }} }}] }}); {monthly_chart_id}.render(); """) return '\n'.join(js) # Convenience functions for direct import def calculate_survey_analytics(template_name=None, start_date=None, end_date=None): """ Calculate comprehensive survey analytics. This function can be imported and used by API. Args: template_name: Optional filter by survey template name start_date: Optional start date (datetime.date or datetime) end_date: Optional end date (datetime.date or datetime) Returns: dict: Complete analytics data """ analyzer = AnalyticsGenerator() return analyzer.calculate_analytics(template_name, start_date, end_date) def generate_markdown_report(analytics_data): """ Generate Markdown report from analytics data. Args: analytics_data: Dictionary of analytics data Returns: str: Markdown formatted report """ report_gen = ReportGenerator() return report_gen.generate_markdown(analytics_data) def generate_html_report(analytics_data): """ Generate HTML report from analytics data. Args: analytics_data: Dictionary of analytics data Returns: str: HTML formatted report """ report_gen = ReportGenerator() return report_gen.generate_html(analytics_data) # ============================================================================ # ENHANCED MULTI-REPORT GENERATOR - Separate reports per survey type # ============================================================================ class MultiReportGenerator: """ Generates separate HTML reports for each survey template with enhanced question-level analysis. Creates a master index page linking all reports. """ def __init__(self, output_dir): self.output_dir = output_dir self.reports_generated = [] def generate_reports(self, analytics_data): """ Generate separate HTML reports for each survey template. Args: analytics_data: Output from calculate_survey_analytics() Returns: dict: Paths to generated reports and master index """ import os from datetime import datetime timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") reports_dir = os.path.join(self.output_dir, f"reports_{timestamp}") os.makedirs(reports_dir, exist_ok=True) generated_files = [] # Generate individual reports for each template for template_data in analytics_data['templates']: report_filename = self._sanitize_filename(template_data['template_name']) report_path = os.path.join(reports_dir, f"{report_filename}.html") html_content = self._generate_single_template_report( template_data, analytics_data['date_range'], analytics_data['report_generated_at'] ) with open(report_path, 'w', encoding='utf-8') as f: f.write(html_content) generated_files.append({ 'template_name': template_data['template_name'], 'template_id': template_data['template_id'], 'filename': f"{report_filename}.html", 'path': report_path, 'size': os.path.getsize(report_path) }) # Generate master index file index_path = os.path.join(reports_dir, "index.html") index_content = self._generate_master_index( generated_files, analytics_data['summary'], analytics_data['date_range'], analytics_data['report_generated_at'] ) with open(index_path, 'w', encoding='utf-8') as f: f.write(index_content) # Generate summary JSON for programmatic access summary_path = os.path.join(reports_dir, "summary.json") import json with open(summary_path, 'w', encoding='utf-8') as f: json.dump({ 'generated_at': analytics_data['report_generated_at'], 'date_range': analytics_data['date_range'], 'summary': analytics_data['summary'], 'reports': generated_files }, f, indent=2, default=str) return { 'reports_dir': reports_dir, 'index_path': index_path, 'summary_path': summary_path, 'individual_reports': generated_files } def _sanitize_filename(self, name): """Convert template name to safe filename""" import re # Remove special characters and replace spaces with underscores safe = re.sub(r'[^\w\s-]', '', name) safe = re.sub(r'[-\s]+', '_', safe) return safe.lower()[:50] def _generate_single_template_report(self, template_data, date_range, generated_at): """Generate detailed HTML report for a single survey template""" # Build question analysis sections question_sections = self._build_question_sections(template_data) # Build charts charts_js = self._build_question_charts_js(template_data) return f""" {template_data['template_name']} - Survey Analytics Report

{template_data['template_name']}

{date_range['start'][:10]} to {date_range['end'][:10]} | Generated: {generated_at[:19]}

Overview

Surveys Sent
{template_data['metrics']['total_sent']:,}
Completed
{template_data['metrics']['total_completed']:,}
{template_data['metrics']['completion_rate']}% completion
Average Score
{template_data['metrics']['avg_score']}/5.0
σ = {template_data['metrics']['std_deviation']}
Negative Surveys
{template_data['metrics']['negative_count']:,}
{template_data['metrics']['negative_rate']}% of total
Score Distribution
{self._build_distribution_bar(template_data['score_distribution'])}
{template_data['score_distribution']['excellent']}
Excellent ({template_data['score_distribution']['excellent_percent']}%)
{template_data['score_distribution']['good']}
Good ({template_data['score_distribution']['good_percent']}%)
{template_data['score_distribution']['average']}
Average ({template_data['score_distribution']['average_percent']}%)
{template_data['score_distribution']['poor']}
Poor ({template_data['score_distribution']['poor_percent']}%)

Question-Level Analysis

Detailed analysis of {len(template_data['questions'])} questions with statistical metrics, score distributions, and performance trends.

{question_sections}

Key Insights & Recommendations

Top Performing Questions
{self._build_top_questions_list(template_data['questions'], 'top')}
Questions Needing Attention
{self._build_top_questions_list(template_data['questions'], 'bottom')}

Channel Performance

{self._build_channel_cards(template_data.get('channel_performance', {}))}
""" def _build_question_sections(self, template_data): """Build detailed HTML sections for each question""" sections = [] for i, question in enumerate(template_data['questions'], 1): section = self._build_single_question_section(question, i) sections.append(section) return '\n'.join(sections) def _build_single_question_section(self, question, index): """Build HTML for a single question analysis""" question_type = question.get('question_type', 'unknown') # Build question-specific content if question_type in ['rating', 'likert', 'nps']: content = self._build_numeric_question_content(question) elif question_type in ['text', 'textarea']: content = self._build_text_question_content(question) elif question_type == 'multiple_choice': content = self._build_choice_question_content(question) else: content = '

Detailed analysis not available for this question type.

' # Score class for color coding avg_score = question.get('avg_score', 0) score_class = self._get_score_class(avg_score) return f"""
{index}
{question.get('question_text', 'Untitled Question')}
{f'

{question.get("question_text_ar", "")}

' if question.get('question_text_ar') else ''}
{question_type.upper()} {f'Required' if question.get('is_required') else ''}
{avg_score if avg_score else 'N/A'}
Average Score
{content}
{question.get('response_count', 0):,}
Responses
{question.get('response_rate', 0)}%
Response Rate
{f'''
{question.get('std_deviation', 0)}
Std Deviation
{question.get('correlation_with_overall', 0)}
Correlation
''' if question_type in ['rating', 'likert', 'nps'] else ''} {f'''
{question.get('text_response_count', 0)}
Text Responses
{question.get('avg_text_length', 0):.0f}
Avg Length
''' if question_type in ['text', 'textarea'] else ''}
{self._build_question_insights(question)}
""" def _build_numeric_question_content(self, question): """Build content for numeric/rating questions""" score_dist = question.get('score_distribution', {}) monthly_trends = question.get('monthly_trends', []) # Build distribution bar dist_html = '
' dist_html += '
Score Distribution
' dist_html += '
' colors = {1: '#ef4444', 2: '#f97316', 3: '#f59e0b', 4: '#10b981', 5: '#059669'} for score in [5, 4, 3, 2, 1]: count = score_dist.get(f'score_{score}', 0) percent = score_dist.get(f'score_{score}_percent', 0) if count > 0: dist_html += f'''
{f'{count}' if percent > 8 else ''}
''' dist_html += '
' dist_html += '
' for score in [1, 2, 3, 4, 5]: count = score_dist.get(f'score_{score}', 0) percent = score_dist.get(f'score_{score}_percent', 0) dist_html += f'{score}: {count} ({percent}%)' dist_html += '
' # Add monthly trends chart placeholder dist_html += f'''
Monthly Trends
''' return dist_html def _build_text_question_content(self, question): """Build content for text questions""" return f"""
This is a text-based question. {question.get('text_response_count', 0)} patients provided written feedback. Average response length: {question.get('avg_text_length', 0):.0f} characters.

Review individual survey responses to read patient comments for this question.

""" def _build_choice_question_content(self, question): """Build content for multiple choice questions""" choice_dist = question.get('choice_distribution', {}) html = '
Choice Distribution
' html += '' html += '' for choice, count in sorted(choice_dist.items(), key=lambda x: x[1], reverse=True): if not choice.endswith('_percent'): percent = choice_dist.get(f'{choice}_percent', 0) html += f'' html += '
ChoiceCountPercentage
{choice}{count}{percent}%
' return html def _build_question_insights(self, question): """Generate insights for a question""" insights = [] question_type = question.get('question_type', '') if question_type in ['rating', 'likert', 'nps']: avg_score = question.get('avg_score', 0) response_rate = question.get('response_rate', 0) correlation = question.get('correlation_with_overall', 0) if avg_score >= 4.5: insights.append(('Excellent Performance', '')) elif avg_score < 3.0: insights.append(('Needs Improvement', '')) if response_rate < 70: insights.append(('Low Response Rate', '')) if abs(correlation) > 0.7: direction = 'positively' if correlation > 0 else 'negatively' insights.append((f'Strong {direction} correlates with overall', '')) if insights: return '
' + ''.join([i[0] for i in insights]) + '
' return '' def _build_question_charts_js(self, template_data): """Build JavaScript for all question charts""" js = [] # Monthly trends chart for the survey monthly_data = template_data.get('monthly_trends', []) months = [t['month_name'] for t in monthly_data] scores = [t['avg_score'] for t in monthly_data] counts = [t['count'] for t in monthly_data] js.append(f""" // Monthly trends for survey new ApexCharts(document.querySelector("#monthlyTrendsChart"), {{ series: [ {{ name: 'Average Score', type: 'line', data: {scores} }}, {{ name: 'Survey Count', type: 'column', data: {counts} }} ], chart: {{ height: 400, type: 'line', toolbar: {{ show: true }} }}, stroke: {{ width: [3, 0] }}, xaxis: {{ categories: {months} }}, yaxis: [ {{ title: {{ text: 'Average Score' }}, min: 0, max: 5 }}, {{ opposite: true, title: {{ text: 'Count' }} }} ], colors: ['#2563eb', '#10b981'], title: {{ text: 'Monthly Survey Performance' }} }}).render(); """) # Channel performance chart channel_perf = template_data.get('channel_performance', {}) if channel_perf: channels = list(channel_perf.keys()) completion_rates = [channel_perf.get(ch, {}).get('completion_rate', 0) for ch in channels] avg_scores = [channel_perf.get(ch, {}).get('avg_score', 0) for ch in channels] js.append(f""" // Channel performance chart new ApexCharts(document.querySelector("#channelChart"), {{ series: [ {{ name: 'Completion Rate (%)', data: {completion_rates} }}, {{ name: 'Average Score', data: {avg_scores} }} ], chart: {{ type: 'bar', height: 350 }}, xaxis: {{ categories: {channels} }}, colors: ['#2563eb', '#10b981'], title: {{ text: 'Performance by Channel' }} }}).render(); """) # Question-level trend charts for question in template_data.get('questions', []): if question.get('question_type') in ['rating', 'likert', 'nps']: monthly_trends = question.get('monthly_trends', []) if monthly_trends: months = [t['month_name'] for t in monthly_trends] scores = [t['avg_score'] for t in monthly_trends] js.append(f""" new ApexCharts(document.querySelector("#questionChart_{question.get('question_id', 'unknown')}"), {{ series: [{{ data: {scores} }}], chart: {{ type: 'area', height: 200, sparkline: {{ enabled: true }} }}, stroke: {{ curve: 'smooth', width: 2 }}, fill: {{ opacity: 0.3 }}, colors: ['#2563eb'], tooltip: {{ fixed: {{ enabled: false }}, x: {{ show: false }} }} }}).render(); """) return '\n'.join(js) def _build_top_questions_list(self, questions, mode='top'): """Build list of top/bottom performing questions""" numeric_questions = [q for q in questions if q.get('avg_score') is not None] if not numeric_questions: return '

No numeric questions available.

' sorted_questions = sorted(numeric_questions, key=lambda x: x.get('avg_score', 0), reverse=(mode == 'top')) selected = sorted_questions[:5] html = '' return html def _build_channel_cards(self, channel_perf): """Build channel performance cards""" cards = [] channel_icons = { 'sms': 'bi-phone', 'whatsapp': 'bi-whatsapp', 'email': 'bi-envelope' } for channel, data in channel_perf.items(): icon = channel_icons.get(channel, 'bi-send') cards.append(f'''
{channel.upper()}
Sent
{data.get('total_sent', 0)}
Completed
{data.get('completed', 0)}
{data.get('completion_rate', 0)}% completion
Avg Score: {data.get('avg_score', 0)}
''') return '\n'.join(cards) if cards else '

No channel data available.

' def _build_distribution_bar(self, score_dist): """Build HTML for score distribution bar""" total = sum([ score_dist.get('excellent', 0), score_dist.get('good', 0), score_dist.get('average', 0), score_dist.get('poor', 0) ]) if total == 0: return '
No Data
' segments = [] colors = { 'excellent': '#10b981', 'good': '#06b6d4', 'average': '#f59e0b', 'poor': '#ef4444' } for key in ['excellent', 'good', 'average', 'poor']: count = score_dist.get(key, 0) if count > 0: percent = (count / total) * 100 segments.append(f'
') return '\n'.join(segments) def _get_score_class(self, score): """Get CSS class based on score""" if score is None: return 'text-muted' if score >= 4.5: return 'score-excellent' elif score >= 3.5: return 'score-good' elif score >= 2.5: return 'score-average' else: return 'score-poor' def _generate_master_index(self, reports, summary, date_range, generated_at): """Generate master index HTML linking all reports""" report_links = [] for report in reports: report_links.append(f'''
{report['template_name']}
{self._human_readable_size(report['size'])}
View Report
''') return f""" Survey Analytics Reports Index - PX360

📊 Survey Analytics Reports

Generated: {generated_at[:19]}
Period: {date_range['start'][:10]} to {date_range['end'][:10]}

{summary['total_templates']}
Survey Templates
{summary['total_surveys_sent']:,}
Surveys Sent
{summary['total_surveys_completed']:,}
Completed
{summary['overall_completion_rate']}%
Completion Rate

Available Reports

{''.join(report_links)}
Report Summary
Overall Average Score: {summary['overall_avg_score']}/5.0
Negative Survey Rate: {summary['overall_negative_rate']}%
Best Performing: {summary['best_performing_template']['name']} ({summary['best_performing_template']['avg_score']})
Total Reports: {len(reports)}
Report Directory: reports/
Worst Performing: {summary['worst_performing_template']['name']} ({summary['worst_performing_template']['avg_score']})
""" def _human_readable_size(self, size_bytes): """Convert bytes to human readable format""" for unit in ['B', 'KB', 'MB', 'GB']: if size_bytes < 1024.0: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024.0 return f"{size_bytes:.1f} TB" # ============================================================================ # CONVENIENCE FUNCTIONS FOR ENHANCED REPORTS # ============================================================================ def generate_enhanced_survey_reports(template_name=None, start_date=None, end_date=None, output_dir=None): """ Generate enhanced survey analytics reports - one per survey template. Args: template_name: Optional filter by survey template name start_date: Optional start date end_date: Optional end date output_dir: Directory to save reports (default: settings.SURVEY_REPORTS_DIR or 'reports') Returns: dict: Paths to generated reports """ from django.conf import settings import os if output_dir is None: output_dir = getattr(settings, 'SURVEY_REPORTS_DIR', 'reports') # Calculate analytics analytics_data = calculate_survey_analytics(template_name, start_date, end_date) # Generate reports generator = MultiReportGenerator(output_dir) result = generator.generate_reports(analytics_data) return result def generate_single_template_report(template_id, start_date=None, end_date=None): """ Generate a detailed report for a single survey template. Args: template_id: UUID of the survey template start_date: Optional start date end_date: Optional end date Returns: str: HTML content of the report """ from apps.surveys.models import SurveyTemplate template = SurveyTemplate.objects.get(id=template_id) # Calculate analytics for this specific template analytics_data = calculate_survey_analytics( template_name=template.name, start_date=start_date, end_date=end_date ) # Generate single report generator = MultiReportGenerator('/tmp') # Find the template data template_data = None for t in analytics_data['templates']: if t['template_id'] == str(template_id): template_data = t break if not template_data: raise ValueError(f"No data found for template {template_id}") return generator._generate_single_template_report( template_data, analytics_data['date_range'], analytics_data['report_generated_at'] )