""" Survey analytics utilities. This module provides reusable functions and classes for survey analytics: - Statistical analysis (correlation, skewness, kurtosis) - Analytics generation - Report generation (Markdown, HTML, JSON) These utilities can be used by: - Management commands (CLI) - API views (REST) - Other services """ import json import math from datetime import datetime, timedelta from decimal import Decimal from django.db.models import Avg, Count, StdDev, Q, F, Sum, ExpressionWrapper, IntegerField from django.utils import timezone from django.conf import settings from apps.surveys.models import SurveyTemplate, SurveyInstance, SurveyResponse, SurveyQuestion class StatisticalAnalyzer: """Statistical analysis helper class""" @staticmethod def calculate_mean(values): """Calculate mean of values""" if not values: return 0.0 return sum(values) / len(values) @staticmethod def calculate_stddev(values, mean=None): """Calculate standard deviation""" if not values: return 0.0 if mean is None: mean = StatisticalAnalyzer.calculate_mean(values) variance = sum((x - mean) ** 2 for x in values) / len(values) return math.sqrt(variance) @staticmethod def calculate_skewness(values, mean=None, stddev=None): """Calculate skewness (measure of asymmetry)""" if not values or len(values) < 3: return 0.0 if mean is None: mean = StatisticalAnalyzer.calculate_mean(values) if stddev is None: stddev = StatisticalAnalyzer.calculate_stddev(values, mean) if stddev == 0: return 0.0 n = len(values) skew = (n / ((n - 1) * (n - 2) * stddev ** 3)) * \ sum((x - mean) ** 3 for x in values) return skew @staticmethod def calculate_kurtosis(values, mean=None, stddev=None): """Calculate kurtosis (measure of tail heaviness)""" if not values or len(values) < 4: return 0.0 if mean is None: mean = StatisticalAnalyzer.calculate_mean(values) if stddev is None: stddev = StatisticalAnalyzer.calculate_stddev(values, mean) if stddev == 0: return 0.0 n = len(values) kurt = ((n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3) * stddev ** 4)) * \ sum((x - mean) ** 4 for x in values) - \ (3 * (n - 1) ** 2) / ((n - 2) * (n - 3)) return kurt @staticmethod def calculate_correlation(x_values, y_values): """Calculate Pearson correlation coefficient""" if not x_values or not y_values or len(x_values) != len(y_values): return 0.0 if len(x_values) < 2: return 0.0 mean_x = StatisticalAnalyzer.calculate_mean(x_values) mean_y = StatisticalAnalyzer.calculate_mean(y_values) stddev_x = StatisticalAnalyzer.calculate_stddev(x_values, mean_x) stddev_y = StatisticalAnalyzer.calculate_stddev(y_values, mean_y) if stddev_x == 0 or stddev_y == 0: return 0.0 n = len(x_values) covariance = sum((x_values[i] - mean_x) * (y_values[i] - mean_y) for i in range(n)) / n correlation = covariance / (stddev_x * stddev_y) return correlation class AnalyticsGenerator: """Main analytics generator class""" def calculate_analytics(self, template_name=None, start_date=None, end_date=None): """ Calculate comprehensive survey analytics. Args: template_name: Optional filter by survey template name start_date: Optional start date (datetime.date or datetime) end_date: Optional end date (datetime.date or datetime) Returns: dict: Complete analytics data """ # Parse date range if start_date is None: start_date = timezone.now() - timedelta(days=365) else: if not isinstance(start_date, datetime): start_date = timezone.make_aware(datetime.combine(start_date, datetime.min.time())) else: start_date = timezone.make_aware(start_date) if timezone.is_naive(start_date) else start_date if end_date is None: end_date = timezone.now() else: if not isinstance(end_date, datetime): end_date = timezone.make_aware(datetime.combine(end_date, datetime.max.time())) else: end_date = timezone.make_aware(end_date) if timezone.is_naive(end_date) else end_date # Get survey templates templates = SurveyTemplate.objects.filter(is_active=True) if template_name: templates = templates.filter(name__icontains=template_name) templates = templates.order_by('name') # Generate analytics data analytics_data = { 'report_generated_at': timezone.now().isoformat(), 'date_range': { 'start': start_date.isoformat(), 'end': end_date.isoformat() }, 'templates': [] } for template in templates: template_data = self._analyze_template(template, start_date, end_date) analytics_data['templates'].append(template_data) # Add overall summary analytics_data['summary'] = self._generate_summary(analytics_data['templates']) return analytics_data def _analyze_template(self, template, start_date, end_date): """Analyze a single survey template""" # Get all instances for this template instances = SurveyInstance.objects.filter( survey_template=template, created_at__gte=start_date, created_at__lte=end_date ) # Get completed instances completed_instances = instances.filter(status='completed') # Basic metrics total_sent = instances.count() total_completed = completed_instances.count() completion_rate = (total_completed / total_sent * 100) if total_sent > 0 else 0 # Score metrics avg_score = completed_instances.aggregate(avg=Avg('total_score'))['avg'] or 0 std_dev = completed_instances.aggregate(std=StdDev('total_score'))['std'] or 0 # Negative surveys negative_count = completed_instances.filter(is_negative=True).count() negative_rate = (negative_count / total_completed * 100) if total_completed > 0 else 0 # Score distribution score_distribution = self._get_score_distribution(completed_instances) # Status breakdown status_breakdown = dict( instances.values('status').annotate(count=Count('id')).values_list('status', 'count') ) # Channel performance channel_performance = self._get_channel_performance(instances) # Monthly trends monthly_trends = self._get_monthly_trends(completed_instances, start_date, end_date) # Engagement metrics engagement = self._get_engagement_metrics(completed_instances) # Patient contact metrics patient_contact = self._get_patient_contact_metrics(completed_instances) # Comments comments_data = self._get_comments_metrics(completed_instances) # Question-level analytics questions = self._analyze_questions(template, completed_instances) # Generate question rankings and insights template_data = { 'template_id': str(template.id), 'template_name': template.name, 'template_name_ar': template.name_ar, 'survey_type': template.survey_type, 'scoring_method': template.scoring_method, 'negative_threshold': float(template.negative_threshold), 'question_count': template.questions.count(), 'metrics': { 'total_sent': total_sent, 'total_completed': total_completed, 'completion_rate': round(completion_rate, 2), 'avg_score': round(float(avg_score), 2), 'std_deviation': round(float(std_dev), 2), 'negative_count': negative_count, 'negative_rate': round(negative_rate, 2), }, 'score_distribution': score_distribution, 'status_breakdown': status_breakdown, 'channel_performance': channel_performance, 'monthly_trends': monthly_trends, 'engagement_metrics': engagement, 'patient_contact_metrics': patient_contact, 'comments_metrics': comments_data, 'questions': questions } # Add rankings and insights template_data['rankings'] = self._generate_question_rankings(template_data) template_data['insights'] = self._generate_insights(template_data) return template_data def _get_score_distribution(self, instances): """Get score distribution""" distribution = { 'excellent': instances.filter(total_score__gte=4.5).count(), 'good': instances.filter(total_score__gte=3.5, total_score__lt=4.5).count(), 'average': instances.filter(total_score__gte=2.5, total_score__lt=3.5).count(), 'poor': instances.filter(total_score__lt=2.5).count(), } total = instances.count() for key in list(distribution.keys()): distribution[f'{key}_percent'] = round(distribution[key] / total * 100, 2) if total > 0 else 0 return distribution def _get_channel_performance(self, instances): """Get performance by delivery channel""" channels = ['sms', 'whatsapp', 'email'] performance = {} for channel in channels: channel_instances = instances.filter(delivery_channel=channel) total = channel_instances.count() completed = channel_instances.filter(status='completed').count() performance[channel] = { 'total_sent': total, 'completed': completed, 'completion_rate': round((completed / total * 100) if total > 0 else 0, 2), 'avg_score': round(float( channel_instances.filter(status='completed').aggregate( avg=Avg('total_score') )['avg'] or 0 ), 2) } return performance def _get_monthly_trends(self, instances, start_date, end_date): """Get monthly trends""" trends = [] current_date = start_date.replace(day=1) while current_date <= end_date: next_date = (current_date + timedelta(days=32)).replace(day=1) month_instances = instances.filter( completed_at__gte=current_date, completed_at__lt=next_date ) trends.append({ 'month': current_date.strftime('%Y-%m'), 'month_name': current_date.strftime('%B %Y'), 'count': month_instances.count(), 'avg_score': round(float( month_instances.aggregate(avg=Avg('total_score'))['avg'] or 0 ), 2), 'negative_count': month_instances.filter(is_negative=True).count() }) current_date = next_date return trends def _get_engagement_metrics(self, instances): """Get engagement metrics""" # Time to complete time_stats = instances.filter( time_spent_seconds__isnull=False ).aggregate( avg_time=Avg('time_spent_seconds'), min_time=Avg('time_spent_seconds'), max_time=Avg('time_spent_seconds') ) # Open count open_stats = instances.aggregate( avg_opens=Avg('open_count'), max_opens=Avg('open_count') ) return { 'avg_completion_time_seconds': round(float(time_stats['avg_time'] or 0), 2), 'min_completion_time_seconds': round(float(time_stats['min_time'] or 0), 2), 'max_completion_time_seconds': round(float(time_stats['max_time'] or 0), 2), 'avg_opens': round(float(open_stats['avg_opens'] or 0), 2), 'max_opens': round(float(open_stats['max_opens'] or 0), 2) } def _get_patient_contact_metrics(self, instances): """Get patient contact metrics for negative surveys""" negative_instances = instances.filter(is_negative=True) contacted = negative_instances.filter(patient_contacted=True).count() resolved = negative_instances.filter(issue_resolved=True).count() return { 'total_negative': negative_instances.count(), 'contacted': contacted, 'contacted_rate': round((contacted / negative_instances.count() * 100) if negative_instances.count() > 0 else 0, 2), 'resolved': resolved, 'resolved_rate': round((resolved / negative_instances.count() * 100) if negative_instances.count() > 0 else 0, 2) } def _get_comments_metrics(self, instances): """Get comments metrics""" with_comments = instances.exclude(comment='').count() return { 'with_comments': with_comments, 'comment_rate': round((with_comments / instances.count() * 100) if instances.count() > 0 else 0, 2), 'avg_comment_length': round(float( instances.exclude(comment='').annotate( length=ExpressionWrapper(F('comment'), output_field=IntegerField()) ).aggregate(avg=Avg('length'))['avg'] or 0 ), 2) } def _analyze_questions(self, template, instances): """Analyze each question in the template""" questions = template.questions.all().order_by('order') question_analytics = [] for question in questions: # Get responses for this question responses = SurveyResponse.objects.filter( survey_instance__in=instances, question=question ) response_count = responses.count() if question.question_type in ['rating', 'likert', 'nps']: # Numeric questions numeric_responses = responses.filter(numeric_value__isnull=False) avg_score = numeric_responses.aggregate(avg=Avg('numeric_value'))['avg'] or 0 std_dev = numeric_responses.aggregate(std=StdDev('numeric_value'))['std'] or 0 # Score distribution score_dist = self._get_question_score_distribution(numeric_responses) # Statistical analysis (skewness, kurtosis) stats = self._calculate_question_statistics(numeric_responses) # Correlation with overall survey score correlation = self._calculate_question_correlation(question, instances) # Performance by channel channel_performance = self._get_question_channel_performance(question, instances) # Monthly trends monthly_trends = self._get_question_monthly_trends( question, instances, self._get_date_range_from_instances(instances) ) question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), 'avg_score': round(float(avg_score), 2), 'std_deviation': round(float(std_dev), 2), 'skewness': round(stats['skewness'], 3), 'kurtosis': round(stats['kurtosis'], 3), 'correlation_with_overall': round(correlation, 3), 'score_distribution': score_dist, 'channel_performance': channel_performance, 'monthly_trends': monthly_trends, 'has_text_responses': False } elif question.question_type in ['text', 'textarea']: # Text questions text_responses = responses.exclude(text_value='') question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), 'text_response_count': text_responses.count(), 'avg_text_length': round(float( text_responses.annotate( length=ExpressionWrapper(F('text_value'), output_field=IntegerField()) ).aggregate(avg=Avg('length'))['avg'] or 0 ), 2), 'has_numeric_responses': False } elif question.question_type == 'multiple_choice': # Multiple choice choice_dist = self._get_choice_distribution(responses) question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), 'choice_distribution': choice_dist, 'has_numeric_responses': False } else: question_data = { 'question_id': str(question.id), 'question_text': question.text, 'question_text_ar': question.text_ar, 'question_type': question.question_type, 'order': question.order, 'is_required': question.is_required, 'response_count': response_count, 'response_rate': round((response_count / instances.count() * 100) if instances.count() > 0 else 0, 2), } question_analytics.append(question_data) return question_analytics def _get_question_score_distribution(self, responses): """Get score distribution for a question""" distribution = {} scores = [1, 2, 3, 4, 5] for score in scores: count = responses.filter(numeric_value=score).count() distribution[f'score_{score}'] = count total = responses.count() for score in scores: distribution[f'score_{score}_percent'] = round( distribution[f'score_{score}'] / total * 100, 2 ) if total > 0 else 0 return distribution def _get_question_monthly_trends(self, question, instances, date_range): """Get monthly trends for a question""" start_date, end_date = date_range trends = [] current_date = start_date.replace(day=1) while current_date <= end_date: next_date = (current_date + timedelta(days=32)).replace(day=1) month_responses = SurveyResponse.objects.filter( survey_instance__in=instances, question=question, created_at__gte=current_date, created_at__lt=next_date ).filter(numeric_value__isnull=False) trends.append({ 'month': current_date.strftime('%Y-%m'), 'month_name': current_date.strftime('%B %Y'), 'count': month_responses.count(), 'avg_score': round(float( month_responses.aggregate(avg=Avg('numeric_value'))['avg'] or 0 ), 2) }) current_date = next_date return trends def _get_choice_distribution(self, responses): """Get distribution for multiple choice questions""" distribution = dict( responses.values('choice_value').annotate( count=Count('id') ).values_list('choice_value', 'count') ) total = responses.count() if total > 0: for choice in list(distribution.keys()): distribution[f'{choice}_percent'] = round( distribution[choice] / total * 100, 2 ) return distribution def _get_date_range_from_instances(self, instances): """Get date range from instances""" first = instances.order_by('completed_at').first() last = instances.order_by('-completed_at').first() if first and last: return first.completed_at, last.completed_at return timezone.now() - timedelta(days=365), timezone.now() def _calculate_question_statistics(self, responses): """Calculate statistical metrics for a question""" # Extract numeric values values = list(responses.values_list('numeric_value', flat=True)) values = [float(v) for v in values if v is not None] if not values: return {'skewness': 0.0, 'kurtosis': 0.0} # Calculate mean and std dev mean = StatisticalAnalyzer.calculate_mean(values) stddev = StatisticalAnalyzer.calculate_stddev(values, mean) # Calculate skewness and kurtosis skewness = StatisticalAnalyzer.calculate_skewness(values, mean, stddev) kurtosis = StatisticalAnalyzer.calculate_kurtosis(values, mean, stddev) return { 'skewness': skewness, 'kurtosis': kurtosis, 'mean': mean, 'stddev': stddev } def _calculate_question_correlation(self, question, instances): """Calculate correlation between question score and overall survey score""" # Get responses for this question along with survey scores question_responses = SurveyResponse.objects.filter( survey_instance__in=instances, question=question, numeric_value__isnull=False ).select_related('survey_instance') if question_responses.count() < 2: return 0.0 # Extract paired values question_scores = [] survey_scores = [] for response in question_responses: question_score = float(response.numeric_value) survey_score = float(response.survey_instance.total_score) if response.survey_instance.total_score else 0.0 question_scores.append(question_score) survey_scores.append(survey_score) # Calculate correlation return StatisticalAnalyzer.calculate_correlation(question_scores, survey_scores) def _get_question_channel_performance(self, question, instances): """Get question performance by delivery channel""" channels = ['sms', 'whatsapp', 'email'] performance = {} for channel in channels: channel_instances = instances.filter(delivery_channel=channel) channel_responses = SurveyResponse.objects.filter( survey_instance__in=channel_instances, question=question, numeric_value__isnull=False ) count = channel_responses.count() if count > 0: avg = channel_responses.aggregate(avg=Avg('numeric_value'))['avg'] or 0 performance[channel] = { 'response_count': count, 'avg_score': round(float(avg), 2) } else: performance[channel] = { 'response_count': 0, 'avg_score': 0.0 } return performance def _generate_question_rankings(self, template_data): """Generate question rankings based on various metrics""" numeric_questions = [ q for q in template_data['questions'] if q.get('has_numeric_responses', True) or 'avg_score' in q ] # Sort by average score sorted_by_score = sorted( numeric_questions, key=lambda x: x.get('avg_score', 0), reverse=True ) # Sort by correlation sorted_by_correlation = sorted( numeric_questions, key=lambda x: x.get('correlation_with_overall', 0), reverse=True ) # Sort by response rate (most skipped) sorted_by_response_rate = sorted( template_data['questions'], key=lambda x: x.get('response_rate', 0) ) return { 'top_5_by_score': [ { 'question': q['question_text'], 'order': q['order'], 'avg_score': q['avg_score'] } for q in sorted_by_score[:5] ], 'bottom_5_by_score': [ { 'question': q['question_text'], 'order': q['order'], 'avg_score': q['avg_score'] } for q in sorted_by_score[-5:] if sorted_by_score ], 'top_5_by_correlation': [ { 'question': q['question_text'], 'order': q['order'], 'correlation': q.get('correlation_with_overall', 0) } for q in sorted_by_correlation[:5] ], 'most_skipped_5': [ { 'question': q['question_text'], 'order': q['order'], 'response_rate': q['response_rate'] } for q in sorted_by_response_rate[:5] ] } def _generate_insights(self, template_data): """Generate actionable insights based on template analytics""" insights = [] metrics = template_data['metrics'] score_dist = template_data['score_distribution'] channel_perf = template_data['channel_performance'] rankings = template_data['rankings'] # Completion rate insights if metrics['completion_rate'] < 50: insights.append({ 'category': 'Engagement', 'severity': 'high', 'message': f'Low completion rate ({metrics["completion_rate"]}%). Consider improving survey timing and delivery channels.' }) elif metrics['completion_rate'] > 80: insights.append({ 'category': 'Engagement', 'severity': 'positive', 'message': f'Excellent completion rate ({metrics["completion_rate"]}%) showing strong patient engagement.' }) # Average score insights if metrics['avg_score'] < 3.0: insights.append({ 'category': 'Performance', 'severity': 'high', 'message': f'Below average performance ({metrics["avg_score"]}/5.0). Review worst performing questions for improvement opportunities.' }) elif metrics['avg_score'] >= 4.5: insights.append({ 'category': 'Performance', 'severity': 'positive', 'message': f'Outstanding performance ({metrics["avg_score"]}/5.0). Maintain current service levels.' }) # Negative rate insights if metrics['negative_rate'] > 20: insights.append({ 'category': 'Quality', 'severity': 'high', 'message': f'High negative survey rate ({metrics["negative_rate"]}%). Immediate action required to address patient concerns.' }) elif metrics['negative_rate'] < 5: insights.append({ 'category': 'Quality', 'severity': 'positive', 'message': f'Low negative survey rate ({metrics["negative_rate"]}%). Excellent patient satisfaction.' }) # Score distribution insights poor_percent = score_dist['poor_percent'] if poor_percent > 15: insights.append({ 'category': 'Distribution', 'severity': 'medium', 'message': f'High percentage of poor scores ({poor_percent}%). Investigate root causes of dissatisfaction.' }) excellent_percent = score_dist['excellent_percent'] if excellent_percent > 60: insights.append({ 'category': 'Distribution', 'severity': 'positive', 'message': f'Majority of responses are excellent ({excellent_percent}%). Outstanding service delivery.' }) # Channel performance insights best_channel = max(channel_perf.items(), key=lambda x: x[1]['completion_rate'])[0] worst_channel = min(channel_perf.items(), key=lambda x: x[1]['completion_rate'])[0] if channel_perf[best_channel]['completion_rate'] - channel_perf[worst_channel]['completion_rate'] > 30: insights.append({ 'category': 'Channels', 'severity': 'medium', 'message': f'Significant channel performance gap. {best_channel.capitalize()} performs much better ({channel_perf[best_channel]["completion_rate"]}%) than {worst_channel.capitalize()} ({channel_perf[worst_channel]["completion_rate"]}%).' }) # Question-specific insights if rankings['bottom_5_by_score']: worst_question = rankings['bottom_5_by_score'][0] if worst_question['avg_score'] < 3.0: insights.append({ 'category': 'Questions', 'severity': 'medium', 'message': f'Question {worst_question["order"]}: "{worst_question["question"]}" has lowest average score ({worst_question["avg_score"]}/5.0). Consider reviewing this service area.' }) if rankings['most_skipped_5']: most_skipped = rankings['most_skipped_5'][0] if most_skipped['response_rate'] < 70: insights.append({ 'category': 'Questions', 'severity': 'low', 'message': f'Question {most_skipped["order"]}: "{most_skipped["question"]}" has a low response rate ({most_skipped["response_rate"]}%). Consider making it optional or improving clarity.' }) # Patient contact metrics patient_contact = template_data.get('patient_contact_metrics', {}) if patient_contact.get('contacted_rate', 0) < 50 and patient_contact.get('total_negative', 0) > 0: insights.append({ 'category': 'Follow-up', 'severity': 'medium', 'message': f'Only {patient_contact["contacted_rate"]}% of negative survey patients have been contacted. Improve follow-up processes.' }) return insights def _generate_summary(self, templates_data): """Generate overall summary across all templates""" total_sent = sum(t['metrics']['total_sent'] for t in templates_data) total_completed = sum(t['metrics']['total_completed'] for t in templates_data) total_negative = sum(t['metrics']['negative_count'] for t in templates_data) # Calculate weighted average score weighted_score_sum = sum( t['metrics']['avg_score'] * t['metrics']['total_completed'] for t in templates_data ) weighted_avg_score = weighted_score_sum / total_completed if total_completed > 0 else 0 # Find best and worst performing templates sorted_by_score = sorted(templates_data, key=lambda x: x['metrics']['avg_score'], reverse=True) return { 'total_templates': len(templates_data), 'total_surveys_sent': total_sent, 'total_surveys_completed': total_completed, 'overall_completion_rate': round((total_completed / total_sent * 100) if total_sent > 0 else 0, 2), 'overall_avg_score': round(weighted_avg_score, 2), 'total_negative_surveys': total_negative, 'overall_negative_rate': round((total_negative / total_completed * 100) if total_completed > 0 else 0, 2), 'best_performing_template': { 'name': sorted_by_score[0]['template_name'], 'avg_score': sorted_by_score[0]['metrics']['avg_score'] }, 'worst_performing_template': { 'name': sorted_by_score[-1]['template_name'], 'avg_score': sorted_by_score[-1]['metrics']['avg_score'] } } class ReportGenerator: """Report generation helper class""" def generate_markdown_to_file(self, data, output_dir): """Generate Markdown report to file""" output_path = output_dir / 'survey_analytics_report.md' if hasattr(output_dir, 'joinpath') else \ f'{output_dir}/survey_analytics_report.md' markdown_content = self.generate_markdown(data) with open(output_path, 'w') as f: f.write(markdown_content) return output_path def generate_markdown(self, data): """Generate Markdown report content""" lines = [] lines.append('# Survey Analytics Report\n\n') lines.append(f'**Generated:** {data["report_generated_at"]}\n\n') lines.append(f'**Date Range:** {data["date_range"]["start"][:10]} to {data["date_range"]["end"][:10]}\n\n') # Summary lines.append('## Executive Summary\n\n') summary = data['summary'] lines.append(f'- **Total Survey Templates:** {summary["total_templates"]}\n') lines.append(f'- **Total Surveys Sent:** {summary["total_surveys_sent"]:,}\n') lines.append(f'- **Total Surveys Completed:** {summary["total_surveys_completed"]:,}\n') lines.append(f'- **Overall Completion Rate:** {summary["overall_completion_rate"]}%\n') lines.append(f'- **Overall Average Score:** {summary["overall_avg_score"]}/5.0\n') lines.append(f'- **Total Negative Surveys:** {summary["total_negative_surveys"]:,} ({summary["overall_negative_rate"]}%)\n\n') lines.append(f'### Best Performing Template\n') lines.append(f'**{summary["best_performing_template"]["name"]}**\n') lines.append(f'Average Score: {summary["best_performing_template"]["avg_score"]}/5.0\n\n') lines.append(f'### Worst Performing Template\n') lines.append(f'**{summary["worst_performing_template"]["name"]}**\n') lines.append(f'Average Score: {summary["worst_performing_template"]["avg_score"]}/5.0\n\n') # Template details for template_data in data['templates']: lines.append(f'## {template_data["template_name"]}\n\n') # Template metrics metrics = template_data['metrics'] lines.append('### Overview\n\n') lines.append(f'- **Survey Type:** {template_data["survey_type"]}\n') lines.append(f'- **Scoring Method:** {template_data["scoring_method"]}\n') lines.append(f'- **Questions:** {template_data["question_count"]}\n') lines.append(f'- **Total Sent:** {metrics["total_sent"]:,}\n') lines.append(f'- **Completed:** {metrics["total_completed"]:,}\n') lines.append(f'- **Completion Rate:** {metrics["completion_rate"]}%\n') lines.append(f'- **Average Score:** {metrics["avg_score"]}/5.0 (±{metrics["std_deviation"]})\n') lines.append(f'- **Negative Surveys:** {metrics["negative_count"]:,} ({metrics["negative_rate"]}%)\n\n') # Score distribution lines.append('### Score Distribution\n\n') lines.append('| Category | Count | Percentage |\n') lines.append('|----------|-------|------------|\n') for cat in ['excellent', 'good', 'average', 'poor']: count = template_data['score_distribution'][cat] percent = template_data['score_distribution'][f'{cat}_percent'] lines.append(f'| {cat.capitalize()} | {count} | {percent}% |\n') lines.append('\n') # Channel performance lines.append('### Channel Performance\n\n') lines.append('| Channel | Sent | Completed | Rate | Avg Score |\n') lines.append('|---------|-------|-----------|-------|----------|\n') for channel, perf in template_data['channel_performance'].items(): lines.append(f'| {channel.capitalize()} | {perf["total_sent"]} | {perf["completed"]} | {perf["completion_rate"]}% | {perf["avg_score"]} |\n') lines.append('\n') # Questions lines.append('### Question Analysis\n\n') for question in template_data['questions']: lines.append(f'#### Q{question["order"]}: {question["question_text"]}\n\n') lines.append(f'- **Type:** {question["question_type"]}\n') lines.append(f'- **Required:** {"Yes" if question["is_required"] else "No"}\n') lines.append(f'- **Response Rate:** {question["response_rate"]}% ({question["response_count"]} responses)\n') if 'avg_score' in question: lines.append(f'- **Average Score:** {question["avg_score"]}/5.0 (±{question["std_deviation"]})\n') if 'score_distribution' in question: lines.append('\n**Score Distribution:**\n\n') lines.append('| Score | Count | % |\n') lines.append('|-------|-------|----|\n') for i in range(1, 6): count = question['score_distribution'][f'score_{i}'] percent = question['score_distribution'][f'score_{i}_percent'] lines.append(f'| {i} | {count} | {percent}% |\n') lines.append('\n') lines.append('---\n\n') return ''.join(lines) def generate_html_to_file(self, data, output_dir): """Generate HTML report to file""" output_path = output_dir / 'survey_analytics_report.html' if hasattr(output_dir, 'joinpath') else \ f'{output_dir}/survey_analytics_report.html' html_content = self.generate_html(data) with open(output_path, 'w') as f: f.write(html_content) return output_path def generate_html(self, data): """Generate HTML content for report""" return f"""
Generated: {data['report_generated_at'][:19]}
Date Range: {data['date_range']['start'][:10]} to {data['date_range']['end'][:10]}
| Question | Type | Responses | Response Rate | Avg Score |
|---|---|---|---|---|
| Q{question['order']}: {question['question_text']} | {question['question_type']} | {question['response_count']} | {question['response_rate']}% | {score} |
{date_range['start'][:10]} to {date_range['end'][:10]} | Generated: {generated_at[:19]}
Detailed analysis of {len(template_data['questions'])} questions with statistical metrics, score distributions, and performance trends.
{question_sections}Detailed analysis not available for this question type.
' # Score class for color coding avg_score = question.get('avg_score', 0) score_class = self._get_score_class(avg_score) return f"""{question.get("question_text_ar", "")}
' if question.get('question_text_ar') else ''}Review individual survey responses to read patient comments for this question.
""" def _build_choice_question_content(self, question): """Build content for multiple choice questions""" choice_dist = question.get('choice_distribution', {}) html = '| Choice | Count | Percentage |
|---|---|---|
| {choice} | {count} | {percent}% |
No numeric questions available.
' sorted_questions = sorted(numeric_questions, key=lambda x: x.get('avg_score', 0), reverse=(mode == 'top')) selected = sorted_questions[:5] html = 'No channel data available.
Generated: {generated_at[:19]}
Period: {date_range['start'][:10]} to {date_range['end'][:10]}
| Overall Average Score: | {summary['overall_avg_score']}/5.0 |
|---|---|
| Negative Survey Rate: | {summary['overall_negative_rate']}% |
| Best Performing: | {summary['best_performing_template']['name']} ({summary['best_performing_template']['avg_score']}) |
| Total Reports: | {len(reports)} |
|---|---|
| Report Directory: | reports/ |
| Worst Performing: | {summary['worst_performing_template']['name']} ({summary['worst_performing_template']['avg_score']}) |