kaauh_ats/load_tests/monitoring.py

"""
Performance monitoring and reporting utilities for ATS load testing.

This module provides tools for monitoring system performance during load tests,
collecting metrics, and generating comprehensive reports.
"""

import os
import json
import time
import psutil
import threading
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
import matplotlib.pyplot as plt
import pandas as pd
from locust import events
import requests

@dataclass
class SystemMetrics:
    """System performance metrics at a point in time."""
    timestamp: datetime
    cpu_percent: float
    memory_percent: float
    memory_used_gb: float
    disk_usage_percent: float
    network_io: Dict[str, int]
    active_connections: int

@dataclass
class DatabaseMetrics:
    """Database performance metrics."""
    timestamp: datetime
    active_connections: int
    query_count: int
    avg_query_time: float
    slow_queries: int
    cache_hit_ratio: float

@dataclass
class TestResults:
    """Complete test results summary."""
    test_name: str
    start_time: datetime
    end_time: datetime
    duration_seconds: float
    total_requests: int
    total_failures: int
    avg_response_time: float
    median_response_time: float
    p95_response_time: float
    p99_response_time: float
    requests_per_second: float
    peak_rps: float
    system_metrics: List[SystemMetrics]
    database_metrics: List[DatabaseMetrics]
    error_summary: Dict[str, int]

class PerformanceMonitor:
    """Monitors system performance during load tests."""

    def __init__(self, interval: float = 5.0):
        self.interval = interval
        self.monitoring = False
        self.system_metrics = []
        self.database_metrics = []
        self.monitor_thread = None
        self.start_time = None

    def start_monitoring(self):
        """Start performance monitoring."""
        self.monitoring = True
        self.start_time = datetime.now()
        self.system_metrics = []
        self.database_metrics = []

        self.monitor_thread = threading.Thread(target=self._monitor_loop)
        self.monitor_thread.daemon = True
        self.monitor_thread.start()

        print(f"Performance monitoring started (interval: {self.interval}s)")

    def stop_monitoring(self):
        """Stop performance monitoring."""
        self.monitoring = False
        if self.monitor_thread:
            self.monitor_thread.join(timeout=10)
        print("Performance monitoring stopped")

    def _monitor_loop(self):
        """Main monitoring loop."""
        while self.monitoring:
            try:
                # Collect system metrics
                system_metric = self._collect_system_metrics()
                self.system_metrics.append(system_metric)

                # Collect database metrics
                db_metric = self._collect_database_metrics()
                if db_metric:
                    self.database_metrics.append(db_metric)

                time.sleep(self.interval)

            except Exception as e:
                print(f"Error in monitoring loop: {e}")
                time.sleep(self.interval)

    def _collect_system_metrics(self) -> SystemMetrics:
        """Collect current system metrics."""
        # CPU and Memory
        cpu_percent = psutil.cpu_percent(interval=1)
        memory = psutil.virtual_memory()
        disk = psutil.disk_usage('/')

        # Network I/O
        network = psutil.net_io_counters()
        network_io = {
            'bytes_sent': network.bytes_sent,
            'bytes_recv': network.bytes_recv,
            'packets_sent': network.packets_sent,
            'packets_recv': network.packets_recv
        }

        # Network connections
        connections = len(psutil.net_connections())

        return SystemMetrics(
            timestamp=datetime.now(),
            cpu_percent=cpu_percent,
            memory_percent=memory.percent,
            memory_used_gb=memory.used / (1024**3),
            disk_usage_percent=disk.percent,
            network_io=network_io,
            active_connections=connections
        )

    def _collect_database_metrics(self) -> Optional[DatabaseMetrics]:
        """Collect database metrics (PostgreSQL specific)."""
        try:
            # This would need to be adapted based on your database setup
            # For now, return mock data
            return DatabaseMetrics(
                timestamp=datetime.now(),
                active_connections=10,
                query_count=1000,
                avg_query_time=0.05,
                slow_queries=2,
                cache_hit_ratio=0.85
            )
        except Exception as e:
            print(f"Error collecting database metrics: {e}")
            return None

class ReportGenerator:
    """Generates comprehensive performance reports."""

    def __init__(self, output_dir: str = "load_tests/reports"):
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)

    def generate_html_report(self, results: TestResults) -> str:
        """Generate an HTML performance report."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"performance_report_{timestamp}.html"
        filepath = os.path.join(self.output_dir, filename)

        html_content = self._create_html_template(results)

        with open(filepath, 'w') as f:
            f.write(html_content)

        print(f"HTML report generated: {filepath}")
        return filepath

    def generate_json_report(self, results: TestResults) -> str:
        """Generate a JSON performance report."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"performance_report_{timestamp}.json"
        filepath = os.path.join(self.output_dir, filename)

        # Convert dataclasses to dicts
        results_dict = asdict(results)

        # Convert datetime objects to strings
        for key, value in results_dict.items():
            if isinstance(value, datetime):
                results_dict[key] = value.isoformat()

        # Convert system and database metrics
        if 'system_metrics' in results_dict:
            results_dict['system_metrics'] = [
                asdict(metric) for metric in results.system_metrics
            ]
            for metric in results_dict['system_metrics']:
                metric['timestamp'] = metric['timestamp'].isoformat()

        if 'database_metrics' in results_dict:
            results_dict['database_metrics'] = [
                asdict(metric) for metric in results.database_metrics
            ]
            for metric in results_dict['database_metrics']:
                metric['timestamp'] = metric['timestamp'].isoformat()

        with open(filepath, 'w') as f:
            json.dump(results_dict, f, indent=2)

        print(f"JSON report generated: {filepath}")
        return filepath

    def generate_charts(self, results: TestResults) -> List[str]:
        """Generate performance charts."""
        chart_files = []
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

        if results.system_metrics:
            # System metrics chart
            chart_file = self._create_system_metrics_chart(results.system_metrics, timestamp)
            chart_files.append(chart_file)

        return chart_files

    def _create_html_template(self, results: TestResults) -> str:
        """Create HTML template for the report."""
        return f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>ATS Load Test Report - {results.test_name}</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 20px; }}
                .header {{ background-color: #f4f4f4; padding: 20px; border-radius: 5px; }}
                .section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
                .metric {{ display: inline-block; margin: 10px; padding: 10px; background-color: #e9ecef; border-radius: 3px; }}
                .success {{ color: green; }}
                .warning {{ color: orange; }}
                .error {{ color: red; }}
                table {{ border-collapse: collapse; width: 100%; }}
                th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
                th {{ background-color: #f2f2f2; }}
            </style>
        </head>
        <body>
            <div class="header">
                <h1>ATS Load Test Report</h1>
                <h2>{results.test_name}</h2>
                <p><strong>Test Duration:</strong> {results.duration_seconds:.2f} seconds</p>
                <p><strong>Test Period:</strong> {results.start_time} to {results.end_time}</p>
            </div>

            <div class="section">
                <h3>Summary Metrics</h3>
                <div class="metric">
                    <strong>Total Requests:</strong> {results.total_requests}
                </div>
                <div class="metric">
                    <strong>Total Failures:</strong> {results.total_failures}
                </div>
                <div class="metric">
                    <strong>Success Rate:</strong> {((results.total_requests - results.total_failures) / results.total_requests * 100):.2f}%
                </div>
                <div class="metric">
                    <strong>Requests/Second:</strong> {results.requests_per_second:.2f}
                </div>
                <div class="metric">
                    <strong>Peak RPS:</strong> {results.peak_rps:.2f}
                </div>
            </div>

            <div class="section">
                <h3>Response Times</h3>
                <div class="metric">
                    <strong>Average:</strong> {results.avg_response_time:.2f}ms
                </div>
                <div class="metric">
                    <strong>Median:</strong> {results.median_response_time:.2f}ms
                </div>
                <div class="metric">
                    <strong>95th Percentile:</strong> {results.p95_response_time:.2f}ms
                </div>
                <div class="metric">
                    <strong>99th Percentile:</strong> {results.p99_response_time:.2f}ms
                </div>
            </div>

            <div class="section">
                <h3>System Performance</h3>
                {self._generate_system_summary(results.system_metrics)}
            </div>

            <div class="section">
                <h3>Error Summary</h3>
                {self._generate_error_summary(results.error_summary)}
            </div>
        </body>
        </html>
        """

    def _generate_system_summary(self, metrics: List[SystemMetrics]) -> str:
        """Generate system performance summary."""
        if not metrics:
            return "<p>No system metrics available</p>"

        avg_cpu = sum(m.cpu_percent for m in metrics) / len(metrics)
        avg_memory = sum(m.memory_percent for m in metrics) / len(metrics)
        max_cpu = max(m.cpu_percent for m in metrics)
        max_memory = max(m.memory_percent for m in metrics)

        return f"""
        <div class="metric">
            <strong>Average CPU:</strong> {avg_cpu:.2f}%
        </div>
        <div class="metric">
            <strong>Peak CPU:</strong> {max_cpu:.2f}%
        </div>
        <div class="metric">
            <strong>Average Memory:</strong> {avg_memory:.2f}%
        </div>
        <div class="metric">
            <strong>Peak Memory:</strong> {max_memory:.2f}%
        </div>
        """

    def _generate_error_summary(self, errors: Dict[str, int]) -> str:
        """Generate error summary table."""
        if not errors:
            return "<p>No errors recorded</p>"

        rows = ""
        for error_type, count in errors.items():
            rows += f"<tr><td>{error_type}</td><td>{count}</td></tr>"

        return f"""
        <table>
            <tr><th>Error Type</th><th>Count</th></tr>
            {rows}
        </table>
        """

    def _create_system_metrics_chart(self, metrics: List[SystemMetrics], timestamp: str) -> str:
        """Create system metrics chart."""
        if not metrics:
            return ""

        # Prepare data
        timestamps = [m.timestamp for m in metrics]
        cpu_data = [m.cpu_percent for m in metrics]
        memory_data = [m.memory_percent for m in metrics]

        # Create chart
        plt.figure(figsize=(12, 6))
        plt.plot(timestamps, cpu_data, label='CPU %', color='red')
        plt.plot(timestamps, memory_data, label='Memory %', color='blue')
        plt.xlabel('Time')
        plt.ylabel('Percentage')
        plt.title('System Performance During Load Test')
        plt.legend()
        plt.xticks(rotation=45)
        plt.tight_layout()

        filename = f"system_metrics_{timestamp}.png"
        filepath = os.path.join(self.output_dir, filename)
        plt.savefig(filepath)
        plt.close()

        print(f"System metrics chart generated: {filepath}")
        return filepath

# Global monitor instance
monitor = PerformanceMonitor()
report_generator = ReportGenerator()

# Locust event handlers
@events.test_start.add_listener
def on_test_start(environment, **kwargs):
    """Start monitoring when test starts."""
    monitor.start_monitoring()

@events.test_stop.add_listener
def on_test_stop(environment, **kwargs):
    """Stop monitoring and generate report when test stops."""
    monitor.stop_monitoring()

    # Collect test results
    stats = environment.stats
    results = TestResults(
        test_name=getattr(environment.parsed_options, 'test_name', 'Load Test'),
        start_time=monitor.start_time,
        end_time=datetime.now(),
        duration_seconds=(datetime.now() - monitor.start_time).total_seconds(),
        total_requests=stats.total.num_requests,
        total_failures=stats.total.num_failures,
        avg_response_time=stats.total.avg_response_time,
        median_response_time=stats.total.median_response_time,
        p95_response_time=stats.total.get_response_time_percentile(0.95),
        p99_response_time=stats.total.get_response_time_percentile(0.99),
        requests_per_second=stats.total.current_rps,
        peak_rps=max([s.current_rps for s in stats.history]) if stats.history else 0,
        system_metrics=monitor.system_metrics,
        database_metrics=monitor.database_metrics,
        error_summary={}
    )

    # Generate reports
    report_generator.generate_html_report(results)
    report_generator.generate_json_report(results)
    report_generator.generate_charts(results)

@events.request.add_listener
def on_request(request_type, name, response_time, response_length, response, **kwargs):
    """Track requests for error analysis."""
    # This could be enhanced to track specific error patterns
    pass

def check_performance_thresholds(results: TestResults, thresholds: Dict[str, float]) -> Dict[str, bool]:
    """Check if performance meets defined thresholds."""
    checks = {
        'response_time_p95': results.p95_response_time <= thresholds.get('response_time_p95', 2000),
        'response_time_avg': results.avg_response_time <= thresholds.get('response_time_avg', 1000),
        'error_rate': (results.total_failures / results.total_requests) <= thresholds.get('error_rate', 0.05),
        'rps_minimum': results.requests_per_second >= thresholds.get('rps_minimum', 10)
    }

    return checks

if __name__ == "__main__":
    # Example usage
    print("Performance monitoring utilities for ATS load testing")
    print("Use with Locust for automatic monitoring and reporting")