kaauh_ats/load_tests/monitoring.py
2025-12-10 13:56:51 +03:00

432 lines
15 KiB
Python

"""
Performance monitoring and reporting utilities for ATS load testing.
This module provides tools for monitoring system performance during load tests,
collecting metrics, and generating comprehensive reports.
"""
import os
import json
import time
import psutil
import threading
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
import matplotlib.pyplot as plt
import pandas as pd
from locust import events
import requests
@dataclass
class SystemMetrics:
"""System performance metrics at a point in time."""
timestamp: datetime
cpu_percent: float
memory_percent: float
memory_used_gb: float
disk_usage_percent: float
network_io: Dict[str, int]
active_connections: int
@dataclass
class DatabaseMetrics:
"""Database performance metrics."""
timestamp: datetime
active_connections: int
query_count: int
avg_query_time: float
slow_queries: int
cache_hit_ratio: float
@dataclass
class TestResults:
"""Complete test results summary."""
test_name: str
start_time: datetime
end_time: datetime
duration_seconds: float
total_requests: int
total_failures: int
avg_response_time: float
median_response_time: float
p95_response_time: float
p99_response_time: float
requests_per_second: float
peak_rps: float
system_metrics: List[SystemMetrics]
database_metrics: List[DatabaseMetrics]
error_summary: Dict[str, int]
class PerformanceMonitor:
"""Monitors system performance during load tests."""
def __init__(self, interval: float = 5.0):
self.interval = interval
self.monitoring = False
self.system_metrics = []
self.database_metrics = []
self.monitor_thread = None
self.start_time = None
def start_monitoring(self):
"""Start performance monitoring."""
self.monitoring = True
self.start_time = datetime.now()
self.system_metrics = []
self.database_metrics = []
self.monitor_thread = threading.Thread(target=self._monitor_loop)
self.monitor_thread.daemon = True
self.monitor_thread.start()
print(f"Performance monitoring started (interval: {self.interval}s)")
def stop_monitoring(self):
"""Stop performance monitoring."""
self.monitoring = False
if self.monitor_thread:
self.monitor_thread.join(timeout=10)
print("Performance monitoring stopped")
def _monitor_loop(self):
"""Main monitoring loop."""
while self.monitoring:
try:
# Collect system metrics
system_metric = self._collect_system_metrics()
self.system_metrics.append(system_metric)
# Collect database metrics
db_metric = self._collect_database_metrics()
if db_metric:
self.database_metrics.append(db_metric)
time.sleep(self.interval)
except Exception as e:
print(f"Error in monitoring loop: {e}")
time.sleep(self.interval)
def _collect_system_metrics(self) -> SystemMetrics:
"""Collect current system metrics."""
# CPU and Memory
cpu_percent = psutil.cpu_percent(interval=1)
memory = psutil.virtual_memory()
disk = psutil.disk_usage('/')
# Network I/O
network = psutil.net_io_counters()
network_io = {
'bytes_sent': network.bytes_sent,
'bytes_recv': network.bytes_recv,
'packets_sent': network.packets_sent,
'packets_recv': network.packets_recv
}
# Network connections
connections = len(psutil.net_connections())
return SystemMetrics(
timestamp=datetime.now(),
cpu_percent=cpu_percent,
memory_percent=memory.percent,
memory_used_gb=memory.used / (1024**3),
disk_usage_percent=disk.percent,
network_io=network_io,
active_connections=connections
)
def _collect_database_metrics(self) -> Optional[DatabaseMetrics]:
"""Collect database metrics (PostgreSQL specific)."""
try:
# This would need to be adapted based on your database setup
# For now, return mock data
return DatabaseMetrics(
timestamp=datetime.now(),
active_connections=10,
query_count=1000,
avg_query_time=0.05,
slow_queries=2,
cache_hit_ratio=0.85
)
except Exception as e:
print(f"Error collecting database metrics: {e}")
return None
class ReportGenerator:
"""Generates comprehensive performance reports."""
def __init__(self, output_dir: str = "load_tests/reports"):
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
def generate_html_report(self, results: TestResults) -> str:
"""Generate an HTML performance report."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"performance_report_{timestamp}.html"
filepath = os.path.join(self.output_dir, filename)
html_content = self._create_html_template(results)
with open(filepath, 'w') as f:
f.write(html_content)
print(f"HTML report generated: {filepath}")
return filepath
def generate_json_report(self, results: TestResults) -> str:
"""Generate a JSON performance report."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"performance_report_{timestamp}.json"
filepath = os.path.join(self.output_dir, filename)
# Convert dataclasses to dicts
results_dict = asdict(results)
# Convert datetime objects to strings
for key, value in results_dict.items():
if isinstance(value, datetime):
results_dict[key] = value.isoformat()
# Convert system and database metrics
if 'system_metrics' in results_dict:
results_dict['system_metrics'] = [
asdict(metric) for metric in results.system_metrics
]
for metric in results_dict['system_metrics']:
metric['timestamp'] = metric['timestamp'].isoformat()
if 'database_metrics' in results_dict:
results_dict['database_metrics'] = [
asdict(metric) for metric in results.database_metrics
]
for metric in results_dict['database_metrics']:
metric['timestamp'] = metric['timestamp'].isoformat()
with open(filepath, 'w') as f:
json.dump(results_dict, f, indent=2)
print(f"JSON report generated: {filepath}")
return filepath
def generate_charts(self, results: TestResults) -> List[str]:
"""Generate performance charts."""
chart_files = []
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
if results.system_metrics:
# System metrics chart
chart_file = self._create_system_metrics_chart(results.system_metrics, timestamp)
chart_files.append(chart_file)
return chart_files
def _create_html_template(self, results: TestResults) -> str:
"""Create HTML template for the report."""
return f"""
<!DOCTYPE html>
<html>
<head>
<title>ATS Load Test Report - {results.test_name}</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
.header {{ background-color: #f4f4f4; padding: 20px; border-radius: 5px; }}
.section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
.metric {{ display: inline-block; margin: 10px; padding: 10px; background-color: #e9ecef; border-radius: 3px; }}
.success {{ color: green; }}
.warning {{ color: orange; }}
.error {{ color: red; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
th {{ background-color: #f2f2f2; }}
</style>
</head>
<body>
<div class="header">
<h1>ATS Load Test Report</h1>
<h2>{results.test_name}</h2>
<p><strong>Test Duration:</strong> {results.duration_seconds:.2f} seconds</p>
<p><strong>Test Period:</strong> {results.start_time} to {results.end_time}</p>
</div>
<div class="section">
<h3>Summary Metrics</h3>
<div class="metric">
<strong>Total Requests:</strong> {results.total_requests}
</div>
<div class="metric">
<strong>Total Failures:</strong> {results.total_failures}
</div>
<div class="metric">
<strong>Success Rate:</strong> {((results.total_requests - results.total_failures) / results.total_requests * 100):.2f}%
</div>
<div class="metric">
<strong>Requests/Second:</strong> {results.requests_per_second:.2f}
</div>
<div class="metric">
<strong>Peak RPS:</strong> {results.peak_rps:.2f}
</div>
</div>
<div class="section">
<h3>Response Times</h3>
<div class="metric">
<strong>Average:</strong> {results.avg_response_time:.2f}ms
</div>
<div class="metric">
<strong>Median:</strong> {results.median_response_time:.2f}ms
</div>
<div class="metric">
<strong>95th Percentile:</strong> {results.p95_response_time:.2f}ms
</div>
<div class="metric">
<strong>99th Percentile:</strong> {results.p99_response_time:.2f}ms
</div>
</div>
<div class="section">
<h3>System Performance</h3>
{self._generate_system_summary(results.system_metrics)}
</div>
<div class="section">
<h3>Error Summary</h3>
{self._generate_error_summary(results.error_summary)}
</div>
</body>
</html>
"""
def _generate_system_summary(self, metrics: List[SystemMetrics]) -> str:
"""Generate system performance summary."""
if not metrics:
return "<p>No system metrics available</p>"
avg_cpu = sum(m.cpu_percent for m in metrics) / len(metrics)
avg_memory = sum(m.memory_percent for m in metrics) / len(metrics)
max_cpu = max(m.cpu_percent for m in metrics)
max_memory = max(m.memory_percent for m in metrics)
return f"""
<div class="metric">
<strong>Average CPU:</strong> {avg_cpu:.2f}%
</div>
<div class="metric">
<strong>Peak CPU:</strong> {max_cpu:.2f}%
</div>
<div class="metric">
<strong>Average Memory:</strong> {avg_memory:.2f}%
</div>
<div class="metric">
<strong>Peak Memory:</strong> {max_memory:.2f}%
</div>
"""
def _generate_error_summary(self, errors: Dict[str, int]) -> str:
"""Generate error summary table."""
if not errors:
return "<p>No errors recorded</p>"
rows = ""
for error_type, count in errors.items():
rows += f"<tr><td>{error_type}</td><td>{count}</td></tr>"
return f"""
<table>
<tr><th>Error Type</th><th>Count</th></tr>
{rows}
</table>
"""
def _create_system_metrics_chart(self, metrics: List[SystemMetrics], timestamp: str) -> str:
"""Create system metrics chart."""
if not metrics:
return ""
# Prepare data
timestamps = [m.timestamp for m in metrics]
cpu_data = [m.cpu_percent for m in metrics]
memory_data = [m.memory_percent for m in metrics]
# Create chart
plt.figure(figsize=(12, 6))
plt.plot(timestamps, cpu_data, label='CPU %', color='red')
plt.plot(timestamps, memory_data, label='Memory %', color='blue')
plt.xlabel('Time')
plt.ylabel('Percentage')
plt.title('System Performance During Load Test')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
filename = f"system_metrics_{timestamp}.png"
filepath = os.path.join(self.output_dir, filename)
plt.savefig(filepath)
plt.close()
print(f"System metrics chart generated: {filepath}")
return filepath
# Global monitor instance
monitor = PerformanceMonitor()
report_generator = ReportGenerator()
# Locust event handlers
@events.test_start.add_listener
def on_test_start(environment, **kwargs):
"""Start monitoring when test starts."""
monitor.start_monitoring()
@events.test_stop.add_listener
def on_test_stop(environment, **kwargs):
"""Stop monitoring and generate report when test stops."""
monitor.stop_monitoring()
# Collect test results
stats = environment.stats
results = TestResults(
test_name=getattr(environment.parsed_options, 'test_name', 'Load Test'),
start_time=monitor.start_time,
end_time=datetime.now(),
duration_seconds=(datetime.now() - monitor.start_time).total_seconds(),
total_requests=stats.total.num_requests,
total_failures=stats.total.num_failures,
avg_response_time=stats.total.avg_response_time,
median_response_time=stats.total.median_response_time,
p95_response_time=stats.total.get_response_time_percentile(0.95),
p99_response_time=stats.total.get_response_time_percentile(0.99),
requests_per_second=stats.total.current_rps,
peak_rps=max([s.current_rps for s in stats.history]) if stats.history else 0,
system_metrics=monitor.system_metrics,
database_metrics=monitor.database_metrics,
error_summary={}
)
# Generate reports
report_generator.generate_html_report(results)
report_generator.generate_json_report(results)
report_generator.generate_charts(results)
@events.request.add_listener
def on_request(request_type, name, response_time, response_length, response, **kwargs):
"""Track requests for error analysis."""
# This could be enhanced to track specific error patterns
pass
def check_performance_thresholds(results: TestResults, thresholds: Dict[str, float]) -> Dict[str, bool]:
"""Check if performance meets defined thresholds."""
checks = {
'response_time_p95': results.p95_response_time <= thresholds.get('response_time_p95', 2000),
'response_time_avg': results.avg_response_time <= thresholds.get('response_time_avg', 1000),
'error_rate': (results.total_failures / results.total_requests) <= thresholds.get('error_rate', 0.05),
'rps_minimum': results.requests_per_second >= thresholds.get('rps_minimum', 10)
}
return checks
if __name__ == "__main__":
# Example usage
print("Performance monitoring utilities for ATS load testing")
print("Use with Locust for automatic monitoring and reporting")