#!/usr/bin/env python3 """ Analyze complaint source values from 'جهة الشكوى' column across all years. """ import pandas as pd import os from collections import defaultdict years = [2022, 2023, 2024, 2025] # per-year data year_data = {year: defaultdict(int) for year in years} # overall data all_data = defaultdict(lambda: {"total_count": 0, "years": set(), "sheets": set()}) for year in years: file_path = f"data/Complaints Report - {year}.xlsx" if not os.path.exists(file_path): print(f"❌ File not found: {file_path}") continue print(f"📊 Processing {year}: {file_path}") try: xls = pd.ExcelFile(file_path) print(f" Sheets: {xls.sheet_names}") for sheet_name in xls.sheet_names: try: df = pd.read_excel(file_path, sheet_name=sheet_name) print(f" Sheet '{sheet_name}': {len(df)} rows, columns: {list(df.columns)}") if "جهة الشكوى" in df.columns: value_counts = df["جهة الشكوى"].value_counts(dropna=False) for value, count in value_counts.items(): value_str = str(value) if pd.notna(value) else "(NULL/Empty)" year_data[year][value_str] += count all_data[value_str]["total_count"] += count all_data[value_str]["years"].add(year) all_data[value_str]["sheets"].add(f"{year}/{sheet_name}") else: print(f" ⚠️ No 'جهة الشكوى' column") except Exception as e: print(f" ❌ Error: {e}") except Exception as e: print(f"❌ Error: {e}") print() print("=" * 100) print("PER-YEAR BREAKDOWN") print("=" * 100) for year in years: print( f"\n--- Year {year} ({len(year_data[year])} unique sources, {sum(year_data[year].values())} total complaints) ---" ) sorted_year = sorted(year_data[year].items(), key=lambda x: x[1], reverse=True) for value, count in sorted_year: print(f" {count:>6} | {value}") print("\n") print("=" * 100) print("CONSOLIDATED - ALL UNIQUE VALUES ACROSS ALL YEARS") print("=" * 100) print(f"{'Total':<8} {'Value':<50} {'Appears In':<30}") print("-" * 100) sorted_all = sorted(all_data.items(), key=lambda x: x[1]["total_count"], reverse=True) for value, data in sorted_all: years_str = ", ".join(sorted([str(y) for y in data["years"]])) print(f"{data['total_count']:<8} {value:<50} {years_str:<30}") print(f"\n{'=' * 100}") print(f"SUMMARY: {len(sorted_all)} unique values across all years") print(f"{'=' * 100}")