80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Analyze complaint source values from 'جهة الشكوى' column across all years.
|
|
"""
|
|
|
|
import pandas as pd
|
|
import os
|
|
from collections import defaultdict
|
|
|
|
years = [2022, 2023, 2024, 2025]
|
|
|
|
# per-year data
|
|
year_data = {year: defaultdict(int) for year in years}
|
|
# overall data
|
|
all_data = defaultdict(lambda: {"total_count": 0, "years": set(), "sheets": set()})
|
|
|
|
for year in years:
|
|
file_path = f"data/Complaints Report - {year}.xlsx"
|
|
|
|
if not os.path.exists(file_path):
|
|
print(f"❌ File not found: {file_path}")
|
|
continue
|
|
|
|
print(f"📊 Processing {year}: {file_path}")
|
|
|
|
try:
|
|
xls = pd.ExcelFile(file_path)
|
|
print(f" Sheets: {xls.sheet_names}")
|
|
|
|
for sheet_name in xls.sheet_names:
|
|
try:
|
|
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
|
print(f" Sheet '{sheet_name}': {len(df)} rows, columns: {list(df.columns)}")
|
|
|
|
if "جهة الشكوى" in df.columns:
|
|
value_counts = df["جهة الشكوى"].value_counts(dropna=False)
|
|
|
|
for value, count in value_counts.items():
|
|
value_str = str(value) if pd.notna(value) else "(NULL/Empty)"
|
|
year_data[year][value_str] += count
|
|
all_data[value_str]["total_count"] += count
|
|
all_data[value_str]["years"].add(year)
|
|
all_data[value_str]["sheets"].add(f"{year}/{sheet_name}")
|
|
else:
|
|
print(f" ⚠️ No 'جهة الشكوى' column")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
print()
|
|
|
|
print("=" * 100)
|
|
print("PER-YEAR BREAKDOWN")
|
|
print("=" * 100)
|
|
for year in years:
|
|
print(
|
|
f"\n--- Year {year} ({len(year_data[year])} unique sources, {sum(year_data[year].values())} total complaints) ---"
|
|
)
|
|
sorted_year = sorted(year_data[year].items(), key=lambda x: x[1], reverse=True)
|
|
for value, count in sorted_year:
|
|
print(f" {count:>6} | {value}")
|
|
|
|
print("\n")
|
|
print("=" * 100)
|
|
print("CONSOLIDATED - ALL UNIQUE VALUES ACROSS ALL YEARS")
|
|
print("=" * 100)
|
|
print(f"{'Total':<8} {'Value':<50} {'Appears In':<30}")
|
|
print("-" * 100)
|
|
|
|
sorted_all = sorted(all_data.items(), key=lambda x: x[1]["total_count"], reverse=True)
|
|
for value, data in sorted_all:
|
|
years_str = ", ".join(sorted([str(y) for y in data["years"]]))
|
|
print(f"{data['total_count']:<8} {value:<50} {years_str:<30}")
|
|
|
|
print(f"\n{'=' * 100}")
|
|
print(f"SUMMARY: {len(sorted_all)} unique values across all years")
|
|
print(f"{'=' * 100}")
|