kaauh_ats/find_empty_translations.py
2025-11-03 13:00:12 +03:00

174 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""
Script to find empty msgstr entries in django.po file and organize them into batches
for systematic Arabic translation work.
"""
import re
import os
from typing import List, Tuple
def find_empty_translations(po_file_path: str) -> List[Tuple[int, str, str]]:
"""
Find all entries with empty msgstr in the django.po file.
Returns:
List of tuples: (line_number, msgid, context_before)
"""
empty_translations = []
with open(po_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
i = 0
while i < len(lines):
line = lines[i].strip()
# Look for msgid
if line.startswith('msgid '):
msgid = line[7:-1] # Remove 'msgid "' and ending '"'
# Check next few lines for msgstr
j = i + 1
msgstr_found = False
msgstr_empty = False
while j < len(lines) and j < i + 5: # Look ahead max 5 lines
next_line = lines[j].strip()
if next_line.startswith('msgstr '):
msgstr_found = True
if next_line == 'msgstr ""':
msgstr_empty = True
break
elif next_line.startswith('msgid ') or next_line.startswith('#'):
# Found next entry or comment, no msgstr for current msgid
break
j += 1
if msgstr_found and msgstr_empty:
# Get context (previous 2-3 lines)
context_start = max(0, i - 3)
context = ''.join(lines[context_start:i])
empty_translations.append((i + 1, msgid, context))
i = j # Skip to after msgstr
else:
i += 1
return empty_translations
def create_batch_files(empty_translations: List[Tuple[int, str, str]], batch_size: int = 25):
"""
Create batch files with empty translations for systematic work.
"""
total_batches = (len(empty_translations) + batch_size - 1) // batch_size
print(f"Found {len(empty_translations)} empty translations")
print(f"Creating {total_batches} batches of ~{batch_size} translations each")
for batch_num in range(total_batches):
start_idx = batch_num * batch_size
end_idx = min(start_idx + batch_size, len(empty_translations))
batch_translations = empty_translations[start_idx:end_idx]
batch_filename = f"translation_batch_{batch_num + 1:02d}.txt"
with open(batch_filename, 'w', encoding='utf-8') as batch_file:
batch_file.write(f"=== TRANSLATION BATCH {batch_num + 1:02d} ===\n")
batch_file.write(f"Translations {start_idx + 1}-{end_idx} of {len(empty_translations)}\n")
batch_file.write("=" * 60 + "\n\n")
for line_num, msgid, context in batch_translations:
batch_file.write(f"Line {line_num}:\n")
batch_file.write(f"msgid: \"{msgid}\"\n")
batch_file.write(f"msgstr: \"\"\n")
batch_file.write(f"\nArabic Translation: \n")
batch_file.write(f"msgstr: \"\"\n")
batch_file.write("-" * 40 + "\n\n")
print(f"Created {batch_filename} with {len(batch_translations)} translations")
def create_summary_report(empty_translations: List[Tuple[int, str, str]]):
"""
Create a summary report of all empty translations.
"""
with open("empty_translations_summary.txt", 'w', encoding='utf-8') as report:
report.write("EMPTY TRANSLATIONS SUMMARY REPORT\n")
report.write("=" * 50 + "\n\n")
report.write(f"Total empty translations: {len(empty_translations)}\n\n")
# Group by type/pattern for better organization
ui_elements = []
form_fields = []
messages = []
navigation = []
other = []
for line_num, msgid, context in empty_translations:
msgid_lower = msgid.lower()
if any(word in msgid_lower for word in ['button', 'btn', 'click', 'select']):
ui_elements.append((line_num, msgid))
elif any(word in msgid_lower for word in ['field', 'form', 'input', 'enter']):
form_fields.append((line_num, msgid))
elif any(word in msgid_lower for word in ['error', 'success', 'warning', 'message']):
messages.append((line_num, msgid))
elif any(word in msgid_lower for word in ['menu', 'nav', 'page', 'dashboard']):
navigation.append((line_num, msgid))
else:
other.append((line_num, msgid))
report.write(f"UI Elements (Buttons, Links): {len(ui_elements)}\n")
report.write(f"Form Fields & Inputs: {len(form_fields)}\n")
report.write(f"Messages (Error/Success/Warning): {len(messages)}\n")
report.write(f"Navigation & Pages: {len(navigation)}\n")
report.write(f"Other: {len(other)}\n\n")
report.write("SAMPLE ENTRIES:\n")
report.write("-" * 30 + "\n")
for category, name, sample_count in [
(ui_elements, "UI Elements", 5),
(form_fields, "Form Fields", 5),
(messages, "Messages", 5),
(navigation, "Navigation", 5),
(other, "Other", 5)
]:
if category:
report.write(f"\n{name} (showing first {min(len(category), sample_count)}):\n")
for line_num, msgid in category[:sample_count]:
report.write(f" Line {line_num}: \"{msgid}\"\n")
def main():
"""Main function to process the django.po file."""
po_file_path = "locale/ar/LC_MESSAGES/django.po"
if not os.path.exists(po_file_path):
print(f"Error: {po_file_path} not found!")
return
print("Scanning for empty translations...")
empty_translations = find_empty_translations(po_file_path)
if not empty_translations:
print("No empty translations found! All msgstr entries have translations.")
return
# Create batch files
create_batch_files(empty_translations, batch_size=25)
# Create summary report
create_summary_report(empty_translations)
print(f"\nProcess completed!")
print(f"Check the generated batch files: translation_batch_*.txt")
print(f"Summary report: empty_translations_summary.txt")
print(f"\nNext steps:")
print(f"1. Work through each batch file systematically")
print(f"2. Add Arabic translations to each empty msgstr")
print(f"3. Update the main django.po file with completed translations")
if __name__ == "__main__":
main()