kaauh_ats/comprehensive_translation_merger.py
2025-11-03 13:00:12 +03:00

213 lines
7.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Comprehensive Translation Merger
Merges all 35 translation batch files into the main django.po file
"""
import os
import re
import glob
def parse_batch_file(filename):
"""Parse a batch file and extract English-Arabic translation pairs"""
translations = {}
try:
with open(filename, 'r', encoding='utf-8') as f:
content = f.read()
# Pattern to match the format in completed batch files:
# msgid: "English text"
# msgstr: ""
# Arabic Translation:
# msgstr: "Arabic text"
pattern = r'msgid:\s*"([^"]*?)"\s*\nmsgstr:\s*""\s*\nArabic Translation:\s*\nmsgstr:\s*"([^"]*?)"'
matches = re.findall(pattern, content, re.MULTILINE | re.DOTALL)
for english, arabic in matches:
english = english.strip()
arabic = arabic.strip()
# Skip empty or invalid entries
if english and arabic and len(english) > 1 and len(arabic) > 1:
translations[english] = arabic
except Exception as e:
print(f"Error parsing {filename}: {e}")
return translations
def parse_current_django_po():
"""Parse the current django.po file and extract existing translations"""
po_file = 'locale/ar/LC_MESSAGES/django.po'
if not os.path.exists(po_file):
return {}, []
with open(po_file, 'r', encoding='utf-8') as f:
content = f.read()
# Extract msgid/msgstr pairs
pattern = r'msgid\s+"([^"]*?)"\s*\nmsgstr\s+"([^"]*?)"'
matches = re.findall(pattern, content)
existing_translations = {}
for msgid, msgstr in matches:
existing_translations[msgid] = msgstr
# Extract the header and footer
parts = re.split(r'(msgid\s+"[^"]*?"\s*\nmsgstr\s+"[^"]*?")', content)
return existing_translations, parts
def create_comprehensive_translation_dict():
"""Create a comprehensive translation dictionary from all batch files"""
all_translations = {}
# Get all batch files
batch_files = glob.glob('translation_batch_*.txt')
batch_files.sort() # Process in order
print(f"Found {len(batch_files)} batch files")
for batch_file in batch_files:
print(f"Processing {batch_file}...")
batch_translations = parse_batch_file(batch_file)
for english, arabic in batch_translations.items():
if english not in all_translations:
all_translations[english] = arabic
else:
# Keep the first translation found, but note duplicates
print(f" Duplicate found: '{english}' -> '{arabic}' (existing: '{all_translations[english]}')")
print(f"Total unique translations: {len(all_translations)}")
return all_translations
def update_django_po(translations):
"""Update the django.po file with new translations"""
po_file = 'locale/ar/LC_MESSAGES/django.po'
# Read current file
with open(po_file, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
new_lines = []
i = 0
updated_count = 0
while i < len(lines):
line = lines[i]
if line.startswith('msgid '):
# Extract the msgid content
msgid_match = re.match(r'msgid\s+"([^"]*)"', line)
if msgid_match:
msgid = msgid_match.group(1)
# Look for the corresponding msgstr
if i + 1 < len(lines) and lines[i + 1].startswith('msgstr '):
msgstr_match = re.match(r'msgstr\s+"([^"]*)"', lines[i + 1])
current_msgstr = msgstr_match.group(1) if msgstr_match else ""
# Check if we have a translation for this msgid
if msgid in translations and (not current_msgstr or current_msgstr == ""):
# Update the translation
new_translation = translations[msgid]
new_lines.append(line) # Keep msgid line
new_lines.append(f'msgstr "{new_translation}"') # Update msgstr
updated_count += 1
print(f" Updated: '{msgid}' -> '{new_translation}'")
else:
# Keep existing translation
new_lines.append(line)
new_lines.append(lines[i + 1])
i += 2 # Skip both msgid and msgstr lines
continue
new_lines.append(line)
i += 1
# Write updated content
new_content = '\n'.join(new_lines)
# Create backup
backup_file = po_file + '.backup'
with open(backup_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Created backup: {backup_file}")
# Write updated file
with open(po_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f"Updated {updated_count} translations in {po_file}")
return updated_count
def add_missing_translations(translations):
"""Add completely missing translations to django.po"""
po_file = 'locale/ar/LC_MESSAGES/django.po'
with open(po_file, 'r', encoding='utf-8') as f:
content = f.read()
existing_translations, _ = parse_current_django_po()
# Find translations that don't exist in the .po file at all
missing_translations = {}
for english, arabic in translations.items():
if english not in existing_translations:
missing_translations[english] = arabic
if missing_translations:
print(f"Found {len(missing_translations)} completely missing translations")
# Add missing translations to the end of the file
with open(po_file, 'a', encoding='utf-8') as f:
f.write('\n\n# Auto-added missing translations\n')
for english, arabic in missing_translations.items():
f.write(f'\nmsgid "{english}"\n')
f.write(f'msgstr "{arabic}"\n')
print(f"Added {len(missing_translations)} missing translations")
else:
print("No missing translations found")
return len(missing_translations)
def main():
"""Main function to merge all translations"""
print("🚀 Starting Comprehensive Translation Merger")
print("=" * 50)
# Step 1: Create comprehensive translation dictionary
print("\n📚 Step 1: Building comprehensive translation dictionary...")
translations = create_comprehensive_translation_dict()
# Step 2: Update existing translations in django.po
print("\n🔄 Step 2: Updating existing translations in django.po...")
updated_count = update_django_po(translations)
# Step 3: Add completely missing translations
print("\n Step 3: Adding missing translations...")
added_count = add_missing_translations(translations)
# Step 4: Summary
print("\n📊 Summary:")
print(f" Total translations available: {len(translations)}")
print(f" Updated existing translations: {updated_count}")
print(f" Added missing translations: {added_count}")
print(f" Total translations processed: {updated_count + added_count}")
print("\n✅ Translation merge completed!")
print("\n📝 Next steps:")
print(" 1. Run: python manage.py compilemessages")
print(" 2. Test Arabic translations in the browser")
print(" 3. Verify language switching functionality")
if __name__ == "__main__":
main()