#!/usr/bin/env python3 """ Comprehensive Translation Merger Merges all 35 translation batch files into the main django.po file """ import os import re import glob def parse_batch_file(filename): """Parse a batch file and extract English-Arabic translation pairs""" translations = {} try: with open(filename, 'r', encoding='utf-8') as f: content = f.read() # Pattern to match the format in completed batch files: # msgid: "English text" # msgstr: "" # Arabic Translation: # msgstr: "Arabic text" pattern = r'msgid:\s*"([^"]*?)"\s*\nmsgstr:\s*""\s*\nArabic Translation:\s*\nmsgstr:\s*"([^"]*?)"' matches = re.findall(pattern, content, re.MULTILINE | re.DOTALL) for english, arabic in matches: english = english.strip() arabic = arabic.strip() # Skip empty or invalid entries if english and arabic and len(english) > 1 and len(arabic) > 1: translations[english] = arabic except Exception as e: print(f"Error parsing {filename}: {e}") return translations def parse_current_django_po(): """Parse the current django.po file and extract existing translations""" po_file = 'locale/ar/LC_MESSAGES/django.po' if not os.path.exists(po_file): return {}, [] with open(po_file, 'r', encoding='utf-8') as f: content = f.read() # Extract msgid/msgstr pairs pattern = r'msgid\s+"([^"]*?)"\s*\nmsgstr\s+"([^"]*?)"' matches = re.findall(pattern, content) existing_translations = {} for msgid, msgstr in matches: existing_translations[msgid] = msgstr # Extract the header and footer parts = re.split(r'(msgid\s+"[^"]*?"\s*\nmsgstr\s+"[^"]*?")', content) return existing_translations, parts def create_comprehensive_translation_dict(): """Create a comprehensive translation dictionary from all batch files""" all_translations = {} # Get all batch files batch_files = glob.glob('translation_batch_*.txt') batch_files.sort() # Process in order print(f"Found {len(batch_files)} batch files") for batch_file in batch_files: print(f"Processing {batch_file}...") batch_translations = parse_batch_file(batch_file) for english, arabic in batch_translations.items(): if english not in all_translations: all_translations[english] = arabic else: # Keep the first translation found, but note duplicates print(f" Duplicate found: '{english}' -> '{arabic}' (existing: '{all_translations[english]}')") print(f"Total unique translations: {len(all_translations)}") return all_translations def update_django_po(translations): """Update the django.po file with new translations""" po_file = 'locale/ar/LC_MESSAGES/django.po' # Read current file with open(po_file, 'r', encoding='utf-8') as f: content = f.read() lines = content.split('\n') new_lines = [] i = 0 updated_count = 0 while i < len(lines): line = lines[i] if line.startswith('msgid '): # Extract the msgid content msgid_match = re.match(r'msgid\s+"([^"]*)"', line) if msgid_match: msgid = msgid_match.group(1) # Look for the corresponding msgstr if i + 1 < len(lines) and lines[i + 1].startswith('msgstr '): msgstr_match = re.match(r'msgstr\s+"([^"]*)"', lines[i + 1]) current_msgstr = msgstr_match.group(1) if msgstr_match else "" # Check if we have a translation for this msgid if msgid in translations and (not current_msgstr or current_msgstr == ""): # Update the translation new_translation = translations[msgid] new_lines.append(line) # Keep msgid line new_lines.append(f'msgstr "{new_translation}"') # Update msgstr updated_count += 1 print(f" Updated: '{msgid}' -> '{new_translation}'") else: # Keep existing translation new_lines.append(line) new_lines.append(lines[i + 1]) i += 2 # Skip both msgid and msgstr lines continue new_lines.append(line) i += 1 # Write updated content new_content = '\n'.join(new_lines) # Create backup backup_file = po_file + '.backup' with open(backup_file, 'w', encoding='utf-8') as f: f.write(content) print(f"Created backup: {backup_file}") # Write updated file with open(po_file, 'w', encoding='utf-8') as f: f.write(new_content) print(f"Updated {updated_count} translations in {po_file}") return updated_count def add_missing_translations(translations): """Add completely missing translations to django.po""" po_file = 'locale/ar/LC_MESSAGES/django.po' with open(po_file, 'r', encoding='utf-8') as f: content = f.read() existing_translations, _ = parse_current_django_po() # Find translations that don't exist in the .po file at all missing_translations = {} for english, arabic in translations.items(): if english not in existing_translations: missing_translations[english] = arabic if missing_translations: print(f"Found {len(missing_translations)} completely missing translations") # Add missing translations to the end of the file with open(po_file, 'a', encoding='utf-8') as f: f.write('\n\n# Auto-added missing translations\n') for english, arabic in missing_translations.items(): f.write(f'\nmsgid "{english}"\n') f.write(f'msgstr "{arabic}"\n') print(f"Added {len(missing_translations)} missing translations") else: print("No missing translations found") return len(missing_translations) def main(): """Main function to merge all translations""" print("šŸš€ Starting Comprehensive Translation Merger") print("=" * 50) # Step 1: Create comprehensive translation dictionary print("\nšŸ“š Step 1: Building comprehensive translation dictionary...") translations = create_comprehensive_translation_dict() # Step 2: Update existing translations in django.po print("\nšŸ”„ Step 2: Updating existing translations in django.po...") updated_count = update_django_po(translations) # Step 3: Add completely missing translations print("\nāž• Step 3: Adding missing translations...") added_count = add_missing_translations(translations) # Step 4: Summary print("\nšŸ“Š Summary:") print(f" Total translations available: {len(translations)}") print(f" Updated existing translations: {updated_count}") print(f" Added missing translations: {added_count}") print(f" Total translations processed: {updated_count + added_count}") print("\nāœ… Translation merge completed!") print("\nšŸ“ Next steps:") print(" 1. Run: python manage.py compilemessages") print(" 2. Test Arabic translations in the browser") print(" 3. Verify language switching functionality") if __name__ == "__main__": main()