#!/usr/bin/env python3
"""
Comprehensive Translation Merger
Merges all 35 translation batch files into the main django.po file
"""

import os
import re
import glob

def parse_batch_file(filename):
    """Parse a batch file and extract English-Arabic translation pairs"""
    translations = {}

    try:
        with open(filename, 'r', encoding='utf-8') as f:
            content = f.read()

        # Pattern to match the format in completed batch files:
        # msgid: "English text"
        # msgstr: ""
        # Arabic Translation:
        # msgstr: "Arabic text"
        pattern = r'msgid:\s*"([^"]*?)"\s*\nmsgstr:\s*""\s*\nArabic Translation:\s*\nmsgstr:\s*"([^"]*?)"'

        matches = re.findall(pattern, content, re.MULTILINE | re.DOTALL)

        for english, arabic in matches:
            english = english.strip()
            arabic = arabic.strip()

            # Skip empty or invalid entries
            if english and arabic and len(english) > 1 and len(arabic) > 1:
                translations[english] = arabic

    except Exception as e:
        print(f"Error parsing {filename}: {e}")

    return translations

def parse_current_django_po():
    """Parse the current django.po file and extract existing translations"""
    po_file = 'locale/ar/LC_MESSAGES/django.po'

    if not os.path.exists(po_file):
        return {}, []

    with open(po_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # Extract msgid/msgstr pairs
    pattern = r'msgid\s+"([^"]*?)"\s*\nmsgstr\s+"([^"]*?)"'
    matches = re.findall(pattern, content)

    existing_translations = {}
    for msgid, msgstr in matches:
        existing_translations[msgid] = msgstr

    # Extract the header and footer
    parts = re.split(r'(msgid\s+"[^"]*?"\s*\nmsgstr\s+"[^"]*?")', content)

    return existing_translations, parts

def create_comprehensive_translation_dict():
    """Create a comprehensive translation dictionary from all batch files"""
    all_translations = {}

    # Get all batch files
    batch_files = glob.glob('translation_batch_*.txt')
    batch_files.sort()  # Process in order

    print(f"Found {len(batch_files)} batch files")

    for batch_file in batch_files:
        print(f"Processing {batch_file}...")
        batch_translations = parse_batch_file(batch_file)

        for english, arabic in batch_translations.items():
            if english not in all_translations:
                all_translations[english] = arabic
            else:
                # Keep the first translation found, but note duplicates
                print(f"  Duplicate found: '{english}' -> '{arabic}' (existing: '{all_translations[english]}')")

    print(f"Total unique translations: {len(all_translations)}")
    return all_translations

def update_django_po(translations):
    """Update the django.po file with new translations"""
    po_file = 'locale/ar/LC_MESSAGES/django.po'

    # Read current file
    with open(po_file, 'r', encoding='utf-8') as f:
        content = f.read()

    lines = content.split('\n')
    new_lines = []
    i = 0
    updated_count = 0

    while i < len(lines):
        line = lines[i]

        if line.startswith('msgid '):
            # Extract the msgid content
            msgid_match = re.match(r'msgid\s+"([^"]*)"', line)
            if msgid_match:
                msgid = msgid_match.group(1)

                # Look for the corresponding msgstr
                if i + 1 < len(lines) and lines[i + 1].startswith('msgstr '):
                    msgstr_match = re.match(r'msgstr\s+"([^"]*)"', lines[i + 1])
                    current_msgstr = msgstr_match.group(1) if msgstr_match else ""

                    # Check if we have a translation for this msgid
                    if msgid in translations and (not current_msgstr or current_msgstr == ""):
                        # Update the translation
                        new_translation = translations[msgid]
                        new_lines.append(line)  # Keep msgid line
                        new_lines.append(f'msgstr "{new_translation}"')  # Update msgstr
                        updated_count += 1
                        print(f"  Updated: '{msgid}' -> '{new_translation}'")
                    else:
                        # Keep existing translation
                        new_lines.append(line)
                        new_lines.append(lines[i + 1])

                    i += 2  # Skip both msgid and msgstr lines
                    continue

        new_lines.append(line)
        i += 1

    # Write updated content
    new_content = '\n'.join(new_lines)

    # Create backup
    backup_file = po_file + '.backup'
    with open(backup_file, 'w', encoding='utf-8') as f:
        f.write(content)
    print(f"Created backup: {backup_file}")

    # Write updated file
    with open(po_file, 'w', encoding='utf-8') as f:
        f.write(new_content)

    print(f"Updated {updated_count} translations in {po_file}")
    return updated_count

def add_missing_translations(translations):
    """Add completely missing translations to django.po"""
    po_file = 'locale/ar/LC_MESSAGES/django.po'

    with open(po_file, 'r', encoding='utf-8') as f:
        content = f.read()

    existing_translations, _ = parse_current_django_po()

    # Find translations that don't exist in the .po file at all
    missing_translations = {}
    for english, arabic in translations.items():
        if english not in existing_translations:
            missing_translations[english] = arabic

    if missing_translations:
        print(f"Found {len(missing_translations)} completely missing translations")

        # Add missing translations to the end of the file
        with open(po_file, 'a', encoding='utf-8') as f:
            f.write('\n\n# Auto-added missing translations\n')
            for english, arabic in missing_translations.items():
                f.write(f'\nmsgid "{english}"\n')
                f.write(f'msgstr "{arabic}"\n')

        print(f"Added {len(missing_translations)} missing translations")
    else:
        print("No missing translations found")

    return len(missing_translations)

def main():
    """Main function to merge all translations"""
    print("🚀 Starting Comprehensive Translation Merger")
    print("=" * 50)

    # Step 1: Create comprehensive translation dictionary
    print("\n📚 Step 1: Building comprehensive translation dictionary...")
    translations = create_comprehensive_translation_dict()

    # Step 2: Update existing translations in django.po
    print("\n🔄 Step 2: Updating existing translations in django.po...")
    updated_count = update_django_po(translations)

    # Step 3: Add completely missing translations
    print("\n➕ Step 3: Adding missing translations...")
    added_count = add_missing_translations(translations)

    # Step 4: Summary
    print("\n📊 Summary:")
    print(f"  Total translations available: {len(translations)}")
    print(f"  Updated existing translations: {updated_count}")
    print(f"  Added missing translations: {added_count}")
    print(f"  Total translations processed: {updated_count + added_count}")

    print("\n✅ Translation merge completed!")
    print("\n📝 Next steps:")
    print("  1. Run: python manage.py compilemessages")
    print("  2. Test Arabic translations in the browser")
    print("  3. Verify language switching functionality")

if __name__ == "__main__":
    main()