#!/usr/bin/env python3 """ AI-Powered PO File Translator using OpenRouter API This command translates .po files using the OpenRouter API directly, matching the project's AI service architecture. Optimized for Arabic translation with healthcare/medical context. Usage: python manage.py translate_po locale/ar/LC_MESSAGES/django.po --lang Arabic python manage.py translate_po locale/ar/LC_MESSAGES/django.po --lang Arabic --model google/gemini-2.5-flash-lite python manage.py translate_po locale/ar/LC_MESSAGES/django.po --lang Arabic --dry-run """ import json import time import re import httpx import polib from typing import List, Dict, Any, Optional, Tuple from concurrent.futures import ThreadPoolExecutor, as_completed from django.core.management.base import BaseCommand from django.conf import settings from apps.core.ai_service import AIService class Command(BaseCommand): help = "Translate .po file entries using OpenRouter API (optimized for Arabic)" def add_arguments(self, parser): parser.add_argument("po_file_path", type=str, help="Path to the .po file") parser.add_argument("--lang", type=str, default="Arabic", help="Target language (default: Arabic)") parser.add_argument("--batch-size", type=int, default=5, help="Entries per API call (default: 5)") parser.add_argument("--workers", type=int, default=2, help="Concurrent threads (default: 2)") parser.add_argument( "--model", type=str, default=None, help="Model to use (default: from settings.AI_MODEL or AIService.DEFAULT_MODEL)", ) parser.add_argument("--temperature", type=float, default=0.2, help="Temperature for translation (default: 0.2)") parser.add_argument("--fix-fuzzy", action="store_true", help="Include entries marked as fuzzy") parser.add_argument("--dry-run", action="store_true", help="Preview translations without saving") parser.add_argument("--skip-validation", action="store_true", help="Skip Arabic text validation") parser.add_argument( "--context", type=str, default="healthcare", help="Context for translation (default: healthcare)" ) def handle(self, *args, **options): # Setup configuration self.setup_config(options) # Load PO file po = self.load_po_file(options["po_file_path"]) if not po: return # Filter entries to translate entries_to_process = self.filter_entries(po, options["fix_fuzzy"]) total = len(entries_to_process) if total == 0: self.stdout.write(self.style.SUCCESS("No entries to translate.")) return self.stdout.write(self.style.SUCCESS(f"Found {total} entries to translate to {self.target_lang}")) self.stdout.write(f"Using model: {self.model}") self.stdout.write(f"Batch size: {self.batch_size}, Workers: {self.workers}") if options["dry_run"]: self.stdout.write(self.style.WARNING("DRY RUN MODE - No changes will be saved")) # Process batches success_count = self.process_batches(entries_to_process, po, options) # Save if not dry run if not options["dry_run"]: po.save() self.stdout.write(self.style.SUCCESS(f"\n✓ Saved {options['po_file_path']}")) self.stdout.write(self.style.SUCCESS(f"\nComplete! Translated {success_count}/{total} entries successfully.")) def setup_config(self, options: Dict[str, Any]) -> None: self.api_key = AIService._get_api_key() self.model = options["model"] or getattr(settings, "AI_MODEL", None) or AIService.DEFAULT_MODEL self.model = AIService._strip_model_prefix(self.model) self.temperature = options["temperature"] self.batch_size = options["batch_size"] self.workers = options["workers"] self.target_lang = options["lang"] self.context = options["context"] self.skip_validation = options["skip_validation"] self.dry_run = options["dry_run"] def load_po_file(self, file_path: str) -> Optional[polib.POFile]: """Load and return PO file""" self.stdout.write(f"Loading {file_path}...") try: po = polib.pofile(file_path) total_entries = len(po) translated = len([e for e in po if e.msgstr.strip() and "fuzzy" not in e.flags]) self.stdout.write(f" Total entries: {total_entries}") self.stdout.write(f" Already translated: {translated}") return po except Exception as e: self.stderr.write(self.style.ERROR(f"Could not load file: {e}")) return None def filter_entries(self, po: polib.POFile, fix_fuzzy: bool) -> List[polib.POEntry]: """Filter entries that need translation""" entries = [] for entry in po: if entry.obsolete: continue if not entry.msgstr.strip(): entries.append(entry) elif fix_fuzzy and "fuzzy" in entry.flags: entries.append(entry) return entries def process_batches(self, entries: List[polib.POEntry], po: polib.POFile, options: Dict[str, Any]) -> int: """Process all batches with threading""" batches = list(self.chunked(entries, self.batch_size)) total_batches = len(batches) success_count = 0 with ThreadPoolExecutor(max_workers=self.workers) as executor: future_to_batch = { executor.submit(self.process_batch, batch, i + 1, total_batches): batch for i, batch in enumerate(batches) } for future in as_completed(future_to_batch): batch = future_to_batch[future] try: success, msg = future.result() if success: success_count += len(batch) else: self.stderr.write(self.style.WARNING(f"Batch failed: {msg}")) except Exception as e: self.stderr.write(self.style.ERROR(f"Batch error: {e}")) # Auto-save every 3 batches (if not dry run) if not self.dry_run and (success_count // self.batch_size) % 3 == 0: po.save() self.stdout.write(f" Auto-saved progress...") return success_count def process_batch(self, batch_entries: List[polib.POEntry], batch_num: int, total_batches: int) -> Tuple[bool, str]: """Process a single batch of entries""" texts = [] contexts = [] for entry in batch_entries: # Handle plural forms if entry.msgid_plural: texts.append({"singular": entry.msgid, "plural": entry.msgid_plural, "context": entry.msgctxt or ""}) else: texts.append({"text": entry.msgid, "context": entry.msgctxt or ""}) system_prompt = self.build_system_prompt() user_prompt = self.build_user_prompt(texts) max_retries = 3 for attempt in range(max_retries): try: messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] content = AIService._openrouter_completion( model=self.model, messages=messages, temperature=self.temperature, max_tokens=2000, timeout=60, ) translations = self.parse_response(content, len(batch_entries)) if not translations: return False, "Failed to parse translations" for entry, trans in zip(batch_entries, translations): if self.dry_run: self.stdout.write(f" [DRY-RUN] {entry.msgid[:50]}... -> {trans[:50]}...") else: entry.msgstr = trans if "fuzzy" in entry.flags: entry.flags.remove("fuzzy") self.stdout.write(f" Batch {batch_num}/{total_batches} ✓") return True, "Success" except (httpx.HTTPStatusError, httpx.RequestError, httpx.TimeoutException) as e: wait_time = 2 ** (attempt + 1) self.stderr.write(self.style.WARNING(f" Retry {attempt + 1}/{max_retries} after {wait_time}s: {e}")) time.sleep(wait_time) if attempt == max_retries - 1: return False, f"API error after retries: {e}" except Exception as e: return False, f"Unexpected error: {e}" return False, "Max retries exceeded" def build_system_prompt(self) -> str: """Build system prompt optimized for Arabic healthcare translation""" base_prompt = f"""You are a professional translator specializing in {self.context} software localization. CRITICAL RULES: 1. Translate from English to {self.target_lang} 2. Return ONLY a JSON array of translated strings 3. Preserve ALL variables exactly: %(name)s, {{variable}}, %s, etc. 4. Preserve HTML tags (, , etc.) - do not translate them 5. Preserve newlines (\\n) and formatting 6. Maintain the same array length and order as input """ # Add Arabic-specific instructions if self.target_lang.lower() in ["arabic", "ar"]: base_prompt += """ ARABIC TRANSLATION REQUIREMENTS: - Use Modern Standard Arabic (الفصحى) - formal, professional - Use healthcare/medical terminology appropriate for Saudi Arabian hospitals - Maintain professional, respectful tone suitable for patient experience management - Ensure grammatical correctness and natural flow - Do not use colloquial Arabic (العامية) - Numbers and dates should follow Arabic conventions where appropriate - Keep English technical terms if no standard Arabic equivalent exists (e.g., API, URL) """ return base_prompt def build_user_prompt(self, texts: List[Dict]) -> str: """Build user prompt with texts to translate""" return f"""Translate these texts to {self.target_lang}. Input format: JSON array with text and optional context Output format: JSON array of translated strings only Texts to translate: {json.dumps(texts, ensure_ascii=False, indent=2)} Return ONLY the JSON array of translations.""" def parse_response(self, content: str, expected_count: int) -> Optional[List[str]]: """Parse and validate API response""" try: # Clean markdown code blocks content = content.strip() if content.startswith("```json"): content = content[7:] elif content.startswith("```"): content = content[3:] if content.endswith("```"): content = content[:-3] content = content.strip() translations = json.loads(content) # Validate if not isinstance(translations, list): self.stderr.write(self.style.ERROR("Response is not a JSON array")) return None if len(translations) != expected_count: self.stderr.write( self.style.ERROR(f"Count mismatch: expected {expected_count}, got {len(translations)}") ) return None # Validate Arabic text if needed if not self.skip_validation and self.target_lang.lower() in ["arabic", "ar"]: translations = [self.validate_arabic(t) for t in translations] return translations except json.JSONDecodeError as e: self.stderr.write(self.style.ERROR(f"JSON parse error: {e}")) return None def validate_arabic(self, text: str) -> str: """Validate and clean Arabic text""" if not text: return text # Check for Arabic characters arabic_pattern = re.compile(r"[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]") if not arabic_pattern.search(text): # No Arabic characters found - mark for review self.stderr.write(self.style.WARNING(f"Translation may not be in Arabic: {text[:50]}...")) # Clean excessive whitespace text = re.sub(r"\s+", " ", text).strip() return text @staticmethod def chunked(iterable: List, n: int): """Split iterable into chunks of size n""" for i in range(0, len(iterable), n): yield iterable[i : i + n]