#!/usr/bin/env python3 """ AI-Powered PO File Translator using LiteLLM with OpenRouter This command translates .po files using LiteLLM with OpenRouter as the provider, matching the project's AI service architecture. Optimized for Arabic translation with healthcare/medical context. Usage: python manage.py translate_po locale/ar/LC_MESSAGES/django.po --lang Arabic python manage.py translate_po locale/ar/LC_MESSAGES/django.po --lang Arabic --model openrouter/nvidia/nemotron-3-super-120b-a12b:free python manage.py translate_po locale/ar/LC_MESSAGES/django.po --lang Arabic --dry-run """ import os import json import time import re import polib from typing import List, Dict, Any, Optional, Tuple from concurrent.futures import ThreadPoolExecutor, as_completed from django.core.management.base import BaseCommand from django.conf import settings from litellm import completion from litellm.exceptions import RateLimitError, APIConnectionError, Timeout # Import project AIService for configuration consistency from apps.core.ai_service import AIService class Command(BaseCommand): help = "Translate .po file entries using LiteLLM with OpenRouter (optimized for Arabic)" def add_arguments(self, parser): parser.add_argument("po_file_path", type=str, help="Path to the .po file") parser.add_argument("--lang", type=str, default="Arabic", help="Target language (default: Arabic)") parser.add_argument("--batch-size", type=int, default=5, help="Entries per API call (default: 5)") parser.add_argument("--workers", type=int, default=2, help="Concurrent threads (default: 2)") parser.add_argument( "--model", type=str, default=None, help="Model to use (default: from settings.AI_MODEL or AIService.DEFAULT_MODEL)", ) parser.add_argument("--temperature", type=float, default=0.2, help="Temperature for translation (default: 0.2)") parser.add_argument("--fix-fuzzy", action="store_true", help="Include entries marked as fuzzy") parser.add_argument("--dry-run", action="store_true", help="Preview translations without saving") parser.add_argument("--skip-validation", action="store_true", help="Skip Arabic text validation") parser.add_argument( "--context", type=str, default="healthcare", help="Context for translation (default: healthcare)" ) def handle(self, *args, **options): # Setup configuration self.setup_config(options) # Load PO file po = self.load_po_file(options["po_file_path"]) if not po: return # Filter entries to translate entries_to_process = self.filter_entries(po, options["fix_fuzzy"]) total = len(entries_to_process) if total == 0: self.stdout.write(self.style.SUCCESS("No entries to translate.")) return self.stdout.write(self.style.SUCCESS(f"Found {total} entries to translate to {self.target_lang}")) self.stdout.write(f"Using model: {self.model}") self.stdout.write(f"Batch size: {self.batch_size}, Workers: {self.workers}") if options["dry_run"]: self.stdout.write(self.style.WARNING("DRY RUN MODE - No changes will be saved")) # Process batches success_count = self.process_batches(entries_to_process, po, options) # Save if not dry run if not options["dry_run"]: po.save() self.stdout.write(self.style.SUCCESS(f"\n✓ Saved {options['po_file_path']}")) self.stdout.write(self.style.SUCCESS(f"\nComplete! Translated {success_count}/{total} entries successfully.")) def setup_config(self, options: Dict[str, Any]) -> None: """Setup configuration from options and settings""" # Get API configuration from AIService (consistent with project) self.api_key = AIService.OPENROUTER_API_KEY self.base_url = AIService.OPENROUTER_BASE_URL # Set environment variables for LiteLLM os.environ["OPENROUTER_API_KEY"] = self.api_key os.environ["OPENROUTER_API_BASE"] = self.base_url # Model configuration self.model = options["model"] or getattr(settings, "AI_MODEL", None) or AIService.DEFAULT_MODEL # Ensure model has openrouter/ prefix if not present if not self.model.startswith("openrouter/") and "/" not in self.model: self.model = f"openrouter/{self.model}" self.temperature = options["temperature"] self.batch_size = options["batch_size"] self.workers = options["workers"] self.target_lang = options["lang"] self.context = options["context"] self.skip_validation = options["skip_validation"] self.dry_run = options["dry_run"] def load_po_file(self, file_path: str) -> Optional[polib.POFile]: """Load and return PO file""" self.stdout.write(f"Loading {file_path}...") try: po = polib.pofile(file_path) total_entries = len(po) translated = len([e for e in po if e.msgstr.strip() and "fuzzy" not in e.flags]) self.stdout.write(f" Total entries: {total_entries}") self.stdout.write(f" Already translated: {translated}") return po except Exception as e: self.stderr.write(self.style.ERROR(f"Could not load file: {e}")) return None def filter_entries(self, po: polib.POFile, fix_fuzzy: bool) -> List[polib.POEntry]: """Filter entries that need translation""" entries = [] for entry in po: if entry.obsolete: continue if not entry.msgstr.strip(): entries.append(entry) elif fix_fuzzy and "fuzzy" in entry.flags: entries.append(entry) return entries def process_batches(self, entries: List[polib.POEntry], po: polib.POFile, options: Dict[str, Any]) -> int: """Process all batches with threading""" batches = list(self.chunked(entries, self.batch_size)) total_batches = len(batches) success_count = 0 with ThreadPoolExecutor(max_workers=self.workers) as executor: future_to_batch = { executor.submit(self.process_batch, batch, i + 1, total_batches): batch for i, batch in enumerate(batches) } for future in as_completed(future_to_batch): batch = future_to_batch[future] try: success, msg = future.result() if success: success_count += len(batch) else: self.stderr.write(self.style.WARNING(f"Batch failed: {msg}")) except Exception as e: self.stderr.write(self.style.ERROR(f"Batch error: {e}")) # Auto-save every 3 batches (if not dry run) if not self.dry_run and (success_count // self.batch_size) % 3 == 0: po.save() self.stdout.write(f" Auto-saved progress...") return success_count def process_batch(self, batch_entries: List[polib.POEntry], batch_num: int, total_batches: int) -> Tuple[bool, str]: """Process a single batch of entries""" texts = [] contexts = [] for entry in batch_entries: # Handle plural forms if entry.msgid_plural: texts.append({"singular": entry.msgid, "plural": entry.msgid_plural, "context": entry.msgctxt or ""}) else: texts.append({"text": entry.msgid, "context": entry.msgctxt or ""}) # Build system prompt with context system_prompt = self.build_system_prompt() # Build user prompt user_prompt = self.build_user_prompt(texts) # Retry logic max_retries = 3 for attempt in range(max_retries): try: response = completion( model=self.model, messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], temperature=self.temperature, max_tokens=2000, api_base=self.base_url, api_key=self.api_key, timeout=60, ) content = response.choices[0].message.content translations = self.parse_response(content, len(batch_entries)) if not translations: return False, "Failed to parse translations" # Update entries with translations for entry, trans in zip(batch_entries, translations): if self.dry_run: self.stdout.write(f" [DRY-RUN] {entry.msgid[:50]}... -> {trans[:50]}...") else: entry.msgstr = trans if "fuzzy" in entry.flags: entry.flags.remove("fuzzy") self.stdout.write(f" Batch {batch_num}/{total_batches} ✓") return True, "Success" except (RateLimitError, APIConnectionError, Timeout) as e: wait_time = 2 ** (attempt + 1) self.stderr.write(self.style.WARNING(f" Retry {attempt + 1}/{max_retries} after {wait_time}s: {e}")) time.sleep(wait_time) if attempt == max_retries - 1: return False, f"API error after retries: {e}" except Exception as e: return False, f"Unexpected error: {e}" return False, "Max retries exceeded" def build_system_prompt(self) -> str: """Build system prompt optimized for Arabic healthcare translation""" base_prompt = f"""You are a professional translator specializing in {self.context} software localization. CRITICAL RULES: 1. Translate from English to {self.target_lang} 2. Return ONLY a JSON array of translated strings 3. Preserve ALL variables exactly: %(name)s, {{variable}}, %s, etc. 4. Preserve HTML tags (, , etc.) - do not translate them 5. Preserve newlines (\\n) and formatting 6. Maintain the same array length and order as input """ # Add Arabic-specific instructions if self.target_lang.lower() in ["arabic", "ar"]: base_prompt += """ ARABIC TRANSLATION REQUIREMENTS: - Use Modern Standard Arabic (الفصحى) - formal, professional - Use healthcare/medical terminology appropriate for Saudi Arabian hospitals - Maintain professional, respectful tone suitable for patient experience management - Ensure grammatical correctness and natural flow - Do not use colloquial Arabic (العامية) - Numbers and dates should follow Arabic conventions where appropriate - Keep English technical terms if no standard Arabic equivalent exists (e.g., API, URL) """ return base_prompt def build_user_prompt(self, texts: List[Dict]) -> str: """Build user prompt with texts to translate""" return f"""Translate these texts to {self.target_lang}. Input format: JSON array with text and optional context Output format: JSON array of translated strings only Texts to translate: {json.dumps(texts, ensure_ascii=False, indent=2)} Return ONLY the JSON array of translations.""" def parse_response(self, content: str, expected_count: int) -> Optional[List[str]]: """Parse and validate API response""" try: # Clean markdown code blocks content = content.strip() if content.startswith("```json"): content = content[7:] elif content.startswith("```"): content = content[3:] if content.endswith("```"): content = content[:-3] content = content.strip() translations = json.loads(content) # Validate if not isinstance(translations, list): self.stderr.write(self.style.ERROR("Response is not a JSON array")) return None if len(translations) != expected_count: self.stderr.write( self.style.ERROR(f"Count mismatch: expected {expected_count}, got {len(translations)}") ) return None # Validate Arabic text if needed if not self.skip_validation and self.target_lang.lower() in ["arabic", "ar"]: translations = [self.validate_arabic(t) for t in translations] return translations except json.JSONDecodeError as e: self.stderr.write(self.style.ERROR(f"JSON parse error: {e}")) return None def validate_arabic(self, text: str) -> str: """Validate and clean Arabic text""" if not text: return text # Check for Arabic characters arabic_pattern = re.compile(r"[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]") if not arabic_pattern.search(text): # No Arabic characters found - mark for review self.stderr.write(self.style.WARNING(f"Translation may not be in Arabic: {text[:50]}...")) # Clean excessive whitespace text = re.sub(r"\s+", " ", text).strip() return text @staticmethod def chunked(iterable: List, n: int): """Split iterable into chunks of size n""" for i in range(0, len(iterable), n): yield iterable[i : i + n]