import os import json import time import argparse import polib from concurrent.futures import ThreadPoolExecutor, as_completed from openai import OpenAI, APIConnectionError, RateLimitError # --- Terminal Colors --- class Colors: HEADER = '\033[95m' BLUE = '\033[94m' GREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' def print_success(msg): print(f"{Colors.GREEN}{msg}{Colors.ENDC}") def print_warning(msg): print(f"{Colors.WARNING}{msg}{Colors.ENDC}") def print_error(msg): print(f"{Colors.FAIL}{msg}{Colors.ENDC}") def print_info(msg): print(f"{Colors.BLUE}{msg}{Colors.ENDC}") # --- Provider Configurations --- class ProviderFactory: @staticmethod def get_client(provider_name, api_key=None, base_url=None): """ Returns a configured OpenAI client and default model based on the provider. """ provider_name = provider_name.lower() if provider_name == 'glm': return OpenAI( api_key=api_key or os.environ.get('ZAI_API_KEY'), base_url="https://api.z.ai/api/coding/paas/v4/" ), "glm-4.6" elif provider_name == 'ollama': # Ollama acts like OpenAI but locally return OpenAI( api_key='ollama', # Required but ignored by Ollama base_url=base_url or "http://localhost:11434/v1" ), "llama3" # Default model, user can override elif provider_name == 'openai': return OpenAI( api_key=api_key or os.environ.get('OPENAI_API_KEY') ), "gpt-4o-mini" else: raise ValueError(f"Unknown provider: {provider_name}") # --- Main Logic --- def translate_po_file(args): # 1. Setup Provider try: client, default_model = ProviderFactory.get_client(args.provider, args.api_key, args.api_base) model_name = args.model or default_model except Exception as e: print_error(f"Configuration Error: {e}") return # 2. Load PO File print_info(f"Loading file: {args.path}") try: po = polib.pofile(args.path) except Exception as e: print_error(f"Could not load file: {e}") return # 3. Filter Entries entries_to_process = [] for entry in po: if entry.obsolete: continue is_empty = not entry.msgstr.strip() is_fuzzy = 'fuzzy' in entry.flags if is_empty or (args.fix_fuzzy and is_fuzzy): entries_to_process.append(entry) total_entries = len(entries_to_process) print_success(f"Target: {args.lang} | Provider: {args.provider} | Model: {model_name}") print_success(f"Found {total_entries} entries to translate.") if total_entries == 0: print_success("Nothing to do.") return # 4. Special Handling for Local Ollama # Local models struggle with concurrent requests. Force workers=1 unless overridden. if args.provider == 'ollama' and args.workers > 1: print_warning("⚠️ Warning: Using multiple workers with Ollama can crash local GPUs.") print_warning(" Switching to workers=1 for stability (use --workers N to override).") args.workers = 1 # 5. Batch Processing Helper def chunked(iterable, n): for i in range(0, len(iterable), n): yield iterable[i:i + n] batches = list(chunked(entries_to_process, args.batch_size)) # 6. Worker Function def process_batch(batch_entries): texts = [e.msgid for e in batch_entries] # System prompt: Critical for JSON enforcement system_prompt = ( "You are a professional technical translator. " "You will receive a JSON list of English strings. " "Translate them accurately. " "IMPORTANT Rules:\n" "1. Return ONLY a valid JSON list of strings.\n" "2. Preserve python formatting (%(count)s, {name}, %s) exactly.\n" "3. Do not translate HTML tags.\n" "4. Do NOT output markdown (like ```json), just raw JSON." ) user_prompt = ( f"Translate these texts into {args.lang}:\n" f"{json.dumps(texts, ensure_ascii=False)}" ) attempts = 0 max_retries = 3 while attempts < max_retries: try: completion = client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], temperature=0.1, # Low temp for deterministic results response_format={"type": "json_object"} if args.provider == "openai" else None ) content = completion.choices[0].message.content # Cleanup: Some local models love adding markdown blocks despite instructions content = content.replace('```json', '').replace('```', '').strip() # Flexible JSON parsing try: data = json.loads(content) # Handle cases where model returns {"translations": [...]} instead of [...] if isinstance(data, dict): # Look for the first list value found_list = False for v in data.values(): if isinstance(v, list): translations = v found_list = True break if not found_list: return False, f"Could not find list in JSON object: {data}" else: translations = data except json.JSONDecodeError: return False, f"Invalid JSON received: {content[:50]}..." if not isinstance(translations, list) or len(translations) != len(batch_entries): return False, f"Count mismatch: sent {len(batch_entries)}, got {len(translations) if isinstance(translations, list) else 'invalid'}" # Apply translations with Type Checking for entry, translation in zip(batch_entries, translations): # --- VITAL FIX FOR AttributeError: 'dict' object has no attribute 'splitlines' --- if isinstance(translation, dict): # Tries to grab the first string value found in the dict (e.g., {"text": "Hello"}) extracted = next((str(v) for v in translation.values() if isinstance(v, str)), None) translation = extracted if extracted else str(translation) elif not isinstance(translation, str): # Ensure all other types (like boolean or int) are converted to string translation = str(translation) # --------------------------------------------------------------------------------- entry.msgstr = translation if 'fuzzy' in entry.flags: entry.flags.remove('fuzzy') return True, "Success" except (RateLimitError, APIConnectionError) as e: attempts += 1 time.sleep(2 ** attempts) # Exponential backoff if attempts == max_retries: return False, f"API Error: {e}" except Exception as e: return False, f"Unexpected: {e}" # 7. Execution Loop success_count = 0 print_info(f"Starting processing {len(batches)} batches...") with ThreadPoolExecutor(max_workers=args.workers) as executor: future_to_batch = {executor.submit(process_batch, batch): batch for batch in batches} for i, future in enumerate(as_completed(future_to_batch)): batch = future_to_batch[future] success, msg = future.result() if success: success_count += len(batch) print_success(f"[{i+1}/{len(batches)}] Batch done.") else: print_warning(f"[{i+1}/{len(batches)}] Batch failed: {msg}") # Auto-save every 5 batches if (i + 1) % 5 == 0: po.save() # 8. Final Save po.save() print_success(f"\n------------------------------------------------") print_success(f"Finished! Translated {success_count}/{total_entries} entries.") print_success(f"File saved: {args.path}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Translate .po files using AI Providers (Z.ai, Ollama, OpenAI)") parser.add_argument('path', type=str, help='Path to the .po file') parser.add_argument('--lang', type=str, required=True, default='ar', help='Target language (e.g., "French", "zh-CN")') # Provider Settings parser.add_argument('--provider', type=str, default='ollama', choices=['glm', 'ollama', 'openai'], help='AI Provider to use') parser.add_argument('--model', type=str, help='Model name (e.g., glm-4, llama3, gpt-4). Defaults vary by provider.') parser.add_argument('--api-key', type=str, help='API Key (optional if env var is set)') parser.add_argument('--api-base', type=str, help='Custom API Base URL (useful for custom Ollama ports)') # Performance Settings parser.add_argument('--batch-size', type=int, default=10, help='Lines per request. Keep low (5-10) for local models.') parser.add_argument('--workers', type=int, default=3, help='Parallel threads. Note: Ollama defaults to 1.') # Logic Settings parser.add_argument('--fix-fuzzy', action='store_true', help='Re-translate entries marked as fuzzy') args = parser.parse_args() translate_po_file(args)