246 lines
9.7 KiB
Python
246 lines
9.7 KiB
Python
import os
|
|
import json
|
|
import time
|
|
import argparse
|
|
import polib
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from openai import OpenAI, APIConnectionError, RateLimitError
|
|
|
|
# --- Terminal Colors ---
|
|
class Colors:
|
|
HEADER = '\033[95m'
|
|
BLUE = '\033[94m'
|
|
GREEN = '\033[92m'
|
|
WARNING = '\033[93m'
|
|
FAIL = '\033[91m'
|
|
ENDC = '\033[0m'
|
|
BOLD = '\033[1m'
|
|
|
|
def print_success(msg): print(f"{Colors.GREEN}{msg}{Colors.ENDC}")
|
|
def print_warning(msg): print(f"{Colors.WARNING}{msg}{Colors.ENDC}")
|
|
def print_error(msg): print(f"{Colors.FAIL}{msg}{Colors.ENDC}")
|
|
def print_info(msg): print(f"{Colors.BLUE}{msg}{Colors.ENDC}")
|
|
|
|
# --- Provider Configurations ---
|
|
|
|
class ProviderFactory:
|
|
@staticmethod
|
|
def get_client(provider_name, api_key=None, base_url=None):
|
|
"""
|
|
Returns a configured OpenAI client and default model based on the provider.
|
|
"""
|
|
provider_name = provider_name.lower()
|
|
|
|
if provider_name == 'glm':
|
|
return OpenAI(
|
|
api_key=api_key or os.environ.get('ZAI_API_KEY'),
|
|
base_url="https://api.z.ai/api/coding/paas/v4/"
|
|
), "glm-4.6"
|
|
|
|
elif provider_name == 'ollama':
|
|
# Ollama acts like OpenAI but locally
|
|
return OpenAI(
|
|
api_key='ollama', # Required but ignored by Ollama
|
|
base_url=base_url or "http://localhost:11434/v1"
|
|
), "llama3" # Default model, user can override
|
|
|
|
elif provider_name == 'openai':
|
|
return OpenAI(
|
|
api_key=api_key or os.environ.get('OPENAI_API_KEY')
|
|
), "gpt-4o-mini"
|
|
|
|
else:
|
|
raise ValueError(f"Unknown provider: {provider_name}")
|
|
|
|
# --- Main Logic ---
|
|
|
|
def translate_po_file(args):
|
|
|
|
# 1. Setup Provider
|
|
try:
|
|
client, default_model = ProviderFactory.get_client(args.provider, args.api_key, args.api_base)
|
|
model_name = args.model or default_model
|
|
except Exception as e:
|
|
print_error(f"Configuration Error: {e}")
|
|
return
|
|
|
|
# 2. Load PO File
|
|
print_info(f"Loading file: {args.path}")
|
|
try:
|
|
po = polib.pofile(args.path)
|
|
except Exception as e:
|
|
print_error(f"Could not load file: {e}")
|
|
return
|
|
|
|
# 3. Filter Entries
|
|
entries_to_process = []
|
|
for entry in po:
|
|
if entry.obsolete:
|
|
continue
|
|
|
|
is_empty = not entry.msgstr.strip()
|
|
is_fuzzy = 'fuzzy' in entry.flags
|
|
|
|
if is_empty or (args.fix_fuzzy and is_fuzzy):
|
|
entries_to_process.append(entry)
|
|
|
|
total_entries = len(entries_to_process)
|
|
print_success(f"Target: {args.lang} | Provider: {args.provider} | Model: {model_name}")
|
|
print_success(f"Found {total_entries} entries to translate.")
|
|
|
|
if total_entries == 0:
|
|
print_success("Nothing to do.")
|
|
return
|
|
|
|
# 4. Special Handling for Local Ollama
|
|
# Local models struggle with concurrent requests. Force workers=1 unless overridden.
|
|
if args.provider == 'ollama' and args.workers > 1:
|
|
print_warning("⚠️ Warning: Using multiple workers with Ollama can crash local GPUs.")
|
|
print_warning(" Switching to workers=1 for stability (use --workers N to override).")
|
|
args.workers = 1
|
|
|
|
# 5. Batch Processing Helper
|
|
def chunked(iterable, n):
|
|
for i in range(0, len(iterable), n):
|
|
yield iterable[i:i + n]
|
|
|
|
batches = list(chunked(entries_to_process, args.batch_size))
|
|
|
|
# 6. Worker Function
|
|
def process_batch(batch_entries):
|
|
texts = [e.msgid for e in batch_entries]
|
|
|
|
# System prompt: Critical for JSON enforcement
|
|
system_prompt = (
|
|
"You are a professional technical translator. "
|
|
"You will receive a JSON list of English strings. "
|
|
"Translate them accurately. "
|
|
"IMPORTANT Rules:\n"
|
|
"1. Return ONLY a valid JSON list of strings.\n"
|
|
"2. Preserve python formatting (%(count)s, {name}, %s) exactly.\n"
|
|
"3. Do not translate HTML tags.\n"
|
|
"4. Do NOT output markdown (like ```json), just raw JSON."
|
|
)
|
|
|
|
user_prompt = (
|
|
f"Translate these texts into {args.lang}:\n"
|
|
f"{json.dumps(texts, ensure_ascii=False)}"
|
|
)
|
|
|
|
attempts = 0
|
|
max_retries = 3
|
|
|
|
while attempts < max_retries:
|
|
try:
|
|
completion = client.chat.completions.create(
|
|
model=model_name,
|
|
messages=[
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_prompt}
|
|
],
|
|
temperature=0.1, # Low temp for deterministic results
|
|
response_format={"type": "json_object"} if args.provider == "openai" else None
|
|
)
|
|
|
|
content = completion.choices[0].message.content
|
|
|
|
# Cleanup: Some local models love adding markdown blocks despite instructions
|
|
content = content.replace('```json', '').replace('```', '').strip()
|
|
|
|
# Flexible JSON parsing
|
|
try:
|
|
data = json.loads(content)
|
|
# Handle cases where model returns {"translations": [...]} instead of [...]
|
|
if isinstance(data, dict):
|
|
# Look for the first list value
|
|
found_list = False
|
|
for v in data.values():
|
|
if isinstance(v, list):
|
|
translations = v
|
|
found_list = True
|
|
break
|
|
if not found_list:
|
|
return False, f"Could not find list in JSON object: {data}"
|
|
else:
|
|
translations = data
|
|
except json.JSONDecodeError:
|
|
return False, f"Invalid JSON received: {content[:50]}..."
|
|
|
|
if not isinstance(translations, list) or len(translations) != len(batch_entries):
|
|
return False, f"Count mismatch: sent {len(batch_entries)}, got {len(translations) if isinstance(translations, list) else 'invalid'}"
|
|
|
|
# Apply translations with Type Checking
|
|
for entry, translation in zip(batch_entries, translations):
|
|
|
|
# --- VITAL FIX FOR AttributeError: 'dict' object has no attribute 'splitlines' ---
|
|
if isinstance(translation, dict):
|
|
# Tries to grab the first string value found in the dict (e.g., {"text": "Hello"})
|
|
extracted = next((str(v) for v in translation.values() if isinstance(v, str)), None)
|
|
translation = extracted if extracted else str(translation)
|
|
elif not isinstance(translation, str):
|
|
# Ensure all other types (like boolean or int) are converted to string
|
|
translation = str(translation)
|
|
# ---------------------------------------------------------------------------------
|
|
|
|
entry.msgstr = translation
|
|
if 'fuzzy' in entry.flags:
|
|
entry.flags.remove('fuzzy')
|
|
|
|
return True, "Success"
|
|
|
|
except (RateLimitError, APIConnectionError) as e:
|
|
attempts += 1
|
|
time.sleep(2 ** attempts) # Exponential backoff
|
|
if attempts == max_retries:
|
|
return False, f"API Error: {e}"
|
|
except Exception as e:
|
|
return False, f"Unexpected: {e}"
|
|
|
|
# 7. Execution Loop
|
|
success_count = 0
|
|
print_info(f"Starting processing {len(batches)} batches...")
|
|
|
|
with ThreadPoolExecutor(max_workers=args.workers) as executor:
|
|
future_to_batch = {executor.submit(process_batch, batch): batch for batch in batches}
|
|
|
|
for i, future in enumerate(as_completed(future_to_batch)):
|
|
batch = future_to_batch[future]
|
|
success, msg = future.result()
|
|
|
|
if success:
|
|
success_count += len(batch)
|
|
print_success(f"[{i+1}/{len(batches)}] Batch done.")
|
|
else:
|
|
print_warning(f"[{i+1}/{len(batches)}] Batch failed: {msg}")
|
|
|
|
# Auto-save every 5 batches
|
|
if (i + 1) % 5 == 0:
|
|
po.save()
|
|
|
|
# 8. Final Save
|
|
po.save()
|
|
print_success(f"\n------------------------------------------------")
|
|
print_success(f"Finished! Translated {success_count}/{total_entries} entries.")
|
|
print_success(f"File saved: {args.path}")
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Translate .po files using AI Providers (Z.ai, Ollama, OpenAI)")
|
|
|
|
parser.add_argument('path', type=str, help='Path to the .po file')
|
|
parser.add_argument('--lang', type=str, required=True, default='ar', help='Target language (e.g., "French", "zh-CN")')
|
|
|
|
# Provider Settings
|
|
parser.add_argument('--provider', type=str, default='ollama', choices=['glm', 'ollama', 'openai'], help='AI Provider to use')
|
|
parser.add_argument('--model', type=str, help='Model name (e.g., glm-4, llama3, gpt-4). Defaults vary by provider.')
|
|
parser.add_argument('--api-key', type=str, help='API Key (optional if env var is set)')
|
|
parser.add_argument('--api-base', type=str, help='Custom API Base URL (useful for custom Ollama ports)')
|
|
|
|
# Performance Settings
|
|
parser.add_argument('--batch-size', type=int, default=10, help='Lines per request. Keep low (5-10) for local models.')
|
|
parser.add_argument('--workers', type=int, default=3, help='Parallel threads. Note: Ollama defaults to 1.')
|
|
|
|
# Logic Settings
|
|
parser.add_argument('--fix-fuzzy', action='store_true', help='Re-translate entries marked as fuzzy')
|
|
|
|
args = parser.parse_args()
|
|
translate_po_file(args) |