Files
Horizon-OC/Source/sys-clk/translate.py
2026-03-22 19:14:52 -04:00

147 lines
4.5 KiB
Python

#!/usr/bin/env python3
"""
Usage:
python translate_json.py # translate to all languages
python translate_json.py fr de ja # translate to specific languages only
"""
import json
import os
import re
import ssl
import sys
import time
import urllib.parse
import urllib.request
INPUT_FILE = os.path.join("overlay", "lang", "en.json")
BATCH_SIZE = 75
DELAY = 0.3
ALL_LANGUAGES = [
"es", "fr", "de", "ja", "ko", "it", "nl", "pt", "ru", "uk", "pl", "zh-cn", "zh-tw",
]
SSL_CTX = ssl.create_default_context()
SSL_CTX.check_hostname = False
SSL_CTX.verify_mode = ssl.CERT_NONE
SEPARATOR = "\n\n###\n\n"
def load_json(path: str) -> dict:
with open(path, "r", encoding="utf-8") as f:
content = f.read()
content = re.sub(r",\s*}", "}", content)
return json.loads(content)
def save_json(data: dict, path: str):
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
f.write("{\n")
items = list(data.items())
for i, (key, val) in enumerate(items):
k = json.dumps(key, ensure_ascii=False)
v = json.dumps(val, ensure_ascii=False)
comma = "," if i < len(items) - 1 else ""
f.write(f" {k}: {v}{comma}\n")
f.write("}\n")
def translate_batch(texts: list[str], dest: str) -> list[str]:
combined = SEPARATOR.join(texts)
encoded = urllib.parse.quote(combined)
url = (
f"https://translate.googleapis.com/translate_a/single"
f"?client=gtx&sl=en&tl={dest}&dt=t&q={encoded}"
)
req = urllib.request.Request(url)
req.add_header("User-Agent", "Mozilla/5.0")
with urllib.request.urlopen(req, timeout=30, context=SSL_CTX) as resp:
data = json.loads(resp.read().decode("utf-8"))
full = "".join(part[0] for part in data[0] if part[0])
parts = re.split(r"\s*###\s*", full)
if len(parts) == len(texts):
return [p.strip() for p in parts]
print(f"\n Split mismatch ({len(parts)} vs {len(texts)}), retrying individually...", end=" ")
results = []
for t in texts:
try:
enc = urllib.parse.quote(t)
u = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={dest}&dt=t&q={enc}"
r = urllib.request.Request(u)
r.add_header("User-Agent", "Mozilla/5.0")
with urllib.request.urlopen(r, timeout=10, context=SSL_CTX) as rsp:
d = json.loads(rsp.read().decode("utf-8"))
results.append("".join(p[0] for p in d[0] if p[0]))
time.sleep(0.3)
except Exception:
results.append(t)
return results
def translate_language(keys: list[str], values: list[str], lang: str):
total = len(values)
total_batches = (total + BATCH_SIZE - 1) // BATCH_SIZE
translated_values = []
for i in range(0, total, BATCH_SIZE):
batch = values[i:i + BATCH_SIZE]
batch_num = (i // BATCH_SIZE) + 1
print(f" Batch {batch_num}/{total_batches} ({len(batch)} strings)...",
end=" ", flush=True)
retries = 3
for attempt in range(retries):
try:
results = translate_batch(batch, lang)
translated_values.extend(results)
print("OK")
break
except Exception as e:
if attempt < retries - 1:
wait = DELAY * (attempt + 2)
print(f"retry in {wait}s ({e})")
time.sleep(wait)
else:
print(f"FAILED ({e}), keeping original")
translated_values.extend(batch)
if i + BATCH_SIZE < total:
time.sleep(DELAY)
output_file = os.path.join("lang", f"{lang}.json")
output = dict(zip(keys, translated_values))
save_json(output, output_file)
print(f" -> {output_file}\n")
def main():
if not os.path.isfile(INPUT_FILE):
print(f"Error: {INPUT_FILE} not found. Run extract_translations.py first.")
sys.exit(1)
languages = sys.argv[1:] if len(sys.argv) > 1 else ALL_LANGUAGES
data = load_json(INPUT_FILE)
keys = list(data.keys())
values = list(data.values())
print(f"Loaded {len(values)} strings from {INPUT_FILE}")
print(f"Translating to {len(languages)} languages: {', '.join(languages)}\n")
for idx, lang in enumerate(languages):
print(f"[{idx + 1}/{len(languages)}] Translating to '{lang}'...")
translate_language(keys, values, lang)
print("All done!")
if __name__ == "__main__":
main()