#!/usr/bin/env python3 """Übersetzt Keys, deren Locale-Wert noch dem en-US-Merge entspricht (EN-Leak).""" from __future__ import annotations import argparse import json import re import sys import time from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout from pathlib import Path from deep_translator import GoogleTranslator ROOT = Path(__file__).resolve().parents[1] LOCALES_DIR = ROOT / "frontend" / "src" / "i18n" / "locales" CACHE_FILE = Path(__file__).resolve().parent / ".i18n-translate-cache.json" TARGETS = { "fr": "fr", "es": "es", "it": "it", "pl": "pl", "ja": "ja", "zh": "zh-CN", "th": "th", "tl": "tl", "fil": "tl", } PLACEHOLDER_RE = re.compile(r"\{[^}]+\}") def deep_merge(base, override): if not isinstance(base, dict) or isinstance(base, list): return override if override is not None else base result = dict(base) for key, value in (override or {}).items(): if ( isinstance(value, dict) and not isinstance(value, list) and isinstance(result.get(key), dict) and not isinstance(result.get(key), list) ): result[key] = deep_merge(result[key], value) else: result[key] = value return result def flatten(obj, prefix=""): out = {} for key, value in (obj or {}).items(): next_key = f"{prefix}.{key}" if prefix else key if isinstance(value, dict) and not isinstance(value, list): out.update(flatten(value, next_key)) elif isinstance(value, str): out[next_key] = value return out def set_by_path(obj, dot_path, value): parts = dot_path.split(".") cur = obj for part in parts[:-1]: if part not in cur or not isinstance(cur[part], dict): cur[part] = {} cur = cur[part] cur[parts[-1]] = value def build_overrides(de_flat, target_flat): out = {} for key, value in target_flat.items(): if value != de_flat.get(key): set_by_path(out, key, value) return out def protect_placeholders(text): tokens = [] def repl(m): token = f"__PH{len(tokens)}__" tokens.append(m.group(0)) return token return PLACEHOLDER_RE.sub(repl, text), tokens def restore_placeholders(text, tokens): out = text for i, token in enumerate(tokens): out = out.replace(f"__PH{i}__", token) return out def load_cache(): if CACHE_FILE.exists(): return json.loads(CACHE_FILE.read_text(encoding="utf-8")) return {} def save_cache(cache): CACHE_FILE.write_text(json.dumps(cache, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") def translate_en(translator, cache, text, target, delay, timeout=25): cache_key = f"en|{target}|{text}" if cache_key in cache: return cache[cache_key] safe, tokens = protect_placeholders(text) def _call(): return translator.translate(safe) last_err = None for attempt in range(3): try: with ThreadPoolExecutor(max_workers=1) as pool: raw = pool.submit(_call).result(timeout=timeout) out = restore_placeholders(raw, tokens) cache[cache_key] = out time.sleep(delay) return out except (FuturesTimeout, Exception) as e: last_err = e time.sleep(2 + attempt * 2) raise last_err def fix_locale(code, de_flat, en_flat, cache, delay, dry_run): target = TARGETS[code] de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8")) locale_path = LOCALES_DIR / f"{code}.json" locale_json = json.loads(locale_path.read_text(encoding="utf-8")) merged = flatten(deep_merge(json.loads(json.dumps(de)), locale_json)) leaks = [ k for k in de_flat if merged.get(k) == en_flat.get(k) and en_flat.get(k) != de_flat.get(k) ] unique_texts = {} for k in leaks: text = en_flat[k] unique_texts.setdefault(text, []).append(k) print(f"[{code}] {len(leaks)} EN-leaks, {len(unique_texts)} unique → {target}", flush=True) if not unique_texts: return translator = GoogleTranslator(source="en", target=target) done = 0 for text, keys in unique_texts.items(): try: if dry_run: translated = f"[{target}] {text[:25]}" else: translated = translate_en(translator, cache, text, target, delay) for k in keys: merged[k] = translated done += 1 if done % 50 == 0: print(f"[{code}] {done}/{len(unique_texts)}", flush=True) save_cache(cache) except Exception as e: print(f"[{code}] skip: {text[:40]}… ({e})", file=sys.stderr, flush=True) save_cache(cache) overrides = build_overrides(de_flat, merged) if not dry_run: locale_path.write_text(json.dumps(overrides, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") en_leaks_left = sum( 1 for k in de_flat if merged.get(k) == en_flat.get(k) and en_flat.get(k) != de_flat.get(k) ) print(f"[{code}] overrides={len(flatten(overrides))}, enLeaksLeft={en_leaks_left}", flush=True) def main(): parser = argparse.ArgumentParser() parser.add_argument("locales", nargs="*", default=list(TARGETS.keys())) parser.add_argument("--delay", type=float, default=0.2) parser.add_argument("--dry-run", action="store_true") args = parser.parse_args() de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8")) en_us = json.loads((LOCALES_DIR / "en-US.json").read_text(encoding="utf-8")) de_flat = flatten(de) en_flat = flatten(deep_merge(json.loads(json.dumps(de)), en_us)) cache = load_cache() for code in args.locales: if code not in TARGETS: print(f"skip {code}", file=sys.stderr) continue fix_locale(code, de_flat, en_flat, cache, args.delay, args.dry_run) time.sleep(3) save_cache(cache) print("Done.", flush=True) if __name__ == "__main__": main()