Files
trainingstagebuch/scripts/fix-en-leaks.py
Torsten Schulz (local) eb54b4f7cf
All checks were successful
Deploy tt-tagebuch / deploy (push) Successful in 45s
feat(i18n): add scripts for locale translation and patching
- Implemented `fill-de-extended-gaps.js` to fill missing billing/orders keys in de-extended from de.
- Created `fill-i18n-deep.py` for deep translation of locale JSONs using deep-translator with fallback options.
- Added `fill-i18n-locales.js` to translate locale JSONs and write overrides for untranslated keys.
- Introduced `fix-en-leaks.py` to translate keys that still match the en-US merge, addressing English leaks.
- Developed `patch-de-ch-swiss.js` to replace 'ß' with 'ss' in de-CH.json without deleting existing entries.
- Created `patch-en-gb-au.js` to apply UK/AU spelling corrections in en-GB and en-AU locales.
- Added shell scripts `run-fix-en-leaks.sh` and `run-i18n-deep-fill.sh` for sequential execution of translation tasks.
- Implemented `update-i18n-todo-stats.js` to update statistics in the I18N_TODO.md file based on translation completeness.
2026-05-15 15:52:54 +02:00

197 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""Übersetzt Keys, deren Locale-Wert noch dem en-US-Merge entspricht (EN-Leak)."""
from __future__ import annotations
import argparse
import json
import re
import sys
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout
from pathlib import Path
from deep_translator import GoogleTranslator
ROOT = Path(__file__).resolve().parents[1]
LOCALES_DIR = ROOT / "frontend" / "src" / "i18n" / "locales"
CACHE_FILE = Path(__file__).resolve().parent / ".i18n-translate-cache.json"
TARGETS = {
"fr": "fr", "es": "es", "it": "it", "pl": "pl",
"ja": "ja", "zh": "zh-CN", "th": "th", "tl": "tl", "fil": "tl",
}
PLACEHOLDER_RE = re.compile(r"\{[^}]+\}")
def deep_merge(base, override):
if not isinstance(base, dict) or isinstance(base, list):
return override if override is not None else base
result = dict(base)
for key, value in (override or {}).items():
if (
isinstance(value, dict) and not isinstance(value, list)
and isinstance(result.get(key), dict) and not isinstance(result.get(key), list)
):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
def flatten(obj, prefix=""):
out = {}
for key, value in (obj or {}).items():
next_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict) and not isinstance(value, list):
out.update(flatten(value, next_key))
elif isinstance(value, str):
out[next_key] = value
return out
def set_by_path(obj, dot_path, value):
parts = dot_path.split(".")
cur = obj
for part in parts[:-1]:
if part not in cur or not isinstance(cur[part], dict):
cur[part] = {}
cur = cur[part]
cur[parts[-1]] = value
def build_overrides(de_flat, target_flat):
out = {}
for key, value in target_flat.items():
if value != de_flat.get(key):
set_by_path(out, key, value)
return out
def protect_placeholders(text):
tokens = []
def repl(m):
token = f"__PH{len(tokens)}__"
tokens.append(m.group(0))
return token
return PLACEHOLDER_RE.sub(repl, text), tokens
def restore_placeholders(text, tokens):
out = text
for i, token in enumerate(tokens):
out = out.replace(f"__PH{i}__", token)
return out
def load_cache():
if CACHE_FILE.exists():
return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
return {}
def save_cache(cache):
CACHE_FILE.write_text(json.dumps(cache, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
def translate_en(translator, cache, text, target, delay, timeout=25):
cache_key = f"en|{target}|{text}"
if cache_key in cache:
return cache[cache_key]
safe, tokens = protect_placeholders(text)
def _call():
return translator.translate(safe)
last_err = None
for attempt in range(3):
try:
with ThreadPoolExecutor(max_workers=1) as pool:
raw = pool.submit(_call).result(timeout=timeout)
out = restore_placeholders(raw, tokens)
cache[cache_key] = out
time.sleep(delay)
return out
except (FuturesTimeout, Exception) as e:
last_err = e
time.sleep(2 + attempt * 2)
raise last_err
def fix_locale(code, de_flat, en_flat, cache, delay, dry_run):
target = TARGETS[code]
de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8"))
locale_path = LOCALES_DIR / f"{code}.json"
locale_json = json.loads(locale_path.read_text(encoding="utf-8"))
merged = flatten(deep_merge(json.loads(json.dumps(de)), locale_json))
leaks = [
k for k in de_flat
if merged.get(k) == en_flat.get(k) and en_flat.get(k) != de_flat.get(k)
]
unique_texts = {}
for k in leaks:
text = en_flat[k]
unique_texts.setdefault(text, []).append(k)
print(f"[{code}] {len(leaks)} EN-leaks, {len(unique_texts)} unique → {target}", flush=True)
if not unique_texts:
return
translator = GoogleTranslator(source="en", target=target)
done = 0
for text, keys in unique_texts.items():
try:
if dry_run:
translated = f"[{target}] {text[:25]}"
else:
translated = translate_en(translator, cache, text, target, delay)
for k in keys:
merged[k] = translated
done += 1
if done % 50 == 0:
print(f"[{code}] {done}/{len(unique_texts)}", flush=True)
save_cache(cache)
except Exception as e:
print(f"[{code}] skip: {text[:40]}… ({e})", file=sys.stderr, flush=True)
save_cache(cache)
overrides = build_overrides(de_flat, merged)
if not dry_run:
locale_path.write_text(json.dumps(overrides, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
en_leaks_left = sum(
1 for k in de_flat
if merged.get(k) == en_flat.get(k) and en_flat.get(k) != de_flat.get(k)
)
print(f"[{code}] overrides={len(flatten(overrides))}, enLeaksLeft={en_leaks_left}", flush=True)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("locales", nargs="*", default=list(TARGETS.keys()))
parser.add_argument("--delay", type=float, default=0.2)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8"))
en_us = json.loads((LOCALES_DIR / "en-US.json").read_text(encoding="utf-8"))
de_flat = flatten(de)
en_flat = flatten(deep_merge(json.loads(json.dumps(de)), en_us))
cache = load_cache()
for code in args.locales:
if code not in TARGETS:
print(f"skip {code}", file=sys.stderr)
continue
fix_locale(code, de_flat, en_flat, cache, args.delay, args.dry_run)
time.sleep(3)
save_cache(cache)
print("Done.", flush=True)
if __name__ == "__main__":
main()