Files
trainingstagebuch/scripts/fill-i18n-deep.py
Torsten Schulz (local) eb54b4f7cf
All checks were successful
Deploy tt-tagebuch / deploy (push) Successful in 45s
feat(i18n): add scripts for locale translation and patching
- Implemented `fill-de-extended-gaps.js` to fill missing billing/orders keys in de-extended from de.
- Created `fill-i18n-deep.py` for deep translation of locale JSONs using deep-translator with fallback options.
- Added `fill-i18n-locales.js` to translate locale JSONs and write overrides for untranslated keys.
- Introduced `fix-en-leaks.py` to translate keys that still match the en-US merge, addressing English leaks.
- Developed `patch-de-ch-swiss.js` to replace 'ß' with 'ss' in de-CH.json without deleting existing entries.
- Created `patch-en-gb-au.js` to apply UK/AU spelling corrections in en-GB and en-AU locales.
- Added shell scripts `run-fix-en-leaks.sh` and `run-i18n-deep-fill.sh` for sequential execution of translation tasks.
- Implemented `update-i18n-todo-stats.js` to update statistics in the I18N_TODO.md file based on translation completeness.
2026-05-15 15:52:54 +02:00

238 lines
7.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""Füllt Locale-JSONs via deep-translator (Fallback wenn MyMemory/Google limitiert)."""
from __future__ import annotations
import argparse
import json
import re
import sys
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout
from pathlib import Path
from deep_translator import GoogleTranslator
ROOT = Path(__file__).resolve().parents[1]
LOCALES_DIR = ROOT / "frontend" / "src" / "i18n" / "locales"
CACHE_FILE = Path(__file__).resolve().parent / ".i18n-translate-cache.json"
LOCALE_TARGETS = {
"fr": "fr",
"es": "es",
"it": "it",
"pl": "pl",
"ja": "ja",
"zh": "zh-CN",
"th": "th",
"tl": "tl",
"fil": "tl",
}
SKIP = {"de", "de-extended", "de-CH", "en-US", "en-GB", "en-AU"}
FILL_ORDER = ["fr", "es", "it", "pl", "ja", "zh", "th", "tl", "fil"]
PLACEHOLDER_RE = re.compile(r"\{[^}]+\}")
def deep_merge(base, override):
if not isinstance(base, dict) or isinstance(base, list):
return override if override is not None else base
result = dict(base)
for key, value in (override or {}).items():
if (
isinstance(value, dict)
and not isinstance(value, list)
and isinstance(result.get(key), dict)
and not isinstance(result.get(key), list)
):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
def flatten(obj, prefix=""):
out = {}
for key, value in (obj or {}).items():
next_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict) and not isinstance(value, list):
out.update(flatten(value, next_key))
elif isinstance(value, str):
out[next_key] = value
return out
def set_by_path(obj, dot_path, value):
parts = dot_path.split(".")
cur = obj
for part in parts[:-1]:
if part not in cur or not isinstance(cur[part], dict):
cur[part] = {}
cur = cur[part]
cur[parts[-1]] = value
def build_overrides(de_flat, target_flat):
out = {}
for key, value in target_flat.items():
if value != de_flat.get(key):
set_by_path(out, key, value)
return out
def protect_placeholders(text):
tokens = []
def repl(m):
token = f"__PH{len(tokens)}__"
tokens.append(m.group(0))
return token
return PLACEHOLDER_RE.sub(repl, text), tokens
def restore_placeholders(text, tokens):
out = text
for i, token in enumerate(tokens):
out = out.replace(f"__PH{i}__", token)
return out
def load_cache():
if CACHE_FILE.exists():
return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
return {}
def save_cache(cache):
CACHE_FILE.write_text(json.dumps(cache, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
def fill_de_ch(de_flat, merged_flat):
for key in de_flat:
if merged_flat.get(key) == de_flat[key]:
merged_flat[key] = de_flat[key].replace("ß", "ss")
def translate_text(translator, cache, text, from_lang, to_lang, delay, timeout=20):
cache_key = f"{from_lang}|{to_lang}|{text}"
if cache_key in cache:
return cache[cache_key]
safe, tokens = protect_placeholders(text)
def _call():
return translator.translate(safe)
last_err = None
for attempt in range(3):
try:
with ThreadPoolExecutor(max_workers=1) as pool:
raw = pool.submit(_call).result(timeout=timeout)
out = restore_placeholders(raw, tokens)
cache[cache_key] = out
time.sleep(delay)
return out
except (FuturesTimeout, Exception) as e:
last_err = e
time.sleep(2 + attempt * 2)
raise last_err
def fill_locale(code, en_flat, cache, delay, dry_run):
target = LOCALE_TARGETS[code]
de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8"))
de_flat = flatten(de)
locale_path = LOCALES_DIR / f"{code}.json"
locale_json = json.loads(locale_path.read_text(encoding="utf-8"))
merged_flat = flatten(deep_merge(json.loads(json.dumps(de)), locale_json))
if code == "de-CH":
fill_de_ch(de_flat, merged_flat)
else:
keys_to_fix = [k for k in de_flat if merged_flat.get(k) == de_flat[k]]
unique = {}
for k in keys_to_fix:
from_lang = "en" if en_flat.get(k) and en_flat[k] != de_flat[k] else "de"
text = en_flat[k] if from_lang == "en" else de_flat[k]
unique.setdefault((from_lang, text), []).append(k)
print(f"[{code}] {len(keys_to_fix)} keys, {len(unique)} unique → {target}", flush=True)
by_source = {"en": [], "de": []}
for (from_lang, text), keys in unique.items():
by_source[from_lang].append((text, keys))
done = 0
for from_lang in ("en", "de"):
items = by_source[from_lang]
if not items:
continue
translator = GoogleTranslator(source=from_lang, target=target)
for text, keys in items:
cache_key = f"{from_lang}|{target}|{text}"
try:
if dry_run:
translated = f"[{target}] {text[:30]}"
elif cache_key in cache:
translated = cache[cache_key]
else:
translated = translate_text(translator, cache, text, from_lang, target, delay)
for k in keys:
merged_flat[k] = translated
done += 1
if done % 50 == 0:
print(f"[{code}] {done}/{len(unique)}", flush=True)
save_cache(cache)
except Exception as e:
print(f"[{code}] skip: {text[:40]}… ({e})", file=sys.stderr, flush=True)
if from_lang == "en" and en_flat:
for k in keys:
merged_flat[k] = text
save_cache(cache)
overrides = build_overrides(de_flat, merged_flat)
if not dry_run:
locale_path.write_text(
json.dumps(overrides, ensure_ascii=False, indent=2) + "\n", encoding="utf-8"
)
still_de = sum(1 for k in de_flat if merged_flat.get(k) == de_flat[k])
print(f"[{code}] overrides={len(flatten(overrides))}, stillDe={still_de}", flush=True)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--locale")
parser.add_argument("--all", action="store_true")
parser.add_argument("--from-locale", dest="from_locale", help="Bei --all: ab dieser Locale fortsetzen")
parser.add_argument("--delay", type=float, default=0.35)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
codes = FILL_ORDER if args.all else [args.locale] if args.locale else None
if codes and args.from_locale:
if args.from_locale not in FILL_ORDER:
parser.error(f"Unknown locale: {args.from_locale}")
codes = FILL_ORDER[FILL_ORDER.index(args.from_locale) :]
if not codes:
parser.error("Usage: --locale <code> | --all")
de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8"))
en_us = json.loads((LOCALES_DIR / "en-US.json").read_text(encoding="utf-8"))
en_flat = flatten(deep_merge(de, en_us))
cache = load_cache()
for code in codes:
if code in SKIP or code not in LOCALE_TARGETS:
continue
fill_locale(code, en_flat, cache, args.delay, args.dry_run)
time.sleep(5)
save_cache(cache)
print("Done.")
if __name__ == "__main__":
main()