All checks were successful
Deploy tt-tagebuch / deploy (push) Successful in 45s
- Implemented `fill-de-extended-gaps.js` to fill missing billing/orders keys in de-extended from de. - Created `fill-i18n-deep.py` for deep translation of locale JSONs using deep-translator with fallback options. - Added `fill-i18n-locales.js` to translate locale JSONs and write overrides for untranslated keys. - Introduced `fix-en-leaks.py` to translate keys that still match the en-US merge, addressing English leaks. - Developed `patch-de-ch-swiss.js` to replace 'ß' with 'ss' in de-CH.json without deleting existing entries. - Created `patch-en-gb-au.js` to apply UK/AU spelling corrections in en-GB and en-AU locales. - Added shell scripts `run-fix-en-leaks.sh` and `run-i18n-deep-fill.sh` for sequential execution of translation tasks. - Implemented `update-i18n-todo-stats.js` to update statistics in the I18N_TODO.md file based on translation completeness.
197 lines
6.0 KiB
Python
197 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Übersetzt Keys, deren Locale-Wert noch dem en-US-Merge entspricht (EN-Leak)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout
|
|
from pathlib import Path
|
|
|
|
from deep_translator import GoogleTranslator
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
LOCALES_DIR = ROOT / "frontend" / "src" / "i18n" / "locales"
|
|
CACHE_FILE = Path(__file__).resolve().parent / ".i18n-translate-cache.json"
|
|
|
|
TARGETS = {
|
|
"fr": "fr", "es": "es", "it": "it", "pl": "pl",
|
|
"ja": "ja", "zh": "zh-CN", "th": "th", "tl": "tl", "fil": "tl",
|
|
}
|
|
|
|
PLACEHOLDER_RE = re.compile(r"\{[^}]+\}")
|
|
|
|
|
|
def deep_merge(base, override):
|
|
if not isinstance(base, dict) or isinstance(base, list):
|
|
return override if override is not None else base
|
|
result = dict(base)
|
|
for key, value in (override or {}).items():
|
|
if (
|
|
isinstance(value, dict) and not isinstance(value, list)
|
|
and isinstance(result.get(key), dict) and not isinstance(result.get(key), list)
|
|
):
|
|
result[key] = deep_merge(result[key], value)
|
|
else:
|
|
result[key] = value
|
|
return result
|
|
|
|
|
|
def flatten(obj, prefix=""):
|
|
out = {}
|
|
for key, value in (obj or {}).items():
|
|
next_key = f"{prefix}.{key}" if prefix else key
|
|
if isinstance(value, dict) and not isinstance(value, list):
|
|
out.update(flatten(value, next_key))
|
|
elif isinstance(value, str):
|
|
out[next_key] = value
|
|
return out
|
|
|
|
|
|
def set_by_path(obj, dot_path, value):
|
|
parts = dot_path.split(".")
|
|
cur = obj
|
|
for part in parts[:-1]:
|
|
if part not in cur or not isinstance(cur[part], dict):
|
|
cur[part] = {}
|
|
cur = cur[part]
|
|
cur[parts[-1]] = value
|
|
|
|
|
|
def build_overrides(de_flat, target_flat):
|
|
out = {}
|
|
for key, value in target_flat.items():
|
|
if value != de_flat.get(key):
|
|
set_by_path(out, key, value)
|
|
return out
|
|
|
|
|
|
def protect_placeholders(text):
|
|
tokens = []
|
|
def repl(m):
|
|
token = f"__PH{len(tokens)}__"
|
|
tokens.append(m.group(0))
|
|
return token
|
|
return PLACEHOLDER_RE.sub(repl, text), tokens
|
|
|
|
|
|
def restore_placeholders(text, tokens):
|
|
out = text
|
|
for i, token in enumerate(tokens):
|
|
out = out.replace(f"__PH{i}__", token)
|
|
return out
|
|
|
|
|
|
def load_cache():
|
|
if CACHE_FILE.exists():
|
|
return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
|
return {}
|
|
|
|
|
|
def save_cache(cache):
|
|
CACHE_FILE.write_text(json.dumps(cache, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
|
|
|
|
def translate_en(translator, cache, text, target, delay, timeout=25):
|
|
cache_key = f"en|{target}|{text}"
|
|
if cache_key in cache:
|
|
return cache[cache_key]
|
|
safe, tokens = protect_placeholders(text)
|
|
|
|
def _call():
|
|
return translator.translate(safe)
|
|
|
|
last_err = None
|
|
for attempt in range(3):
|
|
try:
|
|
with ThreadPoolExecutor(max_workers=1) as pool:
|
|
raw = pool.submit(_call).result(timeout=timeout)
|
|
out = restore_placeholders(raw, tokens)
|
|
cache[cache_key] = out
|
|
time.sleep(delay)
|
|
return out
|
|
except (FuturesTimeout, Exception) as e:
|
|
last_err = e
|
|
time.sleep(2 + attempt * 2)
|
|
raise last_err
|
|
|
|
|
|
def fix_locale(code, de_flat, en_flat, cache, delay, dry_run):
|
|
target = TARGETS[code]
|
|
de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8"))
|
|
locale_path = LOCALES_DIR / f"{code}.json"
|
|
locale_json = json.loads(locale_path.read_text(encoding="utf-8"))
|
|
merged = flatten(deep_merge(json.loads(json.dumps(de)), locale_json))
|
|
|
|
leaks = [
|
|
k for k in de_flat
|
|
if merged.get(k) == en_flat.get(k) and en_flat.get(k) != de_flat.get(k)
|
|
]
|
|
unique_texts = {}
|
|
for k in leaks:
|
|
text = en_flat[k]
|
|
unique_texts.setdefault(text, []).append(k)
|
|
|
|
print(f"[{code}] {len(leaks)} EN-leaks, {len(unique_texts)} unique → {target}", flush=True)
|
|
if not unique_texts:
|
|
return
|
|
|
|
translator = GoogleTranslator(source="en", target=target)
|
|
done = 0
|
|
for text, keys in unique_texts.items():
|
|
try:
|
|
if dry_run:
|
|
translated = f"[{target}] {text[:25]}"
|
|
else:
|
|
translated = translate_en(translator, cache, text, target, delay)
|
|
for k in keys:
|
|
merged[k] = translated
|
|
done += 1
|
|
if done % 50 == 0:
|
|
print(f"[{code}] {done}/{len(unique_texts)}", flush=True)
|
|
save_cache(cache)
|
|
except Exception as e:
|
|
print(f"[{code}] skip: {text[:40]}… ({e})", file=sys.stderr, flush=True)
|
|
|
|
save_cache(cache)
|
|
overrides = build_overrides(de_flat, merged)
|
|
if not dry_run:
|
|
locale_path.write_text(json.dumps(overrides, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
|
|
en_leaks_left = sum(
|
|
1 for k in de_flat
|
|
if merged.get(k) == en_flat.get(k) and en_flat.get(k) != de_flat.get(k)
|
|
)
|
|
print(f"[{code}] overrides={len(flatten(overrides))}, enLeaksLeft={en_leaks_left}", flush=True)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("locales", nargs="*", default=list(TARGETS.keys()))
|
|
parser.add_argument("--delay", type=float, default=0.2)
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
de = json.loads((LOCALES_DIR / "de.json").read_text(encoding="utf-8"))
|
|
en_us = json.loads((LOCALES_DIR / "en-US.json").read_text(encoding="utf-8"))
|
|
de_flat = flatten(de)
|
|
en_flat = flatten(deep_merge(json.loads(json.dumps(de)), en_us))
|
|
cache = load_cache()
|
|
|
|
for code in args.locales:
|
|
if code not in TARGETS:
|
|
print(f"skip {code}", file=sys.stderr)
|
|
continue
|
|
fix_locale(code, de_flat, en_flat, cache, args.delay, args.dry_run)
|
|
time.sleep(3)
|
|
|
|
save_cache(cache)
|
|
print("Done.", flush=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|