Files
trainingstagebuch/scripts/fill-i18n-locales.js
Torsten Schulz (local) eb54b4f7cf
All checks were successful
Deploy tt-tagebuch / deploy (push) Successful in 45s
feat(i18n): add scripts for locale translation and patching
- Implemented `fill-de-extended-gaps.js` to fill missing billing/orders keys in de-extended from de.
- Created `fill-i18n-deep.py` for deep translation of locale JSONs using deep-translator with fallback options.
- Added `fill-i18n-locales.js` to translate locale JSONs and write overrides for untranslated keys.
- Introduced `fix-en-leaks.py` to translate keys that still match the en-US merge, addressing English leaks.
- Developed `patch-de-ch-swiss.js` to replace 'ß' with 'ss' in de-CH.json without deleting existing entries.
- Created `patch-en-gb-au.js` to apply UK/AU spelling corrections in en-GB and en-AU locales.
- Added shell scripts `run-fix-en-leaks.sh` and `run-i18n-deep-fill.sh` for sequential execution of translation tasks.
- Implemented `update-i18n-todo-stats.js` to update statistics in the I18N_TODO.md file based on translation completeness.
2026-05-15 15:52:54 +02:00

386 lines
11 KiB
JavaScript

#!/usr/bin/env node
/**
* Füllt Locale-JSONs: Blätter, die nach deepMerge(de, locale) noch Deutsch sind,
* werden übersetzt (MyMemory, Fallback Google) und als Override geschrieben.
*
* Cache: scripts/.i18n-translate-cache.json
*
* Usage:
* node scripts/fill-i18n-locales.js --locale en-US
* node scripts/fill-i18n-locales.js --all --delay 450
*/
const fs = require('fs');
const path = require('path');
const ROOT = path.resolve(__dirname, '..');
const LOCALES_DIR = path.join(ROOT, 'frontend', 'src', 'i18n', 'locales');
const CACHE_FILE = path.join(__dirname, '.i18n-translate-cache.json');
const TRANSLATE_PKG = path.join(ROOT, 'frontend', 'node_modules', '@vitalets/google-translate-api');
const PLACEHOLDER_RE = /\{[^}]+\}/g;
const LOCALE_TARGETS = {
'en-US': 'en',
'en-GB': 'en',
'en-AU': 'en',
es: 'es',
fr: 'fr',
it: 'it',
pl: 'pl',
ja: 'ja',
zh: 'zh-CN',
th: 'th',
tl: 'tl',
fil: 'tl',
'de-CH': 'de',
};
const SKIP_LOCALES = new Set(['de', 'de-extended']);
const ALL_FILL_ORDER = [
'en-US',
'en-GB',
'en-AU',
'fr',
'es',
'it',
'pl',
'ja',
'zh',
'th',
'tl',
'fil',
'de-CH',
];
function deepMergeMessages(base, override) {
if (!base || typeof base !== 'object' || Array.isArray(base)) {
return override ?? base;
}
const result = { ...base };
for (const [key, value] of Object.entries(override || {})) {
if (
value &&
typeof value === 'object' &&
!Array.isArray(value) &&
result[key] &&
typeof result[key] === 'object' &&
!Array.isArray(result[key])
) {
result[key] = deepMergeMessages(result[key], value);
} else {
result[key] = value;
}
}
return result;
}
function flatten(obj, prefix = '', out = {}) {
for (const [key, value] of Object.entries(obj || {})) {
const nextKey = prefix ? `${prefix}.${key}` : key;
if (value && typeof value === 'object' && !Array.isArray(value)) {
flatten(value, nextKey, out);
} else if (typeof value === 'string') {
out[nextKey] = value;
}
}
return out;
}
function setByPath(obj, dotPath, value) {
const parts = dotPath.split('.');
let cur = obj;
for (let i = 0; i < parts.length - 1; i++) {
if (!cur[parts[i]] || typeof cur[parts[i]] !== 'object') {
cur[parts[i]] = {};
}
cur = cur[parts[i]];
}
cur[parts[parts.length - 1]] = value;
}
function buildOverrides(deFlat, targetFlat) {
const out = {};
for (const [key, value] of Object.entries(targetFlat)) {
if (value !== deFlat[key]) {
setByPath(out, key, value);
}
}
return out;
}
function protectPlaceholders(text) {
const tokens = [];
const safe = text.replace(PLACEHOLDER_RE, (m) => {
const token = `__PH${tokens.length}__`;
tokens.push(m);
return token;
});
return { safe, tokens };
}
function restorePlaceholders(text, tokens) {
let out = text;
for (let i = 0; i < tokens.length; i++) {
out = out.replace(new RegExp(`__\\s*PH\\s*${i}\\s*__`, 'gi'), tokens[i]);
out = out.replace(`__PH${i}__`, tokens[i]);
}
return out;
}
function loadCache() {
if (!fs.existsSync(CACHE_FILE)) return {};
try {
return JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
} catch {
return {};
}
}
function saveCache(cache) {
fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2), 'utf8');
}
function sleep(ms) {
return new Promise((r) => setTimeout(r, ms));
}
function parseArgs() {
const args = process.argv.slice(2);
let locale = null;
let all = false;
let delay = 450;
let dryRun = false;
let noGoogle = false;
let enFallback = false;
let skipEn = false;
for (let i = 0; i < args.length; i++) {
if (args[i] === '--locale' && args[i + 1]) locale = args[++i];
else if (args[i] === '--all') all = true;
else if (args[i] === '--delay' && args[i + 1]) delay = parseInt(args[++i], 10) || 450;
else if (args[i] === '--dry-run') dryRun = true;
else if (args[i] === '--no-google') noGoogle = true;
else if (args[i] === '--en-fallback') enFallback = true;
else if (args[i] === '--skip-en') skipEn = true;
}
return { locale, all, delay, dryRun, noGoogle, enFallback, skipEn };
}
async function mymemoryTranslate(text, from, to) {
const url = new URL('https://api.mymemory.translated.net/get');
url.searchParams.set('q', text.slice(0, 450));
url.searchParams.set('langpair', `${from}|${to}`);
const res = await fetch(url);
const data = await res.json();
if (data.quotaFinished) {
throw new Error('MyMemory quota finished');
}
if (data.responseStatus !== 200) {
throw new Error(data.responseDetails || `MyMemory status ${data.responseStatus}`);
}
return data.responseData.translatedText;
}
async function googleTranslate(translateFn, text, from, to) {
const res = await translateFn(text, { from, to });
return res.text;
}
async function translateText(providers, text, from, to, cache) {
const cacheKey = `${from}|${to}|${text}`;
if (cache[cacheKey]) return cache[cacheKey];
const { safe, tokens } = protectPlaceholders(text);
let out;
let lastErr;
for (const provider of providers) {
try {
const raw = await provider(safe, from, to);
out = restorePlaceholders(raw, tokens);
out = out.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
cache[cacheKey] = out;
return out;
} catch (e) {
lastErr = e;
await sleep(2000);
}
}
throw lastErr;
}
function bootstrapEnFromGb(deFlat, mergedFlat) {
const gbJson = JSON.parse(fs.readFileSync(path.join(LOCALES_DIR, 'en-GB.json'), 'utf8'));
const de = JSON.parse(fs.readFileSync(path.join(LOCALES_DIR, 'de.json'), 'utf8'));
const gbFlat = flatten(deepMergeMessages(de, gbJson));
let n = 0;
for (const k of Object.keys(deFlat)) {
if (mergedFlat[k] === deFlat[k] && gbFlat[k] && gbFlat[k] !== deFlat[k]) {
mergedFlat[k] = gbFlat[k];
n++;
}
}
return n;
}
function bootstrapEnFromUs(deFlat, mergedFlat, enFlat) {
if (!enFlat) return 0;
let n = 0;
for (const k of Object.keys(deFlat)) {
if (mergedFlat[k] === deFlat[k] && enFlat[k] && enFlat[k] !== deFlat[k]) {
mergedFlat[k] = enFlat[k];
n++;
}
}
return n;
}
async function fillLocale(code, opts) {
const { providers, cache, delay, dryRun, enFlat, enFallback } = opts;
const targetLang = LOCALE_TARGETS[code];
const de = JSON.parse(fs.readFileSync(path.join(LOCALES_DIR, 'de.json'), 'utf8'));
const deFlat = flatten(de);
const localePath = path.join(LOCALES_DIR, `${code}.json`);
const localeJson = JSON.parse(fs.readFileSync(localePath, 'utf8'));
const mergedFlat = flatten(deepMergeMessages(JSON.parse(JSON.stringify(de)), localeJson));
if (code === 'en-US' || code === 'en-AU') {
const copied = bootstrapEnFromGb(deFlat, mergedFlat);
if (copied) console.log(`[${code}] bootstrapped ${copied} strings from en-GB`);
}
if ((code === 'en-GB' || code === 'en-AU') && enFlat) {
const copied = bootstrapEnFromUs(deFlat, mergedFlat, enFlat);
if (copied) console.log(`[${code}] bootstrapped ${copied} strings from en-US`);
}
const keysToFix = Object.keys(deFlat).filter((k) => mergedFlat[k] === deFlat[k]);
if (!keysToFix.length) {
console.log(`[${code}] nothing to fill`);
if (!dryRun) {
fs.writeFileSync(localePath, `${JSON.stringify(buildOverrides(deFlat, mergedFlat), null, 2)}\n`, 'utf8');
}
return;
}
const uniqueTexts = new Map();
for (const k of keysToFix) {
const from = enFlat && enFlat[k] && enFlat[k] !== deFlat[k] ? 'en' : 'de';
const text = from === 'en' ? enFlat[k] : deFlat[k];
const mapKey = `${from}\0${text}`;
if (!uniqueTexts.has(mapKey)) uniqueTexts.set(mapKey, { from, text, keys: [] });
uniqueTexts.get(mapKey).keys.push(k);
}
console.log(`[${code}] ${keysToFix.length} keys, ${uniqueTexts.size} unique, target=${targetLang}`);
if (code === 'de-CH') {
for (const k of keysToFix) {
const base = deFlat[k];
mergedFlat[k] = base.replace(/ß/g, 'ss');
}
} else {
let done = 0;
const failed = [];
for (const entry of uniqueTexts.values()) {
const { from, text, keys } = entry;
const cacheKey = `${from}|${targetLang}|${text}`;
let translated;
if (from === 'en' && targetLang === 'en') {
translated = text;
} else if (cache[cacheKey]) {
translated = cache[cacheKey];
} else if (dryRun) {
translated = `[${targetLang}] ${text.slice(0, 30)}`;
} else {
try {
translated = await translateText(providers, text, from, targetLang, cache);
await sleep(delay);
} catch (e) {
console.error(`[${code}] skip: ${text.slice(0, 50)}… (${e.message})`);
failed.push(entry);
continue;
}
}
for (const k of keys) mergedFlat[k] = translated;
done++;
if (done % 50 === 0) {
console.log(`[${code}] ${done}/${uniqueTexts.size} (failed ${failed.length})`);
saveCache(cache);
}
}
saveCache(cache);
if (failed.length && !dryRun) {
console.log(`[${code}] retrying ${failed.length} failed strings…`);
await sleep(5000);
for (const { from, text, keys } of failed) {
const cacheKey = `${from}|${targetLang}|${text}`;
try {
const translated = cache[cacheKey] || (await translateText(providers, text, from, targetLang, cache));
for (const k of keys) mergedFlat[k] = translated;
await sleep(delay);
} catch (e) {
if (enFallback && from === 'en') {
for (const k of keys) mergedFlat[k] = text;
} else {
console.error(`[${code}] final skip: ${text.slice(0, 40)}`);
}
}
}
saveCache(cache);
}
}
const overrides = buildOverrides(deFlat, mergedFlat);
if (!dryRun) {
fs.writeFileSync(localePath, `${JSON.stringify(overrides, null, 2)}\n`, 'utf8');
}
const stillDe = Object.keys(deFlat).filter((k) => mergedFlat[k] === deFlat[k]).length;
console.log(`[${code}] overrides=${Object.keys(flatten(overrides)).length}, stillDe=${stillDe}`);
}
async function main() {
const { locale, all, delay, dryRun, noGoogle, enFallback, skipEn } = parseArgs();
const providers = [
(text, from, to) => mymemoryTranslate(text, from, to),
];
if (!noGoogle && fs.existsSync(TRANSLATE_PKG)) {
const { translate: translateFn } = require(TRANSLATE_PKG);
providers.push((text, from, to) => googleTranslate(translateFn, text, from, to));
}
const cache = loadCache();
let codes = all ? ALL_FILL_ORDER : locale ? [locale] : null;
if (skipEn && codes) {
codes = codes.filter((c) => !['en-US', 'en-GB', 'en-AU'].includes(c));
}
if (!codes) {
console.error('Usage: --locale <code> | --all [--delay ms]');
process.exit(1);
}
let enFlat = null;
const de = JSON.parse(fs.readFileSync(path.join(LOCALES_DIR, 'de.json'), 'utf8'));
const enUsPath = path.join(LOCALES_DIR, 'en-US.json');
if (fs.existsSync(enUsPath)) {
enFlat = flatten(deepMergeMessages(de, JSON.parse(fs.readFileSync(enUsPath, 'utf8'))));
}
for (const code of codes) {
if (SKIP_LOCALES.has(code)) continue;
if (!(code in LOCALE_TARGETS)) continue;
await fillLocale(code, { providers, cache, delay, dryRun, enFlat, enFallback });
if (all && !dryRun) await sleep(8000);
if (code === 'en-US' && !dryRun) {
enFlat = flatten(deepMergeMessages(de, JSON.parse(fs.readFileSync(enUsPath, 'utf8'))));
}
}
saveCache(cache);
console.log('Done.');
}
main().catch((e) => {
console.error(e);
process.exit(1);
});