Files
yourpart3/backend/scripts/repair-bisaya-number-answer-pairs.js
Torsten Schulz (local) 01c2f230a9
All checks were successful
Deploy to production / deploy (push) Successful in 32s
bisaya korrigiert
2026-06-06 13:31:44 +02:00

302 lines
7.8 KiB
JavaScript

#!/usr/bin/env node
/**
* Repairs stale Bisaya number pairs where the Bisaya side is correct, but the
* German answer side contains a currency phrase such as "20 peso".
*
* Default: dry-run. Use --apply to write changes.
*/
import crypto from 'crypto';
import { Op } from 'sequelize';
import { sequelize } from '../utils/sequelize.js';
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
import VocabSrsItem from '../models/community/vocab_srs_item.js';
const BISAYA_TO_GERMAN_DIGIT = new Map([
['usa', '1'],
['duha', '2'],
['tulo', '3'],
['upat', '4'],
['lima', '5'],
['unom', '6'],
['pito', '7'],
['walo', '8'],
['siyam', '9'],
['napulo', '10'],
['onse', '11'],
['dose', '12'],
['trese', '13'],
['katorse', '14'],
['kinse', '15'],
['disisais', '16'],
['disisiete', '17'],
['disiotso', '18'],
['disinuybe', '19'],
['kawhaan', '20'],
]);
const CURRENCY_WORDS = new Set(['peso', 'pesos', 'piso', 'pisos']);
const GERMAN_NUMBER_TO_DIGIT = new Map([
['ein', '1'],
['eins', '1'],
['zwei', '2'],
['drei', '3'],
['vier', '4'],
['fuenf', '5'],
['funf', '5'],
['fünf', '5'],
['sechs', '6'],
['sieben', '7'],
['acht', '8'],
['neun', '9'],
['zehn', '10'],
['elf', '11'],
['zwoelf', '12'],
['zwolf', '12'],
['zwölf', '12'],
['dreizehn', '13'],
['vierzehn', '14'],
['fuenfzehn', '15'],
['funfzehn', '15'],
['fünfzehn', '15'],
['sechzehn', '16'],
['siebzehn', '17'],
['achtzehn', '18'],
['neunzehn', '19'],
['zwanzig', '20'],
]);
function parseArgs(argv) {
return {
apply: argv.includes('--apply'),
};
}
function normalizeText(value) {
return String(value || '')
.trim()
.toLowerCase()
.normalize('NFKC')
.replace(/[\p{P}\p{S}]+/gu, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function compact(value) {
return normalizeText(value).replace(/\s+/g, '');
}
function getBisayaDigit(value) {
return BISAYA_TO_GERMAN_DIGIT.get(compact(value)) || '';
}
function getCurrencyNumber(value) {
const text = normalizeText(value);
if (!text) return '';
const parts = text.split(/\s+/).filter(Boolean);
if (parts.length < 2) return '';
const last = parts[parts.length - 1];
if (!CURRENCY_WORDS.has(last)) return '';
const numberPart = parts.slice(0, -1).join('');
if (/^\d+$/.test(numberPart)) return String(Number(numberPart));
return GERMAN_NUMBER_TO_DIGIT.get(numberPart) || '';
}
function fixPair(left, right) {
const leftBisayaDigit = getBisayaDigit(left);
const rightCurrencyDigit = getCurrencyNumber(right);
if (leftBisayaDigit && rightCurrencyDigit && leftBisayaDigit === rightCurrencyDigit) {
return { left, right: leftBisayaDigit, changed: right !== leftBisayaDigit };
}
const rightBisayaDigit = getBisayaDigit(right);
const leftCurrencyDigit = getCurrencyNumber(left);
if (rightBisayaDigit && leftCurrencyDigit && rightBisayaDigit === leftCurrencyDigit) {
return { left: rightBisayaDigit, right, changed: left !== rightBisayaDigit };
}
return { left, right, changed: false };
}
function patchPattern(entry) {
if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
return { value: entry, changed: false };
}
const next = { ...entry };
let changed = false;
const pairKeys = [
['target', 'gloss'],
['learning', 'reference'],
['bisaya', 'native'],
];
pairKeys.forEach(([leftKey, rightKey]) => {
if (!(leftKey in next) || !(rightKey in next)) return;
const pair = fixPair(next[leftKey], next[rightKey]);
if (!pair.changed) return;
next[leftKey] = pair.left;
next[rightKey] = pair.right;
changed = true;
});
return { value: next, changed };
}
function patchJson(value) {
if (Array.isArray(value)) {
let changed = false;
const next = value.map((entry) => {
const patched = patchJson(entry);
changed = changed || patched.changed;
return patched.value;
});
return { value: next, changed };
}
if (value && typeof value === 'object') {
const direct = patchPattern(value);
let next = direct.value;
let changed = direct.changed;
Object.entries(next).forEach(([key, child]) => {
if (!child || typeof child !== 'object') return;
const patched = patchJson(child);
if (!patched.changed) return;
next = { ...next, [key]: patched.value };
changed = true;
});
return { value: next, changed };
}
return { value, changed: false };
}
function normalizeSrsText(value) {
return normalizeText(value);
}
function buildSrsItemKey({ courseId, lessonId = null, learning, reference, direction = 'BOTH' }) {
const raw = [
Number(courseId) || 0,
lessonId == null ? 'course' : Number(lessonId) || 0,
String(direction || 'BOTH').toUpperCase(),
normalizeSrsText(learning),
normalizeSrsText(reference),
].join('|');
return crypto.createHash('sha1').update(raw).digest('hex');
}
async function repairLessons({ apply }) {
const lessons = await VocabCourseLesson.findAll({
where: {
corePatterns: {
[Op.ne]: null,
},
},
attributes: ['id', 'courseId', 'lessonNumber', 'title', 'corePatterns'],
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
});
let changedLessons = 0;
for (const lesson of lessons) {
const patched = patchJson(lesson.corePatterns);
if (!patched.changed) continue;
changedLessons += 1;
console.log(
`Lesson ${lesson.id} (course ${lesson.courseId}, #${lesson.lessonNumber}, ${lesson.title}): corePatterns korrigiert`
);
if (apply) {
await lesson.update({ corePatterns: patched.value });
}
}
return changedLessons;
}
async function repairSrsItems({ apply }) {
const items = await VocabSrsItem.findAll({
where: {
[Op.or]: [
{ learning: { [Op.iLike]: '%peso%' } },
{ reference: { [Op.iLike]: '%peso%' } },
{ learning: { [Op.iLike]: '%piso%' } },
{ reference: { [Op.iLike]: '%piso%' } },
],
},
order: [['id', 'ASC']],
});
let changedItems = 0;
let keyConflicts = 0;
for (const item of items) {
const pair = fixPair(item.learning, item.reference);
if (!pair.changed) continue;
changedItems += 1;
const nextKey = buildSrsItemKey({
courseId: item.courseId,
lessonId: item.lessonId,
learning: pair.left,
reference: pair.right,
direction: item.direction,
});
console.log(
`SRS ${item.id}: "${item.learning}" | "${item.reference}" -> "${pair.left}" | "${pair.right}"`
);
if (!apply) continue;
try {
await item.update({
learning: pair.left,
reference: pair.right,
itemKey: nextKey,
});
} catch (error) {
keyConflicts += 1;
console.warn(` item_key-Konflikt bei SRS ${item.id}; aktualisiere Textwerte mit altem Key.`);
await item.update({
learning: pair.left,
reference: pair.right,
});
}
}
return { changedItems, keyConflicts };
}
async function main() {
const args = parseArgs(process.argv.slice(2));
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
await sequelize.authenticate();
const changedLessons = await repairLessons(args);
const { changedItems, keyConflicts } = await repairSrsItems(args);
console.log('');
console.log(`Betroffene Lektionen: ${changedLessons}`);
console.log(`Betroffene SRS-Items: ${changedItems}`);
if (keyConflicts > 0) {
console.log(`SRS-Items mit behaltenem altem item_key wegen Unique-Konflikt: ${keyConflicts}`);
}
if (!args.apply) {
console.log('Zum Anwenden erneut mit --apply ausführen.');
}
}
main()
.then(async () => {
await sequelize.close();
})
.catch(async (error) => {
console.error('Fehler beim Reparieren der Bisaya-Zahlenpaare:', error);
await sequelize.close();
process.exit(1);
});