From 15ab450ca9cb57192350c9e1fd5200accccf5125 Mon Sep 17 00:00:00 2001 From: "Torsten Schulz (local)" Date: Fri, 19 Jun 2026 15:04:12 +0200 Subject: [PATCH] bisaja fixes --- backend/services/vocabService.js | 89 ++++++++++++++++++ frontend/src/utils/numberAnswerVariants.js | 104 +++++++++++++++++++++ 2 files changed, 193 insertions(+) diff --git a/backend/services/vocabService.js b/backend/services/vocabService.js index 507968f..7b52020 100644 --- a/backend/services/vocabService.js +++ b/backend/services/vocabService.js @@ -16,6 +16,87 @@ import { Op } from 'sequelize'; import { BISAYA_PHASE1_DIDACTICS, BISAYA_DIDACTICS_FRAGMENTS } from '../scripts/bisaya-course-phase1.js'; export default class VocabService { + _stripGermanNumberSeparators(value) { + return String(value || '').replace(/[\s.-]+/g, ''); + } + + _germanNumberParts() { + return new Map([ + ['null', 0], + ['ein', 1], ['eins', 1], ['eine', 1], ['einen', 1], ['einem', 1], ['einer', 1], + ['zwei', 2], ['drei', 3], ['vier', 4], + ['fuenf', 5], ['funf', 5], ['fünf', 5], + ['sechs', 6], ['sieben', 7], ['acht', 8], ['neun', 9], + ['zehn', 10], ['elf', 11], + ['zwoelf', 12], ['zwolf', 12], ['zwölf', 12], + ['dreizehn', 13], ['vierzehn', 14], + ['fuenfzehn', 15], ['funfzehn', 15], ['fünfzehn', 15], + ['sechzehn', 16], ['siebzehn', 17], ['achtzehn', 18], ['neunzehn', 19], + ['zwanzig', 20], ['dreissig', 30], ['dreißig', 30], ['vierzig', 40], + ['fuenfzig', 50], ['funfzig', 50], ['fünfzig', 50], + ['sechzig', 60], ['siebzig', 70], ['achtzig', 80], ['neunzig', 90] + ]); + } + + _parseGermanUnderHundred(compact) { + const parts = this._germanNumberParts(); + if (!compact) return null; + if (parts.has(compact)) { + return parts.get(compact); + } + + const undIndex = compact.indexOf('und'); + if (undIndex > 0) { + const onesPart = compact.slice(0, undIndex); + const tensPart = compact.slice(undIndex + 3); + const ones = parts.get(onesPart); + const tens = parts.get(tensPart); + if (ones >= 1 && ones <= 9 && tens >= 20 && tens % 10 === 0) { + return tens + ones; + } + } + + return null; + } + + _canonicalizeGermanNumberAnswer(value) { + const compact = this._stripGermanNumberSeparators(String(value || '').trim().toLowerCase()); + if (!compact) return ''; + if (/^\d+$/.test(compact)) { + return String(Number(compact)); + } + + const parts = this._germanNumberParts(); + if (parts.has(compact)) { + return String(parts.get(compact)); + } + + const tausendIndex = compact.indexOf('tausend'); + if (tausendIndex !== -1) { + const thousandsPart = compact.slice(0, tausendIndex); + const remainderPart = compact.slice(tausendIndex + 'tausend'.length); + const thousands = thousandsPart ? this._canonicalizeGermanNumberAnswer(thousandsPart) : '1'; + const remainder = remainderPart ? this._canonicalizeGermanNumberAnswer(remainderPart) : '0'; + if (thousands && remainder) { + return String((Number(thousands) * 1000) + Number(remainder)); + } + } + + const hundertIndex = compact.indexOf('hundert'); + if (hundertIndex !== -1) { + const hundredsPart = compact.slice(0, hundertIndex); + const remainderPart = compact.slice(hundertIndex + 'hundert'.length); + const hundreds = hundredsPart ? this._canonicalizeGermanNumberAnswer(hundredsPart) : '1'; + const remainder = remainderPart ? this._parseGermanUnderHundred(remainderPart) : 0; + if (hundreds && remainder !== null) { + return String((Number(hundreds) * 100) + Number(remainder)); + } + } + + const underHundred = this._parseGermanUnderHundred(compact); + return underHundred === null ? '' : String(underHundred); + } + _normalizeSrsText(value) { return String(value || '') .trim() @@ -727,6 +808,10 @@ export default class VocabService { } _normalizeTextAnswer(text) { + const canonicalNumber = this._canonicalizeGermanNumberAnswer(text); + if (canonicalNumber) { + return canonicalNumber; + } const normalized = String(text || '') .trim() .toLowerCase() @@ -734,6 +819,10 @@ export default class VocabService { .replace(/[\p{P}\p{S}]+/gu, ' ') .replace(/\s+/g, ' ') .trim(); + const normalizedCanonicalNumber = this._canonicalizeGermanNumberAnswer(normalized); + if (normalizedCanonicalNumber) { + return normalizedCanonicalNumber; + } return normalized.replace(/\s+/g, ''); } diff --git a/frontend/src/utils/numberAnswerVariants.js b/frontend/src/utils/numberAnswerVariants.js index f7750c3..55a8311 100644 --- a/frontend/src/utils/numberAnswerVariants.js +++ b/frontend/src/utils/numberAnswerVariants.js @@ -48,6 +48,51 @@ const GERMAN_NUMBER_WORDS = new Map([ ['zweitausend', '2000'] ]); +const GERMAN_NUMBER_PARTS = new Map([ + ['null', 0], + ['ein', 1], + ['eins', 1], + ['eine', 1], + ['einen', 1], + ['einem', 1], + ['einer', 1], + ['zwei', 2], + ['drei', 3], + ['vier', 4], + ['fuenf', 5], + ['funf', 5], + ['fünf', 5], + ['sechs', 6], + ['sieben', 7], + ['acht', 8], + ['neun', 9], + ['zehn', 10], + ['elf', 11], + ['zwoelf', 12], + ['zwolf', 12], + ['zwölf', 12], + ['dreizehn', 13], + ['vierzehn', 14], + ['fuenfzehn', 15], + ['funfzehn', 15], + ['fünfzehn', 15], + ['sechzehn', 16], + ['siebzehn', 17], + ['achtzehn', 18], + ['neunzehn', 19], + ['zwanzig', 20], + ['dreissig', 30], + ['dreißig', 30], + ['vierzig', 40], + ['fuenfzig', 50], + ['funfzig', 50], + ['fünfzig', 50], + ['sechzig', 60], + ['siebzig', 70], + ['achtzig', 80], + ['neunzig', 90] +]); + const GERMAN_ONES = [ ['', ''], ['ein', 'eins'], @@ -88,6 +133,60 @@ function stripGermanNumberSeparators(value) { .replace(/[\s.-]+/g, ''); } +function parseGermanUnderHundred(compact) { + if (!compact) return null; + if (GERMAN_NUMBER_PARTS.has(compact)) { + return GERMAN_NUMBER_PARTS.get(compact); + } + + const undIndex = compact.indexOf('und'); + if (undIndex > 0) { + const onesPart = compact.slice(0, undIndex); + const tensPart = compact.slice(undIndex + 3); + const ones = GERMAN_NUMBER_PARTS.get(onesPart); + const tens = GERMAN_NUMBER_PARTS.get(tensPart); + if (ones >= 1 && ones <= 9 && tens >= 20 && tens % 10 === 0) { + return tens + ones; + } + } + + return null; +} + +function parseGermanNumberWord(compact) { + if (!compact) return null; + if (/^\d+$/.test(compact)) { + return Number(compact); + } + if (GERMAN_NUMBER_PARTS.has(compact)) { + return GERMAN_NUMBER_PARTS.get(compact); + } + + const tausendIndex = compact.indexOf('tausend'); + if (tausendIndex !== -1) { + const thousandsPart = compact.slice(0, tausendIndex); + const remainderPart = compact.slice(tausendIndex + 'tausend'.length); + const thousands = thousandsPart ? parseGermanNumberWord(thousandsPart) : 1; + const remainder = remainderPart ? parseGermanNumberWord(remainderPart) : 0; + if (thousands !== null && remainder !== null) { + return thousands * 1000 + remainder; + } + } + + const hundertIndex = compact.indexOf('hundert'); + if (hundertIndex !== -1) { + const hundredsPart = compact.slice(0, hundertIndex); + const remainderPart = compact.slice(hundertIndex + 'hundert'.length); + const hundreds = hundredsPart ? parseGermanNumberWord(hundredsPart) : 1; + const remainder = remainderPart ? parseGermanUnderHundred(remainderPart) : 0; + if (hundreds !== null && remainder !== null) { + return hundreds * 100 + remainder; + } + } + + return parseGermanUnderHundred(compact); +} + export function canonicalizeNumberAnswer(value) { const raw = String(value || '').trim().toLowerCase(); if (!raw) return ''; @@ -97,6 +196,11 @@ export function canonicalizeNumberAnswer(value) { return String(Number(compact)); } + const parsed = parseGermanNumberWord(compact); + if (parsed !== null) { + return String(parsed); + } + return GERMAN_NUMBER_WORDS.get(compact) || ''; }