From 5d16021557738c71d79501a18e82e43410455de2 Mon Sep 17 00:00:00 2001 From: "Torsten Schulz (local)" Date: Wed, 15 Apr 2026 11:12:18 +0200 Subject: [PATCH] feat(bisaya-course): implement core pattern extraction and merging for exercises - Added functions to derive core patterns from various exercise types, including gap-fill, transformation, and multiple-choice. - Implemented logic to merge derived core patterns with existing lesson patterns, ensuring a minimum count for effective lesson content. - Enhanced the `createBisayaCourseContent` function to update lessons with merged core patterns and provide detailed logging based on the VOCAB_STRICT_AUDIT environment variable. --- .../scripts/create-bisaya-course-content.js | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/backend/scripts/create-bisaya-course-content.js b/backend/scripts/create-bisaya-course-content.js index 4dd4bc5..0b11724 100644 --- a/backend/scripts/create-bisaya-course-content.js +++ b/backend/scripts/create-bisaya-course-content.js @@ -85,6 +85,127 @@ function buildCorePatternGlossLookup(didactics) { return map; } +function phraseWordCount(value) { + return countWords(value); +} + +function isPhraseLike(value) { + return phraseWordCount(value) >= 2; +} + +function normalizeQuotedPrompt(value) { + return normalizeText(String(value || '').replace(/[„“"']/g, '')); +} + +function firstQuotedContent(value) { + const text = String(value || ''); + const doubleQuote = text.match(/"([^"]+)"/); + if (doubleQuote?.[1]) return normalizeText(doubleQuote[1]); + const deQuote = text.match(/„([^“]+)“/); + if (deQuote?.[1]) return normalizeText(deQuote[1]); + const singleQuote = text.match(/'([^']+)'/); + if (singleQuote?.[1]) return normalizeText(singleQuote[1]); + return ''; +} + +function addUniquePattern(out, seen, target, gloss) { + const t = normalizeText(target); + const g = normalizeText(gloss); + if (!t || !g) return; + if (!isPhraseLike(t)) return; + const key = `${t.toLowerCase()}|${g.toLowerCase()}`; + if (seen.has(key)) return; + seen.add(key); + out.push({ target: t, gloss: g }); +} + +function extractPairsFromGapFill(questionData, answerData, out, seen) { + const text = String(questionData?.text || ''); + const answers = Array.isArray(answerData?.answers) ? answerData.answers : []; + if (!text || answers.length === 0) return; + + const hints = []; + const hintRegex = /\(([^)]+)\)/g; + let match = hintRegex.exec(text); + while (match) { + hints.push(normalizeText(match[1])); + match = hintRegex.exec(text); + } + + for (let i = 0; i < Math.min(answers.length, hints.length); i += 1) { + addUniquePattern(out, seen, answers[i], hints[i]); + } +} + +function extractPairsFromTransformation(questionData, answerData, out, seen) { + const src = normalizeText(questionData?.text || ''); + const trg = normalizeText(answerData?.correct || ''); + if (!src || !trg) return; + const sourceLanguage = normalizeText(questionData?.sourceLanguage || '').toLowerCase(); + const targetLanguage = normalizeText(questionData?.targetLanguage || '').toLowerCase(); + if (sourceLanguage === 'deutsch' || targetLanguage === 'bisaya') { + addUniquePattern(out, seen, trg, src); + return; + } + addUniquePattern(out, seen, src, trg); +} + +function extractPairsFromMultipleChoice(questionData, answerData, out, seen) { + const question = normalizeText(questionData?.question || ''); + const options = Array.isArray(questionData?.options) ? questionData.options : []; + const index = Number(answerData?.correctAnswer); + const correct = normalizeText(options[index] || ''); + if (!question || !correct) return; + + const quoted = firstQuotedContent(question); + const lower = question.toLowerCase(); + if (!quoted) return; + + if (lower.startsWith('was bedeutet')) { + addUniquePattern(out, seen, quoted, correct); + return; + } + if (lower.startsWith('wie sagt man')) { + addUniquePattern(out, seen, correct, normalizeQuotedPrompt(quoted)); + } +} + +function deriveLessonCorePatternsFromExercises(exercises) { + const out = []; + const seen = new Set(); + (Array.isArray(exercises) ? exercises : []).forEach((exercise) => { + const questionData = exercise?.questionData || {}; + const answerData = exercise?.answerData || {}; + const type = String(questionData?.type || ''); + if (type === 'gap_fill') { + extractPairsFromGapFill(questionData, answerData, out, seen); + } else if (type === 'transformation') { + extractPairsFromTransformation(questionData, answerData, out, seen); + } else if (type === 'multiple_choice') { + extractPairsFromMultipleChoice(questionData, answerData, out, seen); + } + }); + return out; +} + +function mergeCorePatternsForLesson(didactics, exerciseDerived, minCount = 8) { + const out = []; + const seen = new Set(); + const basePatterns = Array.isArray(didactics?.corePatterns) ? didactics.corePatterns : []; + + basePatterns.forEach((entry) => { + const normalized = normalizeCorePatternEntry(entry); + if (!normalized?.target || !normalized?.gloss) return; + addUniquePattern(out, seen, normalized.target, normalized.gloss); + }); + exerciseDerived.forEach((entry) => { + addUniquePattern(out, seen, entry?.target, entry?.gloss); + }); + + if (out.length >= minCount) return out; + return out; +} + function sanitizeGapFillHintText(lessonTitle, text, answers, glossLookup) { const source = String(text || ''); const normalizedAnswers = Array.isArray(answers) @@ -4454,6 +4575,18 @@ async function createBisayaCourseContent() { // Erstelle Übungen const lessonDidactics = getLessonDidactics(lesson); + const derivedCorePatterns = deriveLessonCorePatternsFromExercises(exercises); + const mergedCorePatterns = mergeCorePatternsForLesson(lessonDidactics, derivedCorePatterns, 8); + if (mergedCorePatterns.length >= 8) { + await lesson.update({ corePatterns: mergedCorePatterns }); + if (process.env.VOCAB_STRICT_AUDIT === '1') { + console.log(` ✅ [${lesson.title}] corePatterns auf ${mergedCorePatterns.length} Satzphrasen aktualisiert`); + } + } else if (process.env.VOCAB_STRICT_AUDIT === '1') { + console.warn( + ` ⚠️ [${lesson.title}] Nur ${mergedCorePatterns.length} Satzphrasen mit sicherer Gloss gefunden (Ziel: 8)` + ); + } let exerciseNumber = 1; for (const exerciseData of exercises) { const { exercise, fixes, warnings } = sanitizeExerciseForConsistency(