feat(VocabPracticeDialog): improve vocabulary item normalization and expansion
All checks were successful
Deploy to production / deploy (push) Successful in 1m48s

- Enhanced the normalization process for vocabulary items to better accommodate multiple valid translations, improving the overall accuracy of vocabulary practice.
- Updated methods for expanding vocabulary item alternatives, ensuring a more flexible and effective learning experience for users.
This commit is contained in:
Torsten Schulz (local)
2026-04-23 13:47:21 +02:00
parent 79fe05c630
commit 0e39ca9a0f
2 changed files with 520 additions and 0 deletions

View File

@@ -0,0 +1,204 @@
#!/usr/bin/env node
/**
* Splits slash-separated alternatives in lesson corePatterns into separate entries.
*
* Default mode is dry-run (no writes).
* Use --apply to persist changes.
*
* Optional filters:
* --course-id=<id>
* --lesson-id=<id>
*/
import { Op } from 'sequelize';
import { sequelize } from '../utils/sequelize.js';
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
function parseArgs(argv) {
const args = {
apply: false,
courseId: null,
lessonId: null,
};
argv.forEach((arg) => {
if (arg === '--apply') {
args.apply = true;
return;
}
if (arg.startsWith('--course-id=')) {
const value = Number(arg.split('=')[1]);
if (Number.isFinite(value) && value > 0) {
args.courseId = value;
}
return;
}
if (arg.startsWith('--lesson-id=')) {
const value = Number(arg.split('=')[1]);
if (Number.isFinite(value) && value > 0) {
args.lessonId = value;
}
}
});
return args;
}
function normalizeText(value) {
return String(value || '').trim().replace(/\s+/g, ' ');
}
function splitAlternatives(value) {
const text = normalizeText(value);
if (!text) return [];
const parts = text
.split(/\s+\/\s+/)
.map((part) => normalizeText(part))
.filter(Boolean);
return parts.length >= 2 ? parts : [text];
}
function normalizePatternEntry(entry) {
if (!entry || typeof entry !== 'object') return null;
const target = normalizeText(entry.target || '');
const gloss = normalizeText(entry.gloss || '');
if (!target || !gloss) return null;
return { target, gloss };
}
function expandPatternAlternatives(entry) {
const normalized = normalizePatternEntry(entry);
if (!normalized) return { expanded: [], changed: false, skipped: false };
const targets = splitAlternatives(normalized.target);
const glosses = splitAlternatives(normalized.gloss);
const hasTargetAlternatives = targets.length > 1;
const hasGlossAlternatives = glosses.length > 1;
if (!hasTargetAlternatives && !hasGlossAlternatives) {
return { expanded: [normalized], changed: false, skipped: false };
}
if (hasTargetAlternatives && !hasGlossAlternatives) {
return {
expanded: targets.map((target) => ({ target, gloss: normalized.gloss })),
changed: true,
skipped: false,
};
}
if (!hasTargetAlternatives && hasGlossAlternatives) {
return {
expanded: glosses.map((gloss) => ({ target: normalized.target, gloss })),
changed: true,
skipped: false,
};
}
if (targets.length === glosses.length) {
return {
expanded: targets.map((target, index) => ({ target, gloss: glosses[index] })),
changed: true,
skipped: false,
};
}
return { expanded: [normalized], changed: false, skipped: true };
}
function dedupePatterns(patterns) {
const out = [];
const seen = new Set();
patterns.forEach((entry) => {
const normalized = normalizePatternEntry(entry);
if (!normalized) return;
const key = `${normalized.target.toLowerCase()}|${normalized.gloss.toLowerCase()}`;
if (seen.has(key)) return;
seen.add(key);
out.push(normalized);
});
return out;
}
async function main() {
const args = parseArgs(process.argv.slice(2));
const where = {};
if (args.courseId) where.courseId = args.courseId;
if (args.lessonId) where.id = args.lessonId;
const lessons = await VocabCourseLesson.findAll({
where: Object.keys(where).length ? where : undefined,
attributes: ['id', 'courseId', 'lessonNumber', 'title', 'corePatterns'],
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
});
let affectedLessons = 0;
let changedPatterns = 0;
let skippedComplexPatterns = 0;
console.log(`Gefundene Lektionen: ${lessons.length}`);
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
for (const lesson of lessons) {
const current = Array.isArray(lesson.corePatterns) ? lesson.corePatterns : [];
if (current.length === 0) continue;
let lessonChanged = false;
let lessonChangedCount = 0;
let lessonSkippedCount = 0;
const expanded = [];
current.forEach((entry) => {
const { expanded: splitEntries, changed, skipped } = expandPatternAlternatives(entry);
if (changed) {
lessonChanged = true;
lessonChangedCount += 1;
}
if (skipped) {
lessonSkippedCount += 1;
}
expanded.push(...splitEntries);
});
const deduped = dedupePatterns(expanded);
const oldSerialized = JSON.stringify(dedupePatterns(current));
const newSerialized = JSON.stringify(deduped);
if (oldSerialized === newSerialized) {
skippedComplexPatterns += lessonSkippedCount;
continue;
}
affectedLessons += 1;
changedPatterns += lessonChangedCount;
skippedComplexPatterns += lessonSkippedCount;
console.log(
`- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": `
+ `${current.length} -> ${deduped.length} Core-Patterns`
+ (lessonSkippedCount > 0 ? `, übersprungen (uneindeutig): ${lessonSkippedCount}` : '')
);
if (args.apply) {
await lesson.update({ corePatterns: deduped });
}
}
console.log('');
console.log(`Betroffene Lektionen: ${affectedLessons}`);
console.log(`Geänderte Pattern-Einträge (vor Dedupe): ${changedPatterns}`);
console.log(`Uneindeutige Slash-Fälle (nicht automatisch geändert): ${skippedComplexPatterns}`);
}
main()
.then(async () => {
await sequelize.close();
})
.catch(async (error) => {
console.error('Fehler beim Normalisieren der Core-Patterns:', error);
await sequelize.close();
process.exit(1);
});

View File

@@ -0,0 +1,316 @@
#!/usr/bin/env node
/**
* Splits slash-separated alternatives in grammar exercise answers into separate exercise variants.
*
* Default: dry-run (no writes)
* Use --apply to persist.
*
* Optional filters:
* --course-id=<id>
* --lesson-id=<id>
*/
import { Op } from 'sequelize';
import { sequelize } from '../utils/sequelize.js';
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
import VocabGrammarExercise from '../models/community/vocab_grammar_exercise.js';
const GAP_FILL_MAX_VARIANTS = 8;
function parseArgs(argv) {
const args = {
apply: false,
courseId: null,
lessonId: null,
};
argv.forEach((arg) => {
if (arg === '--apply') {
args.apply = true;
return;
}
if (arg.startsWith('--course-id=')) {
const value = Number(arg.split('=')[1]);
if (Number.isFinite(value) && value > 0) args.courseId = value;
return;
}
if (arg.startsWith('--lesson-id=')) {
const value = Number(arg.split('=')[1]);
if (Number.isFinite(value) && value > 0) args.lessonId = value;
}
});
return args;
}
function normalizeText(value) {
return String(value || '').trim().replace(/\s+/g, ' ');
}
function splitAlternatives(value) {
const text = normalizeText(value);
if (!text) return [];
const parts = text
.split(/\s+\/\s+/)
.map((part) => normalizeText(part))
.filter(Boolean);
return parts.length >= 2 ? parts : [text];
}
function parseJsonLike(value) {
if (!value) return {};
if (typeof value === 'string') {
try {
return JSON.parse(value);
} catch (_) {
return {};
}
}
if (typeof value === 'object') return value;
return {};
}
function buildCartesianProduct(arrays) {
let out = [[]];
for (const arr of arrays) {
const next = [];
out.forEach((prefix) => {
arr.forEach((value) => {
next.push([...prefix, value]);
});
});
out = next;
}
return out;
}
function buildExerciseVariants(exercise) {
const questionData = parseJsonLike(exercise.questionData);
const answerData = parseJsonLike(exercise.answerData);
const type = String(questionData?.type || answerData?.type || '').trim();
const base = {
lessonId: exercise.lessonId,
exerciseTypeId: exercise.exerciseTypeId,
title: exercise.title,
instruction: exercise.instruction,
explanation: exercise.explanation,
createdByUserId: exercise.createdByUserId,
createdAt: exercise.createdAt,
};
if (type === 'transformation') {
const correctAnswer = normalizeText(answerData?.correctAnswer || answerData?.correct || '');
const alternatives = splitAlternatives(correctAnswer);
if (alternatives.length <= 1) {
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
}
return {
changed: true,
skipped: false,
variants: alternatives.map((alt) => ({
...base,
questionData,
answerData: {
...answerData,
correctAnswer: alt
}
}))
};
}
if (type === 'multiple_choice') {
const options = Array.isArray(questionData?.options) ? questionData.options.slice() : [];
const correctIndex = Number(answerData?.correctAnswer);
if (!Number.isFinite(correctIndex) || correctIndex < 0 || correctIndex >= options.length) {
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
}
const correctOption = normalizeText(options[correctIndex]);
const alternatives = splitAlternatives(correctOption);
if (alternatives.length <= 1) {
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
}
return {
changed: true,
skipped: false,
variants: alternatives.map((alt) => {
const nextOptions = options.slice();
nextOptions[correctIndex] = alt;
return {
...base,
questionData: {
...questionData,
options: nextOptions
},
answerData
};
})
};
}
if (type === 'gap_fill') {
const answers = Array.isArray(answerData?.answers) ? answerData.answers : [];
if (!answers.length) {
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
}
const splitAnswers = answers.map((answer) => splitAlternatives(answer));
const hasAlternatives = splitAnswers.some((entry) => entry.length > 1);
if (!hasAlternatives) {
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
}
const variantCount = splitAnswers.reduce((acc, parts) => acc * parts.length, 1);
if (variantCount > GAP_FILL_MAX_VARIANTS) {
return { changed: false, skipped: true, variants: [{ ...base, questionData, answerData }] };
}
const combos = buildCartesianProduct(splitAnswers);
return {
changed: true,
skipped: false,
variants: combos.map((combo) => ({
...base,
questionData,
answerData: {
...answerData,
answers: combo
}
}))
};
}
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
}
async function main() {
const args = parseArgs(process.argv.slice(2));
const lessonWhere = {};
if (args.courseId) lessonWhere.courseId = args.courseId;
if (args.lessonId) lessonWhere.id = args.lessonId;
const lessons = await VocabCourseLesson.findAll({
where: Object.keys(lessonWhere).length ? lessonWhere : undefined,
attributes: ['id', 'courseId', 'lessonNumber', 'title'],
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
});
const lessonIds = lessons.map((lesson) => lesson.id);
const byLesson = new Map(lessons.map((lesson) => [lesson.id, lesson]));
const exercises = lessonIds.length
? await VocabGrammarExercise.findAll({
where: {
lessonId: {
[Op.in]: lessonIds
}
},
attributes: [
'id',
'lessonId',
'exerciseTypeId',
'exerciseNumber',
'title',
'instruction',
'questionData',
'answerData',
'explanation',
'createdByUserId',
'createdAt'
],
order: [['lessonId', 'ASC'], ['exerciseNumber', 'ASC'], ['id', 'ASC']]
})
: [];
const grouped = new Map();
exercises.forEach((exercise) => {
const list = grouped.get(exercise.lessonId) || [];
list.push(exercise);
grouped.set(exercise.lessonId, list);
});
let affectedLessons = 0;
let changedExercises = 0;
let createdVariants = 0;
let skippedComplex = 0;
console.log(`Gefundene Lektionen: ${lessons.length}`);
console.log(`Gefundene Übungen: ${exercises.length}`);
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
for (const lesson of lessons) {
const source = grouped.get(lesson.id) || [];
if (!source.length) continue;
let lessonChangedExercises = 0;
let lessonSkipped = 0;
const rebuilt = [];
source.forEach((exercise) => {
const variantResult = buildExerciseVariants(exercise);
if (variantResult.changed) {
lessonChangedExercises += 1;
createdVariants += Math.max(0, variantResult.variants.length - 1);
}
if (variantResult.skipped) lessonSkipped += 1;
rebuilt.push(...variantResult.variants);
});
if (lessonChangedExercises === 0) {
skippedComplex += lessonSkipped;
continue;
}
affectedLessons += 1;
changedExercises += lessonChangedExercises;
skippedComplex += lessonSkipped;
console.log(
`- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": `
+ `${source.length} -> ${rebuilt.length} Übungen`
+ `, geändert: ${lessonChangedExercises}`
+ (lessonSkipped > 0 ? `, übersprungen (komplex): ${lessonSkipped}` : '')
);
if (args.apply) {
await sequelize.transaction(async (transaction) => {
await VocabGrammarExercise.destroy({
where: { lessonId: lesson.id },
transaction
});
if (rebuilt.length) {
const payload = rebuilt.map((entry, index) => ({
lessonId: lesson.id,
exerciseTypeId: entry.exerciseTypeId,
exerciseNumber: index + 1,
title: entry.title,
instruction: entry.instruction,
questionData: entry.questionData,
answerData: entry.answerData,
explanation: entry.explanation,
createdByUserId: entry.createdByUserId,
createdAt: entry.createdAt
}));
await VocabGrammarExercise.bulkCreate(payload, { transaction });
}
});
}
}
console.log('');
console.log(`Betroffene Lektionen: ${affectedLessons}`);
console.log(`Geänderte Übungen: ${changedExercises}`);
console.log(`Zusätzliche Varianten erzeugt: ${createdVariants}`);
console.log(`Komplexe Fälle übersprungen: ${skippedComplex}`);
}
main()
.then(async () => {
await sequelize.close();
})
.catch(async (error) => {
console.error('Fehler beim Normalisieren der Übungsantworten:', error);
await sequelize.close();
process.exit(1);
});