feat(VocabPracticeDialog): improve vocabulary item normalization and expansion
All checks were successful
Deploy to production / deploy (push) Successful in 1m48s
All checks were successful
Deploy to production / deploy (push) Successful in 1m48s
- Enhanced the normalization process for vocabulary items to better accommodate multiple valid translations, improving the overall accuracy of vocabulary practice. - Updated methods for expanding vocabulary item alternatives, ensuring a more flexible and effective learning experience for users.
This commit is contained in:
204
backend/scripts/normalize-core-pattern-alternatives.js
Normal file
204
backend/scripts/normalize-core-pattern-alternatives.js
Normal file
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Splits slash-separated alternatives in lesson corePatterns into separate entries.
|
||||
*
|
||||
* Default mode is dry-run (no writes).
|
||||
* Use --apply to persist changes.
|
||||
*
|
||||
* Optional filters:
|
||||
* --course-id=<id>
|
||||
* --lesson-id=<id>
|
||||
*/
|
||||
|
||||
import { Op } from 'sequelize';
|
||||
import { sequelize } from '../utils/sequelize.js';
|
||||
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {
|
||||
apply: false,
|
||||
courseId: null,
|
||||
lessonId: null,
|
||||
};
|
||||
|
||||
argv.forEach((arg) => {
|
||||
if (arg === '--apply') {
|
||||
args.apply = true;
|
||||
return;
|
||||
}
|
||||
if (arg.startsWith('--course-id=')) {
|
||||
const value = Number(arg.split('=')[1]);
|
||||
if (Number.isFinite(value) && value > 0) {
|
||||
args.courseId = value;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (arg.startsWith('--lesson-id=')) {
|
||||
const value = Number(arg.split('=')[1]);
|
||||
if (Number.isFinite(value) && value > 0) {
|
||||
args.lessonId = value;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
function normalizeText(value) {
|
||||
return String(value || '').trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function splitAlternatives(value) {
|
||||
const text = normalizeText(value);
|
||||
if (!text) return [];
|
||||
const parts = text
|
||||
.split(/\s+\/\s+/)
|
||||
.map((part) => normalizeText(part))
|
||||
.filter(Boolean);
|
||||
return parts.length >= 2 ? parts : [text];
|
||||
}
|
||||
|
||||
function normalizePatternEntry(entry) {
|
||||
if (!entry || typeof entry !== 'object') return null;
|
||||
const target = normalizeText(entry.target || '');
|
||||
const gloss = normalizeText(entry.gloss || '');
|
||||
if (!target || !gloss) return null;
|
||||
return { target, gloss };
|
||||
}
|
||||
|
||||
function expandPatternAlternatives(entry) {
|
||||
const normalized = normalizePatternEntry(entry);
|
||||
if (!normalized) return { expanded: [], changed: false, skipped: false };
|
||||
|
||||
const targets = splitAlternatives(normalized.target);
|
||||
const glosses = splitAlternatives(normalized.gloss);
|
||||
|
||||
const hasTargetAlternatives = targets.length > 1;
|
||||
const hasGlossAlternatives = glosses.length > 1;
|
||||
|
||||
if (!hasTargetAlternatives && !hasGlossAlternatives) {
|
||||
return { expanded: [normalized], changed: false, skipped: false };
|
||||
}
|
||||
|
||||
if (hasTargetAlternatives && !hasGlossAlternatives) {
|
||||
return {
|
||||
expanded: targets.map((target) => ({ target, gloss: normalized.gloss })),
|
||||
changed: true,
|
||||
skipped: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (!hasTargetAlternatives && hasGlossAlternatives) {
|
||||
return {
|
||||
expanded: glosses.map((gloss) => ({ target: normalized.target, gloss })),
|
||||
changed: true,
|
||||
skipped: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (targets.length === glosses.length) {
|
||||
return {
|
||||
expanded: targets.map((target, index) => ({ target, gloss: glosses[index] })),
|
||||
changed: true,
|
||||
skipped: false,
|
||||
};
|
||||
}
|
||||
|
||||
return { expanded: [normalized], changed: false, skipped: true };
|
||||
}
|
||||
|
||||
function dedupePatterns(patterns) {
|
||||
const out = [];
|
||||
const seen = new Set();
|
||||
patterns.forEach((entry) => {
|
||||
const normalized = normalizePatternEntry(entry);
|
||||
if (!normalized) return;
|
||||
const key = `${normalized.target.toLowerCase()}|${normalized.gloss.toLowerCase()}`;
|
||||
if (seen.has(key)) return;
|
||||
seen.add(key);
|
||||
out.push(normalized);
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
const where = {};
|
||||
if (args.courseId) where.courseId = args.courseId;
|
||||
if (args.lessonId) where.id = args.lessonId;
|
||||
|
||||
const lessons = await VocabCourseLesson.findAll({
|
||||
where: Object.keys(where).length ? where : undefined,
|
||||
attributes: ['id', 'courseId', 'lessonNumber', 'title', 'corePatterns'],
|
||||
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
|
||||
});
|
||||
|
||||
let affectedLessons = 0;
|
||||
let changedPatterns = 0;
|
||||
let skippedComplexPatterns = 0;
|
||||
|
||||
console.log(`Gefundene Lektionen: ${lessons.length}`);
|
||||
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
|
||||
|
||||
for (const lesson of lessons) {
|
||||
const current = Array.isArray(lesson.corePatterns) ? lesson.corePatterns : [];
|
||||
if (current.length === 0) continue;
|
||||
|
||||
let lessonChanged = false;
|
||||
let lessonChangedCount = 0;
|
||||
let lessonSkippedCount = 0;
|
||||
|
||||
const expanded = [];
|
||||
current.forEach((entry) => {
|
||||
const { expanded: splitEntries, changed, skipped } = expandPatternAlternatives(entry);
|
||||
if (changed) {
|
||||
lessonChanged = true;
|
||||
lessonChangedCount += 1;
|
||||
}
|
||||
if (skipped) {
|
||||
lessonSkippedCount += 1;
|
||||
}
|
||||
expanded.push(...splitEntries);
|
||||
});
|
||||
|
||||
const deduped = dedupePatterns(expanded);
|
||||
const oldSerialized = JSON.stringify(dedupePatterns(current));
|
||||
const newSerialized = JSON.stringify(deduped);
|
||||
|
||||
if (oldSerialized === newSerialized) {
|
||||
skippedComplexPatterns += lessonSkippedCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
affectedLessons += 1;
|
||||
changedPatterns += lessonChangedCount;
|
||||
skippedComplexPatterns += lessonSkippedCount;
|
||||
|
||||
console.log(
|
||||
`- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": `
|
||||
+ `${current.length} -> ${deduped.length} Core-Patterns`
|
||||
+ (lessonSkippedCount > 0 ? `, übersprungen (uneindeutig): ${lessonSkippedCount}` : '')
|
||||
);
|
||||
|
||||
if (args.apply) {
|
||||
await lesson.update({ corePatterns: deduped });
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log(`Betroffene Lektionen: ${affectedLessons}`);
|
||||
console.log(`Geänderte Pattern-Einträge (vor Dedupe): ${changedPatterns}`);
|
||||
console.log(`Uneindeutige Slash-Fälle (nicht automatisch geändert): ${skippedComplexPatterns}`);
|
||||
}
|
||||
|
||||
main()
|
||||
.then(async () => {
|
||||
await sequelize.close();
|
||||
})
|
||||
.catch(async (error) => {
|
||||
console.error('Fehler beim Normalisieren der Core-Patterns:', error);
|
||||
await sequelize.close();
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
316
backend/scripts/normalize-exercise-answer-alternatives.js
Normal file
316
backend/scripts/normalize-exercise-answer-alternatives.js
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Splits slash-separated alternatives in grammar exercise answers into separate exercise variants.
|
||||
*
|
||||
* Default: dry-run (no writes)
|
||||
* Use --apply to persist.
|
||||
*
|
||||
* Optional filters:
|
||||
* --course-id=<id>
|
||||
* --lesson-id=<id>
|
||||
*/
|
||||
|
||||
import { Op } from 'sequelize';
|
||||
import { sequelize } from '../utils/sequelize.js';
|
||||
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
|
||||
import VocabGrammarExercise from '../models/community/vocab_grammar_exercise.js';
|
||||
|
||||
const GAP_FILL_MAX_VARIANTS = 8;
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {
|
||||
apply: false,
|
||||
courseId: null,
|
||||
lessonId: null,
|
||||
};
|
||||
|
||||
argv.forEach((arg) => {
|
||||
if (arg === '--apply') {
|
||||
args.apply = true;
|
||||
return;
|
||||
}
|
||||
if (arg.startsWith('--course-id=')) {
|
||||
const value = Number(arg.split('=')[1]);
|
||||
if (Number.isFinite(value) && value > 0) args.courseId = value;
|
||||
return;
|
||||
}
|
||||
if (arg.startsWith('--lesson-id=')) {
|
||||
const value = Number(arg.split('=')[1]);
|
||||
if (Number.isFinite(value) && value > 0) args.lessonId = value;
|
||||
}
|
||||
});
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
function normalizeText(value) {
|
||||
return String(value || '').trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function splitAlternatives(value) {
|
||||
const text = normalizeText(value);
|
||||
if (!text) return [];
|
||||
const parts = text
|
||||
.split(/\s+\/\s+/)
|
||||
.map((part) => normalizeText(part))
|
||||
.filter(Boolean);
|
||||
return parts.length >= 2 ? parts : [text];
|
||||
}
|
||||
|
||||
function parseJsonLike(value) {
|
||||
if (!value) return {};
|
||||
if (typeof value === 'string') {
|
||||
try {
|
||||
return JSON.parse(value);
|
||||
} catch (_) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
if (typeof value === 'object') return value;
|
||||
return {};
|
||||
}
|
||||
|
||||
function buildCartesianProduct(arrays) {
|
||||
let out = [[]];
|
||||
for (const arr of arrays) {
|
||||
const next = [];
|
||||
out.forEach((prefix) => {
|
||||
arr.forEach((value) => {
|
||||
next.push([...prefix, value]);
|
||||
});
|
||||
});
|
||||
out = next;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function buildExerciseVariants(exercise) {
|
||||
const questionData = parseJsonLike(exercise.questionData);
|
||||
const answerData = parseJsonLike(exercise.answerData);
|
||||
const type = String(questionData?.type || answerData?.type || '').trim();
|
||||
|
||||
const base = {
|
||||
lessonId: exercise.lessonId,
|
||||
exerciseTypeId: exercise.exerciseTypeId,
|
||||
title: exercise.title,
|
||||
instruction: exercise.instruction,
|
||||
explanation: exercise.explanation,
|
||||
createdByUserId: exercise.createdByUserId,
|
||||
createdAt: exercise.createdAt,
|
||||
};
|
||||
|
||||
if (type === 'transformation') {
|
||||
const correctAnswer = normalizeText(answerData?.correctAnswer || answerData?.correct || '');
|
||||
const alternatives = splitAlternatives(correctAnswer);
|
||||
if (alternatives.length <= 1) {
|
||||
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
return {
|
||||
changed: true,
|
||||
skipped: false,
|
||||
variants: alternatives.map((alt) => ({
|
||||
...base,
|
||||
questionData,
|
||||
answerData: {
|
||||
...answerData,
|
||||
correctAnswer: alt
|
||||
}
|
||||
}))
|
||||
};
|
||||
}
|
||||
|
||||
if (type === 'multiple_choice') {
|
||||
const options = Array.isArray(questionData?.options) ? questionData.options.slice() : [];
|
||||
const correctIndex = Number(answerData?.correctAnswer);
|
||||
if (!Number.isFinite(correctIndex) || correctIndex < 0 || correctIndex >= options.length) {
|
||||
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
const correctOption = normalizeText(options[correctIndex]);
|
||||
const alternatives = splitAlternatives(correctOption);
|
||||
if (alternatives.length <= 1) {
|
||||
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
return {
|
||||
changed: true,
|
||||
skipped: false,
|
||||
variants: alternatives.map((alt) => {
|
||||
const nextOptions = options.slice();
|
||||
nextOptions[correctIndex] = alt;
|
||||
return {
|
||||
...base,
|
||||
questionData: {
|
||||
...questionData,
|
||||
options: nextOptions
|
||||
},
|
||||
answerData
|
||||
};
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
if (type === 'gap_fill') {
|
||||
const answers = Array.isArray(answerData?.answers) ? answerData.answers : [];
|
||||
if (!answers.length) {
|
||||
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
const splitAnswers = answers.map((answer) => splitAlternatives(answer));
|
||||
const hasAlternatives = splitAnswers.some((entry) => entry.length > 1);
|
||||
if (!hasAlternatives) {
|
||||
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
|
||||
const variantCount = splitAnswers.reduce((acc, parts) => acc * parts.length, 1);
|
||||
if (variantCount > GAP_FILL_MAX_VARIANTS) {
|
||||
return { changed: false, skipped: true, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
|
||||
const combos = buildCartesianProduct(splitAnswers);
|
||||
return {
|
||||
changed: true,
|
||||
skipped: false,
|
||||
variants: combos.map((combo) => ({
|
||||
...base,
|
||||
questionData,
|
||||
answerData: {
|
||||
...answerData,
|
||||
answers: combo
|
||||
}
|
||||
}))
|
||||
};
|
||||
}
|
||||
|
||||
return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const lessonWhere = {};
|
||||
if (args.courseId) lessonWhere.courseId = args.courseId;
|
||||
if (args.lessonId) lessonWhere.id = args.lessonId;
|
||||
|
||||
const lessons = await VocabCourseLesson.findAll({
|
||||
where: Object.keys(lessonWhere).length ? lessonWhere : undefined,
|
||||
attributes: ['id', 'courseId', 'lessonNumber', 'title'],
|
||||
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
|
||||
});
|
||||
|
||||
const lessonIds = lessons.map((lesson) => lesson.id);
|
||||
const byLesson = new Map(lessons.map((lesson) => [lesson.id, lesson]));
|
||||
|
||||
const exercises = lessonIds.length
|
||||
? await VocabGrammarExercise.findAll({
|
||||
where: {
|
||||
lessonId: {
|
||||
[Op.in]: lessonIds
|
||||
}
|
||||
},
|
||||
attributes: [
|
||||
'id',
|
||||
'lessonId',
|
||||
'exerciseTypeId',
|
||||
'exerciseNumber',
|
||||
'title',
|
||||
'instruction',
|
||||
'questionData',
|
||||
'answerData',
|
||||
'explanation',
|
||||
'createdByUserId',
|
||||
'createdAt'
|
||||
],
|
||||
order: [['lessonId', 'ASC'], ['exerciseNumber', 'ASC'], ['id', 'ASC']]
|
||||
})
|
||||
: [];
|
||||
|
||||
const grouped = new Map();
|
||||
exercises.forEach((exercise) => {
|
||||
const list = grouped.get(exercise.lessonId) || [];
|
||||
list.push(exercise);
|
||||
grouped.set(exercise.lessonId, list);
|
||||
});
|
||||
|
||||
let affectedLessons = 0;
|
||||
let changedExercises = 0;
|
||||
let createdVariants = 0;
|
||||
let skippedComplex = 0;
|
||||
|
||||
console.log(`Gefundene Lektionen: ${lessons.length}`);
|
||||
console.log(`Gefundene Übungen: ${exercises.length}`);
|
||||
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
|
||||
|
||||
for (const lesson of lessons) {
|
||||
const source = grouped.get(lesson.id) || [];
|
||||
if (!source.length) continue;
|
||||
|
||||
let lessonChangedExercises = 0;
|
||||
let lessonSkipped = 0;
|
||||
const rebuilt = [];
|
||||
|
||||
source.forEach((exercise) => {
|
||||
const variantResult = buildExerciseVariants(exercise);
|
||||
if (variantResult.changed) {
|
||||
lessonChangedExercises += 1;
|
||||
createdVariants += Math.max(0, variantResult.variants.length - 1);
|
||||
}
|
||||
if (variantResult.skipped) lessonSkipped += 1;
|
||||
rebuilt.push(...variantResult.variants);
|
||||
});
|
||||
|
||||
if (lessonChangedExercises === 0) {
|
||||
skippedComplex += lessonSkipped;
|
||||
continue;
|
||||
}
|
||||
|
||||
affectedLessons += 1;
|
||||
changedExercises += lessonChangedExercises;
|
||||
skippedComplex += lessonSkipped;
|
||||
|
||||
console.log(
|
||||
`- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": `
|
||||
+ `${source.length} -> ${rebuilt.length} Übungen`
|
||||
+ `, geändert: ${lessonChangedExercises}`
|
||||
+ (lessonSkipped > 0 ? `, übersprungen (komplex): ${lessonSkipped}` : '')
|
||||
);
|
||||
|
||||
if (args.apply) {
|
||||
await sequelize.transaction(async (transaction) => {
|
||||
await VocabGrammarExercise.destroy({
|
||||
where: { lessonId: lesson.id },
|
||||
transaction
|
||||
});
|
||||
|
||||
if (rebuilt.length) {
|
||||
const payload = rebuilt.map((entry, index) => ({
|
||||
lessonId: lesson.id,
|
||||
exerciseTypeId: entry.exerciseTypeId,
|
||||
exerciseNumber: index + 1,
|
||||
title: entry.title,
|
||||
instruction: entry.instruction,
|
||||
questionData: entry.questionData,
|
||||
answerData: entry.answerData,
|
||||
explanation: entry.explanation,
|
||||
createdByUserId: entry.createdByUserId,
|
||||
createdAt: entry.createdAt
|
||||
}));
|
||||
await VocabGrammarExercise.bulkCreate(payload, { transaction });
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log(`Betroffene Lektionen: ${affectedLessons}`);
|
||||
console.log(`Geänderte Übungen: ${changedExercises}`);
|
||||
console.log(`Zusätzliche Varianten erzeugt: ${createdVariants}`);
|
||||
console.log(`Komplexe Fälle übersprungen: ${skippedComplex}`);
|
||||
}
|
||||
|
||||
main()
|
||||
.then(async () => {
|
||||
await sequelize.close();
|
||||
})
|
||||
.catch(async (error) => {
|
||||
console.error('Fehler beim Normalisieren der Übungsantworten:', error);
|
||||
await sequelize.close();
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user