feat(VocabPracticeDialog): improve vocabulary item normalization and expansion
All checks were successful
Deploy to production / deploy (push) Successful in 1m48s

- Enhanced the normalization process for vocabulary items to better accommodate multiple valid translations, improving the overall accuracy of vocabulary practice.
- Updated methods for expanding vocabulary item alternatives, ensuring a more flexible and effective learning experience for users.
This commit is contained in:
Torsten Schulz (local)
2026-04-23 13:47:21 +02:00
parent 79fe05c630
commit 0e39ca9a0f
2 changed files with 520 additions and 0 deletions

View File

@@ -0,0 +1,204 @@
#!/usr/bin/env node
/**
* Splits slash-separated alternatives in lesson corePatterns into separate entries.
*
* Default mode is dry-run (no writes).
* Use --apply to persist changes.
*
* Optional filters:
* --course-id=<id>
* --lesson-id=<id>
*/
import { Op } from 'sequelize';
import { sequelize } from '../utils/sequelize.js';
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
function parseArgs(argv) {
const args = {
apply: false,
courseId: null,
lessonId: null,
};
argv.forEach((arg) => {
if (arg === '--apply') {
args.apply = true;
return;
}
if (arg.startsWith('--course-id=')) {
const value = Number(arg.split('=')[1]);
if (Number.isFinite(value) && value > 0) {
args.courseId = value;
}
return;
}
if (arg.startsWith('--lesson-id=')) {
const value = Number(arg.split('=')[1]);
if (Number.isFinite(value) && value > 0) {
args.lessonId = value;
}
}
});
return args;
}
function normalizeText(value) {
return String(value || '').trim().replace(/\s+/g, ' ');
}
function splitAlternatives(value) {
const text = normalizeText(value);
if (!text) return [];
const parts = text
.split(/\s+\/\s+/)
.map((part) => normalizeText(part))
.filter(Boolean);
return parts.length >= 2 ? parts : [text];
}
function normalizePatternEntry(entry) {
if (!entry || typeof entry !== 'object') return null;
const target = normalizeText(entry.target || '');
const gloss = normalizeText(entry.gloss || '');
if (!target || !gloss) return null;
return { target, gloss };
}
function expandPatternAlternatives(entry) {
const normalized = normalizePatternEntry(entry);
if (!normalized) return { expanded: [], changed: false, skipped: false };
const targets = splitAlternatives(normalized.target);
const glosses = splitAlternatives(normalized.gloss);
const hasTargetAlternatives = targets.length > 1;
const hasGlossAlternatives = glosses.length > 1;
if (!hasTargetAlternatives && !hasGlossAlternatives) {
return { expanded: [normalized], changed: false, skipped: false };
}
if (hasTargetAlternatives && !hasGlossAlternatives) {
return {
expanded: targets.map((target) => ({ target, gloss: normalized.gloss })),
changed: true,
skipped: false,
};
}
if (!hasTargetAlternatives && hasGlossAlternatives) {
return {
expanded: glosses.map((gloss) => ({ target: normalized.target, gloss })),
changed: true,
skipped: false,
};
}
if (targets.length === glosses.length) {
return {
expanded: targets.map((target, index) => ({ target, gloss: glosses[index] })),
changed: true,
skipped: false,
};
}
return { expanded: [normalized], changed: false, skipped: true };
}
function dedupePatterns(patterns) {
const out = [];
const seen = new Set();
patterns.forEach((entry) => {
const normalized = normalizePatternEntry(entry);
if (!normalized) return;
const key = `${normalized.target.toLowerCase()}|${normalized.gloss.toLowerCase()}`;
if (seen.has(key)) return;
seen.add(key);
out.push(normalized);
});
return out;
}
async function main() {
const args = parseArgs(process.argv.slice(2));
const where = {};
if (args.courseId) where.courseId = args.courseId;
if (args.lessonId) where.id = args.lessonId;
const lessons = await VocabCourseLesson.findAll({
where: Object.keys(where).length ? where : undefined,
attributes: ['id', 'courseId', 'lessonNumber', 'title', 'corePatterns'],
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
});
let affectedLessons = 0;
let changedPatterns = 0;
let skippedComplexPatterns = 0;
console.log(`Gefundene Lektionen: ${lessons.length}`);
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
for (const lesson of lessons) {
const current = Array.isArray(lesson.corePatterns) ? lesson.corePatterns : [];
if (current.length === 0) continue;
let lessonChanged = false;
let lessonChangedCount = 0;
let lessonSkippedCount = 0;
const expanded = [];
current.forEach((entry) => {
const { expanded: splitEntries, changed, skipped } = expandPatternAlternatives(entry);
if (changed) {
lessonChanged = true;
lessonChangedCount += 1;
}
if (skipped) {
lessonSkippedCount += 1;
}
expanded.push(...splitEntries);
});
const deduped = dedupePatterns(expanded);
const oldSerialized = JSON.stringify(dedupePatterns(current));
const newSerialized = JSON.stringify(deduped);
if (oldSerialized === newSerialized) {
skippedComplexPatterns += lessonSkippedCount;
continue;
}
affectedLessons += 1;
changedPatterns += lessonChangedCount;
skippedComplexPatterns += lessonSkippedCount;
console.log(
`- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": `
+ `${current.length} -> ${deduped.length} Core-Patterns`
+ (lessonSkippedCount > 0 ? `, übersprungen (uneindeutig): ${lessonSkippedCount}` : '')
);
if (args.apply) {
await lesson.update({ corePatterns: deduped });
}
}
console.log('');
console.log(`Betroffene Lektionen: ${affectedLessons}`);
console.log(`Geänderte Pattern-Einträge (vor Dedupe): ${changedPatterns}`);
console.log(`Uneindeutige Slash-Fälle (nicht automatisch geändert): ${skippedComplexPatterns}`);
}
main()
.then(async () => {
await sequelize.close();
})
.catch(async (error) => {
console.error('Fehler beim Normalisieren der Core-Patterns:', error);
await sequelize.close();
process.exit(1);
});