All checks were successful
Deploy to production / deploy (push) Successful in 1m48s
- Enhanced the normalization process for vocabulary items to better accommodate multiple valid translations, improving the overall accuracy of vocabulary practice. - Updated methods for expanding vocabulary item alternatives, ensuring a more flexible and effective learning experience for users.
205 lines
5.6 KiB
JavaScript
205 lines
5.6 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Splits slash-separated alternatives in lesson corePatterns into separate entries.
|
|
*
|
|
* Default mode is dry-run (no writes).
|
|
* Use --apply to persist changes.
|
|
*
|
|
* Optional filters:
|
|
* --course-id=<id>
|
|
* --lesson-id=<id>
|
|
*/
|
|
|
|
import { Op } from 'sequelize';
|
|
import { sequelize } from '../utils/sequelize.js';
|
|
import VocabCourseLesson from '../models/community/vocab_course_lesson.js';
|
|
|
|
function parseArgs(argv) {
|
|
const args = {
|
|
apply: false,
|
|
courseId: null,
|
|
lessonId: null,
|
|
};
|
|
|
|
argv.forEach((arg) => {
|
|
if (arg === '--apply') {
|
|
args.apply = true;
|
|
return;
|
|
}
|
|
if (arg.startsWith('--course-id=')) {
|
|
const value = Number(arg.split('=')[1]);
|
|
if (Number.isFinite(value) && value > 0) {
|
|
args.courseId = value;
|
|
}
|
|
return;
|
|
}
|
|
if (arg.startsWith('--lesson-id=')) {
|
|
const value = Number(arg.split('=')[1]);
|
|
if (Number.isFinite(value) && value > 0) {
|
|
args.lessonId = value;
|
|
}
|
|
}
|
|
});
|
|
|
|
return args;
|
|
}
|
|
|
|
function normalizeText(value) {
|
|
return String(value || '').trim().replace(/\s+/g, ' ');
|
|
}
|
|
|
|
function splitAlternatives(value) {
|
|
const text = normalizeText(value);
|
|
if (!text) return [];
|
|
const parts = text
|
|
.split(/\s+\/\s+/)
|
|
.map((part) => normalizeText(part))
|
|
.filter(Boolean);
|
|
return parts.length >= 2 ? parts : [text];
|
|
}
|
|
|
|
function normalizePatternEntry(entry) {
|
|
if (!entry || typeof entry !== 'object') return null;
|
|
const target = normalizeText(entry.target || '');
|
|
const gloss = normalizeText(entry.gloss || '');
|
|
if (!target || !gloss) return null;
|
|
return { target, gloss };
|
|
}
|
|
|
|
function expandPatternAlternatives(entry) {
|
|
const normalized = normalizePatternEntry(entry);
|
|
if (!normalized) return { expanded: [], changed: false, skipped: false };
|
|
|
|
const targets = splitAlternatives(normalized.target);
|
|
const glosses = splitAlternatives(normalized.gloss);
|
|
|
|
const hasTargetAlternatives = targets.length > 1;
|
|
const hasGlossAlternatives = glosses.length > 1;
|
|
|
|
if (!hasTargetAlternatives && !hasGlossAlternatives) {
|
|
return { expanded: [normalized], changed: false, skipped: false };
|
|
}
|
|
|
|
if (hasTargetAlternatives && !hasGlossAlternatives) {
|
|
return {
|
|
expanded: targets.map((target) => ({ target, gloss: normalized.gloss })),
|
|
changed: true,
|
|
skipped: false,
|
|
};
|
|
}
|
|
|
|
if (!hasTargetAlternatives && hasGlossAlternatives) {
|
|
return {
|
|
expanded: glosses.map((gloss) => ({ target: normalized.target, gloss })),
|
|
changed: true,
|
|
skipped: false,
|
|
};
|
|
}
|
|
|
|
if (targets.length === glosses.length) {
|
|
return {
|
|
expanded: targets.map((target, index) => ({ target, gloss: glosses[index] })),
|
|
changed: true,
|
|
skipped: false,
|
|
};
|
|
}
|
|
|
|
return { expanded: [normalized], changed: false, skipped: true };
|
|
}
|
|
|
|
function dedupePatterns(patterns) {
|
|
const out = [];
|
|
const seen = new Set();
|
|
patterns.forEach((entry) => {
|
|
const normalized = normalizePatternEntry(entry);
|
|
if (!normalized) return;
|
|
const key = `${normalized.target.toLowerCase()}|${normalized.gloss.toLowerCase()}`;
|
|
if (seen.has(key)) return;
|
|
seen.add(key);
|
|
out.push(normalized);
|
|
});
|
|
return out;
|
|
}
|
|
|
|
async function main() {
|
|
const args = parseArgs(process.argv.slice(2));
|
|
|
|
const where = {};
|
|
if (args.courseId) where.courseId = args.courseId;
|
|
if (args.lessonId) where.id = args.lessonId;
|
|
|
|
const lessons = await VocabCourseLesson.findAll({
|
|
where: Object.keys(where).length ? where : undefined,
|
|
attributes: ['id', 'courseId', 'lessonNumber', 'title', 'corePatterns'],
|
|
order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']],
|
|
});
|
|
|
|
let affectedLessons = 0;
|
|
let changedPatterns = 0;
|
|
let skippedComplexPatterns = 0;
|
|
|
|
console.log(`Gefundene Lektionen: ${lessons.length}`);
|
|
console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`);
|
|
|
|
for (const lesson of lessons) {
|
|
const current = Array.isArray(lesson.corePatterns) ? lesson.corePatterns : [];
|
|
if (current.length === 0) continue;
|
|
|
|
let lessonChanged = false;
|
|
let lessonChangedCount = 0;
|
|
let lessonSkippedCount = 0;
|
|
|
|
const expanded = [];
|
|
current.forEach((entry) => {
|
|
const { expanded: splitEntries, changed, skipped } = expandPatternAlternatives(entry);
|
|
if (changed) {
|
|
lessonChanged = true;
|
|
lessonChangedCount += 1;
|
|
}
|
|
if (skipped) {
|
|
lessonSkippedCount += 1;
|
|
}
|
|
expanded.push(...splitEntries);
|
|
});
|
|
|
|
const deduped = dedupePatterns(expanded);
|
|
const oldSerialized = JSON.stringify(dedupePatterns(current));
|
|
const newSerialized = JSON.stringify(deduped);
|
|
|
|
if (oldSerialized === newSerialized) {
|
|
skippedComplexPatterns += lessonSkippedCount;
|
|
continue;
|
|
}
|
|
|
|
affectedLessons += 1;
|
|
changedPatterns += lessonChangedCount;
|
|
skippedComplexPatterns += lessonSkippedCount;
|
|
|
|
console.log(
|
|
`- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": `
|
|
+ `${current.length} -> ${deduped.length} Core-Patterns`
|
|
+ (lessonSkippedCount > 0 ? `, übersprungen (uneindeutig): ${lessonSkippedCount}` : '')
|
|
);
|
|
|
|
if (args.apply) {
|
|
await lesson.update({ corePatterns: deduped });
|
|
}
|
|
}
|
|
|
|
console.log('');
|
|
console.log(`Betroffene Lektionen: ${affectedLessons}`);
|
|
console.log(`Geänderte Pattern-Einträge (vor Dedupe): ${changedPatterns}`);
|
|
console.log(`Uneindeutige Slash-Fälle (nicht automatisch geändert): ${skippedComplexPatterns}`);
|
|
}
|
|
|
|
main()
|
|
.then(async () => {
|
|
await sequelize.close();
|
|
})
|
|
.catch(async (error) => {
|
|
console.error('Fehler beim Normalisieren der Core-Patterns:', error);
|
|
await sequelize.close();
|
|
process.exit(1);
|
|
});
|
|
|