diff --git a/backend/scripts/normalize-core-pattern-alternatives.js b/backend/scripts/normalize-core-pattern-alternatives.js new file mode 100644 index 0000000..b4552fc --- /dev/null +++ b/backend/scripts/normalize-core-pattern-alternatives.js @@ -0,0 +1,204 @@ +#!/usr/bin/env node +/** + * Splits slash-separated alternatives in lesson corePatterns into separate entries. + * + * Default mode is dry-run (no writes). + * Use --apply to persist changes. + * + * Optional filters: + * --course-id= + * --lesson-id= + */ + +import { Op } from 'sequelize'; +import { sequelize } from '../utils/sequelize.js'; +import VocabCourseLesson from '../models/community/vocab_course_lesson.js'; + +function parseArgs(argv) { + const args = { + apply: false, + courseId: null, + lessonId: null, + }; + + argv.forEach((arg) => { + if (arg === '--apply') { + args.apply = true; + return; + } + if (arg.startsWith('--course-id=')) { + const value = Number(arg.split('=')[1]); + if (Number.isFinite(value) && value > 0) { + args.courseId = value; + } + return; + } + if (arg.startsWith('--lesson-id=')) { + const value = Number(arg.split('=')[1]); + if (Number.isFinite(value) && value > 0) { + args.lessonId = value; + } + } + }); + + return args; +} + +function normalizeText(value) { + return String(value || '').trim().replace(/\s+/g, ' '); +} + +function splitAlternatives(value) { + const text = normalizeText(value); + if (!text) return []; + const parts = text + .split(/\s+\/\s+/) + .map((part) => normalizeText(part)) + .filter(Boolean); + return parts.length >= 2 ? parts : [text]; +} + +function normalizePatternEntry(entry) { + if (!entry || typeof entry !== 'object') return null; + const target = normalizeText(entry.target || ''); + const gloss = normalizeText(entry.gloss || ''); + if (!target || !gloss) return null; + return { target, gloss }; +} + +function expandPatternAlternatives(entry) { + const normalized = normalizePatternEntry(entry); + if (!normalized) return { expanded: [], changed: false, skipped: false }; + + const targets = splitAlternatives(normalized.target); + const glosses = splitAlternatives(normalized.gloss); + + const hasTargetAlternatives = targets.length > 1; + const hasGlossAlternatives = glosses.length > 1; + + if (!hasTargetAlternatives && !hasGlossAlternatives) { + return { expanded: [normalized], changed: false, skipped: false }; + } + + if (hasTargetAlternatives && !hasGlossAlternatives) { + return { + expanded: targets.map((target) => ({ target, gloss: normalized.gloss })), + changed: true, + skipped: false, + }; + } + + if (!hasTargetAlternatives && hasGlossAlternatives) { + return { + expanded: glosses.map((gloss) => ({ target: normalized.target, gloss })), + changed: true, + skipped: false, + }; + } + + if (targets.length === glosses.length) { + return { + expanded: targets.map((target, index) => ({ target, gloss: glosses[index] })), + changed: true, + skipped: false, + }; + } + + return { expanded: [normalized], changed: false, skipped: true }; +} + +function dedupePatterns(patterns) { + const out = []; + const seen = new Set(); + patterns.forEach((entry) => { + const normalized = normalizePatternEntry(entry); + if (!normalized) return; + const key = `${normalized.target.toLowerCase()}|${normalized.gloss.toLowerCase()}`; + if (seen.has(key)) return; + seen.add(key); + out.push(normalized); + }); + return out; +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + + const where = {}; + if (args.courseId) where.courseId = args.courseId; + if (args.lessonId) where.id = args.lessonId; + + const lessons = await VocabCourseLesson.findAll({ + where: Object.keys(where).length ? where : undefined, + attributes: ['id', 'courseId', 'lessonNumber', 'title', 'corePatterns'], + order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']], + }); + + let affectedLessons = 0; + let changedPatterns = 0; + let skippedComplexPatterns = 0; + + console.log(`Gefundene Lektionen: ${lessons.length}`); + console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`); + + for (const lesson of lessons) { + const current = Array.isArray(lesson.corePatterns) ? lesson.corePatterns : []; + if (current.length === 0) continue; + + let lessonChanged = false; + let lessonChangedCount = 0; + let lessonSkippedCount = 0; + + const expanded = []; + current.forEach((entry) => { + const { expanded: splitEntries, changed, skipped } = expandPatternAlternatives(entry); + if (changed) { + lessonChanged = true; + lessonChangedCount += 1; + } + if (skipped) { + lessonSkippedCount += 1; + } + expanded.push(...splitEntries); + }); + + const deduped = dedupePatterns(expanded); + const oldSerialized = JSON.stringify(dedupePatterns(current)); + const newSerialized = JSON.stringify(deduped); + + if (oldSerialized === newSerialized) { + skippedComplexPatterns += lessonSkippedCount; + continue; + } + + affectedLessons += 1; + changedPatterns += lessonChangedCount; + skippedComplexPatterns += lessonSkippedCount; + + console.log( + `- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": ` + + `${current.length} -> ${deduped.length} Core-Patterns` + + (lessonSkippedCount > 0 ? `, übersprungen (uneindeutig): ${lessonSkippedCount}` : '') + ); + + if (args.apply) { + await lesson.update({ corePatterns: deduped }); + } + } + + console.log(''); + console.log(`Betroffene Lektionen: ${affectedLessons}`); + console.log(`Geänderte Pattern-Einträge (vor Dedupe): ${changedPatterns}`); + console.log(`Uneindeutige Slash-Fälle (nicht automatisch geändert): ${skippedComplexPatterns}`); +} + +main() + .then(async () => { + await sequelize.close(); + }) + .catch(async (error) => { + console.error('Fehler beim Normalisieren der Core-Patterns:', error); + await sequelize.close(); + process.exit(1); + }); + diff --git a/backend/scripts/normalize-exercise-answer-alternatives.js b/backend/scripts/normalize-exercise-answer-alternatives.js new file mode 100644 index 0000000..fa0f484 --- /dev/null +++ b/backend/scripts/normalize-exercise-answer-alternatives.js @@ -0,0 +1,316 @@ +#!/usr/bin/env node +/** + * Splits slash-separated alternatives in grammar exercise answers into separate exercise variants. + * + * Default: dry-run (no writes) + * Use --apply to persist. + * + * Optional filters: + * --course-id= + * --lesson-id= + */ + +import { Op } from 'sequelize'; +import { sequelize } from '../utils/sequelize.js'; +import VocabCourseLesson from '../models/community/vocab_course_lesson.js'; +import VocabGrammarExercise from '../models/community/vocab_grammar_exercise.js'; + +const GAP_FILL_MAX_VARIANTS = 8; + +function parseArgs(argv) { + const args = { + apply: false, + courseId: null, + lessonId: null, + }; + + argv.forEach((arg) => { + if (arg === '--apply') { + args.apply = true; + return; + } + if (arg.startsWith('--course-id=')) { + const value = Number(arg.split('=')[1]); + if (Number.isFinite(value) && value > 0) args.courseId = value; + return; + } + if (arg.startsWith('--lesson-id=')) { + const value = Number(arg.split('=')[1]); + if (Number.isFinite(value) && value > 0) args.lessonId = value; + } + }); + + return args; +} + +function normalizeText(value) { + return String(value || '').trim().replace(/\s+/g, ' '); +} + +function splitAlternatives(value) { + const text = normalizeText(value); + if (!text) return []; + const parts = text + .split(/\s+\/\s+/) + .map((part) => normalizeText(part)) + .filter(Boolean); + return parts.length >= 2 ? parts : [text]; +} + +function parseJsonLike(value) { + if (!value) return {}; + if (typeof value === 'string') { + try { + return JSON.parse(value); + } catch (_) { + return {}; + } + } + if (typeof value === 'object') return value; + return {}; +} + +function buildCartesianProduct(arrays) { + let out = [[]]; + for (const arr of arrays) { + const next = []; + out.forEach((prefix) => { + arr.forEach((value) => { + next.push([...prefix, value]); + }); + }); + out = next; + } + return out; +} + +function buildExerciseVariants(exercise) { + const questionData = parseJsonLike(exercise.questionData); + const answerData = parseJsonLike(exercise.answerData); + const type = String(questionData?.type || answerData?.type || '').trim(); + + const base = { + lessonId: exercise.lessonId, + exerciseTypeId: exercise.exerciseTypeId, + title: exercise.title, + instruction: exercise.instruction, + explanation: exercise.explanation, + createdByUserId: exercise.createdByUserId, + createdAt: exercise.createdAt, + }; + + if (type === 'transformation') { + const correctAnswer = normalizeText(answerData?.correctAnswer || answerData?.correct || ''); + const alternatives = splitAlternatives(correctAnswer); + if (alternatives.length <= 1) { + return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] }; + } + return { + changed: true, + skipped: false, + variants: alternatives.map((alt) => ({ + ...base, + questionData, + answerData: { + ...answerData, + correctAnswer: alt + } + })) + }; + } + + if (type === 'multiple_choice') { + const options = Array.isArray(questionData?.options) ? questionData.options.slice() : []; + const correctIndex = Number(answerData?.correctAnswer); + if (!Number.isFinite(correctIndex) || correctIndex < 0 || correctIndex >= options.length) { + return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] }; + } + const correctOption = normalizeText(options[correctIndex]); + const alternatives = splitAlternatives(correctOption); + if (alternatives.length <= 1) { + return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] }; + } + return { + changed: true, + skipped: false, + variants: alternatives.map((alt) => { + const nextOptions = options.slice(); + nextOptions[correctIndex] = alt; + return { + ...base, + questionData: { + ...questionData, + options: nextOptions + }, + answerData + }; + }) + }; + } + + if (type === 'gap_fill') { + const answers = Array.isArray(answerData?.answers) ? answerData.answers : []; + if (!answers.length) { + return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] }; + } + const splitAnswers = answers.map((answer) => splitAlternatives(answer)); + const hasAlternatives = splitAnswers.some((entry) => entry.length > 1); + if (!hasAlternatives) { + return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] }; + } + + const variantCount = splitAnswers.reduce((acc, parts) => acc * parts.length, 1); + if (variantCount > GAP_FILL_MAX_VARIANTS) { + return { changed: false, skipped: true, variants: [{ ...base, questionData, answerData }] }; + } + + const combos = buildCartesianProduct(splitAnswers); + return { + changed: true, + skipped: false, + variants: combos.map((combo) => ({ + ...base, + questionData, + answerData: { + ...answerData, + answers: combo + } + })) + }; + } + + return { changed: false, skipped: false, variants: [{ ...base, questionData, answerData }] }; +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + const lessonWhere = {}; + if (args.courseId) lessonWhere.courseId = args.courseId; + if (args.lessonId) lessonWhere.id = args.lessonId; + + const lessons = await VocabCourseLesson.findAll({ + where: Object.keys(lessonWhere).length ? lessonWhere : undefined, + attributes: ['id', 'courseId', 'lessonNumber', 'title'], + order: [['courseId', 'ASC'], ['lessonNumber', 'ASC']], + }); + + const lessonIds = lessons.map((lesson) => lesson.id); + const byLesson = new Map(lessons.map((lesson) => [lesson.id, lesson])); + + const exercises = lessonIds.length + ? await VocabGrammarExercise.findAll({ + where: { + lessonId: { + [Op.in]: lessonIds + } + }, + attributes: [ + 'id', + 'lessonId', + 'exerciseTypeId', + 'exerciseNumber', + 'title', + 'instruction', + 'questionData', + 'answerData', + 'explanation', + 'createdByUserId', + 'createdAt' + ], + order: [['lessonId', 'ASC'], ['exerciseNumber', 'ASC'], ['id', 'ASC']] + }) + : []; + + const grouped = new Map(); + exercises.forEach((exercise) => { + const list = grouped.get(exercise.lessonId) || []; + list.push(exercise); + grouped.set(exercise.lessonId, list); + }); + + let affectedLessons = 0; + let changedExercises = 0; + let createdVariants = 0; + let skippedComplex = 0; + + console.log(`Gefundene Lektionen: ${lessons.length}`); + console.log(`Gefundene Übungen: ${exercises.length}`); + console.log(`Modus: ${args.apply ? 'APPLY (schreibt Änderungen)' : 'DRY-RUN (keine Änderungen)'}`); + + for (const lesson of lessons) { + const source = grouped.get(lesson.id) || []; + if (!source.length) continue; + + let lessonChangedExercises = 0; + let lessonSkipped = 0; + const rebuilt = []; + + source.forEach((exercise) => { + const variantResult = buildExerciseVariants(exercise); + if (variantResult.changed) { + lessonChangedExercises += 1; + createdVariants += Math.max(0, variantResult.variants.length - 1); + } + if (variantResult.skipped) lessonSkipped += 1; + rebuilt.push(...variantResult.variants); + }); + + if (lessonChangedExercises === 0) { + skippedComplex += lessonSkipped; + continue; + } + + affectedLessons += 1; + changedExercises += lessonChangedExercises; + skippedComplex += lessonSkipped; + + console.log( + `- Kurs ${lesson.courseId}, Lektion #${lesson.lessonNumber} (${lesson.id}) "${lesson.title}": ` + + `${source.length} -> ${rebuilt.length} Übungen` + + `, geändert: ${lessonChangedExercises}` + + (lessonSkipped > 0 ? `, übersprungen (komplex): ${lessonSkipped}` : '') + ); + + if (args.apply) { + await sequelize.transaction(async (transaction) => { + await VocabGrammarExercise.destroy({ + where: { lessonId: lesson.id }, + transaction + }); + + if (rebuilt.length) { + const payload = rebuilt.map((entry, index) => ({ + lessonId: lesson.id, + exerciseTypeId: entry.exerciseTypeId, + exerciseNumber: index + 1, + title: entry.title, + instruction: entry.instruction, + questionData: entry.questionData, + answerData: entry.answerData, + explanation: entry.explanation, + createdByUserId: entry.createdByUserId, + createdAt: entry.createdAt + })); + await VocabGrammarExercise.bulkCreate(payload, { transaction }); + } + }); + } + } + + console.log(''); + console.log(`Betroffene Lektionen: ${affectedLessons}`); + console.log(`Geänderte Übungen: ${changedExercises}`); + console.log(`Zusätzliche Varianten erzeugt: ${createdVariants}`); + console.log(`Komplexe Fälle übersprungen: ${skippedComplex}`); +} + +main() + .then(async () => { + await sequelize.close(); + }) + .catch(async (error) => { + console.error('Fehler beim Normalisieren der Übungsantworten:', error); + await sequelize.close(); + process.exit(1); + }); +