187 lines
6.0 KiB
JavaScript
187 lines
6.0 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Removes stale SRS cards that should not appear in daily typing review:
|
|
* - compact German question cues against full Bisaya questions, e.g.
|
|
* "wo/wohin" -> "Asa ka padulong?"
|
|
* - German speaking/task prompts against model-answer sentences, e.g.
|
|
* "Kumusta ka? ..." -> "Begrüße jemanden ..."
|
|
*
|
|
* Default: dry-run. Use --apply to delete matching rows.
|
|
*/
|
|
|
|
import { Op } from 'sequelize';
|
|
import { sequelize } from '../utils/sequelize.js';
|
|
import VocabSrsItem from '../models/community/vocab_srs_item.js';
|
|
import VocabGrammarExercise from '../models/community/vocab_grammar_exercise.js';
|
|
|
|
function parseArgs(argv) {
|
|
return {
|
|
apply: argv.includes('--apply'),
|
|
};
|
|
}
|
|
|
|
const QUESTION_CUES = new Set([
|
|
'wo',
|
|
'wo wohin',
|
|
'wohin',
|
|
'was',
|
|
'wer',
|
|
'wann',
|
|
'warum',
|
|
'wieso',
|
|
'wie',
|
|
]);
|
|
|
|
function normalizeText(value) {
|
|
return String(value || '')
|
|
.trim()
|
|
.toLowerCase()
|
|
.normalize('NFKC')
|
|
.replace(/[\p{P}\p{S}]+/gu, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function wordCount(value) {
|
|
return normalizeText(value).split(/\s+/).filter(Boolean).length;
|
|
}
|
|
|
|
function isQuestionCue(value) {
|
|
return QUESTION_CUES.has(normalizeText(value));
|
|
}
|
|
|
|
function isFullQuestion(value) {
|
|
const text = String(value || '').trim();
|
|
return wordCount(text) >= 2 && /\?\s*$/.test(text);
|
|
}
|
|
|
|
function isStaleQuestionCueCard(left, right) {
|
|
return (isQuestionCue(left) && isFullQuestion(right)) || (isQuestionCue(right) && isFullQuestion(left));
|
|
}
|
|
|
|
function isInstructionLikeText(value) {
|
|
const text = String(value || '').trim();
|
|
if (!text) return false;
|
|
const words = normalizeText(text).split(/\s+/).filter(Boolean);
|
|
if (words.length < 3) return false;
|
|
|
|
const normalized = text.toLowerCase().normalize('NFKC');
|
|
const startsWithTaskVerb = /^(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|beginne|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige|begrüße|begruesse|grüße|gruesse|drücke|druecke)\b/i.test(normalized);
|
|
const containsTaskChain = /\b(und|,)\s*(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige|begrüße|begruesse|grüße|gruesse|drücke|druecke)\b/i.test(normalized);
|
|
const containsPracticeMarker = /\b(laut|jeweils|zu jedem|zu jeder|umgebung|alltagsszene|rollenspiel|mini-dialog|szene)\b/i.test(normalized);
|
|
|
|
return startsWithTaskVerb || (containsTaskChain && containsPracticeMarker);
|
|
}
|
|
|
|
function isStaleInstructionCard(left, right) {
|
|
return isInstructionLikeText(left) || isInstructionLikeText(right);
|
|
}
|
|
|
|
function normalizePairSide(value) {
|
|
return String(value || '')
|
|
.trim()
|
|
.toLowerCase()
|
|
.normalize('NFKC')
|
|
.replace(/[\p{P}\p{S}]+/gu, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function pairSignature(left, right) {
|
|
return `${normalizePairSide(left)}|${normalizePairSide(right)}`;
|
|
}
|
|
|
|
function extractLegacyGapFillHints(text) {
|
|
return Array.from(String(text || '').matchAll(/\(([^)]+)\)/g), (m) => String(m[1] || '').trim()).filter(Boolean);
|
|
}
|
|
|
|
function extractFixedGapFillHints(text, expectedCount = 0) {
|
|
const source = String(text || '');
|
|
const gapRegex = /\{\s*gap\s*\}/gi;
|
|
const gapMatches = Array.from(source.matchAll(gapRegex));
|
|
const hints = gapMatches.map((match, index) => {
|
|
const start = match.index + match[0].length;
|
|
const nextStart = index + 1 < gapMatches.length ? gapMatches[index + 1].index : source.length;
|
|
const segment = source.slice(start, nextStart);
|
|
const hintMatch = segment.match(/\(([^)]+)\)/);
|
|
return String(hintMatch?.[1] || '').trim();
|
|
}).filter(Boolean);
|
|
return expectedCount > 0 ? hints.slice(0, expectedCount) : hints;
|
|
}
|
|
|
|
async function collectStaleLegacyGapFillPairs() {
|
|
const exercises = await VocabGrammarExercise.findAll({
|
|
attributes: ['questionData', 'answerData'],
|
|
});
|
|
|
|
const stalePairs = new Set();
|
|
exercises.forEach((exercise) => {
|
|
const qData = exercise.questionData || {};
|
|
const aData = exercise.answerData || {};
|
|
const exerciseType = qData.type || '';
|
|
if (exerciseType !== 'gap_fill') return;
|
|
|
|
const answers = Array.isArray(aData.answers)
|
|
? aData.answers
|
|
: (aData.correct ? (Array.isArray(aData.correct) ? aData.correct : [aData.correct]) : []);
|
|
if (!answers.length) return;
|
|
|
|
const text = String(qData.text || '');
|
|
const legacyHints = extractLegacyGapFillHints(text);
|
|
const fixedHints = extractFixedGapFillHints(text, answers.length);
|
|
const fixedSignatures = new Set(
|
|
fixedHints.map((hint, index) => pairSignature(hint, answers[index]))
|
|
);
|
|
|
|
legacyHints.slice(0, answers.length).forEach((hint, index) => {
|
|
const signature = pairSignature(hint, answers[index]);
|
|
if (!fixedSignatures.has(signature)) {
|
|
stalePairs.add(signature);
|
|
}
|
|
});
|
|
});
|
|
|
|
return stalePairs;
|
|
}
|
|
|
|
async function main() {
|
|
const { apply } = parseArgs(process.argv.slice(2));
|
|
const staleLegacyGapFillPairs = await collectStaleLegacyGapFillPairs();
|
|
const items = await VocabSrsItem.findAll({
|
|
order: [['id', 'ASC']],
|
|
});
|
|
|
|
const matches = items.filter((item) =>
|
|
isStaleQuestionCueCard(item.learning, item.reference)
|
|
|| isStaleInstructionCard(item.learning, item.reference)
|
|
|| staleLegacyGapFillPairs.has(pairSignature(item.learning, item.reference))
|
|
);
|
|
|
|
matches.forEach((item) => {
|
|
console.log(
|
|
`SRS ${item.id} course:${item.courseId} lesson:${item.lessonId || '-'} "${item.learning}" | "${item.reference}"`
|
|
);
|
|
});
|
|
|
|
if (apply && matches.length) {
|
|
await VocabSrsItem.destroy({
|
|
where: {
|
|
id: {
|
|
[Op.in]: matches.map((item) => item.id),
|
|
},
|
|
},
|
|
});
|
|
}
|
|
|
|
console.log(`${apply ? 'Deleted' : 'Would delete'} ${matches.length} invalid SRS item(s).`);
|
|
}
|
|
|
|
main()
|
|
.catch((error) => {
|
|
console.error(error);
|
|
process.exitCode = 1;
|
|
})
|
|
.finally(async () => {
|
|
await sequelize.close();
|
|
});
|