Fixed bisaya course
All checks were successful
Deploy to production / deploy (push) Successful in 2m8s
All checks were successful
Deploy to production / deploy (push) Successful in 2m8s
This commit is contained in:
131
backend/scripts/repair-invalid-srs-fragment-sentence-pairs.js
Normal file
131
backend/scripts/repair-invalid-srs-fragment-sentence-pairs.js
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Removes stale SRS cards that should not appear in daily typing review:
|
||||
* - compact German question cues against full Bisaya questions, e.g.
|
||||
* "wo/wohin" -> "Asa ka padulong?"
|
||||
* - German speaking/task prompts against model-answer sentences, e.g.
|
||||
* "Kumusta ka? ..." -> "Begrüße jemanden ..."
|
||||
*
|
||||
* Default: dry-run. Use --apply to delete matching rows.
|
||||
*/
|
||||
|
||||
import { Op } from 'sequelize';
|
||||
import { sequelize } from '../utils/sequelize.js';
|
||||
import VocabSrsItem from '../models/community/vocab_srs_item.js';
|
||||
|
||||
function parseArgs(argv) {
|
||||
return {
|
||||
apply: argv.includes('--apply'),
|
||||
};
|
||||
}
|
||||
|
||||
const QUESTION_CUES = new Set([
|
||||
'wo',
|
||||
'wo wohin',
|
||||
'wohin',
|
||||
'was',
|
||||
'wer',
|
||||
'wann',
|
||||
'warum',
|
||||
'wieso',
|
||||
'wie',
|
||||
]);
|
||||
|
||||
function normalizeText(value) {
|
||||
return String(value || '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.normalize('NFKC')
|
||||
.replace(/[\p{P}\p{S}]+/gu, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function wordCount(value) {
|
||||
return normalizeText(value).split(/\s+/).filter(Boolean).length;
|
||||
}
|
||||
|
||||
function isQuestionCue(value) {
|
||||
return QUESTION_CUES.has(normalizeText(value));
|
||||
}
|
||||
|
||||
function isFullQuestion(value) {
|
||||
const text = String(value || '').trim();
|
||||
return wordCount(text) >= 2 && /\?\s*$/.test(text);
|
||||
}
|
||||
|
||||
function isStaleQuestionCueCard(left, right) {
|
||||
return (isQuestionCue(left) && isFullQuestion(right)) || (isQuestionCue(right) && isFullQuestion(left));
|
||||
}
|
||||
|
||||
function isInstructionLikeText(value) {
|
||||
const text = String(value || '').trim();
|
||||
if (!text) return false;
|
||||
const words = normalizeText(text).split(/\s+/).filter(Boolean);
|
||||
if (words.length < 3) return false;
|
||||
|
||||
const normalized = text.toLowerCase().normalize('NFKC');
|
||||
const startsWithTaskVerb = /^(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|beginne|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige|begrüße|begruesse|grüße|gruesse|drücke|druecke)\b/i.test(normalized);
|
||||
const containsTaskChain = /\b(und|,)\s*(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige|begrüße|begruesse|grüße|gruesse|drücke|druecke)\b/i.test(normalized);
|
||||
const containsPracticeMarker = /\b(laut|jeweils|zu jedem|zu jeder|umgebung|alltagsszene|rollenspiel|mini-dialog|szene)\b/i.test(normalized);
|
||||
|
||||
return startsWithTaskVerb || (containsTaskChain && containsPracticeMarker);
|
||||
}
|
||||
|
||||
function isStaleInstructionCard(left, right) {
|
||||
return isInstructionLikeText(left) || isInstructionLikeText(right);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { apply } = parseArgs(process.argv.slice(2));
|
||||
const items = await VocabSrsItem.findAll({
|
||||
where: {
|
||||
[Op.or]: [
|
||||
{ learning: { [Op.like]: '%/%' } },
|
||||
{ reference: { [Op.like]: '%/%' } },
|
||||
{ learning: { [Op.like]: '%?%' } },
|
||||
{ reference: { [Op.like]: '%?%' } },
|
||||
{ learning: { [Op.iLike]: 'Begrüße %' } },
|
||||
{ reference: { [Op.iLike]: 'Begrüße %' } },
|
||||
{ learning: { [Op.iLike]: 'Begruesse %' } },
|
||||
{ reference: { [Op.iLike]: 'Begruesse %' } },
|
||||
{ learning: { [Op.iLike]: 'Drücke %' } },
|
||||
{ reference: { [Op.iLike]: 'Drücke %' } },
|
||||
{ learning: { [Op.iLike]: 'Druecke %' } },
|
||||
{ reference: { [Op.iLike]: 'Druecke %' } },
|
||||
],
|
||||
},
|
||||
order: [['id', 'ASC']],
|
||||
});
|
||||
|
||||
const matches = items.filter((item) =>
|
||||
isStaleQuestionCueCard(item.learning, item.reference) || isStaleInstructionCard(item.learning, item.reference)
|
||||
);
|
||||
|
||||
matches.forEach((item) => {
|
||||
console.log(
|
||||
`SRS ${item.id} course:${item.courseId} lesson:${item.lessonId || '-'} "${item.learning}" | "${item.reference}"`
|
||||
);
|
||||
});
|
||||
|
||||
if (apply && matches.length) {
|
||||
await VocabSrsItem.destroy({
|
||||
where: {
|
||||
id: {
|
||||
[Op.in]: matches.map((item) => item.id),
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`${apply ? 'Deleted' : 'Would delete'} ${matches.length} invalid SRS item(s).`);
|
||||
}
|
||||
|
||||
main()
|
||||
.catch((error) => {
|
||||
console.error(error);
|
||||
process.exitCode = 1;
|
||||
})
|
||||
.finally(async () => {
|
||||
await sequelize.close();
|
||||
});
|
||||
@@ -73,13 +73,13 @@ export default class VocabService {
|
||||
}
|
||||
|
||||
const normalized = text.toLowerCase().normalize('NFKC');
|
||||
const startsWithTaskVerb = /^(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|beginne|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige)\b/i.test(normalized);
|
||||
const startsWithTaskVerb = /^(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|beginne|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige|begrüße|begruesse|grüße|gruesse|drücke|druecke)\b/i.test(normalized);
|
||||
const startsWithTakeTask = /^nimm\b/i.test(normalized)
|
||||
&& (
|
||||
/\b(ein|eine|einen|zwei|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|\d+)\b/i.test(normalized)
|
||||
|| /\b(w[oö]rter|verben|gegenstände|gegenstaende|sätze|saetze|muster|beispiele)\b/i.test(normalized)
|
||||
);
|
||||
const containsTaskChain = /\b(und|,)\s*(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige)\b/i.test(normalized);
|
||||
const containsTaskChain = /\b(und|,)\s*(sage|sag|frage|frag|bitte|stelle|sprich|erzähle|erzaehle|beschreibe|bilde|wähle|waehle|ordne|übersetze|uebersetze|nenne|nenn|verwende|nutze|reagiere|kombiniere|spiele|löse|loese|beantworte|ergänze|ergaenze|formuliere|lies|entscheide|zeige|begrüße|begruesse|grüße|gruesse|drücke|druecke)\b/i.test(normalized);
|
||||
const containsPracticeMarker = /\b(laut|jeweils|zu jedem|zu jeder|umgebung|alltagsszene|rollenspiel|mini-dialog|szene)\b/i.test(normalized);
|
||||
|
||||
return startsWithTaskVerb || startsWithTakeTask || (containsTaskChain && containsPracticeMarker);
|
||||
@@ -1974,6 +1974,11 @@ export default class VocabService {
|
||||
err.status = 400;
|
||||
throw err;
|
||||
}
|
||||
if (!this._isTrainableSrsPair({ learning, reference })) {
|
||||
const err = new Error('Invalid SRS item text');
|
||||
err.status = 400;
|
||||
throw err;
|
||||
}
|
||||
|
||||
const lessonId = payload?.lessonId == null
|
||||
? null
|
||||
|
||||
Reference in New Issue
Block a user