refactor(vocab): enhance token weight handling in VocabLessonView
All checks were successful
Deploy to production / deploy (push) Successful in 2m45s
All checks were successful
Deploy to production / deploy (push) Successful in 2m45s
- Introduced a new mechanism for calculating token weights based on core patterns and important vocabulary, improving the accuracy of target-gloss pair orientation. - Replaced the previous hint logic with a more robust token-based scoring system, enhancing the quality of vocabulary item representation. - Streamlined the code for better maintainability and clarity in vocabulary preparation processes.
This commit is contained in:
@@ -1318,39 +1318,47 @@ export default {
|
||||
// damit weder leere Gloss-Zeilen noch übergroße Listen entstehen.
|
||||
const out = [];
|
||||
const seen = new Set();
|
||||
let nativeHints = new Set(
|
||||
(this.normalizedCorePatterns || [])
|
||||
.map((x) => this.normalizeLessonVocabTerm(x?.gloss))
|
||||
.filter(Boolean)
|
||||
);
|
||||
let targetHints = new Set(
|
||||
(this.normalizedCorePatterns || [])
|
||||
.map((x) => this.normalizeLessonVocabTerm(x?.target))
|
||||
.filter(Boolean)
|
||||
);
|
||||
if (nativeHints.size === 0 || targetHints.size === 0) {
|
||||
nativeHints = new Set(
|
||||
(this.importantVocab || [])
|
||||
.map((x) => this.normalizeLessonVocabTerm(x?.learning))
|
||||
.filter(Boolean)
|
||||
);
|
||||
targetHints = new Set(
|
||||
(this.importantVocab || [])
|
||||
.map((x) => this.normalizeLessonVocabTerm(x?.reference))
|
||||
.filter(Boolean)
|
||||
);
|
||||
}
|
||||
const targetTokenWeight = new Map();
|
||||
const nativeTokenWeight = new Map();
|
||||
const addTokens = (text, map, weight = 1) => {
|
||||
const tokens = String(text || '')
|
||||
.toLowerCase()
|
||||
.normalize('NFKC')
|
||||
.replace(/[\p{P}\p{S}]+/gu, ' ')
|
||||
.split(/\s+/)
|
||||
.map((t) => t.trim())
|
||||
.filter((t) => t.length >= 2);
|
||||
tokens.forEach((token) => {
|
||||
map.set(token, (map.get(token) || 0) + weight);
|
||||
});
|
||||
};
|
||||
// Core patterns gelten als qualitativ beste Quelle -> höheres Gewicht
|
||||
(this.normalizedCorePatterns || []).forEach((p) => {
|
||||
addTokens(p?.target, targetTokenWeight, 3);
|
||||
addTokens(p?.gloss, nativeTokenWeight, 3);
|
||||
});
|
||||
// Übungs-Extraktion als Zusatzsignal
|
||||
(this.importantVocab || []).forEach((v) => {
|
||||
addTokens(v?.reference, targetTokenWeight, 1);
|
||||
addTokens(v?.learning, nativeTokenWeight, 1);
|
||||
});
|
||||
const sideScore = (text, map) => {
|
||||
const tokens = String(text || '')
|
||||
.toLowerCase()
|
||||
.normalize('NFKC')
|
||||
.replace(/[\p{P}\p{S}]+/gu, ' ')
|
||||
.split(/\s+/)
|
||||
.map((t) => t.trim())
|
||||
.filter((t) => t.length >= 2);
|
||||
return tokens.reduce((sum, token) => sum + (map.get(token) || 0), 0);
|
||||
};
|
||||
const orientPair = (target, gloss) => {
|
||||
const t = String(target || '').trim();
|
||||
const g = String(gloss || '').trim();
|
||||
if (!t || !g) return { target: t, gloss: g };
|
||||
const nt = this.normalizeLessonVocabTerm(t);
|
||||
const ng = this.normalizeLessonVocabTerm(g);
|
||||
const tLooksNative = nativeHints.has(nt) && !targetHints.has(nt);
|
||||
const gLooksTarget = targetHints.has(ng) && !nativeHints.has(ng);
|
||||
const tLooksTarget = targetHints.has(nt) && !nativeHints.has(nt);
|
||||
const gLooksNative = nativeHints.has(ng) && !targetHints.has(ng);
|
||||
if (tLooksNative && gLooksTarget && !(tLooksTarget && gLooksNative)) {
|
||||
const directScore = sideScore(t, targetTokenWeight) + sideScore(g, nativeTokenWeight);
|
||||
const swappedScore = sideScore(g, targetTokenWeight) + sideScore(t, nativeTokenWeight);
|
||||
if (swappedScore > directScore) {
|
||||
return { target: g, gloss: t };
|
||||
}
|
||||
return { target: t, gloss: g };
|
||||
|
||||
Reference in New Issue
Block a user