refactor(vocab): enhance token weight handling in VocabLessonView
All checks were successful
Deploy to production / deploy (push) Successful in 2m45s

- Introduced a new mechanism for calculating token weights based on core patterns and important vocabulary, improving the accuracy of target-gloss pair orientation.
- Replaced the previous hint logic with a more robust token-based scoring system, enhancing the quality of vocabulary item representation.
- Streamlined the code for better maintainability and clarity in vocabulary preparation processes.
This commit is contained in:
Torsten Schulz (local)
2026-04-07 11:32:14 +02:00
parent ee338c0e49
commit 160c9dafb2

View File

@@ -1318,39 +1318,47 @@ export default {
// damit weder leere Gloss-Zeilen noch übergroße Listen entstehen. // damit weder leere Gloss-Zeilen noch übergroße Listen entstehen.
const out = []; const out = [];
const seen = new Set(); const seen = new Set();
let nativeHints = new Set( const targetTokenWeight = new Map();
(this.normalizedCorePatterns || []) const nativeTokenWeight = new Map();
.map((x) => this.normalizeLessonVocabTerm(x?.gloss)) const addTokens = (text, map, weight = 1) => {
.filter(Boolean) const tokens = String(text || '')
); .toLowerCase()
let targetHints = new Set( .normalize('NFKC')
(this.normalizedCorePatterns || []) .replace(/[\p{P}\p{S}]+/gu, ' ')
.map((x) => this.normalizeLessonVocabTerm(x?.target)) .split(/\s+/)
.filter(Boolean) .map((t) => t.trim())
); .filter((t) => t.length >= 2);
if (nativeHints.size === 0 || targetHints.size === 0) { tokens.forEach((token) => {
nativeHints = new Set( map.set(token, (map.get(token) || 0) + weight);
(this.importantVocab || []) });
.map((x) => this.normalizeLessonVocabTerm(x?.learning)) };
.filter(Boolean) // Core patterns gelten als qualitativ beste Quelle -> höheres Gewicht
); (this.normalizedCorePatterns || []).forEach((p) => {
targetHints = new Set( addTokens(p?.target, targetTokenWeight, 3);
(this.importantVocab || []) addTokens(p?.gloss, nativeTokenWeight, 3);
.map((x) => this.normalizeLessonVocabTerm(x?.reference)) });
.filter(Boolean) // Übungs-Extraktion als Zusatzsignal
); (this.importantVocab || []).forEach((v) => {
} addTokens(v?.reference, targetTokenWeight, 1);
addTokens(v?.learning, nativeTokenWeight, 1);
});
const sideScore = (text, map) => {
const tokens = String(text || '')
.toLowerCase()
.normalize('NFKC')
.replace(/[\p{P}\p{S}]+/gu, ' ')
.split(/\s+/)
.map((t) => t.trim())
.filter((t) => t.length >= 2);
return tokens.reduce((sum, token) => sum + (map.get(token) || 0), 0);
};
const orientPair = (target, gloss) => { const orientPair = (target, gloss) => {
const t = String(target || '').trim(); const t = String(target || '').trim();
const g = String(gloss || '').trim(); const g = String(gloss || '').trim();
if (!t || !g) return { target: t, gloss: g }; if (!t || !g) return { target: t, gloss: g };
const nt = this.normalizeLessonVocabTerm(t); const directScore = sideScore(t, targetTokenWeight) + sideScore(g, nativeTokenWeight);
const ng = this.normalizeLessonVocabTerm(g); const swappedScore = sideScore(g, targetTokenWeight) + sideScore(t, nativeTokenWeight);
const tLooksNative = nativeHints.has(nt) && !targetHints.has(nt); if (swappedScore > directScore) {
const gLooksTarget = targetHints.has(ng) && !nativeHints.has(ng);
const tLooksTarget = targetHints.has(nt) && !nativeHints.has(nt);
const gLooksNative = nativeHints.has(ng) && !targetHints.has(ng);
if (tLooksNative && gLooksTarget && !(tLooksTarget && gLooksNative)) {
return { target: g, gloss: t }; return { target: g, gloss: t };
} }
return { target: t, gloss: g }; return { target: t, gloss: g };