feat(clickTtTournamentRegistrationService): enhance tournament title processing and link retrieval

- Added functions to tokenize tournament titles and generate a search profile, improving the accuracy of title matching.
- Updated the link retrieval logic to utilize the new title processing methods, enhancing the selection of tournament links based on normalized and tokenized title data.
- Improved scoring mechanism for link selection to prioritize relevant tournament entries, ensuring better user experience during registration.
This commit is contained in:
Torsten Schulz (local)
2026-03-11 20:55:14 +01:00
parent 2f82886ad6
commit 555e36ea39

View File

@@ -17,6 +17,29 @@ function normalizeText(value) {
.toLowerCase();
}
function tokenizeText(value) {
return normalizeText(value)
.split(/[^a-z0-9äöüß]+/i)
.map((token) => token.trim())
.filter((token) => token.length >= 3)
.filter((token) => !new Set([
'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit',
'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an'
]).has(token));
}
function getTitleSearchProfile(title) {
const normalizedTitle = normalizeText(title);
const tokens = tokenizeText(title);
const tailTokens = tokens.slice(-2);
return {
normalizedTitle,
tokens,
tailTokens
};
}
function formatGermanDate(value) {
if (!value) return '';
const date = value instanceof Date ? value : new Date(value);
@@ -221,32 +244,129 @@ class ClickTtTournamentRegistrationService {
throw new HttpError('Turnierkonkurrenz ohne Bezeichnung gefunden', 500);
}
const titleProfile = getTitleSearchProfile(tournament.title || '');
const tournamentHref = await page.locator('a').evaluateAll((anchors, criteria) => {
const normalize = (value) => String(value || '')
.normalize('NFKC')
.replace(/\s+/g, ' ')
.trim()
.toLowerCase();
const tokenize = (value) => normalize(value)
.split(/[^a-z0-9äöüß]+/i)
.map((token) => token.trim())
.filter((token) => token.length >= 3)
.filter((token) => !new Set([
'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit',
'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an'
]).has(token));
const wantedTitle = normalize(criteria.tournamentTitle);
const wantedTokens = Array.isArray(criteria.tournamentTokens) ? criteria.tournamentTokens : [];
const wantedTailTokens = Array.isArray(criteria.tournamentTailTokens) ? criteria.tournamentTailTokens : [];
let bestHref = null;
let bestScore = -1;
for (const anchor of anchors) {
const href = anchor.getAttribute('href');
if (!href) continue;
const text = normalize(anchor.textContent || '');
const contextText = normalize(anchor.closest('tr, li, div, td')?.textContent || '');
const combinedText = `${text} ${contextText}`.trim();
const combinedTokens = new Set(tokenize(combinedText));
let score = 0;
if (wantedTitle && combinedText.includes(wantedTitle)) score += 100;
for (const token of wantedTokens) {
if (combinedTokens.has(token)) score += 3;
}
for (const token of wantedTailTokens) {
if (combinedTokens.has(token)) score += 8;
}
if (score > bestScore) {
bestScore = score;
bestHref = href;
}
}
return bestHref;
}, {
tournamentTitle: tournament.title || '',
tournamentTokens: titleProfile.tokens,
tournamentTailTokens: titleProfile.tailTokens
});
if (tournamentHref) {
clickTtPlayerRegistrationService._trace(trace, 'step', {
name: 'click',
label: tournament.title || 'Turnier',
selector: `a[href="${tournamentHref}"]`
});
await page.locator(`a[href="${tournamentHref}"]`).first().click();
await page.waitForLoadState('domcontentloaded');
}
const href = await page.locator('a').evaluateAll((anchors, criteria) => {
const normalize = (value) => String(value || '')
.normalize('NFKC')
.replace(/\s+/g, ' ')
.trim()
.toLowerCase();
const tokenize = (value) => normalize(value)
.split(/[^a-z0-9äöüß]+/i)
.map((token) => token.trim())
.filter((token) => token.length >= 3)
.filter((token) => !new Set([
'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit',
'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an'
]).has(token));
const wantedCompetition = normalize(criteria.competitionName);
const wantedTournament = normalize(criteria.tournamentTitle);
const wantedTournamentTokens = Array.isArray(criteria.tournamentTokens) ? criteria.tournamentTokens : [];
const wantedTailTokens = Array.isArray(criteria.tournamentTailTokens) ? criteria.tournamentTailTokens : [];
let bestHref = null;
let bestScore = -1;
for (const anchor of anchors) {
const href = anchor.getAttribute('href');
if (!href) continue;
const text = normalize(anchor.textContent || '');
if (!text.includes(wantedCompetition)) continue;
const contextText = normalize(anchor.closest('tr, li, div, td')?.textContent || '');
if (wantedTournament && contextText && !contextText.includes(wantedTournament) && !text.includes(wantedTournament)) {
continue;
const combinedText = `${text} ${contextText}`.trim();
const combinedTokens = new Set(tokenize(combinedText));
let score = 0;
if (wantedTournament && combinedText.includes(wantedTournament)) score += 100;
for (const token of wantedTournamentTokens) {
if (combinedTokens.has(token)) score += 3;
}
return anchor.getAttribute('href');
for (const token of wantedTailTokens) {
if (combinedTokens.has(token)) score += 8;
}
if (score > bestScore) {
bestScore = score;
bestHref = href;
}
}
return null;
return bestHref;
}, {
competitionName,
tournamentTitle: tournament.title || ''
tournamentTitle: tournament.title || '',
tournamentTokens: titleProfile.tokens,
tournamentTailTokens: titleProfile.tailTokens
});
if (!href) {