From 555e36ea39c908e3a34cc04b1bb35793c55208fe Mon Sep 17 00:00:00 2001 From: "Torsten Schulz (local)" Date: Wed, 11 Mar 2026 20:55:14 +0100 Subject: [PATCH] feat(clickTtTournamentRegistrationService): enhance tournament title processing and link retrieval - Added functions to tokenize tournament titles and generate a search profile, improving the accuracy of title matching. - Updated the link retrieval logic to utilize the new title processing methods, enhancing the selection of tournament links based on normalized and tokenized title data. - Improved scoring mechanism for link selection to prioritize relevant tournament entries, ensuring better user experience during registration. --- .../clickTtTournamentRegistrationService.js | 130 +++++++++++++++++- 1 file changed, 125 insertions(+), 5 deletions(-) diff --git a/backend/services/clickTtTournamentRegistrationService.js b/backend/services/clickTtTournamentRegistrationService.js index e5e29abc..fcab4af3 100644 --- a/backend/services/clickTtTournamentRegistrationService.js +++ b/backend/services/clickTtTournamentRegistrationService.js @@ -17,6 +17,29 @@ function normalizeText(value) { .toLowerCase(); } +function tokenizeText(value) { + return normalizeText(value) + .split(/[^a-z0-9äöüß]+/i) + .map((token) => token.trim()) + .filter((token) => token.length >= 3) + .filter((token) => !new Set([ + 'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit', + 'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an' + ]).has(token)); +} + +function getTitleSearchProfile(title) { + const normalizedTitle = normalizeText(title); + const tokens = tokenizeText(title); + const tailTokens = tokens.slice(-2); + + return { + normalizedTitle, + tokens, + tailTokens + }; +} + function formatGermanDate(value) { if (!value) return ''; const date = value instanceof Date ? value : new Date(value); @@ -221,32 +244,129 @@ class ClickTtTournamentRegistrationService { throw new HttpError('Turnierkonkurrenz ohne Bezeichnung gefunden', 500); } + const titleProfile = getTitleSearchProfile(tournament.title || ''); + + const tournamentHref = await page.locator('a').evaluateAll((anchors, criteria) => { + const normalize = (value) => String(value || '') + .normalize('NFKC') + .replace(/\s+/g, ' ') + .trim() + .toLowerCase(); + const tokenize = (value) => normalize(value) + .split(/[^a-z0-9äöüß]+/i) + .map((token) => token.trim()) + .filter((token) => token.length >= 3) + .filter((token) => !new Set([ + 'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit', + 'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an' + ]).has(token)); + + const wantedTitle = normalize(criteria.tournamentTitle); + const wantedTokens = Array.isArray(criteria.tournamentTokens) ? criteria.tournamentTokens : []; + const wantedTailTokens = Array.isArray(criteria.tournamentTailTokens) ? criteria.tournamentTailTokens : []; + + let bestHref = null; + let bestScore = -1; + + for (const anchor of anchors) { + const href = anchor.getAttribute('href'); + if (!href) continue; + + const text = normalize(anchor.textContent || ''); + const contextText = normalize(anchor.closest('tr, li, div, td')?.textContent || ''); + const combinedText = `${text} ${contextText}`.trim(); + const combinedTokens = new Set(tokenize(combinedText)); + + let score = 0; + if (wantedTitle && combinedText.includes(wantedTitle)) score += 100; + + for (const token of wantedTokens) { + if (combinedTokens.has(token)) score += 3; + } + + for (const token of wantedTailTokens) { + if (combinedTokens.has(token)) score += 8; + } + + if (score > bestScore) { + bestScore = score; + bestHref = href; + } + } + + return bestHref; + }, { + tournamentTitle: tournament.title || '', + tournamentTokens: titleProfile.tokens, + tournamentTailTokens: titleProfile.tailTokens + }); + + if (tournamentHref) { + clickTtPlayerRegistrationService._trace(trace, 'step', { + name: 'click', + label: tournament.title || 'Turnier', + selector: `a[href="${tournamentHref}"]` + }); + await page.locator(`a[href="${tournamentHref}"]`).first().click(); + await page.waitForLoadState('domcontentloaded'); + } + const href = await page.locator('a').evaluateAll((anchors, criteria) => { const normalize = (value) => String(value || '') .normalize('NFKC') .replace(/\s+/g, ' ') .trim() .toLowerCase(); + const tokenize = (value) => normalize(value) + .split(/[^a-z0-9äöüß]+/i) + .map((token) => token.trim()) + .filter((token) => token.length >= 3) + .filter((token) => !new Set([ + 'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit', + 'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an' + ]).has(token)); const wantedCompetition = normalize(criteria.competitionName); const wantedTournament = normalize(criteria.tournamentTitle); + const wantedTournamentTokens = Array.isArray(criteria.tournamentTokens) ? criteria.tournamentTokens : []; + const wantedTailTokens = Array.isArray(criteria.tournamentTailTokens) ? criteria.tournamentTailTokens : []; + + let bestHref = null; + let bestScore = -1; for (const anchor of anchors) { + const href = anchor.getAttribute('href'); + if (!href) continue; const text = normalize(anchor.textContent || ''); if (!text.includes(wantedCompetition)) continue; const contextText = normalize(anchor.closest('tr, li, div, td')?.textContent || ''); - if (wantedTournament && contextText && !contextText.includes(wantedTournament) && !text.includes(wantedTournament)) { - continue; + const combinedText = `${text} ${contextText}`.trim(); + const combinedTokens = new Set(tokenize(combinedText)); + + let score = 0; + if (wantedTournament && combinedText.includes(wantedTournament)) score += 100; + + for (const token of wantedTournamentTokens) { + if (combinedTokens.has(token)) score += 3; } - return anchor.getAttribute('href'); + for (const token of wantedTailTokens) { + if (combinedTokens.has(token)) score += 8; + } + + if (score > bestScore) { + bestScore = score; + bestHref = href; + } } - return null; + return bestHref; }, { competitionName, - tournamentTitle: tournament.title || '' + tournamentTitle: tournament.title || '', + tournamentTokens: titleProfile.tokens, + tournamentTailTokens: titleProfile.tailTokens }); if (!href) {