feat(clickTtTournamentRegistrationService): enhance tournament title processing and link retrieval
- Added functions to tokenize tournament titles and generate a search profile, improving the accuracy of title matching. - Updated the link retrieval logic to utilize the new title processing methods, enhancing the selection of tournament links based on normalized and tokenized title data. - Improved scoring mechanism for link selection to prioritize relevant tournament entries, ensuring better user experience during registration.
This commit is contained in:
@@ -17,6 +17,29 @@ function normalizeText(value) {
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
function tokenizeText(value) {
|
||||
return normalizeText(value)
|
||||
.split(/[^a-z0-9äöüß]+/i)
|
||||
.map((token) => token.trim())
|
||||
.filter((token) => token.length >= 3)
|
||||
.filter((token) => !new Set([
|
||||
'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit',
|
||||
'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an'
|
||||
]).has(token));
|
||||
}
|
||||
|
||||
function getTitleSearchProfile(title) {
|
||||
const normalizedTitle = normalizeText(title);
|
||||
const tokens = tokenizeText(title);
|
||||
const tailTokens = tokens.slice(-2);
|
||||
|
||||
return {
|
||||
normalizedTitle,
|
||||
tokens,
|
||||
tailTokens
|
||||
};
|
||||
}
|
||||
|
||||
function formatGermanDate(value) {
|
||||
if (!value) return '';
|
||||
const date = value instanceof Date ? value : new Date(value);
|
||||
@@ -221,32 +244,129 @@ class ClickTtTournamentRegistrationService {
|
||||
throw new HttpError('Turnierkonkurrenz ohne Bezeichnung gefunden', 500);
|
||||
}
|
||||
|
||||
const titleProfile = getTitleSearchProfile(tournament.title || '');
|
||||
|
||||
const tournamentHref = await page.locator('a').evaluateAll((anchors, criteria) => {
|
||||
const normalize = (value) => String(value || '')
|
||||
.normalize('NFKC')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
const tokenize = (value) => normalize(value)
|
||||
.split(/[^a-z0-9äöüß]+/i)
|
||||
.map((token) => token.trim())
|
||||
.filter((token) => token.length >= 3)
|
||||
.filter((token) => !new Set([
|
||||
'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit',
|
||||
'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an'
|
||||
]).has(token));
|
||||
|
||||
const wantedTitle = normalize(criteria.tournamentTitle);
|
||||
const wantedTokens = Array.isArray(criteria.tournamentTokens) ? criteria.tournamentTokens : [];
|
||||
const wantedTailTokens = Array.isArray(criteria.tournamentTailTokens) ? criteria.tournamentTailTokens : [];
|
||||
|
||||
let bestHref = null;
|
||||
let bestScore = -1;
|
||||
|
||||
for (const anchor of anchors) {
|
||||
const href = anchor.getAttribute('href');
|
||||
if (!href) continue;
|
||||
|
||||
const text = normalize(anchor.textContent || '');
|
||||
const contextText = normalize(anchor.closest('tr, li, div, td')?.textContent || '');
|
||||
const combinedText = `${text} ${contextText}`.trim();
|
||||
const combinedTokens = new Set(tokenize(combinedText));
|
||||
|
||||
let score = 0;
|
||||
if (wantedTitle && combinedText.includes(wantedTitle)) score += 100;
|
||||
|
||||
for (const token of wantedTokens) {
|
||||
if (combinedTokens.has(token)) score += 3;
|
||||
}
|
||||
|
||||
for (const token of wantedTailTokens) {
|
||||
if (combinedTokens.has(token)) score += 8;
|
||||
}
|
||||
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
bestHref = href;
|
||||
}
|
||||
}
|
||||
|
||||
return bestHref;
|
||||
}, {
|
||||
tournamentTitle: tournament.title || '',
|
||||
tournamentTokens: titleProfile.tokens,
|
||||
tournamentTailTokens: titleProfile.tailTokens
|
||||
});
|
||||
|
||||
if (tournamentHref) {
|
||||
clickTtPlayerRegistrationService._trace(trace, 'step', {
|
||||
name: 'click',
|
||||
label: tournament.title || 'Turnier',
|
||||
selector: `a[href="${tournamentHref}"]`
|
||||
});
|
||||
await page.locator(`a[href="${tournamentHref}"]`).first().click();
|
||||
await page.waitForLoadState('domcontentloaded');
|
||||
}
|
||||
|
||||
const href = await page.locator('a').evaluateAll((anchors, criteria) => {
|
||||
const normalize = (value) => String(value || '')
|
||||
.normalize('NFKC')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
const tokenize = (value) => normalize(value)
|
||||
.split(/[^a-z0-9äöüß]+/i)
|
||||
.map((token) => token.trim())
|
||||
.filter((token) => token.length >= 3)
|
||||
.filter((token) => !new Set([
|
||||
'und', 'der', 'die', 'das', 'des', 'den', 'dem', 'von', 'für', 'mit',
|
||||
'ein', 'eine', 'einer', 'eines', 'zum', 'zur', 'im', 'am', 'an'
|
||||
]).has(token));
|
||||
|
||||
const wantedCompetition = normalize(criteria.competitionName);
|
||||
const wantedTournament = normalize(criteria.tournamentTitle);
|
||||
const wantedTournamentTokens = Array.isArray(criteria.tournamentTokens) ? criteria.tournamentTokens : [];
|
||||
const wantedTailTokens = Array.isArray(criteria.tournamentTailTokens) ? criteria.tournamentTailTokens : [];
|
||||
|
||||
let bestHref = null;
|
||||
let bestScore = -1;
|
||||
|
||||
for (const anchor of anchors) {
|
||||
const href = anchor.getAttribute('href');
|
||||
if (!href) continue;
|
||||
const text = normalize(anchor.textContent || '');
|
||||
if (!text.includes(wantedCompetition)) continue;
|
||||
|
||||
const contextText = normalize(anchor.closest('tr, li, div, td')?.textContent || '');
|
||||
if (wantedTournament && contextText && !contextText.includes(wantedTournament) && !text.includes(wantedTournament)) {
|
||||
continue;
|
||||
const combinedText = `${text} ${contextText}`.trim();
|
||||
const combinedTokens = new Set(tokenize(combinedText));
|
||||
|
||||
let score = 0;
|
||||
if (wantedTournament && combinedText.includes(wantedTournament)) score += 100;
|
||||
|
||||
for (const token of wantedTournamentTokens) {
|
||||
if (combinedTokens.has(token)) score += 3;
|
||||
}
|
||||
|
||||
return anchor.getAttribute('href');
|
||||
for (const token of wantedTailTokens) {
|
||||
if (combinedTokens.has(token)) score += 8;
|
||||
}
|
||||
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
bestHref = href;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return bestHref;
|
||||
}, {
|
||||
competitionName,
|
||||
tournamentTitle: tournament.title || ''
|
||||
tournamentTitle: tournament.title || '',
|
||||
tournamentTokens: titleProfile.tokens,
|
||||
tournamentTailTokens: titleProfile.tailTokens
|
||||
});
|
||||
|
||||
if (!href) {
|
||||
|
||||
Reference in New Issue
Block a user