Add script for importing match schedule and logging
Some checks failed
Code Analysis and Production Deploy / analyze (push) Has been skipped
Code Analysis and Production Deploy / deploy-production (push) Has been skipped
Code Analysis and Production Deploy / deploy-test (push) Successful in 2m2s
Code Analysis and Production Deploy / analyze (pull_request) Failing after 33s
Code Analysis and Production Deploy / deploy-production (pull_request) Has been skipped
Code Analysis and Production Deploy / deploy-test (pull_request) Has been skipped
Require Package Version Change / check (pull_request) Failing after 10s
Some checks failed
Code Analysis and Production Deploy / analyze (push) Has been skipped
Code Analysis and Production Deploy / deploy-production (push) Has been skipped
Code Analysis and Production Deploy / deploy-test (push) Successful in 2m2s
Code Analysis and Production Deploy / analyze (pull_request) Failing after 33s
Code Analysis and Production Deploy / deploy-production (pull_request) Has been skipped
Code Analysis and Production Deploy / deploy-test (pull_request) Has been skipped
Require Package Version Change / check (pull_request) Failing after 10s
- Created `import-spielplan.js` to fetch and parse the match schedule from the specified URL, saving the output as JSON. - Added `run-spielplan-import.sh` to automate the execution of the import script and log output. - Introduced `spielplan.html` file to store the downloaded HTML content for further processing.
This commit is contained in:
3
temp/webpage-downloads/crontab-spielplan.example
Normal file
3
temp/webpage-downloads/crontab-spielplan.example
Normal file
@@ -0,0 +1,3 @@
|
||||
# Taeglicher Spielplan-Import um 07:00 Uhr.
|
||||
# Installieren mit: crontab crontab-spielplan.example
|
||||
0 7 * * * /home/torsten/Programs/harheimertc/temp/webpage-downloads/run-spielplan-import.sh
|
||||
25
temp/webpage-downloads/data/harheimer_tc_spielplan.html
Normal file
25
temp/webpage-downloads/data/harheimer_tc_spielplan.html
Normal file
File diff suppressed because one or more lines are too long
9719
temp/webpage-downloads/data/harheimer_tc_spielplan.json
Normal file
9719
temp/webpage-downloads/data/harheimer_tc_spielplan.json
Normal file
File diff suppressed because it is too large
Load Diff
236
temp/webpage-downloads/import-spielplan.js
Executable file
236
temp/webpage-downloads/import-spielplan.js
Executable file
@@ -0,0 +1,236 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const CONFIG = {
|
||||
association: "HeTTV",
|
||||
clubId: "43030",
|
||||
clubName: "Harheimer_TC",
|
||||
outputDir: path.join(__dirname, "data"),
|
||||
outputFile: "harheimer_tc_spielplan.json",
|
||||
htmlFile: "harheimer_tc_spielplan.html",
|
||||
};
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {};
|
||||
for (let i = 2; i < argv.length; i += 1) {
|
||||
const arg = argv[i];
|
||||
if (arg.startsWith("--")) {
|
||||
const key = arg.slice(2);
|
||||
const next = argv[i + 1];
|
||||
if (!next || next.startsWith("--")) {
|
||||
args[key] = true;
|
||||
} else {
|
||||
args[key] = next;
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
function pad2(value) {
|
||||
return String(value).padStart(2, "0");
|
||||
}
|
||||
|
||||
function seasonForDate(date) {
|
||||
const year = date.getFullYear();
|
||||
const startYear = date.getMonth() >= 6 ? year : year - 1;
|
||||
const endYear = startYear + 1;
|
||||
|
||||
return {
|
||||
startYear,
|
||||
endYear,
|
||||
seasonSlug: `${String(startYear).slice(-2)}--${String(endYear).slice(-2)}`,
|
||||
dateStart: `${startYear}-07-01`,
|
||||
dateEnd: `${endYear}-06-30`,
|
||||
};
|
||||
}
|
||||
|
||||
function buildUrl(season) {
|
||||
const base = `https://www.mytischtennis.de/click-tt/${CONFIG.association}/${season.seasonSlug}/verein/${CONFIG.clubId}/${CONFIG.clubName}/spielplan`;
|
||||
return `${base}?date_start=${season.dateStart}&date_end=${season.dateEnd}`;
|
||||
}
|
||||
|
||||
function readHtml(args, url) {
|
||||
if (args.input) {
|
||||
return fs.readFileSync(path.resolve(args.input), "utf8");
|
||||
}
|
||||
|
||||
const html = execFileSync("curl", ["-fsSL", "--compressed", url], {
|
||||
encoding: "utf8",
|
||||
maxBuffer: 20 * 1024 * 1024,
|
||||
});
|
||||
|
||||
fs.mkdirSync(CONFIG.outputDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(CONFIG.outputDir, CONFIG.htmlFile), html);
|
||||
return html;
|
||||
}
|
||||
|
||||
function extractRemixContext(html) {
|
||||
const marker = "window.__remixContext = ";
|
||||
const start = html.indexOf(marker);
|
||||
if (start === -1) {
|
||||
throw new Error("window.__remixContext nicht gefunden");
|
||||
}
|
||||
|
||||
const jsonStart = start + marker.length;
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = jsonStart; i < html.length; i += 1) {
|
||||
const char = html[i];
|
||||
|
||||
if (inString) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (char === "\\") {
|
||||
escaped = true;
|
||||
} else if (char === "\"") {
|
||||
inString = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === "\"") {
|
||||
inString = true;
|
||||
} else if (char === "{") {
|
||||
depth += 1;
|
||||
} else if (char === "}") {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
return JSON.parse(html.slice(jsonStart, i + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Ende von window.__remixContext nicht gefunden");
|
||||
}
|
||||
|
||||
function looksLikeScheduleByDate(value) {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const entries = Object.entries(value);
|
||||
if (entries.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return entries.some(([key, list]) => (
|
||||
/^\d{4}-\d{2}-\d{2}$/.test(key)
|
||||
&& Array.isArray(list)
|
||||
&& list.some((item) => item && item.team_home && item.team_away && item.meeting_id)
|
||||
));
|
||||
}
|
||||
|
||||
function findSchedule(value, trail = []) {
|
||||
if (looksLikeScheduleByDate(value)) {
|
||||
return { schedule: value, path: trail };
|
||||
}
|
||||
|
||||
if (!value || typeof value !== "object") {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (const [key, child] of Object.entries(value)) {
|
||||
const result = findSchedule(child, trail.concat(key));
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeMatch(day, match) {
|
||||
return {
|
||||
day,
|
||||
date: match.date ?? null,
|
||||
formattedDay: match.formattedDay ?? null,
|
||||
formattedTime: match.formattedTime ?? null,
|
||||
state: match.state ?? null,
|
||||
meetingId: match.meeting_id ?? null,
|
||||
meetingNumber: match.meeting_number ?? null,
|
||||
leagueId: match.league_id ?? null,
|
||||
leagueName: match.league_name ?? null,
|
||||
leagueShortName: match.league_short_name ?? null,
|
||||
leagueOrgShortName: match.league_org_short_name ?? null,
|
||||
roundName: match.round_name ?? null,
|
||||
teamHome: match.team_home ?? null,
|
||||
teamHomeId: match.team_home_id ?? null,
|
||||
teamHomeClubId: match.team_home_club_id ?? null,
|
||||
teamAway: match.team_away ?? null,
|
||||
teamAwayId: match.team_away_id ?? null,
|
||||
teamAwayClubId: match.team_away_club_id ?? null,
|
||||
result: match.matches_won != null && match.matches_lost != null
|
||||
? `${match.matches_won}:${match.matches_lost}`
|
||||
: null,
|
||||
isConfirmed: match.is_confirmed ?? null,
|
||||
isComplete: match.is_meeting_complete ?? null,
|
||||
originalDate: match.original_date ?? null,
|
||||
location: match.location ?? null,
|
||||
pdfUrl: match.pdf_url ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
function parseSchedule(html, meta) {
|
||||
const context = extractRemixContext(html);
|
||||
const result = findSchedule(context.state?.loaderData);
|
||||
if (!result) {
|
||||
throw new Error("Keinen Spielplan im Remix loaderData gefunden");
|
||||
}
|
||||
|
||||
const matchesByDay = result.schedule;
|
||||
const matches = Object.keys(matchesByDay)
|
||||
.sort()
|
||||
.flatMap((day) => matchesByDay[day].map((match) => normalizeMatch(day, match)));
|
||||
|
||||
return {
|
||||
importedAt: new Date().toISOString(),
|
||||
source: meta,
|
||||
loaderDataPath: result.path.join("."),
|
||||
matchCount: matches.length,
|
||||
matchesByDay,
|
||||
matches,
|
||||
};
|
||||
}
|
||||
|
||||
function main() {
|
||||
const args = parseArgs(process.argv);
|
||||
const today = args.today ? new Date(`${args.today}T12:00:00`) : new Date();
|
||||
if (Number.isNaN(today.getTime())) {
|
||||
throw new Error(`Ungueltiges Datum fuer --today: ${args.today}`);
|
||||
}
|
||||
|
||||
const season = seasonForDate(today);
|
||||
const url = buildUrl(season);
|
||||
const html = readHtml(args, url);
|
||||
const parsed = parseSchedule(html, {
|
||||
url,
|
||||
clubId: CONFIG.clubId,
|
||||
clubName: CONFIG.clubName,
|
||||
association: CONFIG.association,
|
||||
season,
|
||||
});
|
||||
|
||||
fs.mkdirSync(CONFIG.outputDir, { recursive: true });
|
||||
const outputPath = path.join(CONFIG.outputDir, CONFIG.outputFile);
|
||||
fs.writeFileSync(outputPath, `${JSON.stringify(parsed, null, 2)}\n`);
|
||||
|
||||
console.log(`Spielplan gespeichert: ${outputPath}`);
|
||||
console.log(`Spiele: ${parsed.matchCount}`);
|
||||
console.log(`Zeitraum: ${season.dateStart} bis ${season.dateEnd}`);
|
||||
}
|
||||
|
||||
main();
|
||||
6
temp/webpage-downloads/run-spielplan-import.sh
Executable file
6
temp/webpage-downloads/run-spielplan-import.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
cd /home/torsten/Programs/harheimertc/temp/webpage-downloads
|
||||
mkdir -p data
|
||||
/usr/bin/env node import-spielplan.js >> data/spielplan-import.log 2>&1
|
||||
18
temp/webpage-downloads/spielplan.html
Normal file
18
temp/webpage-downloads/spielplan.html
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user