diff --git a/backend/controllers/teamDocumentController.js b/backend/controllers/teamDocumentController.js index 2ab238e..1d2a1e9 100644 --- a/backend/controllers/teamDocumentController.js +++ b/backend/controllers/teamDocumentController.js @@ -1,5 +1,6 @@ import multer from 'multer'; import path from 'path'; +import fs from 'fs'; import TeamDocumentService from '../services/teamDocumentService.js'; import PDFParserService from '../services/pdfParserService.js'; import { getUserByToken } from '../utils/userUtils.js'; @@ -8,6 +9,11 @@ import { devLog } from '../utils/logger.js'; // Multer-Konfiguration für Datei-Uploads const storage = multer.diskStorage({ destination: (req, file, cb) => { + try { + fs.mkdirSync('uploads/temp', { recursive: true }); + } catch (mkdirError) { + console.error('[multer] - Failed to ensure temp upload directory exists:', mkdirError); + } cb(null, 'uploads/temp/'); }, filename: (req, file, cb) => { diff --git a/backend/node_modules/.package-lock.json b/backend/node_modules/.package-lock.json index fee7619..84b5d9c 100644 --- a/backend/node_modules/.package-lock.json +++ b/backend/node_modules/.package-lock.json @@ -1004,6 +1004,23 @@ "node": ">=6" } }, + "node_modules/canvas": { + "version": "2.11.2", + "resolved": "https://registry.npmjs.org/canvas/-/canvas-2.11.2.tgz", + "integrity": "sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==", + "hasInstallScript": true, + "ideallyInert": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@mapbox/node-pre-gyp": "^1.0.0", + "nan": "^2.17.0", + "simple-get": "^3.0.3" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -1302,6 +1319,20 @@ "ms": "2.0.0" } }, + "node_modules/decompress-response": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", + "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", + "ideallyInert": true, + "license": "MIT", + "optional": true, + "dependencies": { + "mimic-response": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -2642,6 +2673,20 @@ "node": ">= 0.6" } }, + "node_modules/mimic-response": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.1.0.tgz", + "integrity": "sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==", + "ideallyInert": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -2796,6 +2841,14 @@ "node": ">=12" } }, + "node_modules/nan": { + "version": "2.23.1", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.23.1.tgz", + "integrity": "sha512-r7bBUGKzlqk8oPBDYxt6Z0aEdF1G1rwlMcLk8LCOMbOzf0mG+JUfUzG4fIMWwHWP0iyaLWEQZJmtB7nOHEm/qw==", + "ideallyInert": true, + "license": "MIT", + "optional": true + }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -3089,6 +3142,16 @@ "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", "license": "MIT" }, + "node_modules/path2d-polyfill": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/path2d-polyfill/-/path2d-polyfill-2.0.1.tgz", + "integrity": "sha512-ad/3bsalbbWhmBo0D6FZ4RNMwsLsPpL6gnvhuSaU5Vm7b06Kr5ubSltQQ0T7YKsiJQO+g22zJ4dJKNTXIyOXtA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=8" + } + }, "node_modules/pdf-parse": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.1.tgz", @@ -3117,6 +3180,19 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/pdfjs-dist": { + "version": "3.11.174", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz", + "integrity": "sha512-TdTZPf1trZ8/UFu5Cx/GXB7GZM30LT+wWUNfsi6Bq8ePLnb+woNKtDymI2mxZYBpMbonNFqKmiz684DIfnd8dA==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "canvas": "^2.11.2", + "path2d-polyfill": "^2.0.1" + } + }, "node_modules/pg-connection-string": { "version": "2.6.4", "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.6.4.tgz", @@ -3639,6 +3715,41 @@ "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "ideallyInert": true, + "license": "MIT", + "optional": true + }, + "node_modules/simple-get": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.1.tgz", + "integrity": "sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==", + "ideallyInert": true, + "license": "MIT", + "optional": true, + "dependencies": { + "decompress-response": "^4.2.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/simple-swizzle": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", diff --git a/backend/package-lock.json b/backend/package-lock.json index 9263645..310629b 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -25,6 +25,7 @@ "node-cron": "^4.2.1", "nodemailer": "^7.0.9", "pdf-parse": "^1.1.1", + "pdfjs-dist": "^3.11.174", "sequelize": "^6.37.3", "sharp": "^0.33.5" }, @@ -1017,6 +1018,22 @@ "node": ">=6" } }, + "node_modules/canvas": { + "version": "2.11.2", + "resolved": "https://registry.npmjs.org/canvas/-/canvas-2.11.2.tgz", + "integrity": "sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@mapbox/node-pre-gyp": "^1.0.0", + "nan": "^2.17.0", + "simple-get": "^3.0.3" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -1315,6 +1332,19 @@ "ms": "2.0.0" } }, + "node_modules/decompress-response": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", + "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", + "license": "MIT", + "optional": true, + "dependencies": { + "mimic-response": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -2654,6 +2684,19 @@ "node": ">= 0.6" } }, + "node_modules/mimic-response": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.1.0.tgz", + "integrity": "sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -2808,6 +2851,13 @@ "node": ">=12" } }, + "node_modules/nan": { + "version": "2.23.1", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.23.1.tgz", + "integrity": "sha512-r7bBUGKzlqk8oPBDYxt6Z0aEdF1G1rwlMcLk8LCOMbOzf0mG+JUfUzG4fIMWwHWP0iyaLWEQZJmtB7nOHEm/qw==", + "license": "MIT", + "optional": true + }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -3101,6 +3151,16 @@ "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", "license": "MIT" }, + "node_modules/path2d-polyfill": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/path2d-polyfill/-/path2d-polyfill-2.0.1.tgz", + "integrity": "sha512-ad/3bsalbbWhmBo0D6FZ4RNMwsLsPpL6gnvhuSaU5Vm7b06Kr5ubSltQQ0T7YKsiJQO+g22zJ4dJKNTXIyOXtA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=8" + } + }, "node_modules/pdf-parse": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.1.tgz", @@ -3129,6 +3189,19 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/pdfjs-dist": { + "version": "3.11.174", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz", + "integrity": "sha512-TdTZPf1trZ8/UFu5Cx/GXB7GZM30LT+wWUNfsi6Bq8ePLnb+woNKtDymI2mxZYBpMbonNFqKmiz684DIfnd8dA==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "canvas": "^2.11.2", + "path2d-polyfill": "^2.0.1" + } + }, "node_modules/pg-connection-string": { "version": "2.6.4", "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.6.4.tgz", @@ -3651,6 +3724,39 @@ "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/simple-get": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.1.tgz", + "integrity": "sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==", + "license": "MIT", + "optional": true, + "dependencies": { + "decompress-response": "^4.2.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/simple-swizzle": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", diff --git a/backend/package.json b/backend/package.json index 2ea54d8..d154c2c 100644 --- a/backend/package.json +++ b/backend/package.json @@ -26,6 +26,7 @@ "jsonwebtoken": "^9.0.2", "multer": "^1.4.5-lts.1", "mysql2": "^3.10.3", + "pdfjs-dist": "^3.11.174", "node-cron": "^4.2.1", "nodemailer": "^7.0.9", "pdf-parse": "^1.1.1", diff --git a/backend/services/pdfParserService.js b/backend/services/pdfParserService.js index 44fdeb2..2b55b11 100644 --- a/backend/services/pdfParserService.js +++ b/backend/services/pdfParserService.js @@ -32,19 +32,28 @@ class PDFParserService { // Bestimme Dateityp basierend auf Dateiendung const fileExtension = path.extname(filePath).toLowerCase(); let fileContent; + let extractedLines = null; + let lineEntries = null; if (fileExtension === '.pdf') { - // Echte PDF-Parsing - const pdfBuffer = fs.readFileSync(filePath); - const pdfData = await pdfParse(pdfBuffer); - fileContent = pdfData.text; + try { + const { text, lines, entries } = await this.extractPdfTextWithLayout(filePath); + fileContent = text; + extractedLines = lines; + lineEntries = entries; + } catch (layoutError) { + console.error('[PDFParserService.parsePDF] - Layout extraction failed, falling back to pdf-parse:', layoutError); + const pdfBuffer = fs.readFileSync(filePath); + const pdfData = await pdfParse(pdfBuffer); + fileContent = pdfData.text; + } } else { // Fallback für TXT-Dateien (für Tests) fileContent = fs.readFileSync(filePath, 'utf8'); } // Parse den Text nach Spiel-Daten - const parsedData = this.extractMatchData(fileContent, clubId); + const parsedData = this.extractMatchData(fileContent, clubId, extractedLines, lineEntries); return parsedData; @@ -60,7 +69,7 @@ class PDFParserService { * @param {number} clubId - ID des Vereins * @returns {Object} Geparste Daten mit Matches und Metadaten */ - static extractMatchData(text, clubId) { + static extractMatchData(text, clubId, providedLines = null, providedLineEntries = null) { const matches = []; const errors = []; const metadata = { @@ -71,21 +80,33 @@ class PDFParserService { try { // Teile Text in Zeilen auf - const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0); + const linesSource = providedLines && Array.isArray(providedLines) ? providedLines : text.split('\n'); + const lines = []; + const filteredLineEntries = []; + + linesSource.forEach((line, idx) => { + const trimmed = typeof line === 'string' ? line.trim() : ''; + if (trimmed.length > 0) { + lines.push(trimmed); + if (providedLineEntries && Array.isArray(providedLineEntries) && providedLineEntries[idx]) { + filteredLineEntries.push(providedLineEntries[idx]); + } + } + }); metadata.totalLines = lines.length; // Verschiedene Parsing-Strategien je nach PDF-Format const strategies = [ - { name: 'Standard Format', fn: this.parseStandardFormat }, - { name: 'Table Format', fn: this.parseTableFormat }, - { name: 'List Format', fn: this.parseListFormat } + { name: 'Standard Format', fn: (lns, club, entries) => PDFParserService.parseStandardFormat(lns, club, entries) }, + { name: 'Table Format', fn: (lns, club, entries) => PDFParserService.parseTableFormat(lns, club, entries) }, + { name: 'List Format', fn: (lns, club, entries) => PDFParserService.parseListFormat(lns, club, entries) } ]; for (const strategy of strategies) { try { - const result = strategy.fn(lines, clubId); + const result = strategy.fn(lines, clubId, filteredLineEntries.length === lines.length ? filteredLineEntries : null); if (result.matches.length > 0) { console.log(`[PDF Parser] Using strategy: ${strategy.name}, found ${result.matches.length} matches`); @@ -134,12 +155,29 @@ class PDFParserService { * @param {number} clubId - ID des Vereins * @returns {Object} Geparste Matches */ - static parseStandardFormat(lines, clubId) { + static parseStandardFormat(lines, clubId, lineEntries = null) { const matches = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; + const lineDetail = Array.isArray(lineEntries) ? lineEntries[i] : null; + const columnSegments = lineDetail ? this.segmentLineByPositions(lineDetail) : null; + let homeFromColumns = null; + let guestFromColumns = null; + let codeFromColumns = null; + + if (columnSegments && columnSegments.length >= 3) { + homeFromColumns = columnSegments[1]?.trim() || null; + guestFromColumns = columnSegments[2]?.trim() || null; + const lastSegment = columnSegments[columnSegments.length - 1]; + if (lastSegment) { + const candidateCode = lastSegment.replace(/\s+/g, '').trim(); + if (/^[A-Z0-9]{12}$/.test(candidateCode)) { + codeFromColumns = candidateCode; + } + } + } // Suche nach Datum-Pattern (dd.mm.yyyy oder dd/mm/yyyy) const dateMatch = line.match(/(\d{1,2})[./](\d{1,2})[./](\d{4})/); @@ -181,7 +219,7 @@ class PDFParserService { const cleanLine3 = cleanLine2.replace(/\([^)]*\)/g, ''); // Suche nach Code (12 Zeichen) oder PIN (4 Ziffern) am Ende - const codeMatch = cleanLine3.match(/([A-Z0-9]{12})$/); + let codeMatch = cleanLine3.match(/([A-Z0-9]{12})$/); const pinMatch = cleanLine3.match(/(\d{4})$/); let code = null; @@ -222,6 +260,11 @@ class PDFParserService { } } + if (!code && codeFromColumns) { + code = codeFromColumns; + teamsPart = teamsPart.replace(new RegExp(`${code}$`), '').trim(); + } + if (code || pinMatch) { @@ -275,39 +318,89 @@ class PDFParserService { // Strategie 1: Suche nach "Harheimer TC" als Heimteam oder Gastteam if (teamsPart.includes('Harheimer TC')) { const harheimerIndex = teamsPart.indexOf('Harheimer TC'); - - // Prüfe, ob "Harheimer TC" am Anfang oder am Ende steht let beforeHarheimer = teamsPart.substring(0, harheimerIndex).trim(); let afterHarheimer = teamsPart.substring(harheimerIndex + 'Harheimer TC'.length).trim(); - // Entferne Spielnummern aus beiden Teilen - beforeHarheimer = beforeHarheimer.replace(/^\d+/, '').trim(); - afterHarheimer = afterHarheimer.replace(/^\d+/, '').trim(); + beforeHarheimer = beforeHarheimer + .replace(/^\(\d+\)/, '') + .replace(/^\d+/, '') + .trim(); + afterHarheimer = afterHarheimer + .replace(/^\(\d+\)/, '') + .replace(/^\d+/, '') + .trim(); - if (beforeHarheimer && !afterHarheimer) { - // "Harheimer TC" ist am Ende → Harheimer ist Gastteam + const romanNumeralCandidates = ['XII', 'XI', 'X', 'IX', 'VIII', 'VII', 'VI', 'V', 'IV', 'III', 'II', 'I']; + + const matchLeadingRoman = (token) => { + if (!token) { + return null; + } + const normalizedToken = token.trim(); + for (const candidate of romanNumeralCandidates) { + if (normalizedToken.startsWith(candidate)) { + const nextChar = normalizedToken.charAt(candidate.length); + if (!nextChar || /\s|[A-ZÄÖÜẞ]/.test(nextChar)) { + const remainder = normalizedToken.slice(candidate.length).trimStart(); + return { roman: candidate, remainder }; + } + } + } + return null; + }; + + const extractLeadingRomanFromTokens = (tokenList) => { + const tokensCopy = Array.isArray(tokenList) ? [...tokenList] : []; + if (tokensCopy.length === 0) { + return { roman: null, tokens: tokensCopy }; + } + + const firstToken = tokensCopy[0]; + const match = matchLeadingRoman(firstToken); + + if (match) { + const { roman, remainder } = match; + if (remainder) { + tokensCopy[0] = remainder; + } else { + tokensCopy.shift(); + } + return { roman, tokens: tokensCopy }; + } + + return { roman: null, tokens: tokensCopy }; + }; + + if (!beforeHarheimer && afterHarheimer) { + const tokens = afterHarheimer.split(/\s+/).filter(Boolean); + const { roman: homeRoman, tokens: guestTokens } = extractLeadingRomanFromTokens(tokens); + const homeSuffix = homeRoman ? ` ${homeRoman}` : ''; + homeTeamName = `Harheimer TC${homeSuffix}`; + guestTeamName = guestTokens.join(' ').trim(); + } else if (beforeHarheimer && !afterHarheimer) { + // "Harheimer TC" ist Gastteam ohne weitere Tokens + homeTeamName = beforeHarheimer.replace(/\([^)]*\)/g, '').trim(); guestTeamName = 'Harheimer TC'; - homeTeamName = beforeHarheimer - .replace(/\([^)]*\)/g, '') // Entferne Klammern - .trim(); - } else if (!beforeHarheimer && afterHarheimer) { - // "Harheimer TC" ist am Anfang → Harheimer ist Heimteam - homeTeamName = 'Harheimer TC'; - guestTeamName = afterHarheimer - .replace(/\([^)]*\)/g, '') // Entferne Klammern - .trim(); } else if (beforeHarheimer && afterHarheimer) { - // "Harheimer TC" ist in der Mitte → verwende Position als Hinweis - // Normalerweise: Heimteam zuerst, dann Gastteam - homeTeamName = beforeHarheimer - .replace(/\([^)]*\)/g, '') // Entferne Klammern - .trim(); - guestTeamName = 'Harheimer TC'; + // "Harheimer TC" steht in der Mitte → Harheimer ist Gast, Tokens nach Harheimer gehören zu ihm + homeTeamName = beforeHarheimer.replace(/\([^)]*\)/g, '').trim(); + const tokens = afterHarheimer.split(/\s+/).filter(Boolean); + const { roman: guestRoman, tokens: remainingTokens } = extractLeadingRomanFromTokens(tokens); + const guestSuffix = guestRoman ? ` ${guestRoman}` : ''; + guestTeamName = `Harheimer TC${guestSuffix}`; + if (remainingTokens.length > 0) { + const trailingText = remainingTokens.join(' ').trim(); + if (trailingText) { + guestTeamName = `${guestTeamName} ${trailingText}`.trim(); + } + } } else { - // Nur "Harheimer TC" ohne andere Teams → ungültig + // Nur "Harheimer TC" ohne weitere Kontexte → überspringen continue; } - + + homeTeamName = homeTeamName.replace(/\([^)]*\)/g, '').trim(); + guestTeamName = guestTeamName.replace(/\([^)]*\)/g, '').trim(); } else { // Strategie 2: Suche nach Großbuchstaben am Anfang des zweiten Teams const teamSplitMatch = teamsPart.match(/^([A-Za-z0-9\s\-\.]+?)\s+([A-Z][A-Za-z0-9\s\-\.]+)$/); @@ -322,6 +415,13 @@ class PDFParserService { } } + if (homeFromColumns) { + homeTeamName = homeFromColumns; + } + if (guestFromColumns) { + guestTeamName = guestFromColumns; + } + if (homeTeamName && guestTeamName) { let debugInfo; if (code) { @@ -358,13 +458,59 @@ class PDFParserService { return { matches }; } + static segmentLineByPositions(lineDetail) { + if (!lineDetail || !Array.isArray(lineDetail.items)) { + return null; + } + + const intraWordGapThreshold = 1.5; + const columnGapThreshold = 12; + const segments = []; + + let currentSegment = ''; + let previousItem = null; + + lineDetail.items.forEach((item) => { + if (!item || typeof item.text !== 'string') { + return; + } + const text = item.text; + if (!text || text.trim().length === 0) { + return; + } + + if (previousItem) { + const previousEnd = previousItem.x + previousItem.width; + const gap = item.x - previousEnd; + + if (gap > columnGapThreshold) { + if (currentSegment.trim().length > 0) { + segments.push(currentSegment.trim()); + } + currentSegment = ''; + } else if (gap > intraWordGapThreshold) { + currentSegment += ' '; + } + } + + currentSegment += text; + previousItem = item; + }); + + if (currentSegment.trim().length > 0) { + segments.push(currentSegment.trim()); + } + + return segments.length > 0 ? segments : null; + } + /** * Tabellen-Format Parser * @param {Array} lines - Textzeilen * @param {number} clubId - ID des Vereins * @returns {Object} Geparste Matches */ - static parseTableFormat(lines, clubId) { + static parseTableFormat(lines, clubId, lineEntries = null) { const matches = []; // Suche nach Tabellen-Header @@ -428,7 +574,7 @@ class PDFParserService { * @param {number} clubId - ID des Vereins * @returns {Object} Geparste Matches */ - static parseListFormat(lines, clubId) { + static parseListFormat(lines, clubId, lineEntries = null) { const matches = []; for (let i = 0; i < lines.length; i++) { @@ -559,13 +705,10 @@ class PDFParserService { const matchingMatch = existingMatches.find(match => { if (!match.guestTeam) return false; - const guestTeamName = match.guestTeam.name.toLowerCase(); - const searchGuestName = matchData.guestTeamName.toLowerCase(); - - // Exakte Übereinstimmung oder Teilstring-Match - return guestTeamName === searchGuestName || - guestTeamName.includes(searchGuestName) || - searchGuestName.includes(guestTeamName); + const guestTeamName = match.guestTeam.name; + const searchGuestName = matchData.guestTeamName; + + return PDFParserService.namesRoughlyMatch(guestTeamName, searchGuestName); }); if (matchingMatch) { @@ -631,8 +774,7 @@ class PDFParserService { // Fuzzy-Matching für Team-Namen if (!homeTeam) { homeTeam = allTeams.find(t => - t.name.toLowerCase().includes(matchData.homeTeamName.toLowerCase()) || - matchData.homeTeamName.toLowerCase().includes(t.name.toLowerCase()) + PDFParserService.namesRoughlyMatch(t.name, matchData.homeTeamName) ); if (homeTeam) { @@ -642,8 +784,7 @@ class PDFParserService { if (!guestTeam) { guestTeam = allTeams.find(t => - t.name.toLowerCase().includes(matchData.guestTeamName.toLowerCase()) || - matchData.guestTeamName.toLowerCase().includes(t.name.toLowerCase()) + PDFParserService.namesRoughlyMatch(t.name, matchData.guestTeamName) ); if (guestTeam) { @@ -694,6 +835,150 @@ class PDFParserService { throw error; } } + + static async extractPdfTextWithLayout(filePath) { + const { default: pdfjsLib } = await import('pdfjs-dist/legacy/build/pdf.js'); + const pdfData = new Uint8Array(fs.readFileSync(filePath)); + const loadingTask = pdfjsLib.getDocument({ data: pdfData, disableWorker: true }); + const pdf = await loadingTask.promise; + + const lineEntries = []; + const lineTolerance = 2; // Toleranz für Zeilenhöhe + const spaceGapThreshold = 1.5; // Mindestabstand, um ein Leerzeichen einzufügen + + for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) { + const page = await pdf.getPage(pageNumber); + const textContent = await page.getTextContent({ normalizeWhitespace: false }); + const pageLines = []; + + textContent.items.forEach((item) => { + if (!item || typeof item.str !== 'string') { + return; + } + const text = item.str; + if (!text || text.trim().length === 0) { + return; + } + + const [scaleX, , , , x, y] = item.transform; + const width = (item.width || 0) * (scaleX || 1); + + let targetLine = pageLines.find((line) => Math.abs(line.y - y) < lineTolerance); + if (!targetLine) { + targetLine = { y, items: [] }; + pageLines.push(targetLine); + } + + targetLine.items.push({ + text, + x, + y, + width + }); + }); + + // Sortiere Zeilen von oben nach unten + pageLines.sort((a, b) => b.y - a.y); + + pageLines.forEach((line) => { + // Sortiere Zeichen von links nach rechts + line.items.sort((a, b) => a.x - b.x); + + let lineText = ''; + let previousItem = null; + + line.items.forEach((item) => { + if (previousItem) { + const previousEnd = previousItem.x + previousItem.width; + const gap = item.x - previousEnd; + if (gap > spaceGapThreshold) { + lineText += ' '; + } + } + + lineText += item.text; + previousItem = item; + }); + + const normalized = lineText.trim(); + if (normalized.length > 0) { + lineEntries.push({ + text: normalized, + items: line.items.map((item) => ({ + text: item.text, + x: item.x, + y: item.y, + width: item.width + })) + }); + } + }); + } + + await pdf.destroy(); + + const lines = lineEntries.map((entry) => entry.text); + const text = lines.join('\n'); + return { text, lines, entries: lineEntries }; + } + + static normalizeTeamName(name) { + if (!name || typeof name !== 'string') return ''; + return name + .toLowerCase() + .replace(/\u2026/g, '...') + .replace(/\s+/g, ' ') + .trim(); + } + + static matchWithEllipsis(pattern, target) { + const normalizedPattern = PDFParserService.normalizeTeamName(pattern); + const normalizedTarget = PDFParserService.normalizeTeamName(target); + + if (!normalizedPattern.includes('...')) { + return normalizedTarget.includes(normalizedPattern); + } + + const segments = normalizedPattern.split('...').map(segment => segment.trim()).filter(Boolean); + if (segments.length === 0) { + return true; + } + + let currentIndex = 0; + for (const segment of segments) { + const foundIndex = normalizedTarget.indexOf(segment, currentIndex); + if (foundIndex === -1) { + return false; + } + currentIndex = foundIndex + segment.length; + } + + return true; + } + + static namesRoughlyMatch(nameA, nameB) { + const normalizedA = PDFParserService.normalizeTeamName(nameA); + const normalizedB = PDFParserService.normalizeTeamName(nameB); + + if (!normalizedA || !normalizedB) { + return false; + } + + if (normalizedA === normalizedB) { + return true; + } + + if (normalizedA.includes('...') || normalizedB.includes('...')) { + if (PDFParserService.matchWithEllipsis(normalizedA, normalizedB)) { + return true; + } + if (PDFParserService.matchWithEllipsis(normalizedB, normalizedA)) { + return true; + } + } + + return normalizedA.includes(normalizedB) || normalizedB.includes(normalizedA); + } } export default PDFParserService; diff --git a/backend/uploads/team-documents/2_code_list_1762593197315.pdf b/backend/uploads/team-documents/2_code_list_1762593197315.pdf new file mode 100644 index 0000000..3810894 Binary files /dev/null and b/backend/uploads/team-documents/2_code_list_1762593197315.pdf differ diff --git a/frontend/src/views/MembersView.vue b/frontend/src/views/MembersView.vue index a927452..2513252 100644 --- a/frontend/src/views/MembersView.vue +++ b/frontend/src/views/MembersView.vue @@ -213,7 +213,7 @@ - + @@ -241,26 +241,26 @@ @delete-note="deleteNote" @close="closeNotesModal" /> - - - - - - + + + + + + - -
-
- Ausgewählte Datei: {{ pendingUploadFile?.name }} -
- Typ: {{ pendingUploadType === 'code_list' ? 'Code-Liste' : 'Pin-Liste' }} -
- Team: {{ teamToEdit?.name }} -
- Liga: {{ getTeamLeagueName() }} -
-
- - -
-
- - -
- 📄 Hochgeladene Dokumente - - - - - - - - - - - - - - - - - -
DateinameTypGrößeAktionen
{{ document.originalFileName }} - - {{ document.documentType === 'code_list' ? 'Code-Liste' : 'Pin-Liste' }} - - {{ formatFileSize(document.fileSize) }} - -
-
-
🔄 Automatische Jobs @@ -443,10 +382,8 @@ export default { const selectedSeasonId = ref(null); const currentSeason = ref(null); const teamDocuments = ref([]); - const pendingUploadFile = ref(null); - const pendingUploadType = ref(null); - const showLeagueSelection = ref(false); const parsingInProgress = ref(false); + const parsingDocuments = ref({}); // PDF-Dialog Variablen const showPDFViewer = ref(false); @@ -594,42 +531,34 @@ export default { const uploadCodeList = () => { if (!teamToEdit.value) return; + if (parsingInProgress.value) { + return; + } - // Erstelle ein verstecktes File-Input-Element const input = document.createElement('input'); input.type = 'file'; input.accept = '.pdf,.doc,.docx,.txt,.csv'; input.onchange = async (event) => { const file = event.target.files[0]; if (!file) return; - - // Speichere die Datei und den Typ für späteres Parsing - pendingUploadFile.value = file; - pendingUploadType.value = 'code_list'; - - // Zeige Liga-Auswahl für Parsing - showLeagueSelection.value = true; + await uploadAndParseDocument(file, 'code_list'); }; input.click(); }; const uploadPinList = () => { if (!teamToEdit.value) return; + if (parsingInProgress.value) { + return; + } - // Erstelle ein verstecktes File-Input-Element const input = document.createElement('input'); input.type = 'file'; input.accept = '.pdf,.doc,.docx,.txt,.csv'; input.onchange = async (event) => { const file = event.target.files[0]; if (!file) return; - - // Speichere die Datei und den Typ für späteres Parsing - pendingUploadFile.value = file; - pendingUploadType.value = 'pin_list'; - - // Zeige Liga-Auswahl für Parsing - showLeagueSelection.value = true; + await uploadAndParseDocument(file, 'pin_list'); }; input.click(); }; @@ -702,136 +631,153 @@ export default { } }; - const confirmUploadAndParse = async () => { - if (!pendingUploadFile.value || !teamToEdit.value?.leagueId) { - alert('Team ist keiner Liga zugeordnet!'); + const uploadAndParseDocument = async (file, documentType) => { + if (!teamToEdit.value?.leagueId) { + await showInfo( + 'Hinweis', + 'Dieses Team ist keiner Liga zugeordnet.', + 'Bitte ordnen Sie dem Team zuerst eine Liga zu, damit Dokumente verarbeitet werden können.', + 'warning' + ); return; } - + + if (parsingInProgress.value) { + return; + } + parsingInProgress.value = true; - + try { - - // Schritt 1: Datei als Team-Dokument hochladen const formData = new FormData(); - formData.append('document', pendingUploadFile.value); - formData.append('documentType', pendingUploadType.value); - + formData.append('document', file); + formData.append('documentType', documentType); + const uploadResponse = await apiClient.post(`/team-documents/club-team/${teamToEdit.value.id}/upload`, formData, { headers: { 'Content-Type': 'multipart/form-data' } }); - - - // Schritt 2: Datei parsen (nur für PDF/TXT-Dateien) - const fileExtension = pendingUploadFile.value.name.toLowerCase().split('.').pop(); + + const fileExtension = file.name.toLowerCase().split('.').pop(); + const documentLabel = documentType === 'code_list' ? 'Code-Liste' : 'Pin-Liste'; + if (fileExtension === 'pdf' || fileExtension === 'txt') { const parseResponse = await apiClient.post(`/team-documents/${uploadResponse.data.id}/parse?leagueid=${teamToEdit.value.leagueId}`); - - const { parseResult, saveResult } = parseResponse.data; - - let message = `${pendingUploadType.value === 'code_list' ? 'Code-Liste' : 'Pin-Liste'} erfolgreich hochgeladen und geparst!\n\n`; + + let message = `${documentLabel} erfolgreich hochgeladen und geparst!\n\n`; message += `Gefundene Spiele: ${parseResult.matchesFound}\n`; message += `Neue Spiele erstellt: ${saveResult.created}\n`; - message += `Spiele aktualisiert: ${saveResult.updated}\n`; - + message += `Spiele aktualisiert: ${saveResult.updated}`; + if (saveResult.errors.length > 0) { - message += `\nFehler: ${saveResult.errors.length}\n`; + message += `\n\nFehler: ${saveResult.errors.length}\n`; message += saveResult.errors.slice(0, 3).join('\n'); if (saveResult.errors.length > 3) { message += `\n... und ${saveResult.errors.length - 3} weitere`; } } - - // Debug-Informationen anzeigen wenn keine Matches gefunden wurden + + let dialogTitle = 'Erfolg'; + let dialogType = 'success'; + if (parseResult.matchesFound === 0) { - message += `\n\n--- DEBUG-INFORMATIONEN ---\n`; - message += `Text-Länge: ${parseResult.debugInfo.totalTextLength} Zeichen\n`; - message += `Zeilen: ${parseResult.debugInfo.totalLines}\n`; - message += `Erste Zeilen:\n`; - parseResult.debugInfo.firstFewLines.forEach((line, index) => { - message += `${index + 1}: "${line}"\n`; - }); - message += `\nLetzte Zeilen:\n`; - parseResult.debugInfo.lastFewLines.forEach((line, index) => { - message += `${parseResult.debugInfo.totalLines - 5 + index + 1}: "${line}"\n`; - }); - // Fehler-Dialog wenn nichts gefunden wurde - await showInfo('Fehler', message, '', 'error'); + dialogTitle = 'Keine Spiele gefunden'; + dialogType = 'warning'; + if (parseResult.debugInfo) { + message += `\n\nHinweis: Keine Spiele erkannt.\nZeilen im Dokument: ${parseResult.debugInfo.totalLines}`; + } } else if (saveResult.errors.length > 0) { - // Warnung wenn Spiele gefunden wurden, aber Fehler auftraten - await showInfo('Warnung', message, '', 'warning'); - } else { - // Erfolg wenn alles geklappt hat - await showInfo('Erfolg', message, '', 'success'); + dialogTitle = 'Warnung'; + dialogType = 'warning'; } + + await showInfo(dialogTitle, message, '', dialogType); } else { - // Für andere Dateitypen nur Upload-Bestätigung - await showInfo('Information', `${pendingUploadType.value === 'code_list' ? 'Code-Liste' : 'Pin-Liste'} "${pendingUploadFile.value.name}" wurde erfolgreich hochgeladen!`, '', 'info'); + await showInfo('Information', `${documentLabel} "${file.name}" wurde erfolgreich hochgeladen!`, '', 'info'); } - - // Dokumente neu laden + await loadTeamDocuments(); - } catch (error) { console.error('Fehler beim Hochladen und Parsen der Datei:', error); - await showInfo('Fehler', 'Fehler beim Hochladen und Parsen der Datei', '', 'error'); + const responseData = error?.response?.data || {}; + const errorMessage = responseData.message || responseData.error || error.message || 'Fehler beim Hochladen und Parsen der Datei'; + await showInfo('Fehler', errorMessage, '', 'error'); } finally { parsingInProgress.value = false; - pendingUploadFile.value = null; - pendingUploadType.value = null; - showLeagueSelection.value = false; } }; - - const cancelUpload = () => { - pendingUploadFile.value = null; - pendingUploadType.value = null; - showLeagueSelection.value = false; - }; - - const getTeamLeagueName = () => { - if (!teamToEdit.value?.leagueId) return 'Keine Liga zugeordnet'; - const league = leagues.value.find(l => l.id === teamToEdit.value.leagueId); - return league ? league.name : 'Unbekannte Liga'; - }; - - - - const parsePDF = async (document) => { - // Finde das Team für dieses Dokument +const parsePDF = async (document) => { const team = teams.value.find(t => t.id === document.clubTeamId); if (!team || !team.leagueId) { - alert('Team ist keiner Liga zugeordnet!'); + await showInfo( + 'Hinweis', + 'Dieses Team ist keiner Liga zugeordnet.', + 'Bitte ordnen Sie dem Team zuerst eine Liga zu, um PDF-Dateien zu parsen.', + 'warning' + ); return; } + + if (parsingDocuments.value[document.id]) { + return; + } + + parsingDocuments.value = { + ...parsingDocuments.value, + [document.id]: true + }; try { - const response = await apiClient.post(`/team-documents/${document.id}/parse?leagueid=${team.leagueId}`); - - const { parseResult, saveResult } = response.data; - - let message = `PDF erfolgreich geparst!\n\n`; - message += `Gefundene Spiele: ${parseResult.matchesFound}\n`; + + let message = `Gefundene Spiele: ${parseResult.matchesFound}\n`; message += `Neue Spiele erstellt: ${saveResult.created}\n`; - message += `Spiele aktualisiert: ${saveResult.updated}\n`; - - if (saveResult.errors.length > 0) { - message += `\nFehler: ${saveResult.errors.length}\n`; + message += `Spiele aktualisiert: ${saveResult.updated}`; + + let dialogTitle = 'Erfolg'; + let dialogType = 'success'; + + if (parseResult.matchesFound === 0) { + dialogTitle = 'Keine Spiele gefunden'; + dialogType = 'warning'; + if (parseResult.debugInfo) { + message += `\n\nHinweis: Keine Spiele erkannt.\nZeilen im Dokument: ${parseResult.debugInfo.totalLines}`; + } + } else if (saveResult.errors.length > 0) { + dialogTitle = 'Warnung'; + dialogType = 'warning'; + message += `\n\nFehler: ${saveResult.errors.length}\n`; message += saveResult.errors.slice(0, 3).join('\n'); if (saveResult.errors.length > 3) { message += `\n... und ${saveResult.errors.length - 3} weitere`; } } - - this.showInfo('Fehler', message, '', 'error'); + + await showInfo(dialogTitle, message, '', dialogType); + await loadTeamDocuments(); } catch (error) { console.error('Fehler beim Parsen der PDF:', error); - this.showInfo('Fehler', 'Fehler beim Parsen der PDF-Datei', '', 'error'); + const responseData = error?.response?.data || {}; + const status = error?.response?.status; + let errorMessage = responseData.message || responseData.error || error.message || 'Fehler beim Parsen der PDF-Datei'; + let details = ''; + + if (status === 404 && responseData.error === 'documentnotfound') { + errorMessage = 'Das ausgewählte Dokument wurde nicht gefunden.'; + } else if (status === 400 && responseData.error === 'missingleagueid') { + errorMessage = 'Für das ausgewählte Team wurde keine Liga übermittelt.'; + } else if (error.code === 'ENOENT' || errorMessage.includes('ENOENT')) { + errorMessage = 'Die PDF-Datei konnte nicht gefunden werden.'; + details = 'Bitte laden Sie die Datei erneut hoch und versuchen Sie es noch einmal.'; + } + + await showInfo('Fehler', errorMessage, details, 'error'); + } finally { + const { [document.id]: _ignored, ...rest } = parsingDocuments.value; + parsingDocuments.value = rest; } }; @@ -1289,10 +1235,8 @@ export default { selectedSeasonId, currentSeason, teamDocuments, - pendingUploadFile, - pendingUploadType, - showLeagueSelection, parsingInProgress, + parsingDocuments, showPDFViewer, pdfUrl, pdfDialogTitle, @@ -1317,9 +1261,6 @@ export default { uploadPinList, loadTeamDocuments, loadAllTeamDocuments, - confirmUploadAndParse, - cancelUpload, - getTeamLeagueName, parsePDF, getTeamDocuments, showPDFDialog,