Integrate PDF parsing functionality: Add 'pdf-parse' dependency to package.json and package-lock.json. Update worshipController to include logic for handling PDF imports, enhancing the event management process. Refactor routing to support new newsletter import features and improve event form handling for better user experience.
This commit is contained in:
@@ -5,6 +5,7 @@ const { isTokenBlacklisted, addTokenToBlacklist } = require('../utils/blacklist'
|
||||
const multer = require('multer');
|
||||
const upload = multer({ storage: multer.memoryStorage() });
|
||||
const mammoth = require('mammoth');
|
||||
const pdfParse = require('pdf-parse');
|
||||
const { Document, Packer, Paragraph, Table, TableRow, TableCell, TextRun, WidthType, AlignmentType, VerticalAlign, ShadingType, VerticalMerge, VerticalMergeType, FontFamily, HeadingLevel, PageMargin, SectionType, BorderStyle, HeightRule } = require('docx');
|
||||
|
||||
function isAuthorized(req) {
|
||||
@@ -246,6 +247,8 @@ function parseWorshipFromCell(cellText, date, dayName) {
|
||||
});
|
||||
|
||||
const fullText = cellText.trim();
|
||||
const hasNeighborInvitation = /einladung zum gottesdienst im nachbarschaftsraum/i.test(fullText) || /\[\[FLAG_NEIGHBOR_INVITATION\]\]/.test(fullText);
|
||||
const hasSelfInformation = /bitte informieren sie sich auch auf den internetseiten/i.test(fullText) || /\[\[FLAG_SELF_INFORMATION\]\]/.test(fullText);
|
||||
|
||||
// Wenn Zeilenumbrüche vorhanden sind, verwende die zeilenbasierte Logik
|
||||
if (lines.length > 1) {
|
||||
@@ -268,6 +271,8 @@ function parseWorshipFromCell(cellText, date, dayName) {
|
||||
neighborInvitation: false,
|
||||
introLine: ''
|
||||
};
|
||||
worship.neighborInvitation = hasNeighborInvitation;
|
||||
worship.selfInformation = hasSelfInformation;
|
||||
|
||||
console.log(` parseWorshipFromCell: Volltext: "${fullText.substring(0, 200)}..."`);
|
||||
|
||||
@@ -283,19 +288,33 @@ function parseWorshipFromCell(cellText, date, dayName) {
|
||||
const textAfterTime = fullText.substring(timeMatch[0].length).trim();
|
||||
|
||||
// Titel extrahieren: Alles bis zum ersten "Gestaltung:", "Dienst:", "Kollekte:" oder "Orgel:"
|
||||
const titleEndMatch = textAfterTime.match(/(Gestaltung|Dienst|Kollekte|Orgel):/i);
|
||||
const titleEndMatch = textAfterTime.match(/(Gestaltung|Dienst|Kollekte|Orgel|Bitte informieren):/i);
|
||||
if (titleEndMatch) {
|
||||
let title = textAfterTime.substring(0, titleEndMatch.index).trim();
|
||||
// Entferne häufige Wörter am Anfang
|
||||
title = title.replace(/^(Gottesdienst|Gemeinsamer Gottesdienst|Einladung zum Gottesdienst)\s*/i, '');
|
||||
// Entferne "in", "am", "zu" + Ort am Ende, wenn vorhanden (aber behalte den Rest)
|
||||
title = title.replace(/\s+(in|am|zu)\s+([A-ZÄÖÜ][A-ZÄÖÜa-zäöüß-]+)$/, '');
|
||||
title = title
|
||||
.replace(/\[\[FLAG_NEIGHBOR_INVITATION\]\]/g, '')
|
||||
.replace(/\[\[FLAG_SELF_INFORMATION\]\]/g, '')
|
||||
.replace(/bitte informieren sie sich auch auf den internetseiten.*$/i, '')
|
||||
.replace(/\|/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
worship.title = title || 'Gottesdienst';
|
||||
} else {
|
||||
// Falls keine Markierungen gefunden, nimm den gesamten Text als Titel
|
||||
let title = textAfterTime;
|
||||
title = title.replace(/^(Gottesdienst|Gemeinsamer Gottesdienst|Einladung zum Gottesdienst)\s*/i, '');
|
||||
worship.title = title.substring(0, 100) || 'Gottesdienst';
|
||||
title = title
|
||||
.replace(/\[\[FLAG_NEIGHBOR_INVITATION\]\]/g, '')
|
||||
.replace(/\[\[FLAG_SELF_INFORMATION\]\]/g, '')
|
||||
.replace(/bitte informieren sie sich auch auf den internetseiten.*$/i, '')
|
||||
.replace(/\|/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
worship.title = title.substring(0, 140) || 'Gottesdienst';
|
||||
}
|
||||
console.log(` parseWorshipFromCell: Titel extrahiert: "${worship.title}"`);
|
||||
|
||||
@@ -344,7 +363,13 @@ function parseWorshipFromCell(cellText, date, dayName) {
|
||||
// Falls keine Uhrzeit gefunden, versuche Titel direkt zu extrahieren
|
||||
const titleMatch = fullText.match(/^(.+?)(?=Gestaltung:|Dienst:|Kollekte:|Orgel:|$)/i);
|
||||
if (titleMatch) {
|
||||
worship.title = titleMatch[1].trim();
|
||||
worship.title = titleMatch[1]
|
||||
.replace(/\[\[FLAG_NEIGHBOR_INVITATION\]\]/g, '')
|
||||
.replace(/\[\[FLAG_SELF_INFORMATION\]\]/g, '')
|
||||
.replace(/bitte informieren sie sich auch auf den internetseiten.*$/i, '')
|
||||
.replace(/\|/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
} else {
|
||||
worship.title = fullText.substring(0, 100);
|
||||
}
|
||||
@@ -440,6 +465,15 @@ function parseWorshipFromCellWithLines(lines, date, dayName) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (/einladung zum gottesdienst im nachbarschaftsraum/i.test(line) || /\[\[FLAG_NEIGHBOR_INVITATION\]\]/.test(line)) {
|
||||
worship.neighborInvitation = true;
|
||||
continue;
|
||||
}
|
||||
if (/bitte informieren sie sich auch auf den internetseiten/i.test(line) || /\[\[FLAG_SELF_INFORMATION\]\]/.test(line)) {
|
||||
worship.selfInformation = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Gestalter
|
||||
if (line.toLowerCase().includes('gestaltung:')) {
|
||||
worship.organizer = line.replace(/^.*gestaltung:\s*/i, '').trim();
|
||||
@@ -471,6 +505,13 @@ function parseWorshipFromCellWithLines(lines, date, dayName) {
|
||||
}
|
||||
}
|
||||
|
||||
worship.title = worship.title
|
||||
.replace(/\[\[FLAG_NEIGHBOR_INVITATION\]\]/g, '')
|
||||
.replace(/\[\[FLAG_SELF_INFORMATION\]\]/g, '')
|
||||
.replace(/\|/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
// Mindestanforderungen prüfen
|
||||
if (!worship.time || !worship.title) {
|
||||
console.log(` parseWorshipFromCellWithLines: Fehlgeschlagen - time: ${worship.time}, title: "${worship.title}"`);
|
||||
@@ -1244,6 +1285,444 @@ exports.saveImportedWorships = async (req, res) => {
|
||||
}
|
||||
};
|
||||
|
||||
function normalizePdfLines(rawText) {
|
||||
return rawText
|
||||
.split('\n')
|
||||
.map((line) => line.replace(/\s+/g, ' ').trim())
|
||||
.filter((line) => line.length > 0)
|
||||
.filter((line) => !/^--\s*\d+\s+of\s+\d+\s*--$/i.test(line));
|
||||
}
|
||||
|
||||
function findFirstIndex(lines, predicate, from = 0) {
|
||||
for (let i = from; i < lines.length; i++) {
|
||||
if (predicate(lines[i])) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function getSection(lines, startPredicate, endPredicates = []) {
|
||||
const start = findFirstIndex(lines, startPredicate);
|
||||
if (start < 0) return [];
|
||||
let end = lines.length;
|
||||
for (const p of endPredicates) {
|
||||
const idx = findFirstIndex(lines, p, start + 1);
|
||||
if (idx >= 0) end = Math.min(end, idx);
|
||||
}
|
||||
return lines.slice(start, end);
|
||||
}
|
||||
|
||||
function normalizeText(input) {
|
||||
return String(input || '')
|
||||
.toLowerCase()
|
||||
.replace(/ä/g, 'ae')
|
||||
.replace(/ö/g, 'oe')
|
||||
.replace(/ü/g, 'ue')
|
||||
.replace(/ß/g, 'ss')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function isHeading(line, heading) {
|
||||
return normalizeText(line) === normalizeText(heading);
|
||||
}
|
||||
|
||||
function getSectionByHeading(lines, startHeading, endHeadings = []) {
|
||||
const start = findFirstIndex(lines, (l) => isHeading(l, startHeading));
|
||||
if (start < 0) return [];
|
||||
let end = lines.length;
|
||||
for (const endHeading of endHeadings) {
|
||||
const idx = findFirstIndex(lines, (l) => isHeading(l, endHeading), start + 1);
|
||||
if (idx >= 0) end = Math.min(end, idx);
|
||||
}
|
||||
return lines.slice(start, end);
|
||||
}
|
||||
|
||||
function extractEventCandidates(lines) {
|
||||
const seen = new Set();
|
||||
return lines.filter((line) => {
|
||||
const normalized = line.toLowerCase();
|
||||
const hasDate =
|
||||
/\b\d{1,2}\.\d{1,2}\.(\d{4})?\b/.test(line) ||
|
||||
/\b\d{1,2}\.\d{2}\s*uhr\b/i.test(line) ||
|
||||
/\b\d{1,2}:\d{2}\s*uhr\b/i.test(line);
|
||||
const isDuplicate = seen.has(normalized);
|
||||
if (!isDuplicate && hasDate) {
|
||||
seen.add(normalized);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
function looksLikeHeading(line) {
|
||||
return /^(gottesdienste|regelmäßige termine|männer und frauen|kinder und jugend|senioren|besondere gottesdienste|und veranstaltungen)$/i.test(line.trim());
|
||||
}
|
||||
|
||||
function hasDateOrTime(line) {
|
||||
return (
|
||||
/\b\d{1,2}\.\d{1,2}\.(\d{2,4})?\b/.test(line) ||
|
||||
/\b\d{1,2}\.\d{1,2}\.?,\s*\d{1,2}\.\d{1,2}\.(\d{2,4})?\b/.test(line) ||
|
||||
/\b\d{1,2}[:.]\d{2}\s*uhr\b/i.test(line) ||
|
||||
/\b\d{1,2}\.\d{2}\s*-\s*\d{1,2}\.\d{2}\s*uhr\b/i.test(line)
|
||||
);
|
||||
}
|
||||
|
||||
function buildDetailedItems(lines) {
|
||||
const result = [];
|
||||
const seen = new Set();
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const current = lines[i];
|
||||
if (!hasDateOrTime(current)) continue;
|
||||
|
||||
const prev = i > 0 ? lines[i - 1] : '';
|
||||
const next = i + 1 < lines.length ? lines[i + 1] : '';
|
||||
|
||||
const parts = [];
|
||||
if (prev && !hasDateOrTime(prev) && !looksLikeHeading(prev) && prev.length < 120) {
|
||||
parts.push(prev);
|
||||
}
|
||||
parts.push(current);
|
||||
if (next && !hasDateOrTime(next) && !looksLikeHeading(next) && next.length < 120) {
|
||||
parts.push(next);
|
||||
}
|
||||
const text = parts.join(' | ');
|
||||
const key = text.toLowerCase();
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
result.push(text);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function isNoiseLine(line) {
|
||||
const n = normalizeText(line);
|
||||
return (
|
||||
n.includes('impressum') ||
|
||||
n.includes('redaktionsschluss') ||
|
||||
n.includes('visdp') ||
|
||||
n.includes('buerozeiten') ||
|
||||
n.includes('@:') ||
|
||||
n.includes('@t-online.de') ||
|
||||
n.includes('datenschutzerklaerung') ||
|
||||
n.includes('logout')
|
||||
);
|
||||
}
|
||||
|
||||
function filterNoise(lines) {
|
||||
return lines.filter((line) => !isNoiseLine(line));
|
||||
}
|
||||
|
||||
function extractNamedBlock(lines, pattern, maxLookahead = 3, maxParts = 3) {
|
||||
const blocks = [];
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (!pattern.test(lines[i])) continue;
|
||||
const parts = [lines[i]];
|
||||
for (let j = i + 1; j < Math.min(lines.length, i + 1 + maxLookahead); j++) {
|
||||
const candidate = lines[j];
|
||||
if (looksLikeHeading(candidate)) break;
|
||||
if (isNoiseLine(candidate)) break;
|
||||
if (hasDateOrTime(candidate) || /\bum\s+\d{1,2}[:.]\d{2}\s*uhr\b/i.test(candidate)) {
|
||||
parts.push(candidate);
|
||||
}
|
||||
if (parts.length >= maxParts) break;
|
||||
}
|
||||
blocks.push(parts.join(' | '));
|
||||
}
|
||||
return [...new Set(blocks)];
|
||||
}
|
||||
|
||||
function extractLinesByKeyword(lines, pattern) {
|
||||
return lines.filter((line) => pattern.test(line));
|
||||
}
|
||||
|
||||
function extractRegularTermineDetails(lines) {
|
||||
const anchors = [
|
||||
/kinderkirche/i,
|
||||
/kigosabo/i,
|
||||
/jungschar/i,
|
||||
/konfirmationsunterricht/i,
|
||||
/konfirmanden\s*[„"]/i,
|
||||
/was geht abend/i,
|
||||
/vorkonfirmandenkurs/i,
|
||||
/pfadfinder/i,
|
||||
/miriamtreff/i,
|
||||
/m[aä]nnerpalaver/i,
|
||||
/frauenfr[üu]hst[üu]ck/i,
|
||||
/kinder- und jugendb[üu]cherei/i,
|
||||
/wunderkiste/i,
|
||||
];
|
||||
const details = [];
|
||||
const seen = new Set();
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (!anchors.some((r) => r.test(line))) continue;
|
||||
if (isNoiseLine(line)) continue;
|
||||
if (/start des neuen konfirmanden-jahrganges/i.test(line)) continue;
|
||||
if (/konfirmanden\s*\/\s*geburtstagsgr[üu][ßs]e/i.test(line)) continue;
|
||||
if (/jahrgang der miriamgemeinde/i.test(line)) continue;
|
||||
|
||||
const parts = [line];
|
||||
let hasScheduleSignal = hasDateOrTime(line) || /termine[:\s]/i.test(line);
|
||||
for (let j = i + 1; j < Math.min(lines.length, i + 3); j++) {
|
||||
const next = lines[j];
|
||||
if (looksLikeHeading(next) || isNoiseLine(next)) break;
|
||||
if (hasDateOrTime(next) || /termine[:\s]/i.test(next) || /\bmontag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag\b/i.test(next)) {
|
||||
parts.push(next);
|
||||
hasScheduleSignal = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasScheduleSignal) continue;
|
||||
let text = parts.join(' | ');
|
||||
// Manche PDFs liefern Miriamtreff + Männerpalaver in einer Zeile.
|
||||
// Für die Kategorie "Regelmäßige Termine" trennen wir das sauber.
|
||||
if (/^miriamtreff:/i.test(text) && /\|\s*m[aä]nnerpalaver/i.test(text)) {
|
||||
text = text.split(/\|\s*m[aä]nnerpalaver/i)[0].trim();
|
||||
}
|
||||
const key = text.toLowerCase();
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
details.push(text);
|
||||
}
|
||||
}
|
||||
return details;
|
||||
}
|
||||
|
||||
function isDateHeaderLine(line) {
|
||||
return /\b\d{1,2}\.\d{1,2}\.(\d{2,4})?\b/.test(line);
|
||||
}
|
||||
|
||||
function isLikelyDayNameLine(line) {
|
||||
if (!line) return false;
|
||||
if (hasDateOrTime(line)) return false;
|
||||
if (looksLikeHeading(line)) return false;
|
||||
const t = normalizeText(line);
|
||||
return (
|
||||
/advent|trinitatis|epiphanias|ostern|pfingsten|sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag/.test(t) &&
|
||||
t.length < 80
|
||||
);
|
||||
}
|
||||
|
||||
function splitWorshipLinesByTime(lines) {
|
||||
const entries = [];
|
||||
let current = null;
|
||||
const startsWithTime = (line) => /^\d{1,2}[:.]\d{2}\s*uhr\b/i.test(line);
|
||||
const isNeighborInvitationLine = (line) => /einladung zum gottesdienst im nachbarschaftsraum/i.test(line);
|
||||
const isSelfInformationLine = (line) => /bitte informieren sie sich auch auf den internetseiten/i.test(line);
|
||||
let stickyNeighborInvitation = false;
|
||||
let stickySelfInformation = false;
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line || isNoiseLine(line) || looksLikeHeading(line)) continue;
|
||||
if (isNeighborInvitationLine(line)) {
|
||||
stickyNeighborInvitation = true;
|
||||
if (current && current.length) current.push('[[FLAG_NEIGHBOR_INVITATION]]');
|
||||
continue;
|
||||
}
|
||||
if (isSelfInformationLine(line)) {
|
||||
stickySelfInformation = true;
|
||||
if (current && current.length) current.push('[[FLAG_SELF_INFORMATION]]');
|
||||
continue;
|
||||
}
|
||||
if (startsWithTime(line) && current && current.length) {
|
||||
entries.push(current.join(' | '));
|
||||
current = [line];
|
||||
if (stickyNeighborInvitation) current.push('[[FLAG_NEIGHBOR_INVITATION]]');
|
||||
if (stickySelfInformation) current.push('[[FLAG_SELF_INFORMATION]]');
|
||||
continue;
|
||||
}
|
||||
if (startsWithTime(line) && (!current || current.length === 0)) {
|
||||
current = [];
|
||||
current.push(line);
|
||||
if (stickyNeighborInvitation) current.push('[[FLAG_NEIGHBOR_INVITATION]]');
|
||||
if (stickySelfInformation) current.push('[[FLAG_SELF_INFORMATION]]');
|
||||
continue;
|
||||
}
|
||||
|
||||
// Zeilen ohne Uhrzeit vor dem ersten Gottesdienst werden nur als Kontext verstanden.
|
||||
// Sie dürfen keinen eigenen Gottesdienst-Eintrag erzeugen.
|
||||
if (!current || current.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Zeilen ohne Uhrzeit nach einer Zeit gehören zum laufenden Gottesdienst (z.B. "Audite Nova").
|
||||
current.push(line);
|
||||
}
|
||||
if (current && current.length) entries.push(current.join(' | '));
|
||||
return entries;
|
||||
}
|
||||
|
||||
function extractWorshipBlocks(lines) {
|
||||
const blocks = [];
|
||||
let currentHeader = '';
|
||||
let currentDayNameParts = [];
|
||||
let rightColumnLines = [];
|
||||
let startedWorshipContent = false;
|
||||
|
||||
const flush = () => {
|
||||
if (!currentHeader || rightColumnLines.length === 0) return;
|
||||
const currentDayName = currentDayNameParts.join(' ').replace(/\s+/g, ' ').trim();
|
||||
const header = currentDayName ? `${currentHeader} - ${currentDayName}` : currentHeader;
|
||||
const worshipEntries = splitWorshipLinesByTime(rightColumnLines);
|
||||
if (worshipEntries.length === 0) {
|
||||
const joined = rightColumnLines.join(' | ').trim();
|
||||
if (joined) {
|
||||
blocks.push(`${header} | ${joined}`);
|
||||
}
|
||||
} else {
|
||||
worshipEntries.forEach((entry) => blocks.push(`${header} | ${entry}`));
|
||||
}
|
||||
};
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (!line || isNoiseLine(line) || looksLikeHeading(line)) continue;
|
||||
|
||||
if (isDateHeaderLine(line)) {
|
||||
flush();
|
||||
currentHeader = line;
|
||||
currentDayNameParts = [];
|
||||
rightColumnLines = [];
|
||||
startedWorshipContent = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentHeader && !startedWorshipContent && isLikelyDayNameLine(line)) {
|
||||
currentDayNameParts.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentHeader) {
|
||||
if (/^\d{1,2}[:.]\d{2}\s*uhr\b/i.test(line)) {
|
||||
startedWorshipContent = true;
|
||||
}
|
||||
rightColumnLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
flush();
|
||||
return [...new Set(blocks.map((b) => b.trim()).filter(Boolean))];
|
||||
}
|
||||
|
||||
exports.importNewsletterPdf = async (req, res) => {
|
||||
try {
|
||||
if (!req.file) {
|
||||
return res.status(400).json({ message: 'Keine PDF-Datei hochgeladen.' });
|
||||
}
|
||||
|
||||
const fileName = req.file.originalname.toLowerCase();
|
||||
if (!fileName.endsWith('.pdf')) {
|
||||
return res.status(400).json({ message: 'Bitte eine PDF-Datei hochladen.' });
|
||||
}
|
||||
|
||||
const parsed = await pdfParse(req.file.buffer);
|
||||
const lines = normalizePdfLines(parsed.text || '');
|
||||
|
||||
const gottesdiensteLines = getSection(
|
||||
lines,
|
||||
(l) => l.toLowerCase() === 'gottesdienste' || /^8\s+gottesdienste$/i.test(l),
|
||||
[
|
||||
(l) => /besondere gottesdienste/i.test(l),
|
||||
]
|
||||
);
|
||||
|
||||
const regelmaessigSection = getSectionByHeading(
|
||||
lines,
|
||||
'Regelmäßige Termine',
|
||||
['Neues von den Senioren', 'Kinder und Jugendliche']
|
||||
);
|
||||
const maennerFrauenSection = getSectionByHeading(
|
||||
lines,
|
||||
'Männer und Frauen',
|
||||
['Musik', 'Kinder und Jugendliche']
|
||||
);
|
||||
const regelmaessigLines = [...regelmaessigSection, ...maennerFrauenSection];
|
||||
|
||||
const besondereLines = getSectionByHeading(
|
||||
lines,
|
||||
'Besondere Gottesdienste',
|
||||
['Regelmäßige Termine', 'Männer und Frauen', 'Kinder und Jugendliche', 'Neues von den Senioren']
|
||||
);
|
||||
|
||||
const miriamtreffLines = extractLinesByKeyword(lines, /miriamtreff/i);
|
||||
const frauenfruehstueckLines = extractNamedBlock(lines, /frauenfrühstück|frauenfruehstueck/i, 8, 5);
|
||||
|
||||
const kinderJugendLines = getSection(
|
||||
lines,
|
||||
(l) => /^kinder und jugendliche$/i.test(l),
|
||||
[
|
||||
(l) => /^senioren$/i.test(l),
|
||||
]
|
||||
);
|
||||
|
||||
const cleanedGottesdienste = filterNoise(gottesdiensteLines);
|
||||
const cleanedRegelmaessig = filterNoise(regelmaessigLines);
|
||||
const cleanedBesondere = filterNoise(besondereLines);
|
||||
const cleanedKinderJugend = filterNoise(kinderJugendLines);
|
||||
|
||||
const regelmaessigDetails = extractRegularTermineDetails(cleanedRegelmaessig);
|
||||
|
||||
const parsedWorshipBlocks = extractWorshipBlocks(cleanedGottesdienste);
|
||||
|
||||
const result = {
|
||||
gottesdienste: parsedWorshipBlocks,
|
||||
regelmaessigeTermine: regelmaessigDetails,
|
||||
besondereGottesdienste: extractEventCandidates(cleanedBesondere),
|
||||
miriamtreff: miriamtreffLines,
|
||||
kinderUndJugend: extractEventCandidates(cleanedKinderJugend),
|
||||
frauenfruehstueck: frauenfruehstueckLines,
|
||||
};
|
||||
|
||||
const details = {
|
||||
gottesdienste: parsedWorshipBlocks,
|
||||
regelmaessigeTermine: regelmaessigDetails,
|
||||
besondereGottesdienste: buildDetailedItems(cleanedBesondere),
|
||||
miriamtreff: miriamtreffLines,
|
||||
kinderUndJugend: buildDetailedItems(cleanedKinderJugend),
|
||||
frauenfruehstueck: frauenfruehstueckLines,
|
||||
sectionInfo: {
|
||||
gottesdiensteLines: gottesdiensteLines.length,
|
||||
regelmaessigLines: regelmaessigLines.length,
|
||||
besondereLines: besondereLines.length,
|
||||
kinderJugendLines: kinderJugendLines.length,
|
||||
}
|
||||
};
|
||||
|
||||
const questions = [];
|
||||
if (result.gottesdienste.length === 0) {
|
||||
questions.push('Keine Gottesdienste sicher extrahiert. Abschnittsgrenze oder Muster prüfen.');
|
||||
}
|
||||
if (result.regelmaessigeTermine.length === 0) {
|
||||
questions.push('Regelmäßige Termine leer. Soll dieser Bereich seitenübergreifend weiter gefasst werden?');
|
||||
}
|
||||
if (result.besondereGottesdienste.length === 0) {
|
||||
questions.push('Besondere Gottesdienste leer. Eventuell weitere Muster/Orte notwendig.');
|
||||
}
|
||||
if (result.miriamtreff.length === 0) {
|
||||
questions.push('Miriamtreff nicht gefunden. Soll auch "Männer und Frauen" als Fallback gelten?');
|
||||
}
|
||||
if (result.kinderUndJugend.length === 0) {
|
||||
questions.push('Kinder/Jugend leer. Soll zusätzlich der Abschnitt "Kinder und Jugend" (Seite 19) priorisiert werden?');
|
||||
}
|
||||
|
||||
res.status(200).json({
|
||||
message: 'PDF geparst. Bitte Vorschau prüfen und offene Fragen beantworten.',
|
||||
parsed: result,
|
||||
details,
|
||||
questions,
|
||||
meta: {
|
||||
pages: parsed.numpages || null,
|
||||
lineCount: lines.length,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Fehler beim PDF-Import des Gemeindebriefs:', error);
|
||||
res.status(500).json({ message: 'Fehler beim Parsen der PDF-Datei.', error: error.message });
|
||||
}
|
||||
};
|
||||
|
||||
// Export-Funktion für Gottesdienste
|
||||
exports.exportWorships = async (req, res) => {
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user