From 581e80bbc37203c4a7ff6546402a57854724a38d Mon Sep 17 00:00:00 2001 From: "Torsten Schulz (local)" Date: Fri, 6 Feb 2026 11:58:23 +0100 Subject: [PATCH] Enhance HTML conversion for Satzung uploads by removing page numbers and improving list handling This commit updates the text-to-HTML conversion function to remove page numbers and footers from the extracted text. It also introduces enhanced handling for enumerated lists, allowing for better formatting of items with specific patterns (e.g., a), b), c)). These changes improve the overall quality and readability of the generated HTML content. --- server/api/cms/satzung-upload.post.js | 38 ++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/server/api/cms/satzung-upload.post.js b/server/api/cms/satzung-upload.post.js index 20602a8..e850b3c 100644 --- a/server/api/cms/satzung-upload.post.js +++ b/server/api/cms/satzung-upload.post.js @@ -167,6 +167,11 @@ function convertTextToHtml(text) { .replace(/\n\s*\n/g, '\n\n') // Mehrfache Zeilenumbrüche reduzieren .trim() + // Seitenzahlen und Seitenfuß entfernen (z.B. "Seite 2 von 4", "-2-") + html = html + .replace(/^Seite\s+\d+\s+von\s+\d+.*$/gm, '') + .replace(/^-+\d+-+\s*$/gm, '') + // Überschriften erkennen und formatieren html = html.replace(/^(Vereinssatzung|Satzung)$/gm, '

$1

') html = html.replace(/^(§\s*\d+[^§\n]*)$/gm, '

$1

') @@ -181,15 +186,40 @@ function convertTextToHtml(text) { return paragraph } - // Listen erkennen - if (paragraph.includes('•') || paragraph.includes('-') || paragraph.match(/^\d+\./)) { + // Spezielle Behandlung für Aufzählungen mit a), b), c) ... + if (paragraph.match(/^[a-z]\)\s*$/mi)) { + const lines = paragraph.split('\n').map(l => l.trim()).filter(Boolean) + const items = [] + let current = '' + + for (const line of lines) { + if (/^[a-z]\)\s*$/i.test(line)) { + // neuer Aufzählungspunkt, vorherigen abschließen + if (current) items.push(current.trim()) + current = line + } else { + // Text zum aktuellen Aufzählungspunkt hinzufügen + current += (current ? ' ' : '') + line + } + } + if (current) items.push(current.trim()) + + const listItems = items.map(item => { + return `
  • ${item}
  • ` + }).join('') + + return `` + } + + // Allgemeine Listen erkennen (Bullet "•", Bindestrich- oder Nummern-Listen) + if (paragraph.includes('•') || paragraph.match(/^[\-•]\s/m) || paragraph.match(/^\d+\.\s/m)) { const listItems = paragraph.split(/\n/).map(item => { item = item.trim() if (item.match(/^[•-]\s/) || item.match(/^\d+\.\s/)) { return `
  • ${item.replace(/^[•-]\s/, '').replace(/^\d+\.\s/, '')}
  • ` } - return `
  • ${item}
  • ` - }).join('') + return item ? `
  • ${item}
  • ` : '' + }).filter(Boolean).join('') return `` }