From 717fdf3025684e33f8f58ed2bf41a460f4c0f5d8 Mon Sep 17 00:00:00 2001 From: "Torsten Schulz (local)" Date: Fri, 6 Feb 2026 11:58:23 +0100 Subject: [PATCH] Enhance HTML conversion for Satzung uploads by removing page numbers and improving list handling This commit updates the text-to-HTML conversion function to remove page numbers and footers from the extracted text. It also introduces enhanced handling for enumerated lists, allowing for better formatting of items with specific patterns (e.g., a), b), c)). These changes improve the overall quality and readability of the generated HTML content. --- server/api/cms/satzung-upload.post.js | 38 ++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/server/api/cms/satzung-upload.post.js b/server/api/cms/satzung-upload.post.js index 20602a8..e850b3c 100644 --- a/server/api/cms/satzung-upload.post.js +++ b/server/api/cms/satzung-upload.post.js @@ -167,6 +167,11 @@ function convertTextToHtml(text) { .replace(/\n\s*\n/g, '\n\n') // Mehrfache Zeilenumbrüche reduzieren .trim() + // Seitenzahlen und Seitenfuß entfernen (z.B. "Seite 2 von 4", "-2-") + html = html + .replace(/^Seite\s+\d+\s+von\s+\d+.*$/gm, '') + .replace(/^-+\d+-+\s*$/gm, '') + // Überschriften erkennen und formatieren html = html.replace(/^(Vereinssatzung|Satzung)$/gm, '

$1

') html = html.replace(/^(§\s*\d+[^§\n]*)$/gm, '

$1

') @@ -181,15 +186,40 @@ function convertTextToHtml(text) { return paragraph } - // Listen erkennen - if (paragraph.includes('•') || paragraph.includes('-') || paragraph.match(/^\d+\./)) { + // Spezielle Behandlung für Aufzählungen mit a), b), c) ... + if (paragraph.match(/^[a-z]\)\s*$/mi)) { + const lines = paragraph.split('\n').map(l => l.trim()).filter(Boolean) + const items = [] + let current = '' + + for (const line of lines) { + if (/^[a-z]\)\s*$/i.test(line)) { + // neuer Aufzählungspunkt, vorherigen abschließen + if (current) items.push(current.trim()) + current = line + } else { + // Text zum aktuellen Aufzählungspunkt hinzufügen + current += (current ? ' ' : '') + line + } + } + if (current) items.push(current.trim()) + + const listItems = items.map(item => { + return `
  • ${item}
  • ` + }).join('') + + return `` + } + + // Allgemeine Listen erkennen (Bullet "•", Bindestrich- oder Nummern-Listen) + if (paragraph.includes('•') || paragraph.match(/^[\-•]\s/m) || paragraph.match(/^\d+\.\s/m)) { const listItems = paragraph.split(/\n/).map(item => { item = item.trim() if (item.match(/^[•-]\s/) || item.match(/^\d+\.\s/)) { return `
  • ${item.replace(/^[•-]\s/, '').replace(/^\d+\.\s/, '')}
  • ` } - return `
  • ${item}
  • ` - }).join('') + return item ? `
  • ${item}
  • ` : '' + }).filter(Boolean).join('') return `` }