Enhance HTML conversion for Satzung uploads by removing page numbers and improving list handling
Some checks failed
Code Analysis (JS/Vue) / analyze (push) Failing after 50s

This commit updates the text-to-HTML conversion function to remove page numbers and footers from the extracted text. It also introduces enhanced handling for enumerated lists, allowing for better formatting of items with specific patterns (e.g., a), b), c)). These changes improve the overall quality and readability of the generated HTML content.
This commit is contained in:
Torsten Schulz (local)
2026-02-06 11:58:23 +01:00
parent 32d4486123
commit 717fdf3025

View File

@@ -167,6 +167,11 @@ function convertTextToHtml(text) {
.replace(/\n\s*\n/g, '\n\n') // Mehrfache Zeilenumbrüche reduzieren
.trim()
// Seitenzahlen und Seitenfuß entfernen (z.B. "Seite 2 von 4", "-2-")
html = html
.replace(/^Seite\s+\d+\s+von\s+\d+.*$/gm, '')
.replace(/^-+\d+-+\s*$/gm, '')
// Überschriften erkennen und formatieren
html = html.replace(/^(Vereinssatzung|Satzung)$/gm, '<h1>$1</h1>')
html = html.replace(/^(§\s*\d+[^§\n]*)$/gm, '<h2>$1</h2>')
@@ -181,15 +186,40 @@ function convertTextToHtml(text) {
return paragraph
}
// Listen erkennen
if (paragraph.includes('•') || paragraph.includes('-') || paragraph.match(/^\d+\./)) {
// Spezielle Behandlung für Aufzählungen mit a), b), c) ...
if (paragraph.match(/^[a-z]\)\s*$/mi)) {
const lines = paragraph.split('\n').map(l => l.trim()).filter(Boolean)
const items = []
let current = ''
for (const line of lines) {
if (/^[a-z]\)\s*$/i.test(line)) {
// neuer Aufzählungspunkt, vorherigen abschließen
if (current) items.push(current.trim())
current = line
} else {
// Text zum aktuellen Aufzählungspunkt hinzufügen
current += (current ? ' ' : '') + line
}
}
if (current) items.push(current.trim())
const listItems = items.map(item => {
return `<li>${item}</li>`
}).join('')
return `<ul>${listItems}</ul>`
}
// Allgemeine Listen erkennen (Bullet "•", Bindestrich- oder Nummern-Listen)
if (paragraph.includes('•') || paragraph.match(/^[\-•]\s/m) || paragraph.match(/^\d+\.\s/m)) {
const listItems = paragraph.split(/\n/).map(item => {
item = item.trim()
if (item.match(/^[•-]\s/) || item.match(/^\d+\.\s/)) {
return `<li>${item.replace(/^[•-]\s/, '').replace(/^\d+\.\s/, '')}</li>`
}
return `<li>${item}</li>`
}).join('')
return item ? `<li>${item}</li>` : ''
}).filter(Boolean).join('')
return `<ul>${listItems}</ul>`
}