Enhance HTML conversion for Satzung uploads by removing page numbers and improving list handling
Some checks failed
Code Analysis (JS/Vue) / analyze (push) Failing after 50s
Some checks failed
Code Analysis (JS/Vue) / analyze (push) Failing after 50s
This commit updates the text-to-HTML conversion function to remove page numbers and footers from the extracted text. It also introduces enhanced handling for enumerated lists, allowing for better formatting of items with specific patterns (e.g., a), b), c)). These changes improve the overall quality and readability of the generated HTML content.
This commit is contained in:
@@ -167,6 +167,11 @@ function convertTextToHtml(text) {
|
|||||||
.replace(/\n\s*\n/g, '\n\n') // Mehrfache Zeilenumbrüche reduzieren
|
.replace(/\n\s*\n/g, '\n\n') // Mehrfache Zeilenumbrüche reduzieren
|
||||||
.trim()
|
.trim()
|
||||||
|
|
||||||
|
// Seitenzahlen und Seitenfuß entfernen (z.B. "Seite 2 von 4", "-2-")
|
||||||
|
html = html
|
||||||
|
.replace(/^Seite\s+\d+\s+von\s+\d+.*$/gm, '')
|
||||||
|
.replace(/^-+\d+-+\s*$/gm, '')
|
||||||
|
|
||||||
// Überschriften erkennen und formatieren
|
// Überschriften erkennen und formatieren
|
||||||
html = html.replace(/^(Vereinssatzung|Satzung)$/gm, '<h1>$1</h1>')
|
html = html.replace(/^(Vereinssatzung|Satzung)$/gm, '<h1>$1</h1>')
|
||||||
html = html.replace(/^(§\s*\d+[^§\n]*)$/gm, '<h2>$1</h2>')
|
html = html.replace(/^(§\s*\d+[^§\n]*)$/gm, '<h2>$1</h2>')
|
||||||
@@ -181,15 +186,40 @@ function convertTextToHtml(text) {
|
|||||||
return paragraph
|
return paragraph
|
||||||
}
|
}
|
||||||
|
|
||||||
// Listen erkennen
|
// Spezielle Behandlung für Aufzählungen mit a), b), c) ...
|
||||||
if (paragraph.includes('•') || paragraph.includes('-') || paragraph.match(/^\d+\./)) {
|
if (paragraph.match(/^[a-z]\)\s*$/mi)) {
|
||||||
|
const lines = paragraph.split('\n').map(l => l.trim()).filter(Boolean)
|
||||||
|
const items = []
|
||||||
|
let current = ''
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (/^[a-z]\)\s*$/i.test(line)) {
|
||||||
|
// neuer Aufzählungspunkt, vorherigen abschließen
|
||||||
|
if (current) items.push(current.trim())
|
||||||
|
current = line
|
||||||
|
} else {
|
||||||
|
// Text zum aktuellen Aufzählungspunkt hinzufügen
|
||||||
|
current += (current ? ' ' : '') + line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (current) items.push(current.trim())
|
||||||
|
|
||||||
|
const listItems = items.map(item => {
|
||||||
|
return `<li>${item}</li>`
|
||||||
|
}).join('')
|
||||||
|
|
||||||
|
return `<ul>${listItems}</ul>`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allgemeine Listen erkennen (Bullet "•", Bindestrich- oder Nummern-Listen)
|
||||||
|
if (paragraph.includes('•') || paragraph.match(/^[\-•]\s/m) || paragraph.match(/^\d+\.\s/m)) {
|
||||||
const listItems = paragraph.split(/\n/).map(item => {
|
const listItems = paragraph.split(/\n/).map(item => {
|
||||||
item = item.trim()
|
item = item.trim()
|
||||||
if (item.match(/^[•-]\s/) || item.match(/^\d+\.\s/)) {
|
if (item.match(/^[•-]\s/) || item.match(/^\d+\.\s/)) {
|
||||||
return `<li>${item.replace(/^[•-]\s/, '').replace(/^\d+\.\s/, '')}</li>`
|
return `<li>${item.replace(/^[•-]\s/, '').replace(/^\d+\.\s/, '')}</li>`
|
||||||
}
|
}
|
||||||
return `<li>${item}</li>`
|
return item ? `<li>${item}</li>` : ''
|
||||||
}).join('')
|
}).filter(Boolean).join('')
|
||||||
return `<ul>${listItems}</ul>`
|
return `<ul>${listItems}</ul>`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user