Some checks failed
Code Analysis (JS/Vue) / analyze (push) Failing after 52s
This commit introduces a new mechanism for extracting text from uploaded PDF files using pdftotext, followed by a basic plausibility check of the extracted content. If the text meets the criteria, it is converted to HTML format and stored in the configuration, replacing the previous static content handling. This enhancement improves the accuracy and reliability of the Satzung content management.
205 lines
6.2 KiB
JavaScript
205 lines
6.2 KiB
JavaScript
import multer from 'multer'
|
|
import fs from 'fs/promises'
|
|
import path from 'path'
|
|
import { exec } from 'child_process'
|
|
import { promisify } from 'util'
|
|
import { getUserFromToken, hasAnyRole } from '../../utils/auth.js'
|
|
import { assertPdfMagicHeader } from '../../utils/upload-validation.js'
|
|
|
|
const execAsync = promisify(exec)
|
|
|
|
// Handle both dev and production paths
|
|
// nosemgrep: javascript.lang.security.audit.path-traversal.path-join-resolve-traversal.path-join-resolve-traversal
|
|
// filename is always a hardcoded constant ('satzung.json'), never user input
|
|
const getDataPath = (filename) => {
|
|
const cwd = process.cwd()
|
|
|
|
// In production (.output/server), working dir is .output
|
|
if (cwd.endsWith('.output')) {
|
|
// nosemgrep: javascript.lang.security.audit.path-traversal.path-join-resolve-traversal.path-join-resolve-traversal
|
|
return path.join(cwd, '../server/data', filename)
|
|
}
|
|
|
|
// In development, working dir is project root
|
|
// nosemgrep: javascript.lang.security.audit.path-traversal.path-join-resolve-traversal.path-join-resolve-traversal
|
|
return path.join(cwd, 'server/data', filename)
|
|
}
|
|
|
|
// Multer-Konfiguration für PDF-Uploads
|
|
const storage = multer.diskStorage({
|
|
destination: (req, file, cb) => {
|
|
cb(null, 'public/documents/')
|
|
},
|
|
filename: (req, file, cb) => {
|
|
cb(null, 'satzung.pdf')
|
|
}
|
|
})
|
|
|
|
const upload = multer({
|
|
storage,
|
|
fileFilter: (req, file, cb) => {
|
|
if (file.mimetype === 'application/pdf') {
|
|
cb(null, true)
|
|
} else {
|
|
cb(new Error('Nur PDF-Dateien sind erlaubt'), false)
|
|
}
|
|
},
|
|
limits: {
|
|
fileSize: 10 * 1024 * 1024 // 10MB Limit
|
|
}
|
|
})
|
|
|
|
export default defineEventHandler(async (event) => {
|
|
if (event.method !== 'POST') {
|
|
throw createError({
|
|
statusCode: 405,
|
|
statusMessage: 'Method Not Allowed'
|
|
})
|
|
}
|
|
|
|
let token = getCookie(event, 'auth_token')
|
|
const currentUser = token ? await getUserFromToken(token) : null
|
|
|
|
if (!currentUser) {
|
|
throw createError({
|
|
statusCode: 401,
|
|
statusMessage: 'Nicht authentifiziert'
|
|
})
|
|
}
|
|
|
|
if (!hasAnyRole(currentUser, 'admin', 'vorstand')) {
|
|
throw createError({
|
|
statusCode: 403,
|
|
statusMessage: 'Keine Berechtigung'
|
|
})
|
|
}
|
|
|
|
try {
|
|
await fs.mkdir(path.join(process.cwd(), 'public', 'documents'), { recursive: true })
|
|
|
|
// Multer-Middleware für File-Upload
|
|
await new Promise((resolve, reject) => {
|
|
upload.single('pdf')(event.node.req, event.node.res, (err) => {
|
|
if (err) reject(err)
|
|
else resolve()
|
|
})
|
|
})
|
|
|
|
const file = event.node.req.file
|
|
if (!file) {
|
|
throw createError({
|
|
statusCode: 400,
|
|
statusMessage: 'Keine PDF-Datei hochgeladen'
|
|
})
|
|
}
|
|
|
|
// Zusätzliche Validierung: Magic-Bytes prüfen (mimetype kann gespooft sein)
|
|
await assertPdfMagicHeader(file.path)
|
|
|
|
// 1. Versuche, den Text mit pdftotext zu extrahieren
|
|
let extractedText = ''
|
|
try {
|
|
// UTF-8 erzwingen, Ausgabe nach stdout
|
|
const { stdout } = await execAsync(`pdftotext -enc UTF-8 "${file.path}" -`)
|
|
extractedText = stdout || ''
|
|
} catch (err) {
|
|
console.error('pdftotext Fehler beim Verarbeiten der Satzung:', err)
|
|
throw createError({
|
|
statusCode: 500,
|
|
statusMessage: 'Die Satzung konnte nicht aus dem PDF gelesen werden (pdftotext-Fehler). Bitte den Server-Administrator kontaktieren.'
|
|
})
|
|
}
|
|
|
|
// Minimale Plausibilitätsprüfung: genug Text & typische Satzungs-Merkmale
|
|
const cleaned = extractedText.trim()
|
|
if (!cleaned || cleaned.length < 500 || !cleaned.includes('§')) {
|
|
console.error('Satzung: extrahierter Text wirkt unplausibel oder zu kurz:', {
|
|
length: cleaned.length
|
|
})
|
|
throw createError({
|
|
statusCode: 500,
|
|
statusMessage: 'Die Satzung konnte nicht zuverlässig aus dem PDF gelesen werden. Bitte die PDF-Datei prüfen.'
|
|
})
|
|
}
|
|
|
|
// 2. In HTML-Format konvertieren
|
|
const htmlContent = convertTextToHtml(cleaned)
|
|
|
|
// 3. Config aktualisieren (PDF + geparster Inhalt)
|
|
const configPath = getDataPath('config.json')
|
|
const configData = JSON.parse(await fs.readFile(configPath, 'utf-8'))
|
|
|
|
if (!configData.seiten) {
|
|
configData.seiten = {}
|
|
}
|
|
|
|
configData.seiten.satzung = {
|
|
pdfUrl: '/documents/satzung.pdf',
|
|
content: htmlContent
|
|
}
|
|
|
|
await fs.writeFile(configPath, JSON.stringify(configData, null, 2), 'utf-8')
|
|
|
|
return {
|
|
success: true,
|
|
message: 'Satzung erfolgreich hochgeladen und verarbeitet',
|
|
pdfUrl: '/documents/satzung.pdf'
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error('PDF Upload Error:', error)
|
|
if (error.statusCode) {
|
|
throw error
|
|
}
|
|
throw createError({
|
|
statusCode: 500,
|
|
statusMessage: error.message || 'Fehler beim Verarbeiten der PDF-Datei'
|
|
})
|
|
}
|
|
})
|
|
|
|
// PDF-Text zu HTML konvertieren
|
|
function convertTextToHtml(text) {
|
|
// Text bereinigen und strukturieren
|
|
let html = text
|
|
.replace(/\r\n/g, '\n') // Windows-Zeilenumbrüche normalisieren
|
|
.replace(/\r/g, '\n') // Mac-Zeilenumbrüche normalisieren
|
|
.replace(/\n\s*\n/g, '\n\n') // Mehrfache Zeilenumbrüche reduzieren
|
|
.trim()
|
|
|
|
// Überschriften erkennen und formatieren
|
|
html = html.replace(/^(Vereinssatzung|Satzung)$/gm, '<h1>$1</h1>')
|
|
html = html.replace(/^(§\s*\d+[^§\n]*)$/gm, '<h2>$1</h2>')
|
|
|
|
// Absätze erstellen
|
|
html = html.split('\n\n').map(paragraph => {
|
|
paragraph = paragraph.trim()
|
|
if (!paragraph) return ''
|
|
|
|
// Überschriften nicht als Paragraphen behandeln
|
|
if (paragraph.match(/^<h[1-6]>/) || paragraph.match(/^§\s*\d+/)) {
|
|
return paragraph
|
|
}
|
|
|
|
// Listen erkennen
|
|
if (paragraph.includes('•') || paragraph.includes('-') || paragraph.match(/^\d+\./)) {
|
|
const listItems = paragraph.split(/\n/).map(item => {
|
|
item = item.trim()
|
|
if (item.match(/^[•-]\s/) || item.match(/^\d+\.\s/)) {
|
|
return `<li>${item.replace(/^[•-]\s/, '').replace(/^\d+\.\s/, '')}</li>`
|
|
}
|
|
return `<li>${item}</li>`
|
|
}).join('')
|
|
return `<ul>${listItems}</ul>`
|
|
}
|
|
|
|
// Normale Absätze
|
|
return `<p>${paragraph.replace(/\n/g, '<br>')}</p>`
|
|
}).join('\n')
|
|
|
|
// Mehrfache Zeilenumbrüche entfernen
|
|
html = html.replace(/\n{3,}/g, '\n\n')
|
|
|
|
return html
|
|
}
|