feat(clickTtHttpPageRoutes): add inline script normalization for improved HTML processing
- Introduced a new function, normalizeInlineScriptStrings, to standardize inline script content in HTML, ensuring proper handling of escape characters and line breaks. - Updated the proxy GET and POST routes to utilize the new normalization function, enhancing the robustness of HTML processing during proxy interactions. - Enhanced existing URL validation logic to include additional allowed domains, improving overall proxy functionality.
This commit is contained in:
@@ -103,6 +103,66 @@ function applyOutsideScriptTags(html, transform) {
|
||||
return transformed;
|
||||
}
|
||||
|
||||
function normalizeInlineScriptStrings(html) {
|
||||
if (!html) return html;
|
||||
|
||||
return html.replace(/<script\b([^>]*)>([\s\S]*?)<\/script>/gi, (match, attrs, content) => {
|
||||
if (/\bsrc\s*=/i.test(attrs || '')) return match;
|
||||
|
||||
let out = '';
|
||||
let inSingle = false;
|
||||
let inDouble = false;
|
||||
let inTemplate = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < content.length; i += 1) {
|
||||
const ch = content[i];
|
||||
|
||||
if (escaped) {
|
||||
out += ch;
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '\\') {
|
||||
out += ch;
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inDouble && !inTemplate && ch === '\'') {
|
||||
inSingle = !inSingle;
|
||||
out += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inSingle && !inTemplate && ch === '"') {
|
||||
inDouble = !inDouble;
|
||||
out += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inSingle && !inDouble && ch === '`') {
|
||||
inTemplate = !inTemplate;
|
||||
out += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((inSingle || inDouble) && (ch === '\n' || ch === '\r')) {
|
||||
if (ch === '\r' && content[i + 1] === '\n') {
|
||||
i += 1;
|
||||
}
|
||||
out += '\\n';
|
||||
continue;
|
||||
}
|
||||
|
||||
out += ch;
|
||||
}
|
||||
|
||||
return `<script${attrs}>${out}</script>`;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Schreibt Links im HTML um, sodass Klicks im iframe über unseren Proxy laufen (Folge-Logs).
|
||||
*/
|
||||
@@ -206,7 +266,7 @@ function injectProxyNavigationScript(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
'try{return new URL(trimmed, base||PAGE_BASE_URL).href;}catch(e){return null;}',
|
||||
'}',
|
||||
'function shouldProxyUrl(value){',
|
||||
'try{var hostname=new URL(value).hostname;return /(^|\\\\.)click-tt\\\\.de$|(^|\\\\.)httv\\\\.de$/i.test(hostname);}catch(e){return false;}',
|
||||
'try{var hostname=new URL(value).hostname;return /(^|\\\\.)click-tt\\\\.de$|(^|\\\\.)httv\\\\.de$|(^|\\\\.)liga\\\\.nu$/i.test(hostname);}catch(e){return false;}',
|
||||
'}',
|
||||
'function buildProxyUrl(targetUrl){',
|
||||
"var separator=PROXY_BASE_URL.indexOf('?')>=0?'&':'?';",
|
||||
@@ -365,6 +425,7 @@ router.get('/proxy', async (req, res, next) => {
|
||||
html = rewriteLinksInHtml(html, proxyBase, effectivePageUrl, sid);
|
||||
html = rewriteFormActionsInHtml(html, proxyBase, effectivePageUrl, sid);
|
||||
html = rewriteMetaRefreshInHtml(html, proxyBase, effectivePageUrl, sid);
|
||||
html = normalizeInlineScriptStrings(html);
|
||||
html = injectProxyNavigationScript(html, proxyBase, effectivePageUrl, sid);
|
||||
|
||||
res.set({
|
||||
@@ -462,6 +523,7 @@ router.post('/proxy', async (req, res, next) => {
|
||||
responseBody = rewriteLinksInHtml(responseBody, proxyBase, effectivePageUrl, sid);
|
||||
responseBody = rewriteFormActionsInHtml(responseBody, proxyBase, effectivePageUrl, sid);
|
||||
responseBody = rewriteMetaRefreshInHtml(responseBody, proxyBase, effectivePageUrl, sid);
|
||||
responseBody = normalizeInlineScriptStrings(responseBody);
|
||||
responseBody = injectProxyNavigationScript(responseBody, proxyBase, effectivePageUrl, sid);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user