feat(clickTtHttpPageRoutes): enhance HTML processing for proxy handling

- Introduced a new function, applyOutsideScriptTags, to safely transform HTML while preserving script tags, improving the handling of links, meta-refresh, and form actions.
- Updated existing functions to utilize the new transformation method, ensuring that proxy URLs are correctly applied without disrupting script execution.
- Enhanced error handling in HTML processing functions to maintain robustness during URL rewriting operations.
This commit is contained in:
Torsten Schulz (local)
2026-03-10 23:18:45 +01:00
parent 4484f122d2
commit dee96a9445

View File

@@ -87,28 +87,46 @@ function buildProxyUrl(proxyBase, targetUrl, sid) {
return `${proxyBase}${sep}url=${encodeURIComponent(targetUrl)}${sid ? `&sid=${sid}` : ''}`;
}
function applyOutsideScriptTags(html, transform) {
if (!html) return html;
const scripts = [];
const placeholderHtml = html.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, (match) => {
const token = `__CLICKTT_SCRIPT_${scripts.length}__`;
scripts.push(match);
return token;
});
let transformed = transform(placeholderHtml);
scripts.forEach((script, index) => {
transformed = transformed.replace(`__CLICKTT_SCRIPT_${index}__`, script);
});
return transformed;
}
/**
* Schreibt Links im HTML um, sodass Klicks im iframe über unseren Proxy laufen (Folge-Logs).
*/
function rewriteLinksInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
if (!html || !proxyBaseUrl || !pageBaseUrl) return html;
try {
const base = new URL(pageBaseUrl);
return html.replace(
/<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*)>/gi,
(match, before, href, after) => {
let absoluteUrl = href;
if (href.startsWith('/') || !href.startsWith('http')) {
absoluteUrl = new URL(href, base.origin + base.pathname).href;
return applyOutsideScriptTags(html, (safeHtml) => {
try {
const base = new URL(pageBaseUrl);
return safeHtml.replace(
/<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*)>/gi,
(match, before, href, after) => {
let absoluteUrl = href;
if (href.startsWith('/') || !href.startsWith('http')) {
absoluteUrl = new URL(href, base.origin + base.pathname).href;
}
if (!shouldProxyUrl(absoluteUrl)) return match;
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
return `<a ${before}href="${proxyUrl}"${after}>`;
}
if (!shouldProxyUrl(absoluteUrl)) return match;
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
return `<a ${before}href="${proxyUrl}"${after}>`;
}
);
} catch {
return html;
}
);
} catch {
return safeHtml;
}
});
}
/**
@@ -116,31 +134,33 @@ function rewriteLinksInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
*/
function rewriteMetaRefreshInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
if (!html || !proxyBaseUrl || !pageBaseUrl || !sid) return html;
try {
const base = new URL(pageBaseUrl);
return html.replace(
/<meta\s[^>]*(?:http-equiv\s*=\s*["']refresh["'][^>]*content\s*=\s*["']([^"']+)["']|content\s*=\s*["']([^"']+)["'][^>]*http-equiv\s*=\s*["']refresh["'])[^>]*>/gi,
(match, content1, content2) => {
const content = content1 || content2;
if (!content) return match;
const urlMatch = content.match(/^\s*\d+\s*;\s*url\s*=\s*(.+)$/i);
if (!urlMatch) return match;
let targetUrl = urlMatch[1].trim();
if ((targetUrl.startsWith("'") && targetUrl.endsWith("'")) || (targetUrl.startsWith('"') && targetUrl.endsWith('"'))) {
targetUrl = targetUrl.slice(1, -1);
return applyOutsideScriptTags(html, (safeHtml) => {
try {
const base = new URL(pageBaseUrl);
return safeHtml.replace(
/<meta\s[^>]*(?:http-equiv\s*=\s*["']refresh["'][^>]*content\s*=\s*["']([^"']+)["']|content\s*=\s*["']([^"']+)["'][^>]*http-equiv\s*=\s*["']refresh["'])[^>]*>/gi,
(match, content1, content2) => {
const content = content1 || content2;
if (!content) return match;
const urlMatch = content.match(/^\s*\d+\s*;\s*url\s*=\s*(.+)$/i);
if (!urlMatch) return match;
let targetUrl = urlMatch[1].trim();
if ((targetUrl.startsWith("'") && targetUrl.endsWith("'")) || (targetUrl.startsWith('"') && targetUrl.endsWith('"'))) {
targetUrl = targetUrl.slice(1, -1);
}
if (targetUrl.startsWith('/') || !targetUrl.startsWith('http')) {
targetUrl = new URL(targetUrl, base.origin + base.pathname).href;
}
if (!shouldProxyUrl(targetUrl)) return match;
const proxyUrl = buildProxyUrl(proxyBaseUrl, targetUrl, sid);
const newContent = content.replace(/url\s*=\s*.+$/i, `url=${proxyUrl}`);
return match.replace(content, newContent);
}
if (targetUrl.startsWith('/') || !targetUrl.startsWith('http')) {
targetUrl = new URL(targetUrl, base.origin + base.pathname).href;
}
if (!shouldProxyUrl(targetUrl)) return match;
const proxyUrl = buildProxyUrl(proxyBaseUrl, targetUrl, sid);
const newContent = content.replace(/url\s*=\s*.+$/i, `url=${proxyUrl}`);
return match.replace(content, newContent);
}
);
} catch {
return html;
}
);
} catch {
return safeHtml;
}
});
}
/**
@@ -148,25 +168,27 @@ function rewriteMetaRefreshInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
*/
function rewriteFormActionsInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
if (!html || !proxyBaseUrl || !pageBaseUrl) return html;
try {
const base = new URL(pageBaseUrl);
return html.replace(
/<form\s+([^>]*?)action\s*=\s*["']([^"']*)["']([^>]*)>/gi,
(match, before, action, after) => {
const actionTrim = action.trim();
if (!actionTrim) return match; // action="" = same URL
let absoluteUrl = actionTrim;
if (actionTrim.startsWith('/') || !actionTrim.startsWith('http')) {
absoluteUrl = new URL(actionTrim, base.origin + base.pathname).href;
return applyOutsideScriptTags(html, (safeHtml) => {
try {
const base = new URL(pageBaseUrl);
return safeHtml.replace(
/<form\s+([^>]*?)action\s*=\s*["']([^"']*)["']([^>]*)>/gi,
(match, before, action, after) => {
const actionTrim = action.trim();
if (!actionTrim) return match; // action="" = same URL
let absoluteUrl = actionTrim;
if (actionTrim.startsWith('/') || !actionTrim.startsWith('http')) {
absoluteUrl = new URL(actionTrim, base.origin + base.pathname).href;
}
if (!shouldProxyUrl(absoluteUrl)) return match;
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
return `<form ${before}action="${proxyUrl}"${after}>`;
}
if (!shouldProxyUrl(absoluteUrl)) return match;
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
return `<form ${before}action="${proxyUrl}"${after}>`;
}
);
} catch {
return html;
}
);
} catch {
return safeHtml;
}
});
}
function injectProxyNavigationScript(html, proxyBaseUrl, pageBaseUrl, sid) {
@@ -216,6 +238,25 @@ function injectProxyNavigationScript(html, proxyBaseUrl, pageBaseUrl, sid) {
'if(!targetUrl||!shouldProxyUrl(targetUrl))return;',
"form.setAttribute('action',buildProxyUrl(targetUrl));",
'},true);',
'if(window.fetch){',
'var nativeFetch=window.fetch.bind(window);',
'window.fetch=function patchedFetch(input, init){',
'var rawUrl=typeof input==="string"?input:(input&&input.url?input.url:null);',
'var targetUrl=normalizeUrl(rawUrl,PAGE_BASE_URL);',
'if(targetUrl&&shouldProxyUrl(targetUrl)){',
'if(typeof input==="string"){input=buildProxyUrl(targetUrl);}else if(input&&input.url){input=buildProxyUrl(targetUrl);}',
'}',
'return nativeFetch(input, init);',
'};',
'}',
'if(window.XMLHttpRequest&&window.XMLHttpRequest.prototype&&window.XMLHttpRequest.prototype.open){',
'var nativeOpen=window.XMLHttpRequest.prototype.open;',
'window.XMLHttpRequest.prototype.open=function patchedOpen(method, url){',
'var targetUrl=normalizeUrl(url,PAGE_BASE_URL);',
'if(targetUrl&&shouldProxyUrl(targetUrl)){arguments[1]=buildProxyUrl(targetUrl);}',
'return nativeOpen.apply(this, arguments);',
'};',
'}',
'if(window.HTMLFormElement&&window.HTMLFormElement.prototype&&window.HTMLFormElement.prototype.submit){',
'var nativeSubmit=window.HTMLFormElement.prototype.submit;',
'window.HTMLFormElement.prototype.submit=function patchedSubmit(){',