feat(clickTtHttpPageRoutes): enhance HTML processing for proxy handling
- Introduced a new function, applyOutsideScriptTags, to safely transform HTML while preserving script tags, improving the handling of links, meta-refresh, and form actions. - Updated existing functions to utilize the new transformation method, ensuring that proxy URLs are correctly applied without disrupting script execution. - Enhanced error handling in HTML processing functions to maintain robustness during URL rewriting operations.
This commit is contained in:
@@ -87,28 +87,46 @@ function buildProxyUrl(proxyBase, targetUrl, sid) {
|
||||
return `${proxyBase}${sep}url=${encodeURIComponent(targetUrl)}${sid ? `&sid=${sid}` : ''}`;
|
||||
}
|
||||
|
||||
function applyOutsideScriptTags(html, transform) {
|
||||
if (!html) return html;
|
||||
const scripts = [];
|
||||
const placeholderHtml = html.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, (match) => {
|
||||
const token = `__CLICKTT_SCRIPT_${scripts.length}__`;
|
||||
scripts.push(match);
|
||||
return token;
|
||||
});
|
||||
|
||||
let transformed = transform(placeholderHtml);
|
||||
scripts.forEach((script, index) => {
|
||||
transformed = transformed.replace(`__CLICKTT_SCRIPT_${index}__`, script);
|
||||
});
|
||||
return transformed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Schreibt Links im HTML um, sodass Klicks im iframe über unseren Proxy laufen (Folge-Logs).
|
||||
*/
|
||||
function rewriteLinksInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
if (!html || !proxyBaseUrl || !pageBaseUrl) return html;
|
||||
try {
|
||||
const base = new URL(pageBaseUrl);
|
||||
return html.replace(
|
||||
/<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*)>/gi,
|
||||
(match, before, href, after) => {
|
||||
let absoluteUrl = href;
|
||||
if (href.startsWith('/') || !href.startsWith('http')) {
|
||||
absoluteUrl = new URL(href, base.origin + base.pathname).href;
|
||||
return applyOutsideScriptTags(html, (safeHtml) => {
|
||||
try {
|
||||
const base = new URL(pageBaseUrl);
|
||||
return safeHtml.replace(
|
||||
/<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*)>/gi,
|
||||
(match, before, href, after) => {
|
||||
let absoluteUrl = href;
|
||||
if (href.startsWith('/') || !href.startsWith('http')) {
|
||||
absoluteUrl = new URL(href, base.origin + base.pathname).href;
|
||||
}
|
||||
if (!shouldProxyUrl(absoluteUrl)) return match;
|
||||
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
|
||||
return `<a ${before}href="${proxyUrl}"${after}>`;
|
||||
}
|
||||
if (!shouldProxyUrl(absoluteUrl)) return match;
|
||||
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
|
||||
return `<a ${before}href="${proxyUrl}"${after}>`;
|
||||
}
|
||||
);
|
||||
} catch {
|
||||
return html;
|
||||
}
|
||||
);
|
||||
} catch {
|
||||
return safeHtml;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -116,31 +134,33 @@ function rewriteLinksInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
*/
|
||||
function rewriteMetaRefreshInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
if (!html || !proxyBaseUrl || !pageBaseUrl || !sid) return html;
|
||||
try {
|
||||
const base = new URL(pageBaseUrl);
|
||||
return html.replace(
|
||||
/<meta\s[^>]*(?:http-equiv\s*=\s*["']refresh["'][^>]*content\s*=\s*["']([^"']+)["']|content\s*=\s*["']([^"']+)["'][^>]*http-equiv\s*=\s*["']refresh["'])[^>]*>/gi,
|
||||
(match, content1, content2) => {
|
||||
const content = content1 || content2;
|
||||
if (!content) return match;
|
||||
const urlMatch = content.match(/^\s*\d+\s*;\s*url\s*=\s*(.+)$/i);
|
||||
if (!urlMatch) return match;
|
||||
let targetUrl = urlMatch[1].trim();
|
||||
if ((targetUrl.startsWith("'") && targetUrl.endsWith("'")) || (targetUrl.startsWith('"') && targetUrl.endsWith('"'))) {
|
||||
targetUrl = targetUrl.slice(1, -1);
|
||||
return applyOutsideScriptTags(html, (safeHtml) => {
|
||||
try {
|
||||
const base = new URL(pageBaseUrl);
|
||||
return safeHtml.replace(
|
||||
/<meta\s[^>]*(?:http-equiv\s*=\s*["']refresh["'][^>]*content\s*=\s*["']([^"']+)["']|content\s*=\s*["']([^"']+)["'][^>]*http-equiv\s*=\s*["']refresh["'])[^>]*>/gi,
|
||||
(match, content1, content2) => {
|
||||
const content = content1 || content2;
|
||||
if (!content) return match;
|
||||
const urlMatch = content.match(/^\s*\d+\s*;\s*url\s*=\s*(.+)$/i);
|
||||
if (!urlMatch) return match;
|
||||
let targetUrl = urlMatch[1].trim();
|
||||
if ((targetUrl.startsWith("'") && targetUrl.endsWith("'")) || (targetUrl.startsWith('"') && targetUrl.endsWith('"'))) {
|
||||
targetUrl = targetUrl.slice(1, -1);
|
||||
}
|
||||
if (targetUrl.startsWith('/') || !targetUrl.startsWith('http')) {
|
||||
targetUrl = new URL(targetUrl, base.origin + base.pathname).href;
|
||||
}
|
||||
if (!shouldProxyUrl(targetUrl)) return match;
|
||||
const proxyUrl = buildProxyUrl(proxyBaseUrl, targetUrl, sid);
|
||||
const newContent = content.replace(/url\s*=\s*.+$/i, `url=${proxyUrl}`);
|
||||
return match.replace(content, newContent);
|
||||
}
|
||||
if (targetUrl.startsWith('/') || !targetUrl.startsWith('http')) {
|
||||
targetUrl = new URL(targetUrl, base.origin + base.pathname).href;
|
||||
}
|
||||
if (!shouldProxyUrl(targetUrl)) return match;
|
||||
const proxyUrl = buildProxyUrl(proxyBaseUrl, targetUrl, sid);
|
||||
const newContent = content.replace(/url\s*=\s*.+$/i, `url=${proxyUrl}`);
|
||||
return match.replace(content, newContent);
|
||||
}
|
||||
);
|
||||
} catch {
|
||||
return html;
|
||||
}
|
||||
);
|
||||
} catch {
|
||||
return safeHtml;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -148,25 +168,27 @@ function rewriteMetaRefreshInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
*/
|
||||
function rewriteFormActionsInHtml(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
if (!html || !proxyBaseUrl || !pageBaseUrl) return html;
|
||||
try {
|
||||
const base = new URL(pageBaseUrl);
|
||||
return html.replace(
|
||||
/<form\s+([^>]*?)action\s*=\s*["']([^"']*)["']([^>]*)>/gi,
|
||||
(match, before, action, after) => {
|
||||
const actionTrim = action.trim();
|
||||
if (!actionTrim) return match; // action="" = same URL
|
||||
let absoluteUrl = actionTrim;
|
||||
if (actionTrim.startsWith('/') || !actionTrim.startsWith('http')) {
|
||||
absoluteUrl = new URL(actionTrim, base.origin + base.pathname).href;
|
||||
return applyOutsideScriptTags(html, (safeHtml) => {
|
||||
try {
|
||||
const base = new URL(pageBaseUrl);
|
||||
return safeHtml.replace(
|
||||
/<form\s+([^>]*?)action\s*=\s*["']([^"']*)["']([^>]*)>/gi,
|
||||
(match, before, action, after) => {
|
||||
const actionTrim = action.trim();
|
||||
if (!actionTrim) return match; // action="" = same URL
|
||||
let absoluteUrl = actionTrim;
|
||||
if (actionTrim.startsWith('/') || !actionTrim.startsWith('http')) {
|
||||
absoluteUrl = new URL(actionTrim, base.origin + base.pathname).href;
|
||||
}
|
||||
if (!shouldProxyUrl(absoluteUrl)) return match;
|
||||
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
|
||||
return `<form ${before}action="${proxyUrl}"${after}>`;
|
||||
}
|
||||
if (!shouldProxyUrl(absoluteUrl)) return match;
|
||||
const proxyUrl = buildProxyUrl(proxyBaseUrl, absoluteUrl, sid);
|
||||
return `<form ${before}action="${proxyUrl}"${after}>`;
|
||||
}
|
||||
);
|
||||
} catch {
|
||||
return html;
|
||||
}
|
||||
);
|
||||
} catch {
|
||||
return safeHtml;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function injectProxyNavigationScript(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
@@ -216,6 +238,25 @@ function injectProxyNavigationScript(html, proxyBaseUrl, pageBaseUrl, sid) {
|
||||
'if(!targetUrl||!shouldProxyUrl(targetUrl))return;',
|
||||
"form.setAttribute('action',buildProxyUrl(targetUrl));",
|
||||
'},true);',
|
||||
'if(window.fetch){',
|
||||
'var nativeFetch=window.fetch.bind(window);',
|
||||
'window.fetch=function patchedFetch(input, init){',
|
||||
'var rawUrl=typeof input==="string"?input:(input&&input.url?input.url:null);',
|
||||
'var targetUrl=normalizeUrl(rawUrl,PAGE_BASE_URL);',
|
||||
'if(targetUrl&&shouldProxyUrl(targetUrl)){',
|
||||
'if(typeof input==="string"){input=buildProxyUrl(targetUrl);}else if(input&&input.url){input=buildProxyUrl(targetUrl);}',
|
||||
'}',
|
||||
'return nativeFetch(input, init);',
|
||||
'};',
|
||||
'}',
|
||||
'if(window.XMLHttpRequest&&window.XMLHttpRequest.prototype&&window.XMLHttpRequest.prototype.open){',
|
||||
'var nativeOpen=window.XMLHttpRequest.prototype.open;',
|
||||
'window.XMLHttpRequest.prototype.open=function patchedOpen(method, url){',
|
||||
'var targetUrl=normalizeUrl(url,PAGE_BASE_URL);',
|
||||
'if(targetUrl&&shouldProxyUrl(targetUrl)){arguments[1]=buildProxyUrl(targetUrl);}',
|
||||
'return nativeOpen.apply(this, arguments);',
|
||||
'};',
|
||||
'}',
|
||||
'if(window.HTMLFormElement&&window.HTMLFormElement.prototype&&window.HTMLFormElement.prototype.submit){',
|
||||
'var nativeSubmit=window.HTMLFormElement.prototype.submit;',
|
||||
'window.HTMLFormElement.prototype.submit=function patchedSubmit(){',
|
||||
|
||||
Reference in New Issue
Block a user