From 3cb3d2a241aac61d47bf3562be0f3327d2f2b655 Mon Sep 17 00:00:00 2001 From: jwplukarski Date: Wed, 6 Mar 2024 15:45:27 -0600 Subject: [PATCH] Add recursive decode HTML characters --- src/__tests__/index.test.ts | 2 ++ src/index.ts | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index 22e1b68..c6a4e26 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -109,6 +109,8 @@ describe("sanitizeUrl", () => { "  javascript:alert('XSS');", "javasc ript: alert('XSS');", "javasc&#\u0000x09;ript:alert(1)", + "java&NewLine&newline;;script:alert('XSS')", + "java&NewLine&newline;;script:alert('XSS')", ]; attackVectors.forEach((vector) => { diff --git a/src/index.ts b/src/index.ts index 49f1749..dd04813 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,12 +24,19 @@ export function sanitizeUrl(url?: string): string { if (!url) { return BLANK_URL; } - - const sanitizedUrl = decodeHtmlCharacters(url) - .replace(htmlCtrlEntityRegex, "") - .replace(ctrlCharactersRegex, "") - .trim(); - + let charsToDecode; + let decodedUrl = url; + do { + decodedUrl = decodeHtmlCharacters(decodedUrl) + .replace(htmlCtrlEntityRegex, "") + .replace(ctrlCharactersRegex, "") + .trim(); + charsToDecode = + decodedUrl.match(ctrlCharactersRegex) || + decodedUrl.match(htmlEntitiesRegex) || + decodedUrl.match(htmlCtrlEntityRegex); + } while (charsToDecode && charsToDecode.length > 0); + const sanitizedUrl = decodedUrl; if (!sanitizedUrl) { return BLANK_URL; }