From 39a21cb9bdd0ceb101faba5696bd0a01136fc3f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 23 Oct 2024 15:44:18 +0200 Subject: [PATCH 1/4] fix: handle proxy authentication consistently --- src/agent/h1-proxy-agent.ts | 15 +-------------- src/auth.ts | 18 ++++++++++++++++++ src/hooks/proxy.ts | 10 ++++++++++ src/resolve-protocol.ts | 10 +++++----- 4 files changed, 34 insertions(+), 19 deletions(-) create mode 100644 src/auth.ts diff --git a/src/agent/h1-proxy-agent.ts b/src/agent/h1-proxy-agent.ts index 6b68010..c12af69 100644 --- a/src/agent/h1-proxy-agent.ts +++ b/src/agent/h1-proxy-agent.ts @@ -4,6 +4,7 @@ import https from 'node:https'; import { isIPv6 } from 'node:net'; import tls, { type ConnectionOptions } from 'node:tls'; import { URL } from 'node:url'; +import { getBasic } from '../auth.js'; interface AgentOptions extends http.AgentOptions { proxy: string | URL; @@ -30,20 +31,6 @@ const getPort = (url: URL): number => { throw new Error(`Unexpected protocol: ${url.protocol}`); }; -const getBasic = (url: URL): string => { - let basic = ''; - if (url.username || url.password) { - const username = decodeURIComponent(url.username); - const password = decodeURIComponent(url.password); - - basic = Buffer.from(`${username}:${password}`).toString('base64'); - - return `Basic ${basic}`; - } - - return basic; -}; - export class HttpRegularProxyAgent extends http.Agent { proxy!: URL; diff --git a/src/auth.ts b/src/auth.ts new file mode 100644 index 0000000..5c68dfe --- /dev/null +++ b/src/auth.ts @@ -0,0 +1,18 @@ +/** + * Returns the Basic auth string based on the `username` and `password` parts of the given URL. + * If the URL does not contain neither username nor password, returns `null`. + * @param url URL object to process + * @returns `Basic BASE64` string + */ +export function getBasic(url: URL): string | null { + if (!url.username && !url.password) { + return null; + } + + const username = decodeURIComponent(url.username); + const password = decodeURIComponent(url.password); + + const basic = Buffer.from(`${username}:${password}`).toString('base64'); + + return `Basic ${basic}`; +} diff --git a/src/hooks/proxy.ts b/src/hooks/proxy.ts index 3ca15bd..f538209 100644 --- a/src/hooks/proxy.ts +++ b/src/hooks/proxy.ts @@ -3,6 +3,7 @@ import http2, { auto } from 'http2-wrapper'; import { URL } from 'node:url'; import { HttpProxyAgent, HttpRegularProxyAgent, HttpsProxyAgent } from '../agent/h1-proxy-agent.js'; import { TransformHeadersAgent } from '../agent/transform-headers-agent.js'; +import { getBasic } from '../auth.js'; const { HttpOverHttp2, @@ -37,10 +38,19 @@ function validateProxyProtocol(protocol: string) { async function getAgents(parsedProxyUrl: URL, rejectUnauthorized: boolean) { // Sockets must not be reused, the proxy server may rotate upstream proxies as well. + const headers: Record = {}; + const basic = getBasic(parsedProxyUrl); + + if (basic) { + headers.authorization = basic; + headers['proxy-authorization'] = basic; + } + // `http2-wrapper` Agent options const wrapperOptions = { proxyOptions: { url: parsedProxyUrl, + headers, // Based on the got https.rejectUnauthorized option rejectUnauthorized, diff --git a/src/resolve-protocol.ts b/src/resolve-protocol.ts index c6c5bab..0bd59ac 100644 --- a/src/resolve-protocol.ts +++ b/src/resolve-protocol.ts @@ -5,6 +5,7 @@ import { type Headers } from 'got'; import { auto, type ResolveProtocolConnectFunction, type ResolveProtocolFunction } from 'http2-wrapper'; import QuickLRU from 'quick-lru'; import { ProxyError } from './hooks/proxy.js'; +import { getBasic } from './auth.js'; const connect = async (proxyUrl: string, options: tls.ConnectionOptions, callback: () => void) => new Promise((resolve, reject) => { let host = `${options.host}:${options.port}`; @@ -20,12 +21,11 @@ const connect = async (proxyUrl: string, options: tls.ConnectionOptions, callbac }; const url = new URL(proxyUrl); - const username = decodeURIComponent(url.username); - const password = decodeURIComponent(url.password); + const basic = getBasic(url); - if (username || password) { - headers.authorization = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`; - headers['proxy-authorization'] = headers.authorization; + if (basic) { + headers.authorization = basic; + headers['proxy-authorization'] = basic; } const request = await auto(url, { From 881f435e1f2075a252a905e9d01dacb99fd3022e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 23 Oct 2024 15:47:05 +0200 Subject: [PATCH 2/4] chore: bump version + update changelog --- CHANGELOG.md | 4 ++++ package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9675f6f..6c59b58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +4.0.6 / 2024/10/23 +==================== +- Handles proxy authentication consistently throughout the codebase (solves e.g. this [`http2-wrapper`](https://github.com/szmarczak/http2-wrapper/issues/108) issue). + 4.0.6 / 2024/05/22 ==================== - Logging `CONNECT` error response body instead of the length only diff --git a/package-lock.json b/package-lock.json index 22a0c60..d95e88c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "got-scraping", - "version": "4.0.6", + "version": "4.0.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "got-scraping", - "version": "4.0.6", + "version": "4.0.7", "license": "Apache-2.0", "dependencies": { "got": "^14.2.1", diff --git a/package.json b/package.json index 7b7b4aa..754096e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "got-scraping", - "version": "4.0.6", + "version": "4.0.7", "description": "HTTP client made for scraping based on got.", "engines": { "node": ">=16" From 8109fdc316d7d7fe91f48b83ab48f69ca246cb6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 23 Oct 2024 16:02:06 +0200 Subject: [PATCH 3/4] fix: handle missing `username`/`password` correctly --- src/auth.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/auth.ts b/src/auth.ts index 5c68dfe..e38ae9c 100644 --- a/src/auth.ts +++ b/src/auth.ts @@ -9,8 +9,8 @@ export function getBasic(url: URL): string | null { return null; } - const username = decodeURIComponent(url.username); - const password = decodeURIComponent(url.password); + const username = decodeURIComponent(url.username ?? ''); + const password = decodeURIComponent(url.password ?? ''); const basic = Buffer.from(`${username}:${password}`).toString('base64'); From 2121cd7ebf2a4007462e744326caddf102d4563e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 23 Oct 2024 16:03:03 +0200 Subject: [PATCH 4/4] chore: review PR comments --- CHANGELOG.md | 2 +- src/agent/h1-proxy-agent.ts | 6 +++--- src/auth.ts | 2 +- src/hooks/proxy.ts | 4 ++-- src/resolve-protocol.ts | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c59b58..1a864e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -4.0.6 / 2024/10/23 +4.0.7 / 2024/10/23 ==================== - Handles proxy authentication consistently throughout the codebase (solves e.g. this [`http2-wrapper`](https://github.com/szmarczak/http2-wrapper/issues/108) issue). diff --git a/src/agent/h1-proxy-agent.ts b/src/agent/h1-proxy-agent.ts index c12af69..3e8974a 100644 --- a/src/agent/h1-proxy-agent.ts +++ b/src/agent/h1-proxy-agent.ts @@ -4,7 +4,7 @@ import https from 'node:https'; import { isIPv6 } from 'node:net'; import tls, { type ConnectionOptions } from 'node:tls'; import { URL } from 'node:url'; -import { getBasic } from '../auth.js'; +import { buildBasicAuthHeader } from '../auth.js'; interface AgentOptions extends http.AgentOptions { proxy: string | URL; @@ -63,7 +63,7 @@ export class HttpRegularProxyAgent extends http.Agent { request.path = url.href; - const basic = getBasic(this.proxy); + const basic = buildBasicAuthHeader(this.proxy); if (basic) { request.setHeader('proxy-authorization', basic); } @@ -101,7 +101,7 @@ export class HttpProxyAgent extends http.Agent { host: hostport, }; - const basic = getBasic(this.proxy); + const basic = buildBasicAuthHeader(this.proxy); if (basic) { headers['proxy-authorization'] = basic; headers.authorization = basic; diff --git a/src/auth.ts b/src/auth.ts index e38ae9c..2e37078 100644 --- a/src/auth.ts +++ b/src/auth.ts @@ -4,7 +4,7 @@ * @param url URL object to process * @returns `Basic BASE64` string */ -export function getBasic(url: URL): string | null { +export function buildBasicAuthHeader(url: URL): string | null { if (!url.username && !url.password) { return null; } diff --git a/src/hooks/proxy.ts b/src/hooks/proxy.ts index f538209..e785675 100644 --- a/src/hooks/proxy.ts +++ b/src/hooks/proxy.ts @@ -3,7 +3,7 @@ import http2, { auto } from 'http2-wrapper'; import { URL } from 'node:url'; import { HttpProxyAgent, HttpRegularProxyAgent, HttpsProxyAgent } from '../agent/h1-proxy-agent.js'; import { TransformHeadersAgent } from '../agent/transform-headers-agent.js'; -import { getBasic } from '../auth.js'; +import { buildBasicAuthHeader } from '../auth.js'; const { HttpOverHttp2, @@ -39,7 +39,7 @@ async function getAgents(parsedProxyUrl: URL, rejectUnauthorized: boolean) { // Sockets must not be reused, the proxy server may rotate upstream proxies as well. const headers: Record = {}; - const basic = getBasic(parsedProxyUrl); + const basic = buildBasicAuthHeader(parsedProxyUrl); if (basic) { headers.authorization = basic; diff --git a/src/resolve-protocol.ts b/src/resolve-protocol.ts index 0bd59ac..c3ca76d 100644 --- a/src/resolve-protocol.ts +++ b/src/resolve-protocol.ts @@ -5,7 +5,7 @@ import { type Headers } from 'got'; import { auto, type ResolveProtocolConnectFunction, type ResolveProtocolFunction } from 'http2-wrapper'; import QuickLRU from 'quick-lru'; import { ProxyError } from './hooks/proxy.js'; -import { getBasic } from './auth.js'; +import { buildBasicAuthHeader } from './auth.js'; const connect = async (proxyUrl: string, options: tls.ConnectionOptions, callback: () => void) => new Promise((resolve, reject) => { let host = `${options.host}:${options.port}`; @@ -21,7 +21,7 @@ const connect = async (proxyUrl: string, options: tls.ConnectionOptions, callbac }; const url = new URL(proxyUrl); - const basic = getBasic(url); + const basic = buildBasicAuthHeader(url); if (basic) { headers.authorization = basic;