diff --git a/CHANGELOG.md b/CHANGELOG.md index 9675f6f..1a864e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +4.0.7 / 2024/10/23 +==================== +- Handles proxy authentication consistently throughout the codebase (solves e.g. this [`http2-wrapper`](https://github.com/szmarczak/http2-wrapper/issues/108) issue). + 4.0.6 / 2024/05/22 ==================== - Logging `CONNECT` error response body instead of the length only diff --git a/package-lock.json b/package-lock.json index 22a0c60..d95e88c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "got-scraping", - "version": "4.0.6", + "version": "4.0.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "got-scraping", - "version": "4.0.6", + "version": "4.0.7", "license": "Apache-2.0", "dependencies": { "got": "^14.2.1", diff --git a/package.json b/package.json index 7b7b4aa..754096e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "got-scraping", - "version": "4.0.6", + "version": "4.0.7", "description": "HTTP client made for scraping based on got.", "engines": { "node": ">=16" diff --git a/src/agent/h1-proxy-agent.ts b/src/agent/h1-proxy-agent.ts index 6b68010..3e8974a 100644 --- a/src/agent/h1-proxy-agent.ts +++ b/src/agent/h1-proxy-agent.ts @@ -4,6 +4,7 @@ import https from 'node:https'; import { isIPv6 } from 'node:net'; import tls, { type ConnectionOptions } from 'node:tls'; import { URL } from 'node:url'; +import { buildBasicAuthHeader } from '../auth.js'; interface AgentOptions extends http.AgentOptions { proxy: string | URL; @@ -30,20 +31,6 @@ const getPort = (url: URL): number => { throw new Error(`Unexpected protocol: ${url.protocol}`); }; -const getBasic = (url: URL): string => { - let basic = ''; - if (url.username || url.password) { - const username = decodeURIComponent(url.username); - const password = decodeURIComponent(url.password); - - basic = Buffer.from(`${username}:${password}`).toString('base64'); - - return `Basic ${basic}`; - } - - return basic; -}; - export class HttpRegularProxyAgent extends http.Agent { proxy!: URL; @@ -76,7 +63,7 @@ export class HttpRegularProxyAgent extends http.Agent { request.path = url.href; - const basic = getBasic(this.proxy); + const basic = buildBasicAuthHeader(this.proxy); if (basic) { request.setHeader('proxy-authorization', basic); } @@ -114,7 +101,7 @@ export class HttpProxyAgent extends http.Agent { host: hostport, }; - const basic = getBasic(this.proxy); + const basic = buildBasicAuthHeader(this.proxy); if (basic) { headers['proxy-authorization'] = basic; headers.authorization = basic; diff --git a/src/auth.ts b/src/auth.ts new file mode 100644 index 0000000..2e37078 --- /dev/null +++ b/src/auth.ts @@ -0,0 +1,18 @@ +/** + * Returns the Basic auth string based on the `username` and `password` parts of the given URL. + * If the URL does not contain neither username nor password, returns `null`. + * @param url URL object to process + * @returns `Basic BASE64` string + */ +export function buildBasicAuthHeader(url: URL): string | null { + if (!url.username && !url.password) { + return null; + } + + const username = decodeURIComponent(url.username ?? ''); + const password = decodeURIComponent(url.password ?? ''); + + const basic = Buffer.from(`${username}:${password}`).toString('base64'); + + return `Basic ${basic}`; +} diff --git a/src/hooks/proxy.ts b/src/hooks/proxy.ts index 3ca15bd..e785675 100644 --- a/src/hooks/proxy.ts +++ b/src/hooks/proxy.ts @@ -3,6 +3,7 @@ import http2, { auto } from 'http2-wrapper'; import { URL } from 'node:url'; import { HttpProxyAgent, HttpRegularProxyAgent, HttpsProxyAgent } from '../agent/h1-proxy-agent.js'; import { TransformHeadersAgent } from '../agent/transform-headers-agent.js'; +import { buildBasicAuthHeader } from '../auth.js'; const { HttpOverHttp2, @@ -37,10 +38,19 @@ function validateProxyProtocol(protocol: string) { async function getAgents(parsedProxyUrl: URL, rejectUnauthorized: boolean) { // Sockets must not be reused, the proxy server may rotate upstream proxies as well. + const headers: Record = {}; + const basic = buildBasicAuthHeader(parsedProxyUrl); + + if (basic) { + headers.authorization = basic; + headers['proxy-authorization'] = basic; + } + // `http2-wrapper` Agent options const wrapperOptions = { proxyOptions: { url: parsedProxyUrl, + headers, // Based on the got https.rejectUnauthorized option rejectUnauthorized, diff --git a/src/resolve-protocol.ts b/src/resolve-protocol.ts index c6c5bab..c3ca76d 100644 --- a/src/resolve-protocol.ts +++ b/src/resolve-protocol.ts @@ -5,6 +5,7 @@ import { type Headers } from 'got'; import { auto, type ResolveProtocolConnectFunction, type ResolveProtocolFunction } from 'http2-wrapper'; import QuickLRU from 'quick-lru'; import { ProxyError } from './hooks/proxy.js'; +import { buildBasicAuthHeader } from './auth.js'; const connect = async (proxyUrl: string, options: tls.ConnectionOptions, callback: () => void) => new Promise((resolve, reject) => { let host = `${options.host}:${options.port}`; @@ -20,12 +21,11 @@ const connect = async (proxyUrl: string, options: tls.ConnectionOptions, callbac }; const url = new URL(proxyUrl); - const username = decodeURIComponent(url.username); - const password = decodeURIComponent(url.password); + const basic = buildBasicAuthHeader(url); - if (username || password) { - headers.authorization = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`; - headers['proxy-authorization'] = headers.authorization; + if (basic) { + headers.authorization = basic; + headers['proxy-authorization'] = basic; } const request = await auto(url, {