diff --git a/packages/gateway-conformance/src/conformance.spec.ts b/packages/gateway-conformance/src/conformance.spec.ts index c59d0db8..2d2681d3 100644 --- a/packages/gateway-conformance/src/conformance.spec.ts +++ b/packages/gateway-conformance/src/conformance.spec.ts @@ -182,7 +182,7 @@ const tests: TestConfig[] = [ skip: [ 'TestGatewaySubdomains/.*HTTP_proxy_tunneling_via_CONNECT' // verified fetch should not be doing HTTP proxy tunneling. ], - successRate: 41.35 + successRate: 47.26 }, { name: 'TestUnixFSDirectoryListingOnSubdomainGateway', diff --git a/packages/verified-fetch/src/utils/handle-redirects.ts b/packages/verified-fetch/src/utils/handle-redirects.ts index b1234445..2617cc52 100644 --- a/packages/verified-fetch/src/utils/handle-redirects.ts +++ b/packages/verified-fetch/src/utils/handle-redirects.ts @@ -1,101 +1,108 @@ -import { type AbortOptions, type ComponentLogger } from '@libp2p/interface' +import { CodeError, type AbortOptions, type ComponentLogger } from '@libp2p/interface' import { type VerifiedFetchInit, type Resource } from '../index.js' import { matchURLString } from './parse-url-string.js' -import { movedPermanentlyResponse } from './responses.js' import type { CID } from 'multiformats/cid' -interface GetRedirectResponse { +interface GetRedirectResponseOptions { cid: CID resource: Resource options?: Omit & AbortOptions logger: ComponentLogger +} - /** - * Only used in testing. - */ - fetch?: typeof globalThis.fetch +interface GetSubdomainRedirectOptions extends GetRedirectResponseOptions { + resource: string } -function maybeAddTraillingSlash (path: string): string { - // if it has an extension-like ending, don't add a trailing slash - if (path.match(/\.[a-zA-Z0-9]{1,4}$/) != null) { - return path +/** + * If given only a path, i.e. /ipfs/QmHash, this function will return the path only, with a trailing slash if the path part doesn't have an extension-like ending. + * If given a full URL, it will return that same URL, with a trailing slash on the path if the path part doesn't have an extension-like ending. + * + * This is only used for directory normalization with UnixFS directory requests. + */ +export function getSpecCompliantPath (resource: string): string { + let url: URL + let isInvalidURL = false + try { + url = new URL(resource) + } catch { + isInvalidURL = true + url = new URL(resource, 'http://example.com') } - return path.endsWith('/') ? path : `${path}/` -} + const { pathname } = url -// See https://specs.ipfs.tech/http-gateways/path-gateway/#location-response-header -export async function getRedirectResponse ({ resource, options, logger, cid, fetch = globalThis.fetch }: GetRedirectResponse): Promise { - const log = logger.forComponent('helia:verified-fetch:get-redirect-response') + let specCompliantPath = pathname - if (typeof resource !== 'string' || options == null || ['ipfs://', 'ipns://'].some((prefix) => resource.startsWith(prefix))) { - return null + if (pathname.match(/\.[a-zA-Z0-9]{1,4}$/) == null && !pathname.endsWith('/')) { + // no extension-like ending, add a trailing slash + specCompliantPath = `${pathname}/` } + if (isInvalidURL) { + return specCompliantPath + } + + // the below is needed to get around a bug with some environments removing the trailing slash when calling url.href or url.toString() + if (specCompliantPath.startsWith('//')) { + // likely ipfs:// or ipns:// url + return `${url.protocol}${specCompliantPath}${url.search}${url.hash}` + } + return `${url.protocol}//${url.host}${specCompliantPath}${url.search}${url.hash}` +} + +/** + * Handles determining if a redirect to subdomain is needed. + */ +export async function getRedirectUrl ({ resource, options, logger, cid }: GetSubdomainRedirectOptions): Promise { + const log = logger.forComponent('helia:verified-fetch:get-subdomain-redirect') const headers = new Headers(options?.headers) const forwardedHost = headers.get('x-forwarded-host') const headerHost = headers.get('host') - const forwardedFor = headers.get('x-forwarded-for') - if (forwardedFor == null && forwardedHost == null && headerHost == null) { - log.trace('no redirect info found in headers') - return null - } + const forwardedProto = headers.get('x-forwarded-proto') - log.trace('checking for redirect info') - // if x-forwarded-host is passed, we need to set the location header to the subdomain - // so that the browser can redirect to the correct subdomain try { const urlParts = matchURLString(resource) + if (urlParts.cidOrPeerIdOrDnsLink.length > 63) { + if (urlParts.protocol === 'ipfs') { + throw new CodeError('CID incompatible with DNS label length limit of 63', 'DNS_LABEL_INCOMPATIBLE_CID_SUBDOMAIN') + } + throw new CodeError('PeerId or DNSLink incompatible with DNS label length limit of 63', 'DNS_LABEL_INCOMPATIBLE_SUBDOMAIN') + } + + if (forwardedHost == null && forwardedProto == null) { + log.trace('no redirect info found in headers') + throw new CodeError('No redirect info found in headers', 'NO_REDIRECT_INFO_FOUND') + } const reqUrl = new URL(resource) + reqUrl.protocol = forwardedProto ?? reqUrl.protocol const actualHost = forwardedHost ?? reqUrl.host - const subdomainUrl = new URL(reqUrl) - if (urlParts.protocol === 'ipfs' && cid.version === 0) { - subdomainUrl.host = `${cid.toV1()}.ipfs.${actualHost}` - } else { - subdomainUrl.host = `${urlParts.cidOrPeerIdOrDnsLink}.${urlParts.protocol}.${actualHost}` + const subdomain = `${urlParts.cidOrPeerIdOrDnsLink}.${urlParts.protocol}` + if (actualHost.includes(subdomain)) { + log.trace('request was for a subdomain already. Returning requested resource.') + return resource } - if (headerHost?.includes(urlParts.protocol) === true && subdomainUrl.host.includes(headerHost)) { - log.trace('request was for a subdomain already, not setting location header') - return null - } + let subdomainHost = `${urlParts.cidOrPeerIdOrDnsLink}.${urlParts.protocol}.${actualHost}` - if (headerHost != null && !subdomainUrl.host.includes(headerHost)) { - log.trace('host header is not the same as the subdomain url host, not setting location header') - return null + if (urlParts.protocol === 'ipfs' && cid.version === 0) { + subdomainHost = `${cid.toV1()}.ipfs.${actualHost}` } - if (reqUrl.host === subdomainUrl.host) { - log.trace('req url is the same as the subdomain url, not setting location header') - return null + const subdomainUrl = new URL(reqUrl) + subdomainUrl.host = subdomainHost + subdomainUrl.pathname = reqUrl.pathname.replace(`/${urlParts.cidOrPeerIdOrDnsLink}`, '').replace(`/${urlParts.protocol}`, '') + + if (headerHost != null && headerHost === subdomainUrl.host) { + log.trace('request was for a subdomain already. Returning requested resource.') + return resource } - subdomainUrl.pathname = maybeAddTraillingSlash(reqUrl.pathname.replace(`/${urlParts.cidOrPeerIdOrDnsLink}`, '').replace(`/${urlParts.protocol}`, '')) - log.trace('subdomain url %s', subdomainUrl.href) - const pathUrl = new URL(reqUrl, `${reqUrl.protocol}//${actualHost}`) - pathUrl.pathname = maybeAddTraillingSlash(reqUrl.pathname) - log.trace('path url %s', pathUrl.href) - // try to query subdomain with HEAD request to see if it's supported - try { - const subdomainTest = await fetch(subdomainUrl, { method: 'HEAD' }) - if (subdomainTest.ok) { - log('subdomain supported, redirecting to subdomain') - return movedPermanentlyResponse(resource.toString(), subdomainUrl.href) - } else { - log('subdomain not supported, subdomain failed with status %s %s', subdomainTest.status, subdomainTest.statusText) - throw new Error('subdomain not supported') - } - } catch (err: any) { - log('subdomain not supported', err) - if (pathUrl.href === reqUrl.href) { - log('path url is the same as the request url, not setting location header') - return null - } - // pathUrl is different from request URL (maybe even with just a trailing slash) - return movedPermanentlyResponse(resource.toString(), pathUrl.href) + return subdomainUrl.toString() + } catch (err: any) { + log.error('error while checking for subdomain support', err) + if (err.code != null) { + throw err } - } catch (e) { - // if it's not a full URL, we have nothing left to do. - log.error('error setting location header for x-forwarded-host', e) } - return null + + return resource } diff --git a/packages/verified-fetch/src/verified-fetch.ts b/packages/verified-fetch/src/verified-fetch.ts index 2ca99392..0c5774ec 100644 --- a/packages/verified-fetch/src/verified-fetch.ts +++ b/packages/verified-fetch/src/verified-fetch.ts @@ -24,7 +24,7 @@ import { getETag } from './utils/get-e-tag.js' import { getResolvedAcceptHeader } from './utils/get-resolved-accept-header.js' import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js' import { tarStream } from './utils/get-tar-stream.js' -import { getRedirectResponse } from './utils/handle-redirects.js' +import { getSpecCompliantPath, getRedirectUrl } from './utils/handle-redirects.js' import { parseResource } from './utils/parse-resource.js' import { type ParsedUrlStringResults } from './utils/parse-url-string.js' import { resourceToSessionCacheKey } from './utils/resource-to-cache-key.js' @@ -325,8 +325,10 @@ export class VerifiedFetch { this.log('could not redirect to %s/ as redirect option was set to "error"', resource) throw new TypeError('Failed to fetch') } else if (options?.redirect === 'manual') { - this.log('returning 301 permanent redirect to %s/', resource) - return movedPermanentlyResponse(resource, `${resource}/`) + const properPath = getSpecCompliantPath(resource) + const redirectUrl = await getRedirectUrl({ resource: properPath, cid, options, logger: this.helia.logger }) + this.log('returning 301 permanent redirect to %s', redirectUrl) + return movedPermanentlyResponse(resource, redirectUrl) } // fall-through simulates following the redirect? @@ -476,6 +478,7 @@ export class VerifiedFetch { * TODO: move operations called by fetch to a queue of operations where we can * always exit early (and cleanly) if a given signal is aborted */ + // eslint-disable-next-line complexity async fetch (resource: Resource, opts?: VerifiedFetchOptions): Promise { this.log('fetch %s', resource) @@ -519,9 +522,22 @@ export class VerifiedFetch { let response: Response let reqFormat: RequestFormatShorthand | undefined - const redirectResponse = await getRedirectResponse({ resource, options, logger: this.helia.logger, cid }) - if (redirectResponse != null) { - return redirectResponse + // subdomain redirects don't make sense for `fetch(cid)`, only for `fetch(path)` or `fetch(url)` + // if a specific format is requested, that should be handled by the `accept === '...'` checks + // subdomain redirects for unixFS is handled in handleDagPb + if (typeof resource === 'string' && query.format == null && cid.code !== dagPbCode) { + try { + const redirectUrl = await getRedirectUrl({ resource, cid, options, logger: this.helia.logger }) + if (redirectUrl !== resource) { + this.log.trace('returning 301 permanent redirect to %s', redirectUrl) + return movedPermanentlyResponse(resource.toString(), redirectUrl) + } + } catch (err: any) { + if (err.code.startsWith('DNS_LABEL_INCOMPATIBLE') === true) { + return badRequestResponse(resource, err) + } + // ignore + } } const handlerArgs: FetchHandlerFunctionArg = { resource: resource.toString(), cid, path, accept, session: options?.session ?? true, options } diff --git a/packages/verified-fetch/test/utils/handle-redirects.spec.ts b/packages/verified-fetch/test/utils/handle-redirects.spec.ts index 173cf4c6..2cbdac3f 100644 --- a/packages/verified-fetch/test/utils/handle-redirects.spec.ts +++ b/packages/verified-fetch/test/utils/handle-redirects.spec.ts @@ -1,84 +1,75 @@ import { prefixLogger } from '@libp2p/logger' import { expect } from 'aegir/chai' import { CID } from 'multiformats/cid' -import Sinon from 'sinon' -import { getRedirectResponse } from '../../src/utils/handle-redirects.js' +import { getRedirectUrl, getSpecCompliantPath } from '../../src/utils/handle-redirects.js' const logger = prefixLogger('test:handle-redirects') + describe('handle-redirects', () => { - describe('getRedirectResponse', () => { - const sandbox = Sinon.createSandbox() - const cid = CID.parse('bafkqabtimvwgy3yk') + const cid = CID.parse('bafkqabtimvwgy3yk') - let fetchStub: Sinon.SinonStub + describe('getSpecCompliantPath', () => { + // the below are all assuming the above identity CID is a unixFS directory CID + it('should handle ipfs:// urls', () => { + expect(getSpecCompliantPath(`ipfs://${cid}`)).to.equal(`ipfs://${cid}/`) + expect(getSpecCompliantPath(`ipfs://${cid}/file.txt`)).to.equal(`ipfs://${cid}/file.txt`) + }) - beforeEach(() => { - fetchStub = sandbox.stub(globalThis, 'fetch') + it('should handle ipns:// urls', () => { + expect(getSpecCompliantPath(`ipns://${cid}`)).to.equal(`ipns://${cid}/`) + expect(getSpecCompliantPath(`ipns://${cid}/file.txt`)).to.equal(`ipns://${cid}/file.txt`) }) - afterEach(() => { - sandbox.restore() + it('should handle http:// path urls', () => { + expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}`)).to.equal(`http://ipfs.io/ipfs/${cid}/`) + expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}/file.txt`)).to.equal(`http://ipfs.io/ipfs/${cid}/file.txt`) }) - const nullResponses = [ - { resource: cid, options: {}, logger, cid, testTitle: 'should return null if resource is not a string' }, - { resource: 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk', options: undefined, logger, cid, testTitle: 'should return null if options is undefined' }, - { resource: 'ipfs://', options: {}, logger, cid, testTitle: 'should return null for ipfs:// protocol urls' }, - { resource: 'ipns://', options: {}, logger, cid, testTitle: 'should return null for ipns:// protocol urls' } - ] - - nullResponses.forEach(({ resource, options, logger, cid, testTitle }) => { - it(testTitle, async () => { - const response = await getRedirectResponse({ resource, options, logger, cid }) - expect(response).to.be.null() - }) + it('should handle http:// subdomain urls', () => { + expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}`)).to.equal(`http://ipfs.io/ipfs/${cid}/`) + expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}/file.txt`)).to.equal(`http://ipfs.io/ipfs/${cid}/file.txt`) + }) + }) + + describe('getRedirectUrl', () => { + it('returns path gateway url if headers is empty', async () => { + const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk' + const options = { headers: new Headers() } + + const url = await getRedirectUrl({ resource, options, logger, cid }) + expect(url).to.equal('http://ipfs.io/ipfs/bafkqabtimvwgy3yk') }) - it('should attempt to get the current host from the headers', async () => { + it('returns subdomain gateway url if host is passed', async () => { const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk' - const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) } - fetchStub.returns(Promise.resolve(new Response(null, { status: 200 }))) - - const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub }) - expect(fetchStub.calledOnce).to.be.true() - expect(response).to.not.be.null() - expect(response).to.have.property('status', 301) - const location = response?.headers.get('location') - expect(location).to.equal('http://bafkqabtimvwgy3yk.ipfs.localhost:3931/') + const options = { headers: new Headers({ host: 'ipfs.io' }) } + + const url = await getRedirectUrl({ resource, options, logger, cid }) + expect(url).to.equal('http://bafkqabtimvwgy3yk.ipfs.ipfs.io/') }) - it('should return redirect response to requested host with trailing slash when HEAD fetch fails', async () => { + it('returns subdomain gateway url if x-forwarded-host is passed', async () => { const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk' - const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) } - fetchStub.returns(Promise.reject(new Response(null, { status: 404 }))) - - const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub }) - expect(fetchStub.calledOnce).to.be.true() - expect(response).to.not.be.null() - expect(response).to.have.property('status', 301) - const location = response?.headers.get('location') - // note that the URL returned in location header has trailing slash. - expect(location).to.equal('http://ipfs.io/ipfs/bafkqabtimvwgy3yk/') + const options = { headers: new Headers({ 'x-forwarded-host': 'dweb.link' }) } + + const url = await getRedirectUrl({ resource, options, logger, cid }) + expect(url).to.equal('http://bafkqabtimvwgy3yk.ipfs.dweb.link/') }) - it('should not return redirect response to x-forwarded-host if HEAD fetch fails', async () => { - const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk/file.txt' - const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) } - fetchStub.returns(Promise.reject(new Response(null, { status: 404 }))) + it('returns https subdomain gateway url if proto & host are passed', async () => { + const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk' + const options = { headers: new Headers({ host: 'ipfs.io', 'x-forwarded-proto': 'https' }) } - const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub }) - expect(fetchStub.calledOnce).to.be.true() - expect(response).to.be.null() + const url = await getRedirectUrl({ resource, options, logger, cid }) + expect(url).to.equal('https://bafkqabtimvwgy3yk.ipfs.ipfs.io/') }) - it('should not return redirect response to x-forwarded-host when HEAD fetch fails and trailing slash already exists', async () => { - const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk/' - const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) } - fetchStub.returns(Promise.reject(new Response(null, { status: 404 }))) + it('returns the given subdomain gateway url given a subdomain gateway url', async () => { + const resource = 'https://bafkqabtimvwgy3yk.ipfs.inbrowser.dev' + const options = { headers: new Headers({ host: 'bafkqabtimvwgy3yk.ipfs.inbrowser.dev' }) } - const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub }) - expect(fetchStub.calledOnce).to.be.true() - expect(response).to.be.null() + const url = await getRedirectUrl({ resource, options, logger, cid }) + expect(url).to.equal('https://bafkqabtimvwgy3yk.ipfs.inbrowser.dev') }) }) })