diff --git a/packages/verified-fetch/src/utils/parse-resource.ts b/packages/verified-fetch/src/utils/parse-resource.ts index 49e0b6d3..4b3b000c 100644 --- a/packages/verified-fetch/src/utils/parse-resource.ts +++ b/packages/verified-fetch/src/utils/parse-resource.ts @@ -32,8 +32,9 @@ export async function parseResource (resource: Resource, { ipns, logger }: Parse cid, protocol: 'ipfs', path: '', - query: {} - } + query: {}, + ttl: 29030400 // 1 year for ipfs content + } satisfies ParsedUrlStringResults } throw new TypeError(`Invalid resource. Cannot determine CID from resource: ${resource}`) diff --git a/packages/verified-fetch/src/utils/parse-url-string.ts b/packages/verified-fetch/src/utils/parse-url-string.ts index 4d99a460..bf3b6127 100644 --- a/packages/verified-fetch/src/utils/parse-url-string.ts +++ b/packages/verified-fetch/src/utils/parse-url-string.ts @@ -2,11 +2,11 @@ import { peerIdFromString } from '@libp2p/peer-id' import { CID } from 'multiformats/cid' import { TLRU } from './tlru.js' import type { RequestFormatShorthand } from '../types.js' -import type { IPNS, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns' +import type { DNSLinkResolveResult, IPNS, IPNSResolveResult, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns' import type { ComponentLogger } from '@libp2p/interface' import type { ProgressOptions } from 'progress-events' -const ipnsCache = new TLRU(1000) +const ipnsCache = new TLRU(1000) export interface ParseUrlStringInput { urlString: string @@ -23,30 +23,80 @@ export interface ParsedUrlQuery extends Record { filename?: string } -export interface ParsedUrlStringResults { - protocol: string - path: string - cid: CID +interface ParsedUrlStringResultsBase extends ResolveResult { + protocol: 'ipfs' | 'ipns' query: ParsedUrlQuery + + /** + * seconds as a number + */ + ttl?: number } +export type ParsedUrlStringResults = ParsedUrlStringResultsBase + const URL_REGEX = /^(?ip[fn]s):\/\/(?[^/?]+)\/?(?[^?]*)\??(?.*)$/ const PATH_REGEX = /^\/(?ip[fn]s)\/(?[^/?]+)\/?(?[^?]*)\??(?.*)$/ const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?ip[fn]s)\/(?[^/?]+)\/?(?[^?]*)\??(?.*)$/ const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?[^/?]+)\.(?ip[fn]s)\.([^/?]+)\/?(?[^?]*)\??(?.*)$/ -function matchURLString (urlString: string): Record { +interface MatchUrlGroups { + protocol: 'ipfs' | 'ipns' + cidOrPeerIdOrDnsLink: string + path?: string + queryString?: string +} + +function matchUrlGroupsGuard (groups?: null | { [key in string]: string; } | MatchUrlGroups): groups is MatchUrlGroups { + const protocol = groups?.protocol + if (protocol == null) return false + const cidOrPeerIdOrDnsLink = groups?.cidOrPeerIdOrDnsLink + if (cidOrPeerIdOrDnsLink == null) return false + const path = groups?.path + const queryString = groups?.queryString + + return ['ipns', 'ipfs'].includes(protocol) && + typeof cidOrPeerIdOrDnsLink === 'string' && + (path == null || typeof path === 'string') && + (queryString == null || typeof queryString === 'string') +} + +function matchURLString (urlString: string): MatchUrlGroups { for (const pattern of [URL_REGEX, PATH_REGEX, PATH_GATEWAY_REGEX, SUBDOMAIN_GATEWAY_REGEX]) { const match = urlString.match(pattern) - if (match?.groups != null) { - return match.groups + if (matchUrlGroupsGuard(match?.groups)) { + return match.groups satisfies MatchUrlGroups } } throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`) } +/** + * determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive. + * max-age is in seconds + * + * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives + * + * If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds. + * + * For more TTL nuances: + * + * @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200 + * @see https://github.com/ipfs/js-ipns/pull/308 + */ +function calculateTtl (resolveResult?: IPNSResolveResult | DNSLinkResolveResult): number | undefined { + if (resolveResult == null) { + return undefined + } + const dnsLinkTtl = (resolveResult as DNSLinkResolveResult).answer?.TTL + const ipnsTtlNs = (resolveResult as IPNSResolveResult).record?.ttl + // For some reason, ipns "nanoseconds" are 1e-8 of a second, instead of 1e-9. + const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e8)) : undefined + return dnsLinkTtl ?? ipnsTtl +} + /** * For dnslinks see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header * DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates. @@ -89,32 +139,36 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin let cid: CID | undefined let resolvedPath: string | undefined const errors: Error[] = [] + let resolveResult: IPNSResolveResult | DNSLinkResolveResult | undefined if (protocol === 'ipfs') { try { cid = CID.parse(cidOrPeerIdOrDnsLink) + /** + * no ttl set. @link {setCacheControlHeader} + */ } catch (err) { log.error(err) errors.push(new TypeError('Invalid CID for ipfs:// URL')) } } else { - let resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink) + // protocol is ipns + resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink) if (resolveResult != null) { cid = resolveResult.cid resolvedPath = resolveResult.path log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid) } else { - // protocol is ipns - log.trace('attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink) + log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink) let peerId = null try { + // try resolving as an IPNS name peerId = peerIdFromString(cidOrPeerIdOrDnsLink) resolveResult = await ipns.resolve(peerId, { onProgress: options?.onProgress }) - cid = resolveResult?.cid - resolvedPath = resolveResult?.path + cid = resolveResult.cid + resolvedPath = resolveResult.path log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid) - ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2) } catch (err) { if (peerId == null) { log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err) @@ -126,6 +180,7 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin } if (cid == null) { + // cid is still null, try resolving as a DNSLink let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) { decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink) @@ -138,7 +193,6 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin cid = resolveResult?.cid resolvedPath = resolveResult?.path log.trace('resolved %s to %c', decodedDnsLinkLabel, cid) - ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2) } catch (err: any) { log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err) errors.push(err) @@ -155,6 +209,13 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin throw new AggregateError(errors, `Invalid resource. Cannot determine CID from URL "${urlString}"`) } + const ttl = calculateTtl(resolveResult) + + if (resolveResult != null) { + // use the ttl for the resolved resouce for the cache, but fallback to 2 minutes if not available + ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl ?? 60 * 1000 * 2) + } + // parse query string const query: Record = {} @@ -177,9 +238,10 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin return { protocol, cid, - path: joinPaths(resolvedPath, urlPath), - query - } + path: joinPaths(resolvedPath, urlPath ?? ''), + query, + ttl + } satisfies ParsedUrlStringResults } /** diff --git a/packages/verified-fetch/src/utils/response-headers.ts b/packages/verified-fetch/src/utils/response-headers.ts index 19d2bb98..1d980832 100644 --- a/packages/verified-fetch/src/utils/response-headers.ts +++ b/packages/verified-fetch/src/utils/response-headers.ts @@ -1,3 +1,39 @@ +interface CacheControlHeaderOptions { + /** + * This should be seconds as a number. + * + * See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives + */ + ttl?: number + protocol: 'ipfs' | 'ipns' + response: Response +} + +/** + * Implementations may place an upper bound on any TTL received, as noted in Section 8 of [rfc2181]. + * If TTL value is unknown, implementations should not send a Cache-Control + * No matter if TTL value is known or not, implementations should always send a Last-Modified header with the timestamp of the record resolution. + * + * @see https://specs.ipfs.tech/http-gateways/path-gateway/#cache-control-response-header + */ +export function setCacheControlHeader ({ ttl, protocol, response }: CacheControlHeaderOptions): void { + let headerValue: string + if (protocol === 'ipfs') { + headerValue = 'public, max-age=29030400, immutable' + } else if (ttl == null) { + /** + * default limit for unknown TTL: "use 5 minute as default fallback when it is not available." + * + * @see https://github.com/ipfs/boxo/issues/329#issuecomment-1995236409 + */ + headerValue = 'public, max-age=300' + } else { + headerValue = `public, max-age=${ttl}` + } + + response.headers.set('cache-control', headerValue) +} + /** * This function returns the value of the `Content-Range` header for a given range. * If you know the total size of the body, pass it as `byteSize` diff --git a/packages/verified-fetch/src/verified-fetch.ts b/packages/verified-fetch/src/verified-fetch.ts index 77ae665e..15280d91 100644 --- a/packages/verified-fetch/src/verified-fetch.ts +++ b/packages/verified-fetch/src/verified-fetch.ts @@ -22,6 +22,7 @@ import { getETag } from './utils/get-e-tag.js' import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js' import { tarStream } from './utils/get-tar-stream.js' import { parseResource } from './utils/parse-resource.js' +import { setCacheControlHeader } from './utils/response-headers.js' import { badRequestResponse, movedPermanentlyResponse, notAcceptableResponse, notSupportedResponse, okResponse, badRangeResponse, okRangeResponse, badGatewayResponse } from './utils/responses.js' import { selectOutputType, queryFormatToAcceptHeader } from './utils/select-output-type.js' import { walkPath } from './utils/walk-path.js' @@ -441,11 +442,15 @@ export class VerifiedFetch { let cid: ParsedUrlStringResults['cid'] let path: ParsedUrlStringResults['path'] let query: ParsedUrlStringResults['query'] + let ttl: ParsedUrlStringResults['ttl'] + let protocol: ParsedUrlStringResults['protocol'] try { const result = await parseResource(resource, { ipns: this.ipns, logger: this.helia.logger }, options) cid = result.cid path = result.path query = result.query + ttl = result.ttl + protocol = result.protocol } catch (err) { this.log.error('error parsing resource %s', resource, err) @@ -516,7 +521,8 @@ export class VerifiedFetch { } response.headers.set('etag', getETag({ cid, reqFormat, weak: false })) - response.headers.set('cache-control', 'public, max-age=29030400, immutable') + + setCacheControlHeader({ response, ttl, protocol }) // https://specs.ipfs.tech/http-gateways/path-gateway/#x-ipfs-path-response-header response.headers.set('X-Ipfs-Path', resource.toString()) diff --git a/packages/verified-fetch/test/cache-control-header.spec.ts b/packages/verified-fetch/test/cache-control-header.spec.ts new file mode 100644 index 00000000..5c234fea --- /dev/null +++ b/packages/verified-fetch/test/cache-control-header.spec.ts @@ -0,0 +1,125 @@ +import { dagCbor } from '@helia/dag-cbor' +import { ipns } from '@helia/ipns' +import { stop } from '@libp2p/interface' +import { createEd25519PeerId } from '@libp2p/peer-id-factory' +import { dns } from '@multiformats/dns' +import { expect } from 'aegir/chai' +import Sinon from 'sinon' +import { stubInterface } from 'sinon-ts' +import { VerifiedFetch } from '../src/verified-fetch.js' +import { createHelia } from './fixtures/create-offline-helia.js' +import type { Helia } from '@helia/interface' +import type { IPNS } from '@helia/ipns' +import type { DNSResponse } from '@multiformats/dns' + +function answerFake (data: string, TTL: number, name: string, type: number): DNSResponse { + const fake = stubInterface() + fake.Answer = [{ + data, + TTL, + name, + type + }] + return fake +} +describe('cache-control header', () => { + let helia: Helia + let name: IPNS + let verifiedFetch: VerifiedFetch + let customDnsResolver: Sinon.SinonStub> + + beforeEach(async () => { + customDnsResolver = Sinon.stub() + helia = await createHelia({ + dns: dns({ + resolvers: { + '.': customDnsResolver + } + }) + }) + name = ipns(helia) + verifiedFetch = new VerifiedFetch({ + helia + }) + }) + + afterEach(async () => { + await stop(helia, verifiedFetch) + }) + + it('should allow return the correct max-age in the cache header for immutable responses', async () => { + const obj = { + hello: 'world' + } + const c = dagCbor(helia) + const cid = await c.add(obj) + + const resp = await verifiedFetch.fetch(cid) + + expect(resp).to.be.ok() + expect(resp.status).to.equal(200) + expect(resp.headers.get('Cache-Control')).to.equal('public, max-age=29030400, immutable') + }) + + it('should return not contain immutable in the cache-control header for an IPNS name', async () => { + const obj = { + hello: 'world' + } + const c = dagCbor(helia) + const cid = await c.add(obj) + + const oneHourInMs = 1000 * 60 * 60 + const peerId = await createEd25519PeerId() + + // ipns currently only allows customising the lifetime which is also used as the TTL + await name.publish(peerId, cid, { lifetime: oneHourInMs }) + + const resp = await verifiedFetch.fetch(`ipns://${peerId}`) + expect(resp).to.be.ok() + expect(resp.status).to.equal(200) + + expect(resp.headers.get('Cache-Control')).to.not.containIgnoreCase('immutable') + }) + + it('should return the correct max-age in the cache-control header for an IPNS name', async () => { + const obj = { + hello: 'world' + } + const c = dagCbor(helia) + const cid = await c.add(obj) + + const oneHourInSeconds = 60 * 60 + const peerId = await createEd25519PeerId() + + /** + * ipns currently only allows customising the lifetime which is also used as the TTL + * + * lifetime is coming back as 100000 times larger than expected + * + * @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200 + * @see https://github.com/ipfs/js-ipns/pull/308 + */ + await name.publish(peerId, cid, { lifetime: oneHourInSeconds * 1000 }) // pass to ipns as milliseconds + + const resp = await verifiedFetch.fetch(`ipns://${peerId}`) + expect(resp).to.be.ok() + expect(resp.status).to.equal(200) + + expect(resp.headers.get('Cache-Control')).to.equal(`public, max-age=${oneHourInSeconds}`) + }) + + it('should not contain immutable in the cache-control header for a DNSLink name', async () => { + const obj = { + hello: 'world' + } + const c = dagCbor(helia) + const cid = await c.add(obj) + customDnsResolver.withArgs('_dnslink.example-domain.com').resolves(answerFake(`dnslink=/ipfs/${cid}`, 666, '_dnslink.example-domain.com', 16)) + + const resp = await verifiedFetch.fetch('ipns://example-domain.com') + expect(resp).to.be.ok() + expect(resp.status).to.equal(200) + + expect(resp.headers.get('Cache-Control')).to.equal('public, max-age=666') + }) +}) diff --git a/packages/verified-fetch/test/fixtures/ipns-stubs.ts b/packages/verified-fetch/test/fixtures/ipns-stubs.ts new file mode 100644 index 00000000..759790c3 --- /dev/null +++ b/packages/verified-fetch/test/fixtures/ipns-stubs.ts @@ -0,0 +1,19 @@ +import { stubInterface, type StubbedInstance } from 'sinon-ts' +import type { PeerId } from '@libp2p/interface' +import type { IPNSRecord } from 'ipns' + +export interface IpnsRecordStubOptions { + peerId: PeerId + ttl?: bigint +} + +/** + * When stubbing an IPNSRecord, we need to provide a PeerId and some ttl value or else we will get + * "SyntaxError: Cannot convert stub to a BigInt" when parse-url-string.ts calls `calculateTtl` + */ +export function ipnsRecordStub ({ peerId, ttl }: IpnsRecordStubOptions): StubbedInstance { + return stubInterface({ + value: peerId.toString(), + ttl + }) +} diff --git a/packages/verified-fetch/test/utils/parse-url-string.spec.ts b/packages/verified-fetch/test/utils/parse-url-string.spec.ts index cd91d8cd..fbf2341c 100644 --- a/packages/verified-fetch/test/utils/parse-url-string.spec.ts +++ b/packages/verified-fetch/test/utils/parse-url-string.spec.ts @@ -3,11 +3,11 @@ import { defaultLogger } from '@libp2p/logger' import { createEd25519PeerId } from '@libp2p/peer-id-factory' import { type Answer } from '@multiformats/dns' import { expect } from 'aegir/chai' -import { type IPNSRecord } from 'ipns' import { CID } from 'multiformats/cid' import { match } from 'sinon' import { stubInterface } from 'sinon-ts' import { parseUrlString } from '../../src/utils/parse-url-string.js' +import { ipnsRecordStub } from '../fixtures/ipns-stubs.js' import type { IPNS } from '@helia/ipns' import type { ComponentLogger, PeerId } from '@libp2p/interface' import type { StubbedInstance } from 'sinon-ts' @@ -75,8 +75,7 @@ describe('parseUrlString', () => { ipns, logger }) - ).to.eventually.be.rejected - .with.property('message', 'Could not parse PeerId in ipns url "mydomain.com", Non-base64 character') + ).to.eventually.be.rejected.with.property('message', 'Could not parse PeerId in ipns url "mydomain.com", Non-base64 character') }) }) @@ -441,7 +440,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(testPeerId)).resolves({ cid: CID.parse('QmQJ8fxavY54CUsxMSx9aE9Rdcmvhx8awJK2jzJp4iAqCr'), path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -458,7 +457,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(testPeerId)).resolves({ cid: CID.parse('QmQJ8fxavY54CUsxMSx9aE9Rdcmvhx8awJK2jzJp4iAqCr'), path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -475,7 +474,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(testPeerId)).resolves({ cid: CID.parse('QmQJ8fxavY54CUsxMSx9aE9Rdcmvhx8awJK2jzJp4iAqCr'), path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -492,7 +491,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(testPeerId)).resolves({ cid: CID.parse('QmQJ8fxavY54CUsxMSx9aE9Rdcmvhx8awJK2jzJp4iAqCr'), path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -511,7 +510,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(testPeerId)).resolves({ cid: CID.parse('QmQJ8fxavY54CUsxMSx9aE9Rdcmvhx8awJK2jzJp4iAqCr'), path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -535,7 +534,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(peerId)).resolves({ cid, path: recordPath, - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -557,7 +556,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(peerId)).resolves({ cid, path: recordPath, - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -579,7 +578,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(peerId)).resolves({ cid, path: recordPath, - record: stubInterface() + record: ipnsRecordStub({ peerId: testPeerId }) }) await assertMatchUrl( @@ -603,7 +602,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(peerId)).resolves({ cid, path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId }) }) }) @@ -691,7 +690,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(peerId)).resolves({ cid, path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId }) }) }) @@ -793,7 +792,7 @@ describe('parseUrlString', () => { ipns.resolve.withArgs(matchPeerId(value as PeerId)).resolves({ cid, path: '', - record: stubInterface() + record: ipnsRecordStub({ peerId: value as PeerId }) }) } else if (type === 'dnslink-encoded') { const matchValue = (value as string).replace(/-/g, '.')