Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: set cache-control header correctly #19

Merged
merged 9 commits into from
Mar 21, 2024
5 changes: 3 additions & 2 deletions packages/verified-fetch/src/utils/parse-resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ export async function parseResource (resource: Resource, { ipns, logger }: Parse
cid,
protocol: 'ipfs',
path: '',
query: {}
}
query: {},
ttl: 29030400 // 1 year for ipfs content
} satisfies ParsedUrlStringResults
}

throw new TypeError(`Invalid resource. Cannot determine CID from resource: ${resource}`)
Expand Down
84 changes: 66 additions & 18 deletions packages/verified-fetch/src/utils/parse-url-string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ import { peerIdFromString } from '@libp2p/peer-id'
import { CID } from 'multiformats/cid'
import { TLRU } from './tlru.js'
import type { RequestFormatShorthand } from '../types.js'
import type { IPNS, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns'
import type { DNSLinkResolveResult, IPNS, IPNSResolveResult, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns'
import type { ComponentLogger } from '@libp2p/interface'
import type { ProgressOptions } from 'progress-events'

const ipnsCache = new TLRU<ResolveResult>(1000)
const ipnsCache = new TLRU<DNSLinkResolveResult | IPNSResolveResult>(1000)

export interface ParseUrlStringInput {
urlString: string
Expand All @@ -23,30 +23,66 @@ export interface ParsedUrlQuery extends Record<string, string | unknown> {
filename?: string
}

export interface ParsedUrlStringResults {
protocol: string
path: string
cid: CID
interface ParsedUrlStringResultsBase extends ResolveResult {
protocol: 'ipfs' | 'ipns'
query: ParsedUrlQuery

/**
* seconds as a number
*/
ttl?: number
}

export type ParsedUrlStringResults = ParsedUrlStringResultsBase

const URL_REGEX = /^(?<protocol>ip[fn]s):\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_REGEX = /^\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\.(?<protocol>ip[fn]s)\.([^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/

function matchURLString (urlString: string): Record<string, string> {
interface MatchUrlGroups {
protocol: 'ipfs' | 'ipns'
cidOrPeerIdOrDnsLink: string
path?: string
queryString?: string

}
function matchURLString (urlString: string): MatchUrlGroups {
for (const pattern of [URL_REGEX, PATH_REGEX, PATH_GATEWAY_REGEX, SUBDOMAIN_GATEWAY_REGEX]) {
const match = urlString.match(pattern)

if (match?.groups != null) {
return match.groups
return match.groups as unknown as MatchUrlGroups // force cast to MatchUrlGroups, because if it matches, it has to contain this structure.
2color marked this conversation as resolved.
Show resolved Hide resolved
}
}

throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`)
}

/**
* determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive.
* max-age is in seconds
*
* @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*
* If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds.
*
* For more TTL nuances:
*
* @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200
* @see https://github.com/ipfs/js-ipns/pull/308
*/
function calculateTtl (resolveResult?: IPNSResolveResult | DNSLinkResolveResult): number | undefined {
if (resolveResult == null) {
return undefined
}
const dnsLinkTtl = (resolveResult as DNSLinkResolveResult).answer?.TTL
const ipnsTtlNs = (resolveResult as IPNSResolveResult).record?.ttl
// For some reason, ipns "nanoseconds" are 1e-8 of a second, instead of 1e-9.
2color marked this conversation as resolved.
Show resolved Hide resolved
const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e8)) : undefined
return dnsLinkTtl ?? ipnsTtl
}

/**
* For dnslinks see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header
* DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates.
Expand Down Expand Up @@ -89,32 +125,36 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
let cid: CID | undefined
let resolvedPath: string | undefined
const errors: Error[] = []
let resolveResult: IPNSResolveResult | DNSLinkResolveResult | undefined

if (protocol === 'ipfs') {
try {
cid = CID.parse(cidOrPeerIdOrDnsLink)
/**
* no ttl set. @link {setCacheControlHeader}
*/
} catch (err) {
log.error(err)
errors.push(new TypeError('Invalid CID for ipfs://<cid> URL'))
}
} else {
let resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)
// protocol is ipns
resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)

if (resolveResult != null) {
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid)
} else {
// protocol is ipns
log.trace('attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
let peerId = null
try {
// try resolving as an IPNS name
peerId = peerIdFromString(cidOrPeerIdOrDnsLink)
resolveResult = await ipns.resolve(peerId, { onProgress: options?.onProgress })
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid)
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2)
} catch (err) {
if (peerId == null) {
log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err)
Expand All @@ -126,6 +166,7 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
}

if (cid == null) {
// cid is still null, try resolving as a DNSLink
let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink
if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) {
decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink)
Expand All @@ -138,7 +179,6 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
log.trace('resolved %s to %c', decodedDnsLinkLabel, cid)
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2)
} catch (err: any) {
log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err)
errors.push(err)
Expand All @@ -155,6 +195,13 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
throw new AggregateError(errors, `Invalid resource. Cannot determine CID from URL "${urlString}"`)
}

const ttl = calculateTtl(resolveResult)

if (resolveResult != null) {
// use the ttl for the resolved resouce for the cache, but fallback to 2 minutes if not available
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl ?? 60 * 1000 * 2)
}

// parse query string
const query: Record<string, any> = {}

Expand All @@ -177,9 +224,10 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
return {
protocol,
cid,
path: joinPaths(resolvedPath, urlPath),
query
}
path: joinPaths(resolvedPath, urlPath ?? ''),
query,
ttl
} satisfies ParsedUrlStringResults
}

/**
Expand Down
38 changes: 38 additions & 0 deletions packages/verified-fetch/src/utils/response-headers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
interface CacheControlHeaderOptions {
/**
* This should be seconds as a number.
*
* See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*/
ttl?: number
protocol: 'ipfs' | 'ipns'
response: Response
}

/**
* Implementations may place an upper bound on any TTL received, as noted in Section 8 of [rfc2181].
* If TTL value is unknown, implementations should not send a Cache-Control
* No matter if TTL value is known or not, implementations should always send a Last-Modified header with the timestamp of the record resolution.
*
* @see https://specs.ipfs.tech/http-gateways/path-gateway/#cache-control-response-header
*/
export function setCacheControlHeader ({ ttl, protocol, response }: CacheControlHeaderOptions): void {
let headerValue: string
if (protocol === 'ipfs') {
headerValue = 'public, max-age=29030400, immutable'
} else if (ttl == null) {
/**
* default limit for unknown TTL: "use 5 minute as default fallback when it is not available."
*
* @see https://github.com/ipfs/boxo/issues/329#issuecomment-1995236409
*/
headerValue = 'public, max-age=300'
} else {
headerValue = `public, max-age=${ttl}`
}

if (headerValue != null) {
2color marked this conversation as resolved.
Show resolved Hide resolved
response.headers.set('cache-control', headerValue)
}
}

/**
* This function returns the value of the `Content-Range` header for a given range.
* If you know the total size of the body, pass it as `byteSize`
Expand Down
8 changes: 7 additions & 1 deletion packages/verified-fetch/src/verified-fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { getETag } from './utils/get-e-tag.js'
import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js'
import { tarStream } from './utils/get-tar-stream.js'
import { parseResource } from './utils/parse-resource.js'
import { setCacheControlHeader } from './utils/response-headers.js'
import { badRequestResponse, movedPermanentlyResponse, notAcceptableResponse, notSupportedResponse, okResponse, badRangeResponse, okRangeResponse, badGatewayResponse } from './utils/responses.js'
import { selectOutputType, queryFormatToAcceptHeader } from './utils/select-output-type.js'
import { walkPath } from './utils/walk-path.js'
Expand Down Expand Up @@ -441,11 +442,15 @@ export class VerifiedFetch {
let cid: ParsedUrlStringResults['cid']
let path: ParsedUrlStringResults['path']
let query: ParsedUrlStringResults['query']
let ttl: ParsedUrlStringResults['ttl']
let protocol: ParsedUrlStringResults['protocol']
try {
const result = await parseResource(resource, { ipns: this.ipns, logger: this.helia.logger }, options)
cid = result.cid
path = result.path
query = result.query
ttl = result.ttl
protocol = result.protocol
} catch (err) {
this.log.error('error parsing resource %s', resource, err)

Expand Down Expand Up @@ -516,7 +521,8 @@ export class VerifiedFetch {
}

response.headers.set('etag', getETag({ cid, reqFormat, weak: false }))
response.headers.set('cache-control', 'public, max-age=29030400, immutable')

setCacheControlHeader({ response, ttl, protocol })
// https://specs.ipfs.tech/http-gateways/path-gateway/#x-ipfs-path-response-header
response.headers.set('X-Ipfs-Path', resource.toString())

Expand Down
131 changes: 131 additions & 0 deletions packages/verified-fetch/test/cache-control-header.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import { dagCbor } from '@helia/dag-cbor'
import { ipns } from '@helia/ipns'
import { stop } from '@libp2p/interface'
import { createEd25519PeerId } from '@libp2p/peer-id-factory'
import { dns } from '@multiformats/dns'
import { expect } from 'aegir/chai'
import Sinon from 'sinon'
import { stubInterface } from 'sinon-ts'
import { VerifiedFetch } from '../src/verified-fetch.js'
import { createHelia } from './fixtures/create-offline-helia.js'
import type { Helia } from '@helia/interface'
import type { IPNS } from '@helia/ipns'
import type { DNSResponse } from '@multiformats/dns'

function answerFake (data: string, TTL: number, name: string, type: number): DNSResponse {
const fake = stubInterface<DNSResponse>()
fake.Answer = [{
data,
TTL,
name,
type
}]
return fake
}
describe('cache-control header', () => {
let helia: Helia
let name: IPNS
let verifiedFetch: VerifiedFetch
let customDnsResolver: Sinon.SinonStub<any[], Promise<DNSResponse>>

beforeEach(async () => {
customDnsResolver = Sinon.stub()
helia = await createHelia({
dns: dns({
resolvers: {
'.': customDnsResolver
}
})
})
name = ipns(helia)
verifiedFetch = new VerifiedFetch({
helia
})
})

afterEach(async () => {
await stop(helia, verifiedFetch)
})

it('should allow return the correct max-age in the cache header for immutable responses', async () => {
const obj = {
hello: 'world'
}
const c = dagCbor(helia)
const cid = await c.add(obj)

const resp = await verifiedFetch.fetch(cid)

expect(resp).to.be.ok()
expect(resp.status).to.equal(200)
expect(resp.headers.get('Cache-Control')).to.equal('public, max-age=29030400, immutable')
})

it('should return not contain immutable in the cache-control header for an IPNS name', async () => {
const obj = {
hello: 'world'
}
const c = dagCbor(helia)
const cid = await c.add(obj)

const oneHourInMs = 1000 * 60 * 60
const peerId = await createEd25519PeerId()

// ipns currently only allows customising the lifetime which is also used as the TTL
await name.publish(peerId, cid, { lifetime: oneHourInMs })

const resp = await verifiedFetch.fetch(`ipns://${peerId}`)
expect(resp).to.be.ok()
expect(resp.status).to.equal(200)

expect(resp.headers.get('Cache-Control')).to.not.containIgnoreCase('immutable')
})

it('should return the correct max-age in the cache-control header for an IPNS name', async () => {
const obj = {
hello: 'world'
}
const c = dagCbor(helia)
const cid = await c.add(obj)

const oneHourInSeconds = 60 * 60
const peerId = await createEd25519PeerId()

/**
* ipns currently only allows customising the lifetime which is also used as the TTL
*
* lifetime is coming back as 100000 times larger than expected
*
* @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200
* @see https://github.com/ipfs/js-ipns/pull/308
*/
await name.publish(peerId, cid, { lifetime: oneHourInSeconds * 1000 }) // pass to ipns as milliseconds

const resp = await verifiedFetch.fetch(`ipns://${peerId}`)
expect(resp).to.be.ok()
expect(resp.status).to.equal(200)

expect(resp.headers.get('Cache-Control')).to.equal(`public, max-age=${oneHourInSeconds}`)
})

it('should not contain immutable in the cache-control header for a DNSLink name', async () => {
verifiedFetch = new VerifiedFetch({
SgtPooki marked this conversation as resolved.
Show resolved Hide resolved
helia
}, {
dnsResolvers: [customDnsResolver]
})

const obj = {
hello: 'world'
}
const c = dagCbor(helia)
const cid = await c.add(obj)
customDnsResolver.withArgs('_dnslink.example-domain.com').resolves(answerFake(`dnslink=/ipfs/${cid}`, 666, '_dnslink.example-domain.com', 16))

const resp = await verifiedFetch.fetch('ipns://example-domain.com')
expect(resp).to.be.ok()
expect(resp.status).to.equal(200)

expect(resp.headers.get('Cache-Control')).to.equal('public, max-age=666')
})
})
Loading
Loading