diff --git a/copy/compression.go b/copy/compression.go index 0164dd91da..081c49312f 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -35,10 +35,10 @@ var ( // bpDetectCompressionStepData contains data that the copy pipeline needs about the “detect compression” step. type bpDetectCompressionStepData struct { - isCompressed bool - format compressiontypes.Algorithm // Valid if isCompressed - decompressor compressiontypes.DecompressorFunc // Valid if isCompressed - srcCompressorName string // Compressor name to possibly record in the blob info cache for the source blob. + isCompressed bool + format compressiontypes.Algorithm // Valid if isCompressed + decompressor compressiontypes.DecompressorFunc // Valid if isCompressed + srcCompressorBaseVariantName string // Compressor name to possibly record in the blob info cache for the source blob. } // blobPipelineDetectCompressionStep updates *stream to detect its current compression format. @@ -58,9 +58,9 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI decompressor: decompressor, } if res.isCompressed { - res.srcCompressorName = format.Name() + res.srcCompressorBaseVariantName = format.BaseVariantName() } else { - res.srcCompressorName = internalblobinfocache.Uncompressed + res.srcCompressorBaseVariantName = internalblobinfocache.Uncompressed } if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() { @@ -71,13 +71,13 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI // bpCompressionStepData contains data that the copy pipeline needs about the compression step. type bpCompressionStepData struct { - operation bpcOperation // What we are actually doing - uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) - uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. - uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. - srcCompressorName string // Compressor name to record in the blob info cache for the source blob. - uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. - closers []io.Closer // Objects to close after the upload is done, if any. + operation bpcOperation // What we are actually doing + uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) + uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. + uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. + srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob. + uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. + closers []io.Closer // Objects to close after the upload is done, if any. } type bpcOperation int @@ -129,11 +129,11 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp // We can’t do anything with an encrypted blob unless decrypted. logrus.Debugf("Using original blob without modification for encrypted blob") return &bpCompressionStepData{ - operation: bpcOpPreserveOpaque, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: nil, - srcCompressorName: internalblobinfocache.UnknownCompression, - uploadedCompressorName: internalblobinfocache.UnknownCompression, + operation: bpcOpPreserveOpaque, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression, + uploadedCompressorName: internalblobinfocache.UnknownCompression, }, nil } return nil, nil @@ -158,13 +158,13 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpCompressUncompressed, - uploadedOperation: types.Compress, - uploadedAlgorithm: uploadedAlgorithm, - uploadedAnnotations: annotations, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: uploadedAlgorithm.Name(), - closers: []io.Closer{reader}, + operation: bpcOpCompressUncompressed, + uploadedOperation: types.Compress, + uploadedAlgorithm: uploadedAlgorithm, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: uploadedAlgorithm.Name(), + closers: []io.Closer{reader}, }, nil } return nil, nil @@ -199,13 +199,13 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp } succeeded = true return &bpCompressionStepData{ - operation: bpcOpRecompressCompressed, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: ic.compressionFormat, - uploadedAnnotations: annotations, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: ic.compressionFormat.Name(), - closers: []io.Closer{decompressed, recompressed}, + operation: bpcOpRecompressCompressed, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: ic.compressionFormat, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: ic.compressionFormat.Name(), + closers: []io.Closer{decompressed, recompressed}, }, nil } return nil, nil @@ -226,12 +226,12 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpDecompressCompressed, - uploadedOperation: types.Decompress, - uploadedAlgorithm: nil, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: internalblobinfocache.Uncompressed, - closers: []io.Closer{s}, + operation: bpcOpDecompressCompressed, + uploadedOperation: types.Decompress, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: internalblobinfocache.Uncompressed, + closers: []io.Closer{s}, }, nil } return nil, nil @@ -269,11 +269,14 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom algorithm = nil } return &bpCompressionStepData{ - operation: bpcOp, - uploadedOperation: uploadedOp, - uploadedAlgorithm: algorithm, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: detected.srcCompressorName, + operation: bpcOp, + uploadedOperation: uploadedOp, + uploadedAlgorithm: algorithm, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + // We only record the base variant of the format on upload; we didn’t do anything with + // the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger + // reuse of any kind between the blob digest and the TOC digest. + uploadedCompressorName: detected.srcCompressorBaseVariantName, } } @@ -333,9 +336,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation) } } - if d.srcCompressorName == "" || d.uploadedCompressorName == "" { - return fmt.Errorf("internal error: missing compressor names (src: %q, uploaded: %q)", - d.srcCompressorName, d.uploadedCompressorName) + if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" { + return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)", + d.srcCompressorBaseVariantName, d.uploadedCompressorName) } if d.uploadedCompressorName != internalblobinfocache.UnknownCompression { if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName { @@ -347,15 +350,19 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName // with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about // inconsistent data to be logged. - c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName) + c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.uploadedCompressorName, + }) } } if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest && - d.srcCompressorName != internalblobinfocache.UnknownCompression { - if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName { - // HACK: Don’t record zstd:chunked algorithms, see above. - c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName) - } + d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression { + // If the source is already using some TOC-dependent variant, we either copied the + // blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest, + // so record neither the variant name, nor the TOC digest. + c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.srcCompressorBaseVariantName, + }) } return nil } diff --git a/internal/blobinfocache/blobinfocache.go b/internal/blobinfocache/blobinfocache.go index 60f8e6a027..f31ee3124d 100644 --- a/internal/blobinfocache/blobinfocache.go +++ b/internal/blobinfocache/blobinfocache.go @@ -34,7 +34,7 @@ func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { } -func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) { } func (bic *v1OnlyBlobInfoCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 { diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index ed340ba478..276c8073e3 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -35,19 +35,25 @@ type BlobInfoCache2 interface { // (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) - // RecordDigestCompressorName records a compressor for the blob with the specified digest, - // or Uncompressed or UnknownCompression. - // WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a - // digest just because some remote author claims so (e.g. because a manifest says so); + // RecordDigestCompressorData records data for the blob with the specified digest. + // WARNING: Only call this with LOCALLY VERIFIED data: + // - don’t record a compressor for a digest just because some remote author claims so + // (e.g. because a manifest says so); // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. - RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) + RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) // CandidateLocations2 returns a prioritized, limited, number of blobs and their locations (if known) // that could possibly be reused within the specified (transport scope) (if they still // exist, which is not guaranteed). CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 } +// DigestCompressorData is information known about how a blob is compressed. +// (This is worded generically, but basically targeted at the zstd / zstd:chunked situation.) +type DigestCompressorData struct { + BaseVariantCompressor string // A compressor’s base variant name, or Uncompressed or UnknownCompression. +} + // CandidateLocations2Options are used in CandidateLocations2. type CandidateLocations2Options struct { // If !CanSubstitute, the returned candidates will match the submitted digest exactly; if diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index b13246b212..036d2c0149 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -295,12 +295,14 @@ func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompresse }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified -// compressor, or is blobinfocache.Uncompressed. -// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. -// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. -// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) -func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// +// otherwise the cache could be poisoned and cause us to make incorrect edits to type +// information in a manifest. +func (bdc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _ = bdc.update(func(tx *bolt.Tx) error { b, err := tx.CreateBucketIfNotExists(digestCompressorBucket) if err != nil { @@ -308,14 +310,14 @@ func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressor } key := []byte(anyDigest.String()) if previousBytes := b.Get(key); previousBytes != nil { - if string(previousBytes) != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), compressorName) + if string(previousBytes) != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), data.BaseVariantCompressor) } } - if compressorName == blobinfocache.UnknownCompression { + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { return b.Delete(key) } - return b.Put(key, []byte(compressorName)) + return b.Put(key, []byte(data.BaseVariantCompressor)) }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } @@ -367,8 +369,10 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = string(compressorNameValue) } } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) + if template == nil { return candidates } @@ -382,28 +386,11 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if err := t.UnmarshalBinary(v); err != nil { return err } - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: types.BICLocationReference{Opaque: string(k)}, - }, - LastSeen: t, - }) + candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: string(k)}, t)) return nil }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } else if v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 9cd9c8f7d3..03548209f9 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -25,57 +25,103 @@ const replacementAttempts = 5 // This is a heuristic/guess, and could well use a different value. const replacementUnknownLocationAttempts = 2 -// CandidateCompression returns (true, compressionOp, compressionAlgo) if a blob -// with compressionName (which can be Uncompressed or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. +// CandidateTemplate is a subset of BICReplacementCandidate2 with data related to a specific digest, +// which can be later combined with information about a location. +type CandidateTemplate struct { + digest digest.Digest + compressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed + compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed +} + +// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with data is acceptable +// for a CandidateLocations* call with v2Options. // // v2Options can be set to nil if the call is CandidateLocations (i.e. compression is not required to be known); // if not nil, the call is assumed to be CandidateLocations2. -// -// The (compressionOp, compressionAlgo) values are suitable for BICReplacementCandidate2 -func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) (bool, types.LayerCompression, *compression.Algorithm) { +func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, data blobinfocache.DigestCompressorData) *CandidateTemplate { if v2Options == nil { - return true, types.PreserveOriginal, nil // Anything goes. The (compressionOp, compressionAlgo) values are not used. + return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm values are not used. + digest: digest, + } } - var op types.LayerCompression - var algo *compression.Algorithm - switch compressorName { + requiredCompression := "nil" + if v2Options.RequiredCompression != nil { + requiredCompression = v2Options.RequiredCompression.Name() + } + switch data.BaseVariantCompressor { case blobinfocache.Uncompressed: - op = types.Decompress - algo = nil + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, nil) { + logrus.Debugf("Ignoring BlobInfoCache record of digest %q, uncompressed format does not match required %s or MIME types %#v", + digest.String(), requiredCompression, v2Options.PossibleManifestFormats) + return nil + } + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Decompress, + compressionAlgorithm: nil, + } case blobinfocache.UnknownCompression: logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) - return false, types.PreserveOriginal, nil // Not allowed with CandidateLocations2 + return nil // Not allowed with CandidateLocations2 default: - op = types.Compress - algo_, err := compression.AlgorithmByName(compressorName) + algo, err := compression.AlgorithmByName(data.BaseVariantCompressor) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", - digest.String(), compressorName, err) - return false, types.PreserveOriginal, nil // The BICReplacementCandidate2.CompressionAlgorithm field is required + digest.String(), data.BaseVariantCompressor, err) + return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } - algo = &algo_ - } - if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ - PossibleManifestFormats: v2Options.PossibleManifestFormats, - RequiredCompression: v2Options.RequiredCompression, - }, algo) { - requiredCompresssion := "nil" - if v2Options.RequiredCompression != nil { - requiredCompresssion = v2Options.RequiredCompression.Name() + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, &algo) { + logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", + digest.String(), data.BaseVariantCompressor, requiredCompression, v2Options.PossibleManifestFormats) + return nil + } + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Compress, + compressionAlgorithm: &algo, } - logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", - digest.String(), compressorName, requiredCompresssion, v2Options.PossibleManifestFormats) - return false, types.PreserveOriginal, nil } - - return true, op, algo } // CandidateWithTime is the input to types.BICReplacementCandidate prioritization. type CandidateWithTime struct { - Candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate - LastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) + candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate + lastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) +} + +// CandidateWithLocation returns a complete CandidateWithTime combining (template from CandidateTemplateWithCompression, location, lastSeen) +func (template CandidateTemplate) CandidateWithLocation(location types.BICLocationReference, lastSeen time.Time) CandidateWithTime { + return CandidateWithTime{ + candidate: blobinfocache.BICReplacementCandidate2{ + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + UnknownLocation: false, + Location: location, + }, + lastSeen: lastSeen, + } +} + +// CandidateWithUnknownLocation returns a complete CandidateWithTime for a template from CandidateTemplateWithCompression and an unknown location. +func (template CandidateTemplate) CandidateWithUnknownLocation() CandidateWithTime { + return CandidateWithTime{ + candidate: blobinfocache.BICReplacementCandidate2{ + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + UnknownLocation: true, + Location: types.BICLocationReference{Opaque: ""}, + }, + lastSeen: time.Time{}, + } } // candidateSortState is a closure for a comparison used by slices.SortFunc on candidates to prioritize, @@ -91,35 +137,35 @@ func (css *candidateSortState) compare(xi, xj CandidateWithTime) int { // Other digest values are primarily sorted by time (more recent first), secondarily by digest (to provide a deterministic order) // First, deal with the primaryDigest/uncompressedDigest cases: - if xi.Candidate.Digest != xj.Candidate.Digest { + if xi.candidate.Digest != xj.candidate.Digest { // - The two digests are different, and one (or both) of the digests is primaryDigest or uncompressedDigest: time does not matter - if xi.Candidate.Digest == css.primaryDigest { + if xi.candidate.Digest == css.primaryDigest { return -1 } - if xj.Candidate.Digest == css.primaryDigest { + if xj.candidate.Digest == css.primaryDigest { return 1 } if css.uncompressedDigest != "" { - if xi.Candidate.Digest == css.uncompressedDigest { + if xi.candidate.Digest == css.uncompressedDigest { return 1 } - if xj.Candidate.Digest == css.uncompressedDigest { + if xj.candidate.Digest == css.uncompressedDigest { return -1 } } } else { // xi.Candidate.Digest == xj.Candidate.Digest // The two digests are the same, and are either primaryDigest or uncompressedDigest: order by time - if xi.Candidate.Digest == css.primaryDigest || (css.uncompressedDigest != "" && xi.Candidate.Digest == css.uncompressedDigest) { - return -xi.LastSeen.Compare(xj.LastSeen) + if xi.candidate.Digest == css.primaryDigest || (css.uncompressedDigest != "" && xi.candidate.Digest == css.uncompressedDigest) { + return -xi.lastSeen.Compare(xj.lastSeen) } } // Neither of the digests are primaryDigest/uncompressedDigest: - if cmp := xi.LastSeen.Compare(xj.LastSeen); cmp != 0 { // Order primarily by time + if cmp := xi.lastSeen.Compare(xj.lastSeen); cmp != 0 { // Order primarily by time return -cmp } // Fall back to digest, if timestamps end up _exactly_ the same (how?!) - return cmp.Compare(xi.Candidate.Digest, xj.Candidate.Digest) + return cmp.Compare(xi.candidate.Digest, xj.candidate.Digest) } // destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with parameters for the @@ -138,7 +184,7 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, uncompressedDigest: uncompressedDigest, }).compare) for _, candidate := range cs { - if candidate.Candidate.UnknownLocation { + if candidate.candidate.UnknownLocation { unknownLocationCandidates = append(unknownLocationCandidates, candidate) } else { knownLocationCandidates = append(knownLocationCandidates, candidate) @@ -150,11 +196,11 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, unknownLocationCandidatesUsed := min(noLocationLimit, remainingCapacity, len(unknownLocationCandidates)) res := make([]blobinfocache.BICReplacementCandidate2, knownLocationCandidatesUsed) for i := 0; i < knownLocationCandidatesUsed; i++ { - res[i] = knownLocationCandidates[i].Candidate + res[i] = knownLocationCandidates[i].candidate } // If candidates with unknown location are found, lets add them to final list for i := 0; i < unknownLocationCandidatesUsed; i++ { - res = append(res, unknownLocationCandidates[i].Candidate) + res = append(res, unknownLocationCandidates[i].candidate) } return res } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index ab47fe0625..0d9fe12c1d 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -8,9 +8,11 @@ import ( "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/compression" + compressiontypes "github.com/containers/image/v5/pkg/compression/types" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) const ( @@ -53,6 +55,155 @@ var ( } ) +func TestCandidateTemplateWithCompression(t *testing.T) { + uncompressedData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.Uncompressed, + } + gzipData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.GzipAlgorithmName, + } + zstdData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + } + + for _, c := range []struct { + name string + requiredCompression *compressiontypes.Algorithm + data blobinfocache.DigestCompressorData + v2Matches bool + // if v2Matches: + v2Op types.LayerCompression + v2Algo string + }{ + { + name: "unknown", + requiredCompression: nil, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + }, + v2Matches: false, + }, + { + name: "uncompressed", + requiredCompression: nil, + data: uncompressedData, + v2Matches: true, + v2Op: types.Decompress, + v2Algo: "", + }, + { + name: "uncompressed, want gzip", + requiredCompression: &compression.Gzip, + data: uncompressedData, + v2Matches: false, + }, + { + name: "gzip", + requiredCompression: nil, + data: gzipData, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.GzipAlgorithmName, + }, + { + name: "gzip, want zstd", + requiredCompression: &compression.Zstd, + data: gzipData, + v2Matches: false, + }, + { + name: "unknown base", + requiredCompression: nil, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: "this value is unknown", + }, + v2Matches: false, + }, + { + name: "zstd", + requiredCompression: nil, + data: zstdData, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + }, + { + name: "zstd, want gzip", + requiredCompression: &compression.Gzip, + data: zstdData, + v2Matches: false, + }, + { + name: "zstd, want zstd", + requiredCompression: &compression.Zstd, + data: zstdData, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + }, + { + name: "zstd, want zstd:chunked", + requiredCompression: &compression.ZstdChunked, + data: zstdData, + v2Matches: false, + }, + } { + res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.data) + assert.Equal(t, &CandidateTemplate{ + digest: digestCompressedPrimary, + compressionOperation: types.PreserveOriginal, + compressionAlgorithm: nil, + }, res, c.name) + + // These tests only use RequiredCompression in CandidateLocations2Options for clarity; + // CandidateCompressionMatchesReuseConditions should have its own tests of handling the full set of options. + res = CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{ + RequiredCompression: c.requiredCompression, + }, digestCompressedPrimary, c.data) + if !c.v2Matches { + assert.Nil(t, res, c.name) + } else { + require.NotNil(t, res, c.name) + assert.Equal(t, digestCompressedPrimary, res.digest, c.name) + assert.Equal(t, c.v2Op, res.compressionOperation, c.name) + if c.v2Algo == "" { + assert.Nil(t, res.compressionAlgorithm, c.name) + } else { + require.NotNil(t, res.compressionAlgorithm, c.name) + assert.Equal(t, c.v2Algo, res.compressionAlgorithm.Name()) + } + } + } +} + +func TestCandidateWithLocation(t *testing.T) { + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + }) + require.NotNil(t, template) + loc := types.BICLocationReference{Opaque: "opaque"} + time := time.Now() + res := template.CandidateWithLocation(loc, time) + assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) + assert.Equal(t, types.Compress, res.candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, false, res.candidate.UnknownLocation) + assert.Equal(t, loc, res.candidate.Location) + assert.Equal(t, time, res.lastSeen) +} + +func TestCandidateWithUnknownLocation(t *testing.T) { + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + }) + require.NotNil(t, template) + res := template.CandidateWithUnknownLocation() + assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) + assert.Equal(t, types.Compress, res.candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, true, res.candidate.UnknownLocation) +} + func TestCandidateSortStateLess(t *testing.T) { type p struct { d digest.Digest diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 9d932f51c8..30b1ab4b12 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -314,7 +314,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // ---------------------------- // If a record exists with compression without Location then // then return a record without location and with `UnknownLocation: true` - cache.RecordDigestCompressorName(digestUnknownLocation, compressiontypes.Bzip2AlgorithmName) + cache.RecordDigestCompressorData(digestUnknownLocation, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.Bzip2AlgorithmName, + }) res = cache.CandidateLocations2(transport, scope, digestUnknownLocation, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, }) @@ -355,7 +357,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // that shouldn’t happen in real-world usage. if scopeIndex != 0 { for _, e := range digestNameSetPrioritization { - cache.RecordDigestCompressorName(e.d, blobinfocache.UnknownCompression) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + }) } } @@ -423,7 +427,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // Set the "known" compression values for _, e := range digestNameSetPrioritization { - cache.RecordDigestCompressorName(e.d, e.m) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: e.m, + }) } // No substitutions allowed: @@ -505,7 +511,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa cache.RecordKnownLocation(transport, scope, e.d, types.BICLocationReference{Opaque: scopeName + e.n}) } for _, e := range digestNameSetFiltering { - cache.RecordDigestCompressorName(e.d, e.m) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: e.m, + }) } // No filtering diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 61e858deb8..067c6b7e11 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -144,19 +144,24 @@ func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope type locationScope[location] = time.Now() // Possibly overwriting an older entry. } -// RecordDigestCompressorName records that the blob with the specified digest is either compressed with the specified -// algorithm, or uncompressed, or that we no longer know. -func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compressorName string) { +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// +// otherwise the cache could be poisoned and cause us to make incorrect edits to type +// information in a manifest. +func (mem *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { mem.mutex.Lock() defer mem.mutex.Unlock() - if previous, ok := mem.compressors[blobDigest]; ok && previous != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", blobDigest, previous, compressorName) + if previous, ok := mem.compressors[anyDigest]; ok && previous != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) } - if compressorName == blobinfocache.UnknownCompression { - delete(mem.compressors, blobDigest) + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { + delete(mem.compressors, anyDigest) return } - mem.compressors[blobDigest] = compressorName + mem.compressors[anyDigest] = data.BaseVariantCompressor } // appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in memory @@ -170,34 +175,19 @@ func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if v, ok := mem.compressors[digest]; ok { compressorName = v } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) + if template == nil { return candidates } locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present if len(locations) > 0 { for l, t := range locations { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: l, - }, - LastSeen: t, - }) + candidates = append(candidates, template.CandidateWithLocation(l, t)) } } else if v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates } diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index eb6a4a5c2e..8d2bf72898 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -457,29 +457,30 @@ func (sqc *cache) RecordKnownLocation(transport types.ImageTransport, scope type }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordDigestCompressorName records a compressor for the blob with the specified digest, -// or Uncompressed or UnknownCompression. -// WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a -// digest just because some remote author claims so (e.g. because a manifest says so); +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. -func (sqc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +func (sqc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _, _ = transaction(sqc, func(tx *sql.Tx) (void, error) { previous, gotPrevious, err := querySingleValue[string](tx, "SELECT compressor FROM DigestCompressors WHERE digest = ?", anyDigest.String()) if err != nil { return void{}, fmt.Errorf("looking for compressor of for %q", anyDigest) } - if gotPrevious && previous != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, compressorName) + if gotPrevious && previous != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) } - if compressorName == blobinfocache.UnknownCompression { + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { if _, err := tx.Exec("DELETE FROM DigestCompressors WHERE digest = ?", anyDigest.String()); err != nil { return void{}, fmt.Errorf("deleting compressor for digest %q: %w", anyDigest, err) } } else { if _, err := tx.Exec("INSERT OR REPLACE INTO DigestCompressors(digest, compressor) VALUES (?, ?)", - anyDigest.String(), compressorName); err != nil { - return void{}, fmt.Errorf("recording compressor %q for %q: %w", compressorName, anyDigest, err) + anyDigest.String(), data.BaseVariantCompressor); err != nil { + return void{}, fmt.Errorf("recording compressor %q for %q: %w", data.BaseVariantCompressor, anyDigest, err) } } return void{}, nil @@ -502,8 +503,10 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = compressor } } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) + if template == nil { return candidates, nil } @@ -522,15 +525,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if err := rows.Scan(&location, &time); err != nil { return nil, fmt.Errorf("scanning candidate: %w", err) } - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: types.BICLocationReference{Opaque: location}, - }, - LastSeen: time, - }) + candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: location}, time)) rowAdded = true } if err := rows.Err(); err != nil { @@ -538,16 +533,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW } if !rowAdded && v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates, nil } diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 0524e74ca3..842a3ab068 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -548,8 +548,10 @@ func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest, } } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { return true, private.ReusedBlob{ - Digest: layers[0].UncompressedDigest, - Size: layers[0].UncompressedSize, + Digest: layers[0].UncompressedDigest, + Size: layers[0].UncompressedSize, + CompressionOperation: types.Decompress, + CompressionAlgorithm: nil, } } }