From 62f10249f8e38c419a36a9d5de141ad7c42c8a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 15 Mar 2024 22:16:46 +0100 Subject: [PATCH 1/6] Fix data returned when returning uncompressed data on a c/storage layer match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rules expect us to set manifest editing updates. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 0524e74ca3..842a3ab068 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -548,8 +548,10 @@ func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest, } } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { return true, private.ReusedBlob{ - Digest: layers[0].UncompressedDigest, - Size: layers[0].UncompressedSize, + Digest: layers[0].UncompressedDigest, + Size: layers[0].UncompressedSize, + CompressionOperation: types.Decompress, + CompressionAlgorithm: nil, } } } From efdc63990c1f9b4ed48ca095eb3008ffe10d8d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Mon, 25 Mar 2024 20:39:20 +0100 Subject: [PATCH 2/6] Turn CandidateCompression into CandidateTemplateWithCompression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... and add CandidateWithLocation and CandidateWithUnknownLocation , so that the BIC implementations only need to deal with one value instead of carrying around three; we will want to add one more, and encapsulating them all into a single template will make it transparent to the cache implementations. Should not change behavior. Signed-off-by: Miloslav Trmač --- pkg/blobinfocache/boltdb/boltdb.go | 25 +--- .../internal/prioritize/prioritize.go | 60 ++++++-- .../internal/prioritize/prioritize_test.go | 133 ++++++++++++++++++ pkg/blobinfocache/memory/memory.go | 25 +--- pkg/blobinfocache/sqlite/sqlite.go | 25 +--- 5 files changed, 195 insertions(+), 73 deletions(-) diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index b13246b212..e9855fa3c1 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -367,8 +367,8 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = string(compressorNameValue) } } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + if template == nil { return candidates } @@ -382,28 +382,11 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if err := t.UnmarshalBinary(v); err != nil { return err } - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: types.BICLocationReference{Opaque: string(k)}, - }, - LastSeen: t, - }) + candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: string(k)}, t)) return nil }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } else if v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 9cd9c8f7d3..18478b5e82 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -25,16 +25,24 @@ const replacementAttempts = 5 // This is a heuristic/guess, and could well use a different value. const replacementUnknownLocationAttempts = 2 -// CandidateCompression returns (true, compressionOp, compressionAlgo) if a blob -// with compressionName (which can be Uncompressed or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. +// CandidateTemplate is a subset of BICReplacementCandidate2 with data related to a specific digest, +// which can be later combined with information about a location. +type CandidateTemplate struct { + digest digest.Digest + compressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed + compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed +} + +// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with compressionName (which can be Uncompressed +// or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. // // v2Options can be set to nil if the call is CandidateLocations (i.e. compression is not required to be known); // if not nil, the call is assumed to be CandidateLocations2. -// -// The (compressionOp, compressionAlgo) values are suitable for BICReplacementCandidate2 -func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) (bool, types.LayerCompression, *compression.Algorithm) { +func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) *CandidateTemplate { if v2Options == nil { - return true, types.PreserveOriginal, nil // Anything goes. The (compressionOp, compressionAlgo) values are not used. + return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm values are not used. + digest: digest, + } } var op types.LayerCompression @@ -45,14 +53,14 @@ func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, d algo = nil case blobinfocache.UnknownCompression: logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) - return false, types.PreserveOriginal, nil // Not allowed with CandidateLocations2 + return nil // Not allowed with CandidateLocations2 default: op = types.Compress algo_, err := compression.AlgorithmByName(compressorName) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", digest.String(), compressorName, err) - return false, types.PreserveOriginal, nil // The BICReplacementCandidate2.CompressionAlgorithm field is required + return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } algo = &algo_ } @@ -66,10 +74,14 @@ func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, d } logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", digest.String(), compressorName, requiredCompresssion, v2Options.PossibleManifestFormats) - return false, types.PreserveOriginal, nil + return nil } - return true, op, algo + return &CandidateTemplate{ + digest: digest, + compressionOperation: op, + compressionAlgorithm: algo, + } } // CandidateWithTime is the input to types.BICReplacementCandidate prioritization. @@ -78,6 +90,34 @@ type CandidateWithTime struct { LastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) } +// CandidateWithLocation returns a complete CandidateWithTime combining (template from CandidateTemplateWithCompression, location, lastSeen) +func (template CandidateTemplate) CandidateWithLocation(location types.BICLocationReference, lastSeen time.Time) CandidateWithTime { + return CandidateWithTime{ + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + UnknownLocation: false, + Location: location, + }, + LastSeen: lastSeen, + } +} + +// CandidateWithUnknownLocation returns a complete CandidateWithTime for a template from CandidateTemplateWithCompression and an unknown location. +func (template CandidateTemplate) CandidateWithUnknownLocation() CandidateWithTime { + return CandidateWithTime{ + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + UnknownLocation: true, + Location: types.BICLocationReference{Opaque: ""}, + }, + LastSeen: time.Time{}, + } +} + // candidateSortState is a closure for a comparison used by slices.SortFunc on candidates to prioritize, // along with the specially-treated digest values relevant to the ordering. type candidateSortState struct { diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index ab47fe0625..6f48e78c06 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -8,9 +8,11 @@ import ( "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/compression" + compressiontypes "github.com/containers/image/v5/pkg/compression/types" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) const ( @@ -53,6 +55,137 @@ var ( } ) +func TestCandidateTemplateWithCompression(t *testing.T) { + for _, c := range []struct { + name string + requiredCompression *compressiontypes.Algorithm + compressor string + v2Matches bool + // if v2Matches: + v2Op types.LayerCompression + v2Algo string + }{ + { + name: "unknown", + requiredCompression: nil, + compressor: blobinfocache.UnknownCompression, + v2Matches: false, + }, + { + name: "uncompressed", + requiredCompression: nil, + compressor: blobinfocache.Uncompressed, + v2Matches: true, + v2Op: types.Decompress, + v2Algo: "", + }, + { + name: "uncompressed, want gzip", + requiredCompression: &compression.Gzip, + compressor: blobinfocache.Uncompressed, + v2Matches: false, + }, + { + name: "gzip", + requiredCompression: nil, + compressor: compressiontypes.GzipAlgorithmName, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.GzipAlgorithmName, + }, + { + name: "gzip, want zstd", + requiredCompression: &compression.Zstd, + compressor: compressiontypes.GzipAlgorithmName, + v2Matches: false, + }, + { + name: "unknown base", + requiredCompression: nil, + compressor: "this value is unknown", + v2Matches: false, + }, + { + name: "zstd", + requiredCompression: nil, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + }, + { + name: "zstd, want gzip", + requiredCompression: &compression.Gzip, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: false, + }, + { + name: "zstd, want zstd", + requiredCompression: &compression.Zstd, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + }, + { + name: "zstd, want zstd:chunked", + requiredCompression: &compression.ZstdChunked, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: false, + }, + } { + res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.compressor) + assert.Equal(t, &CandidateTemplate{ + digest: digestCompressedPrimary, + compressionOperation: types.PreserveOriginal, + compressionAlgorithm: nil, + }, res, c.name) + + // These tests only use RequiredCompression in CandidateLocations2Options for clarity; + // CandidateCompressionMatchesReuseConditions should have its own tests of handling the full set of options. + res = CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{ + RequiredCompression: c.requiredCompression, + }, digestCompressedPrimary, c.compressor) + if !c.v2Matches { + assert.Nil(t, res, c.name) + } else { + require.NotNil(t, res, c.name) + assert.Equal(t, digestCompressedPrimary, res.digest, c.name) + assert.Equal(t, c.v2Op, res.compressionOperation, c.name) + if c.v2Algo == "" { + assert.Nil(t, res.compressionAlgorithm, c.name) + } else { + require.NotNil(t, res.compressionAlgorithm, c.name) + assert.Equal(t, c.v2Algo, res.compressionAlgorithm.Name()) + } + } + } +} + +func TestCandidateWithLocation(t *testing.T) { + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + require.NotNil(t, template) + loc := types.BICLocationReference{Opaque: "opaque"} + time := time.Now() + res := template.CandidateWithLocation(loc, time) + assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) + assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) + assert.Equal(t, false, res.Candidate.UnknownLocation) + assert.Equal(t, loc, res.Candidate.Location) + assert.Equal(t, time, res.LastSeen) +} + +func TestCandidateWithUnknownLocation(t *testing.T) { + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + require.NotNil(t, template) + res := template.CandidateWithUnknownLocation() + assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) + assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) + assert.Equal(t, true, res.Candidate.UnknownLocation) +} + func TestCandidateSortStateLess(t *testing.T) { type p struct { d digest.Digest diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 61e858deb8..ad1de7c841 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -170,34 +170,17 @@ func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if v, ok := mem.compressors[digest]; ok { compressorName = v } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + if template == nil { return candidates } locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present if len(locations) > 0 { for l, t := range locations { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: l, - }, - LastSeen: t, - }) + candidates = append(candidates, template.CandidateWithLocation(l, t)) } } else if v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates } diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index eb6a4a5c2e..4061af9720 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -502,8 +502,8 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = compressor } } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + if template == nil { return candidates, nil } @@ -522,15 +522,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if err := rows.Scan(&location, &time); err != nil { return nil, fmt.Errorf("scanning candidate: %w", err) } - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: types.BICLocationReference{Opaque: location}, - }, - LastSeen: time, - }) + candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: location}, time)) rowAdded = true } if err := rows.Err(); err != nil { @@ -538,16 +530,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW } if !rowAdded && v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates, nil } From 2b45d35846a551fbe4cadb33284156bb4ae9bbff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 30 Jul 2024 19:27:46 +0200 Subject: [PATCH 3/6] Make the fields of CandidateWithTime private MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... just because we now can, and to nudge all future caches to be designed around CandidateTemplateWithCompression. Should not change behavior. Signed-off-by: Miloslav Trmač --- .../internal/prioritize/prioritize.go | 36 +++++++++---------- .../internal/prioritize/prioritize_test.go | 20 +++++------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 18478b5e82..64a0c57855 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -86,35 +86,35 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation // CandidateWithTime is the input to types.BICReplacementCandidate prioritization. type CandidateWithTime struct { - Candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate - LastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) + candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate + lastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) } // CandidateWithLocation returns a complete CandidateWithTime combining (template from CandidateTemplateWithCompression, location, lastSeen) func (template CandidateTemplate) CandidateWithLocation(location types.BICLocationReference, lastSeen time.Time) CandidateWithTime { return CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ + candidate: blobinfocache.BICReplacementCandidate2{ Digest: template.digest, CompressionOperation: template.compressionOperation, CompressionAlgorithm: template.compressionAlgorithm, UnknownLocation: false, Location: location, }, - LastSeen: lastSeen, + lastSeen: lastSeen, } } // CandidateWithUnknownLocation returns a complete CandidateWithTime for a template from CandidateTemplateWithCompression and an unknown location. func (template CandidateTemplate) CandidateWithUnknownLocation() CandidateWithTime { return CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ + candidate: blobinfocache.BICReplacementCandidate2{ Digest: template.digest, CompressionOperation: template.compressionOperation, CompressionAlgorithm: template.compressionAlgorithm, UnknownLocation: true, Location: types.BICLocationReference{Opaque: ""}, }, - LastSeen: time.Time{}, + lastSeen: time.Time{}, } } @@ -131,35 +131,35 @@ func (css *candidateSortState) compare(xi, xj CandidateWithTime) int { // Other digest values are primarily sorted by time (more recent first), secondarily by digest (to provide a deterministic order) // First, deal with the primaryDigest/uncompressedDigest cases: - if xi.Candidate.Digest != xj.Candidate.Digest { + if xi.candidate.Digest != xj.candidate.Digest { // - The two digests are different, and one (or both) of the digests is primaryDigest or uncompressedDigest: time does not matter - if xi.Candidate.Digest == css.primaryDigest { + if xi.candidate.Digest == css.primaryDigest { return -1 } - if xj.Candidate.Digest == css.primaryDigest { + if xj.candidate.Digest == css.primaryDigest { return 1 } if css.uncompressedDigest != "" { - if xi.Candidate.Digest == css.uncompressedDigest { + if xi.candidate.Digest == css.uncompressedDigest { return 1 } - if xj.Candidate.Digest == css.uncompressedDigest { + if xj.candidate.Digest == css.uncompressedDigest { return -1 } } } else { // xi.Candidate.Digest == xj.Candidate.Digest // The two digests are the same, and are either primaryDigest or uncompressedDigest: order by time - if xi.Candidate.Digest == css.primaryDigest || (css.uncompressedDigest != "" && xi.Candidate.Digest == css.uncompressedDigest) { - return -xi.LastSeen.Compare(xj.LastSeen) + if xi.candidate.Digest == css.primaryDigest || (css.uncompressedDigest != "" && xi.candidate.Digest == css.uncompressedDigest) { + return -xi.lastSeen.Compare(xj.lastSeen) } } // Neither of the digests are primaryDigest/uncompressedDigest: - if cmp := xi.LastSeen.Compare(xj.LastSeen); cmp != 0 { // Order primarily by time + if cmp := xi.lastSeen.Compare(xj.lastSeen); cmp != 0 { // Order primarily by time return -cmp } // Fall back to digest, if timestamps end up _exactly_ the same (how?!) - return cmp.Compare(xi.Candidate.Digest, xj.Candidate.Digest) + return cmp.Compare(xi.candidate.Digest, xj.candidate.Digest) } // destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with parameters for the @@ -178,7 +178,7 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, uncompressedDigest: uncompressedDigest, }).compare) for _, candidate := range cs { - if candidate.Candidate.UnknownLocation { + if candidate.candidate.UnknownLocation { unknownLocationCandidates = append(unknownLocationCandidates, candidate) } else { knownLocationCandidates = append(knownLocationCandidates, candidate) @@ -190,11 +190,11 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, unknownLocationCandidatesUsed := min(noLocationLimit, remainingCapacity, len(unknownLocationCandidates)) res := make([]blobinfocache.BICReplacementCandidate2, knownLocationCandidatesUsed) for i := 0; i < knownLocationCandidatesUsed; i++ { - res[i] = knownLocationCandidates[i].Candidate + res[i] = knownLocationCandidates[i].candidate } // If candidates with unknown location are found, lets add them to final list for i := 0; i < unknownLocationCandidatesUsed; i++ { - res = append(res, unknownLocationCandidates[i].Candidate) + res = append(res, unknownLocationCandidates[i].candidate) } return res } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index 6f48e78c06..2ea60e3394 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -168,22 +168,22 @@ func TestCandidateWithLocation(t *testing.T) { loc := types.BICLocationReference{Opaque: "opaque"} time := time.Now() res := template.CandidateWithLocation(loc, time) - assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) - assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) - assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) - assert.Equal(t, false, res.Candidate.UnknownLocation) - assert.Equal(t, loc, res.Candidate.Location) - assert.Equal(t, time, res.LastSeen) + assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) + assert.Equal(t, types.Compress, res.candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, false, res.candidate.UnknownLocation) + assert.Equal(t, loc, res.candidate.Location) + assert.Equal(t, time, res.lastSeen) } func TestCandidateWithUnknownLocation(t *testing.T) { template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) require.NotNil(t, template) res := template.CandidateWithUnknownLocation() - assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) - assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) - assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) - assert.Equal(t, true, res.Candidate.UnknownLocation) + assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) + assert.Equal(t, types.Compress, res.candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, true, res.candidate.UnknownLocation) } func TestCandidateSortStateLess(t *testing.T) { From 849938da193920f9cb18d6597ed9fe05db16d125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Mon, 25 Mar 2024 21:20:15 +0100 Subject: [PATCH 4/6] Reformat CandidateTemplateWithCompression a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add more logic to the default case, so sharing the CandidateCompressionMatchesReuseConditions call is not going to be as easy. Split the two code paths. Should not change behavior. Signed-off-by: Miloslav Trmač --- .../internal/prioritize/prioritize.go | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 64a0c57855..9c7621f28a 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -45,42 +45,48 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation } } - var op types.LayerCompression - var algo *compression.Algorithm + requiredCompression := "nil" + if v2Options.RequiredCompression != nil { + requiredCompression = v2Options.RequiredCompression.Name() + } switch compressorName { case blobinfocache.Uncompressed: - op = types.Decompress - algo = nil + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, nil) { + logrus.Debugf("Ignoring BlobInfoCache record of digest %q, uncompressed format does not match required %s or MIME types %#v", + digest.String(), requiredCompression, v2Options.PossibleManifestFormats) + return nil + } + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Decompress, + compressionAlgorithm: nil, + } case blobinfocache.UnknownCompression: logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) return nil // Not allowed with CandidateLocations2 default: - op = types.Compress - algo_, err := compression.AlgorithmByName(compressorName) + algo, err := compression.AlgorithmByName(compressorName) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", digest.String(), compressorName, err) return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } - algo = &algo_ - } - if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ - PossibleManifestFormats: v2Options.PossibleManifestFormats, - RequiredCompression: v2Options.RequiredCompression, - }, algo) { - requiredCompresssion := "nil" - if v2Options.RequiredCompression != nil { - requiredCompresssion = v2Options.RequiredCompression.Name() + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, &algo) { + logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", + digest.String(), compressorName, requiredCompression, v2Options.PossibleManifestFormats) + return nil + } + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Compress, + compressionAlgorithm: &algo, } - logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", - digest.String(), compressorName, requiredCompresssion, v2Options.PossibleManifestFormats) - return nil - } - - return &CandidateTemplate{ - digest: digest, - compressionOperation: op, - compressionAlgorithm: algo, } } From d09a403fd686813a1a5ce5e40b153e9029a69a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 13 Mar 2024 21:58:07 +0100 Subject: [PATCH 5/6] Introduce blobinfocache.DigestCompressorData MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will want to record more than a single alghoritm name. For now, just introduce the structure and modify users, we'll add the new fields later. Should not change behavior. Signed-off-by: Miloslav Trmač --- copy/compression.go | 8 ++- internal/blobinfocache/blobinfocache.go | 2 +- internal/blobinfocache/types.go | 16 ++++-- pkg/blobinfocache/boltdb/boltdb.go | 26 ++++++---- .../internal/prioritize/prioritize.go | 14 ++--- .../internal/prioritize/prioritize_test.go | 52 +++++++++++++------ pkg/blobinfocache/internal/test/test.go | 16 ++++-- pkg/blobinfocache/memory/memory.go | 25 +++++---- pkg/blobinfocache/sqlite/sqlite.go | 25 +++++---- 9 files changed, 117 insertions(+), 67 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index 0164dd91da..e5800aa4d5 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -347,14 +347,18 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName // with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about // inconsistent data to be logged. - c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName) + c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.uploadedCompressorName, + }) } } if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest && d.srcCompressorName != internalblobinfocache.UnknownCompression { if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName { // HACK: Don’t record zstd:chunked algorithms, see above. - c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName) + c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.srcCompressorName, + }) } } return nil diff --git a/internal/blobinfocache/blobinfocache.go b/internal/blobinfocache/blobinfocache.go index 60f8e6a027..f31ee3124d 100644 --- a/internal/blobinfocache/blobinfocache.go +++ b/internal/blobinfocache/blobinfocache.go @@ -34,7 +34,7 @@ func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { } -func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) { } func (bic *v1OnlyBlobInfoCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 { diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index ed340ba478..276c8073e3 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -35,19 +35,25 @@ type BlobInfoCache2 interface { // (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) - // RecordDigestCompressorName records a compressor for the blob with the specified digest, - // or Uncompressed or UnknownCompression. - // WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a - // digest just because some remote author claims so (e.g. because a manifest says so); + // RecordDigestCompressorData records data for the blob with the specified digest. + // WARNING: Only call this with LOCALLY VERIFIED data: + // - don’t record a compressor for a digest just because some remote author claims so + // (e.g. because a manifest says so); // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. - RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) + RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) // CandidateLocations2 returns a prioritized, limited, number of blobs and their locations (if known) // that could possibly be reused within the specified (transport scope) (if they still // exist, which is not guaranteed). CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 } +// DigestCompressorData is information known about how a blob is compressed. +// (This is worded generically, but basically targeted at the zstd / zstd:chunked situation.) +type DigestCompressorData struct { + BaseVariantCompressor string // A compressor’s base variant name, or Uncompressed or UnknownCompression. +} + // CandidateLocations2Options are used in CandidateLocations2. type CandidateLocations2Options struct { // If !CanSubstitute, the returned candidates will match the submitted digest exactly; if diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index e9855fa3c1..036d2c0149 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -295,12 +295,14 @@ func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompresse }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified -// compressor, or is blobinfocache.Uncompressed. -// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. -// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. -// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) -func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// +// otherwise the cache could be poisoned and cause us to make incorrect edits to type +// information in a manifest. +func (bdc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _ = bdc.update(func(tx *bolt.Tx) error { b, err := tx.CreateBucketIfNotExists(digestCompressorBucket) if err != nil { @@ -308,14 +310,14 @@ func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressor } key := []byte(anyDigest.String()) if previousBytes := b.Get(key); previousBytes != nil { - if string(previousBytes) != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), compressorName) + if string(previousBytes) != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), data.BaseVariantCompressor) } } - if compressorName == blobinfocache.UnknownCompression { + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { return b.Delete(key) } - return b.Put(key, []byte(compressorName)) + return b.Put(key, []byte(data.BaseVariantCompressor)) }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } @@ -367,7 +369,9 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = string(compressorNameValue) } } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) if template == nil { return candidates } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 9c7621f28a..03548209f9 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -33,12 +33,12 @@ type CandidateTemplate struct { compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed } -// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with compressionName (which can be Uncompressed -// or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. +// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with data is acceptable +// for a CandidateLocations* call with v2Options. // // v2Options can be set to nil if the call is CandidateLocations (i.e. compression is not required to be known); // if not nil, the call is assumed to be CandidateLocations2. -func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) *CandidateTemplate { +func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, data blobinfocache.DigestCompressorData) *CandidateTemplate { if v2Options == nil { return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm values are not used. digest: digest, @@ -49,7 +49,7 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation if v2Options.RequiredCompression != nil { requiredCompression = v2Options.RequiredCompression.Name() } - switch compressorName { + switch data.BaseVariantCompressor { case blobinfocache.Uncompressed: if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ PossibleManifestFormats: v2Options.PossibleManifestFormats, @@ -68,10 +68,10 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) return nil // Not allowed with CandidateLocations2 default: - algo, err := compression.AlgorithmByName(compressorName) + algo, err := compression.AlgorithmByName(data.BaseVariantCompressor) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", - digest.String(), compressorName, err) + digest.String(), data.BaseVariantCompressor, err) return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ @@ -79,7 +79,7 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation RequiredCompression: v2Options.RequiredCompression, }, &algo) { logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", - digest.String(), compressorName, requiredCompression, v2Options.PossibleManifestFormats) + digest.String(), data.BaseVariantCompressor, requiredCompression, v2Options.PossibleManifestFormats) return nil } return &CandidateTemplate{ diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index 2ea60e3394..0d9fe12c1d 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -56,10 +56,20 @@ var ( ) func TestCandidateTemplateWithCompression(t *testing.T) { + uncompressedData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.Uncompressed, + } + gzipData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.GzipAlgorithmName, + } + zstdData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + } + for _, c := range []struct { name string requiredCompression *compressiontypes.Algorithm - compressor string + data blobinfocache.DigestCompressorData v2Matches bool // if v2Matches: v2Op types.LayerCompression @@ -68,13 +78,15 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "unknown", requiredCompression: nil, - compressor: blobinfocache.UnknownCompression, - v2Matches: false, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + }, + v2Matches: false, }, { name: "uncompressed", requiredCompression: nil, - compressor: blobinfocache.Uncompressed, + data: uncompressedData, v2Matches: true, v2Op: types.Decompress, v2Algo: "", @@ -82,13 +94,13 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "uncompressed, want gzip", requiredCompression: &compression.Gzip, - compressor: blobinfocache.Uncompressed, + data: uncompressedData, v2Matches: false, }, { name: "gzip", requiredCompression: nil, - compressor: compressiontypes.GzipAlgorithmName, + data: gzipData, v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.GzipAlgorithmName, @@ -96,19 +108,21 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "gzip, want zstd", requiredCompression: &compression.Zstd, - compressor: compressiontypes.GzipAlgorithmName, + data: gzipData, v2Matches: false, }, { name: "unknown base", requiredCompression: nil, - compressor: "this value is unknown", - v2Matches: false, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: "this value is unknown", + }, + v2Matches: false, }, { name: "zstd", requiredCompression: nil, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.ZstdAlgorithmName, @@ -116,13 +130,13 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "zstd, want gzip", requiredCompression: &compression.Gzip, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: false, }, { name: "zstd, want zstd", requiredCompression: &compression.Zstd, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.ZstdAlgorithmName, @@ -130,11 +144,11 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "zstd, want zstd:chunked", requiredCompression: &compression.ZstdChunked, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: false, }, } { - res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.compressor) + res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.data) assert.Equal(t, &CandidateTemplate{ digest: digestCompressedPrimary, compressionOperation: types.PreserveOriginal, @@ -145,7 +159,7 @@ func TestCandidateTemplateWithCompression(t *testing.T) { // CandidateCompressionMatchesReuseConditions should have its own tests of handling the full set of options. res = CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{ RequiredCompression: c.requiredCompression, - }, digestCompressedPrimary, c.compressor) + }, digestCompressedPrimary, c.data) if !c.v2Matches { assert.Nil(t, res, c.name) } else { @@ -163,7 +177,9 @@ func TestCandidateTemplateWithCompression(t *testing.T) { } func TestCandidateWithLocation(t *testing.T) { - template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + }) require.NotNil(t, template) loc := types.BICLocationReference{Opaque: "opaque"} time := time.Now() @@ -177,7 +193,9 @@ func TestCandidateWithLocation(t *testing.T) { } func TestCandidateWithUnknownLocation(t *testing.T) { - template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + }) require.NotNil(t, template) res := template.CandidateWithUnknownLocation() assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 9d932f51c8..30b1ab4b12 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -314,7 +314,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // ---------------------------- // If a record exists with compression without Location then // then return a record without location and with `UnknownLocation: true` - cache.RecordDigestCompressorName(digestUnknownLocation, compressiontypes.Bzip2AlgorithmName) + cache.RecordDigestCompressorData(digestUnknownLocation, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.Bzip2AlgorithmName, + }) res = cache.CandidateLocations2(transport, scope, digestUnknownLocation, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, }) @@ -355,7 +357,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // that shouldn’t happen in real-world usage. if scopeIndex != 0 { for _, e := range digestNameSetPrioritization { - cache.RecordDigestCompressorName(e.d, blobinfocache.UnknownCompression) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + }) } } @@ -423,7 +427,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // Set the "known" compression values for _, e := range digestNameSetPrioritization { - cache.RecordDigestCompressorName(e.d, e.m) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: e.m, + }) } // No substitutions allowed: @@ -505,7 +511,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa cache.RecordKnownLocation(transport, scope, e.d, types.BICLocationReference{Opaque: scopeName + e.n}) } for _, e := range digestNameSetFiltering { - cache.RecordDigestCompressorName(e.d, e.m) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: e.m, + }) } // No filtering diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index ad1de7c841..067c6b7e11 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -144,19 +144,24 @@ func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope type locationScope[location] = time.Now() // Possibly overwriting an older entry. } -// RecordDigestCompressorName records that the blob with the specified digest is either compressed with the specified -// algorithm, or uncompressed, or that we no longer know. -func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compressorName string) { +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// +// otherwise the cache could be poisoned and cause us to make incorrect edits to type +// information in a manifest. +func (mem *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { mem.mutex.Lock() defer mem.mutex.Unlock() - if previous, ok := mem.compressors[blobDigest]; ok && previous != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", blobDigest, previous, compressorName) + if previous, ok := mem.compressors[anyDigest]; ok && previous != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) } - if compressorName == blobinfocache.UnknownCompression { - delete(mem.compressors, blobDigest) + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { + delete(mem.compressors, anyDigest) return } - mem.compressors[blobDigest] = compressorName + mem.compressors[anyDigest] = data.BaseVariantCompressor } // appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in memory @@ -170,7 +175,9 @@ func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if v, ok := mem.compressors[digest]; ok { compressorName = v } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) if template == nil { return candidates } diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index 4061af9720..8d2bf72898 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -457,29 +457,30 @@ func (sqc *cache) RecordKnownLocation(transport types.ImageTransport, scope type }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordDigestCompressorName records a compressor for the blob with the specified digest, -// or Uncompressed or UnknownCompression. -// WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a -// digest just because some remote author claims so (e.g. because a manifest says so); +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. -func (sqc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +func (sqc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _, _ = transaction(sqc, func(tx *sql.Tx) (void, error) { previous, gotPrevious, err := querySingleValue[string](tx, "SELECT compressor FROM DigestCompressors WHERE digest = ?", anyDigest.String()) if err != nil { return void{}, fmt.Errorf("looking for compressor of for %q", anyDigest) } - if gotPrevious && previous != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, compressorName) + if gotPrevious && previous != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) } - if compressorName == blobinfocache.UnknownCompression { + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { if _, err := tx.Exec("DELETE FROM DigestCompressors WHERE digest = ?", anyDigest.String()); err != nil { return void{}, fmt.Errorf("deleting compressor for digest %q: %w", anyDigest, err) } } else { if _, err := tx.Exec("INSERT OR REPLACE INTO DigestCompressors(digest, compressor) VALUES (?, ?)", - anyDigest.String(), compressorName); err != nil { - return void{}, fmt.Errorf("recording compressor %q for %q: %w", compressorName, anyDigest, err) + anyDigest.String(), data.BaseVariantCompressor); err != nil { + return void{}, fmt.Errorf("recording compressor %q for %q: %w", data.BaseVariantCompressor, anyDigest, err) } } return void{}, nil @@ -502,7 +503,9 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = compressor } } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) if template == nil { return candidates, nil } From babdac8191e9162b47ced80a2e1b39cd2d3882b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 13 Mar 2024 22:13:30 +0100 Subject: [PATCH 6/6] Always record only the base variant information about consumed source blobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... because we don't trust the TOC data, if any. This allows us to remove the zstd:chunked hack; we, at least, now record those blobs as zstd. Signed-off-by: Miloslav Trmač --- copy/compression.go | 109 +++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index e5800aa4d5..081c49312f 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -35,10 +35,10 @@ var ( // bpDetectCompressionStepData contains data that the copy pipeline needs about the “detect compression” step. type bpDetectCompressionStepData struct { - isCompressed bool - format compressiontypes.Algorithm // Valid if isCompressed - decompressor compressiontypes.DecompressorFunc // Valid if isCompressed - srcCompressorName string // Compressor name to possibly record in the blob info cache for the source blob. + isCompressed bool + format compressiontypes.Algorithm // Valid if isCompressed + decompressor compressiontypes.DecompressorFunc // Valid if isCompressed + srcCompressorBaseVariantName string // Compressor name to possibly record in the blob info cache for the source blob. } // blobPipelineDetectCompressionStep updates *stream to detect its current compression format. @@ -58,9 +58,9 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI decompressor: decompressor, } if res.isCompressed { - res.srcCompressorName = format.Name() + res.srcCompressorBaseVariantName = format.BaseVariantName() } else { - res.srcCompressorName = internalblobinfocache.Uncompressed + res.srcCompressorBaseVariantName = internalblobinfocache.Uncompressed } if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() { @@ -71,13 +71,13 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI // bpCompressionStepData contains data that the copy pipeline needs about the compression step. type bpCompressionStepData struct { - operation bpcOperation // What we are actually doing - uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) - uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. - uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. - srcCompressorName string // Compressor name to record in the blob info cache for the source blob. - uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. - closers []io.Closer // Objects to close after the upload is done, if any. + operation bpcOperation // What we are actually doing + uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) + uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. + uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. + srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob. + uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. + closers []io.Closer // Objects to close after the upload is done, if any. } type bpcOperation int @@ -129,11 +129,11 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp // We can’t do anything with an encrypted blob unless decrypted. logrus.Debugf("Using original blob without modification for encrypted blob") return &bpCompressionStepData{ - operation: bpcOpPreserveOpaque, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: nil, - srcCompressorName: internalblobinfocache.UnknownCompression, - uploadedCompressorName: internalblobinfocache.UnknownCompression, + operation: bpcOpPreserveOpaque, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression, + uploadedCompressorName: internalblobinfocache.UnknownCompression, }, nil } return nil, nil @@ -158,13 +158,13 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpCompressUncompressed, - uploadedOperation: types.Compress, - uploadedAlgorithm: uploadedAlgorithm, - uploadedAnnotations: annotations, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: uploadedAlgorithm.Name(), - closers: []io.Closer{reader}, + operation: bpcOpCompressUncompressed, + uploadedOperation: types.Compress, + uploadedAlgorithm: uploadedAlgorithm, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: uploadedAlgorithm.Name(), + closers: []io.Closer{reader}, }, nil } return nil, nil @@ -199,13 +199,13 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp } succeeded = true return &bpCompressionStepData{ - operation: bpcOpRecompressCompressed, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: ic.compressionFormat, - uploadedAnnotations: annotations, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: ic.compressionFormat.Name(), - closers: []io.Closer{decompressed, recompressed}, + operation: bpcOpRecompressCompressed, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: ic.compressionFormat, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: ic.compressionFormat.Name(), + closers: []io.Closer{decompressed, recompressed}, }, nil } return nil, nil @@ -226,12 +226,12 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpDecompressCompressed, - uploadedOperation: types.Decompress, - uploadedAlgorithm: nil, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: internalblobinfocache.Uncompressed, - closers: []io.Closer{s}, + operation: bpcOpDecompressCompressed, + uploadedOperation: types.Decompress, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: internalblobinfocache.Uncompressed, + closers: []io.Closer{s}, }, nil } return nil, nil @@ -269,11 +269,14 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom algorithm = nil } return &bpCompressionStepData{ - operation: bpcOp, - uploadedOperation: uploadedOp, - uploadedAlgorithm: algorithm, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: detected.srcCompressorName, + operation: bpcOp, + uploadedOperation: uploadedOp, + uploadedAlgorithm: algorithm, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + // We only record the base variant of the format on upload; we didn’t do anything with + // the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger + // reuse of any kind between the blob digest and the TOC digest. + uploadedCompressorName: detected.srcCompressorBaseVariantName, } } @@ -333,9 +336,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation) } } - if d.srcCompressorName == "" || d.uploadedCompressorName == "" { - return fmt.Errorf("internal error: missing compressor names (src: %q, uploaded: %q)", - d.srcCompressorName, d.uploadedCompressorName) + if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" { + return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)", + d.srcCompressorBaseVariantName, d.uploadedCompressorName) } if d.uploadedCompressorName != internalblobinfocache.UnknownCompression { if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName { @@ -353,13 +356,13 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf } } if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest && - d.srcCompressorName != internalblobinfocache.UnknownCompression { - if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName { - // HACK: Don’t record zstd:chunked algorithms, see above. - c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ - BaseVariantCompressor: d.srcCompressorName, - }) - } + d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression { + // If the source is already using some TOC-dependent variant, we either copied the + // blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest, + // so record neither the variant name, nor the TOC digest. + c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.srcCompressorBaseVariantName, + }) } return nil }