Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zstd:chunked refactoring for early review #2503

Merged
merged 6 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 59 additions & 52 deletions copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ var (

// bpDetectCompressionStepData contains data that the copy pipeline needs about the “detect compression” step.
type bpDetectCompressionStepData struct {
isCompressed bool
format compressiontypes.Algorithm // Valid if isCompressed
decompressor compressiontypes.DecompressorFunc // Valid if isCompressed
srcCompressorName string // Compressor name to possibly record in the blob info cache for the source blob.
isCompressed bool
format compressiontypes.Algorithm // Valid if isCompressed
decompressor compressiontypes.DecompressorFunc // Valid if isCompressed
srcCompressorBaseVariantName string // Compressor name to possibly record in the blob info cache for the source blob.
}

// blobPipelineDetectCompressionStep updates *stream to detect its current compression format.
Expand All @@ -58,9 +58,9 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI
decompressor: decompressor,
}
if res.isCompressed {
res.srcCompressorName = format.Name()
res.srcCompressorBaseVariantName = format.BaseVariantName()
} else {
res.srcCompressorName = internalblobinfocache.Uncompressed
res.srcCompressorBaseVariantName = internalblobinfocache.Uncompressed
}

if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() {
Expand All @@ -71,13 +71,13 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI

// bpCompressionStepData contains data that the copy pipeline needs about the compression step.
type bpCompressionStepData struct {
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorName string // Compressor name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
}

type bpcOperation int
Expand Down Expand Up @@ -129,11 +129,11 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp
// We can’t do anything with an encrypted blob unless decrypted.
logrus.Debugf("Using original blob without modification for encrypted blob")
return &bpCompressionStepData{
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
}, nil
}
return nil, nil
Expand All @@ -158,13 +158,13 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -199,13 +199,13 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp
}
succeeded = true
return &bpCompressionStepData{
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
}, nil
}
return nil, nil
Expand All @@ -226,12 +226,12 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -269,11 +269,14 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
algorithm = nil
}
return &bpCompressionStepData{
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: detected.srcCompressorName,
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
// We only record the base variant of the format on upload; we didn’t do anything with
// the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger
// reuse of any kind between the blob digest and the TOC digest.
uploadedCompressorName: detected.srcCompressorBaseVariantName,
}
}

Expand Down Expand Up @@ -333,9 +336,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation)
}
}
if d.srcCompressorName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src: %q, uploaded: %q)",
d.srcCompressorName, d.uploadedCompressorName)
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorName)
}
if d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
Expand All @@ -347,15 +350,19 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
// between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName
// with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about
// inconsistent data to be logged.
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName)
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorName,
})
}
}
if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest &&
d.srcCompressorName != internalblobinfocache.UnknownCompression {
if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms, see above.
c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName)
}
d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
// If the source is already using some TOC-dependent variant, we either copied the
// blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest,
// so record neither the variant name, nor the TOC digest.
c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.srcCompressorBaseVariantName,
})
}
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion internal/blobinfocache/blobinfocache.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest
func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) {
}

func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) {
func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) {
}

func (bic *v1OnlyBlobInfoCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 {
Expand Down
16 changes: 11 additions & 5 deletions internal/blobinfocache/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,25 @@ type BlobInfoCache2 interface {
// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.)
RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest)

// RecordDigestCompressorName records a compressor for the blob with the specified digest,
// or Uncompressed or UnknownCompression.
// WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a
// digest just because some remote author claims so (e.g. because a manifest says so);
// RecordDigestCompressorData records data for the blob with the specified digest.
// WARNING: Only call this with LOCALLY VERIFIED data:
// - don’t record a compressor for a digest just because some remote author claims so
// (e.g. because a manifest says so);
// otherwise the cache could be poisoned and cause us to make incorrect edits to type
// information in a manifest.
RecordDigestCompressorName(anyDigest digest.Digest, compressorName string)
RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData)
// CandidateLocations2 returns a prioritized, limited, number of blobs and their locations (if known)
// that could possibly be reused within the specified (transport scope) (if they still
// exist, which is not guaranteed).
CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2
}

// DigestCompressorData is information known about how a blob is compressed.
// (This is worded generically, but basically targeted at the zstd / zstd:chunked situation.)
type DigestCompressorData struct {
BaseVariantCompressor string // A compressor’s base variant name, or Uncompressed or UnknownCompression.
}

// CandidateLocations2Options are used in CandidateLocations2.
type CandidateLocations2Options struct {
// If !CanSubstitute, the returned candidates will match the submitted digest exactly; if
Expand Down
49 changes: 18 additions & 31 deletions pkg/blobinfocache/boltdb/boltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,27 +295,29 @@ func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompresse
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}

// RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified
// compressor, or is blobinfocache.Uncompressed.
// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g.
// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs.
// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.)
func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) {
// RecordDigestCompressorData records data for the blob with the specified digest.
// WARNING: Only call this with LOCALLY VERIFIED data:
// - don’t record a compressor for a digest just because some remote author claims so
// (e.g. because a manifest says so);
//
// otherwise the cache could be poisoned and cause us to make incorrect edits to type
// information in a manifest.
func (bdc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) {
_ = bdc.update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists(digestCompressorBucket)
if err != nil {
return err
}
key := []byte(anyDigest.String())
if previousBytes := b.Get(key); previousBytes != nil {
if string(previousBytes) != compressorName {
logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), compressorName)
if string(previousBytes) != data.BaseVariantCompressor {
logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), data.BaseVariantCompressor)
}
}
if compressorName == blobinfocache.UnknownCompression {
if data.BaseVariantCompressor == blobinfocache.UnknownCompression {
return b.Delete(key)
}
return b.Put(key, []byte(compressorName))
return b.Put(key, []byte(data.BaseVariantCompressor))
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}

Expand Down Expand Up @@ -367,8 +369,10 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
compressorName = string(compressorNameValue)
}
}
ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName)
if !ok {
template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{
BaseVariantCompressor: compressorName,
})
if template == nil {
return candidates
}

Expand All @@ -382,28 +386,11 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
if err := t.UnmarshalBinary(v); err != nil {
return err
}
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressionOperation: compressionOp,
CompressionAlgorithm: compressionAlgo,
Location: types.BICLocationReference{Opaque: string(k)},
},
LastSeen: t,
})
candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: string(k)}, t))
return nil
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
} else if v2Options != nil {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressionOperation: compressionOp,
CompressionAlgorithm: compressionAlgo,
UnknownLocation: true,
Location: types.BICLocationReference{Opaque: ""},
},
LastSeen: time.Time{},
})
candidates = append(candidates, template.CandidateWithUnknownLocation())
}
return candidates
}
Expand Down
Loading