Skip to content

Commit

Permalink
Merge pull request #2503 from mtrmac/chunked-bic2-partial
Browse files Browse the repository at this point in the history
zstd:chunked refactoring for early review
  • Loading branch information
giuseppe authored Aug 6, 2024
2 parents 7847869 + babdac8 commit e3e9287
Show file tree
Hide file tree
Showing 10 changed files with 382 additions and 199 deletions.
111 changes: 59 additions & 52 deletions copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ var (

// bpDetectCompressionStepData contains data that the copy pipeline needs about the “detect compression” step.
type bpDetectCompressionStepData struct {
isCompressed bool
format compressiontypes.Algorithm // Valid if isCompressed
decompressor compressiontypes.DecompressorFunc // Valid if isCompressed
srcCompressorName string // Compressor name to possibly record in the blob info cache for the source blob.
isCompressed bool
format compressiontypes.Algorithm // Valid if isCompressed
decompressor compressiontypes.DecompressorFunc // Valid if isCompressed
srcCompressorBaseVariantName string // Compressor name to possibly record in the blob info cache for the source blob.
}

// blobPipelineDetectCompressionStep updates *stream to detect its current compression format.
Expand All @@ -58,9 +58,9 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI
decompressor: decompressor,
}
if res.isCompressed {
res.srcCompressorName = format.Name()
res.srcCompressorBaseVariantName = format.BaseVariantName()
} else {
res.srcCompressorName = internalblobinfocache.Uncompressed
res.srcCompressorBaseVariantName = internalblobinfocache.Uncompressed
}

if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() {
Expand All @@ -71,13 +71,13 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI

// bpCompressionStepData contains data that the copy pipeline needs about the compression step.
type bpCompressionStepData struct {
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorName string // Compressor name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
}

type bpcOperation int
Expand Down Expand Up @@ -129,11 +129,11 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp
// We can’t do anything with an encrypted blob unless decrypted.
logrus.Debugf("Using original blob without modification for encrypted blob")
return &bpCompressionStepData{
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
}, nil
}
return nil, nil
Expand All @@ -158,13 +158,13 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -199,13 +199,13 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp
}
succeeded = true
return &bpCompressionStepData{
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
}, nil
}
return nil, nil
Expand All @@ -226,12 +226,12 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -269,11 +269,14 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
algorithm = nil
}
return &bpCompressionStepData{
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: detected.srcCompressorName,
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
// We only record the base variant of the format on upload; we didn’t do anything with
// the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger
// reuse of any kind between the blob digest and the TOC digest.
uploadedCompressorName: detected.srcCompressorBaseVariantName,
}
}

Expand Down Expand Up @@ -333,9 +336,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation)
}
}
if d.srcCompressorName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src: %q, uploaded: %q)",
d.srcCompressorName, d.uploadedCompressorName)
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorName)
}
if d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
Expand All @@ -347,15 +350,19 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
// between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName
// with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about
// inconsistent data to be logged.
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName)
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorName,
})
}
}
if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest &&
d.srcCompressorName != internalblobinfocache.UnknownCompression {
if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms, see above.
c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName)
}
d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
// If the source is already using some TOC-dependent variant, we either copied the
// blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest,
// so record neither the variant name, nor the TOC digest.
c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.srcCompressorBaseVariantName,
})
}
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion internal/blobinfocache/blobinfocache.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest
func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) {
}

func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) {
func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) {
}

func (bic *v1OnlyBlobInfoCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 {
Expand Down
16 changes: 11 additions & 5 deletions internal/blobinfocache/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,25 @@ type BlobInfoCache2 interface {
// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.)
RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest)

// RecordDigestCompressorName records a compressor for the blob with the specified digest,
// or Uncompressed or UnknownCompression.
// WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a
// digest just because some remote author claims so (e.g. because a manifest says so);
// RecordDigestCompressorData records data for the blob with the specified digest.
// WARNING: Only call this with LOCALLY VERIFIED data:
// - don’t record a compressor for a digest just because some remote author claims so
// (e.g. because a manifest says so);
// otherwise the cache could be poisoned and cause us to make incorrect edits to type
// information in a manifest.
RecordDigestCompressorName(anyDigest digest.Digest, compressorName string)
RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData)
// CandidateLocations2 returns a prioritized, limited, number of blobs and their locations (if known)
// that could possibly be reused within the specified (transport scope) (if they still
// exist, which is not guaranteed).
CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2
}

// DigestCompressorData is information known about how a blob is compressed.
// (This is worded generically, but basically targeted at the zstd / zstd:chunked situation.)
type DigestCompressorData struct {
BaseVariantCompressor string // A compressor’s base variant name, or Uncompressed or UnknownCompression.
}

// CandidateLocations2Options are used in CandidateLocations2.
type CandidateLocations2Options struct {
// If !CanSubstitute, the returned candidates will match the submitted digest exactly; if
Expand Down
49 changes: 18 additions & 31 deletions pkg/blobinfocache/boltdb/boltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,27 +295,29 @@ func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompresse
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}

// RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified
// compressor, or is blobinfocache.Uncompressed.
// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g.
// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs.
// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.)
func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) {
// RecordDigestCompressorData records data for the blob with the specified digest.
// WARNING: Only call this with LOCALLY VERIFIED data:
// - don’t record a compressor for a digest just because some remote author claims so
// (e.g. because a manifest says so);
//
// otherwise the cache could be poisoned and cause us to make incorrect edits to type
// information in a manifest.
func (bdc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) {
_ = bdc.update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists(digestCompressorBucket)
if err != nil {
return err
}
key := []byte(anyDigest.String())
if previousBytes := b.Get(key); previousBytes != nil {
if string(previousBytes) != compressorName {
logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), compressorName)
if string(previousBytes) != data.BaseVariantCompressor {
logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), data.BaseVariantCompressor)
}
}
if compressorName == blobinfocache.UnknownCompression {
if data.BaseVariantCompressor == blobinfocache.UnknownCompression {
return b.Delete(key)
}
return b.Put(key, []byte(compressorName))
return b.Put(key, []byte(data.BaseVariantCompressor))
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}

Expand Down Expand Up @@ -367,8 +369,10 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
compressorName = string(compressorNameValue)
}
}
ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName)
if !ok {
template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{
BaseVariantCompressor: compressorName,
})
if template == nil {
return candidates
}

Expand All @@ -382,28 +386,11 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
if err := t.UnmarshalBinary(v); err != nil {
return err
}
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressionOperation: compressionOp,
CompressionAlgorithm: compressionAlgo,
Location: types.BICLocationReference{Opaque: string(k)},
},
LastSeen: t,
})
candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: string(k)}, t))
return nil
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
} else if v2Options != nil {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressionOperation: compressionOp,
CompressionAlgorithm: compressionAlgo,
UnknownLocation: true,
Location: types.BICLocationReference{Opaque: ""},
},
LastSeen: time.Time{},
})
candidates = append(candidates, template.CandidateWithUnknownLocation())
}
return candidates
}
Expand Down
Loading

0 comments on commit e3e9287

Please sign in to comment.