From 4c1f2d89b5c73477fec78ef68cd3fc973ba46da3 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Mon, 20 Apr 2020 13:47:16 +0200 Subject: [PATCH] Support layer deltas Deltas are a way to avoid downloading a full copy if a layer tar file if you have a previous version of the layer available locally. In testing these deltas have been shown to be around 10x to 100x smaller than the .tar.gz files for typical Linux base images. In the typical client-side case we have some previous version of the image stored in container-storage somewhere, which means that we have an uncompressed files available, but not the actual tarball (compressed or not). This means we can use github.com/alexlarsson/tar-diff which takes two tar files and produces a delta file which when applied on the untar:ed content of the first tarfile produces the (bitwise identical) content of the uncompressed second tarfile. It just happens that the uncompressed tarfile is exactly what we need to reproduce, because that is how the layers are refered to in the image config (the DiffIDs). How this works is that we use OCI artifacts to store, for each regular image a manifest with information about the available deltas for the image. This image looks like a regular manifest, except each layer contains a tar-diff (as a blob) an uses the existing annotations key to record which DiffIDs the layer applies to. For example, a manifest would look like this: ``` { "schemaVersion": 2, "config": { "mediaType": "application/vnd.oci.image.config.v1+json", "digest": "sha256:ca3d163bab055381827226140568f3bef7eaac187cebd76878e0b63e9e442356", "size": 3 }, "layers": [ { "mediaType": "application/vnd.redhat.tardiff", "digest": "sha256:49402288de20a465616174a38aca4746f46be2c3f9519fe4d14fc7f83f44a32a", "size": 7059734, "annotations": { "com.redhat.deltaFrom": "sha256:b9137868142acd7ce4d62216e2b03e63e9800e2b647bf682492d3e9c5e66277c", "com.redhat.deltaTo": "sha256:c88d2d437799c2879fded33ee358429e1eb954968a25f3153e2e0e26fef7ef28" } } ] } ``` The config blob is just an json file containing "{}". Ideally it should not be of type application/vnd.oci.image.config.v1+json, because that is reserved for docker-style images. However, as explained in https://github.com/deislabs/oras/issues/129, docker hub doesn't currently support any other type. For registries that support OCI artifacts we should instead use some other type so that tooling can know that this is not a regular image. The way we attach the delta manifest to the image is that we store it in the same repo and a tag name based on the manifest digest like "delta-${shortid}". The delta layers record which DiffID they apply to, which is what we want to use to look up the pre-existing layers to use as delta source material, and it is what the delta apply will generate. This means however that using the deltas only works if we're allowed to substitute blobs, but this doesn't seem to be an issue in the typical case. --- copy/copy.go | 141 +++++++++++++++++++++++++++++++++-- directory/directory_dest.go | 5 ++ directory/directory_src.go | 8 ++ docker/archive/dest.go | 6 ++ docker/daemon/daemon_dest.go | 6 ++ docker/docker_image_dest.go | 5 ++ docker/docker_image_src.go | 37 +++++++++ docker/tarfile/src.go | 8 ++ go.mod | 1 + go.sum | 2 + image/memory.go | 4 + image/unparsed.go | 11 +++ manifest/oci.go | 11 ++- oci/archive/oci_dest.go | 5 ++ oci/archive/oci_src.go | 8 ++ oci/layout/oci_dest.go | 5 ++ oci/layout/oci_src.go | 8 ++ openshift/openshift.go | 16 ++++ storage/storage_image.go | 55 ++++++++++++++ tarball/tarball_src.go | 8 ++ types/types.go | 19 +++++ 21 files changed, 363 insertions(+), 6 deletions(-) diff --git a/copy/copy.go b/copy/copy.go index e8610254cf..ca766d94b3 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -8,10 +8,12 @@ import ( "io/ioutil" "os" "reflect" + "sort" "strings" "sync" "time" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/image" "github.com/containers/image/v5/internal/pkg/platform" @@ -790,6 +792,11 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error { srcInfosUpdated = true } + deltaLayers, err := ic.src.DeltaLayers(ctx) + if err != nil { + return err + } + type copyLayerData struct { destInfo types.BlobInfo diffID digest.Digest @@ -810,7 +817,7 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error { } data := make([]copyLayerData, numLayers) - copyLayerHelper := func(index int, srcLayer types.BlobInfo, toEncrypt bool, pool *mpb.Progress) { + copyLayerHelper := func(index int, srcLayer types.BlobInfo, toEncrypt bool, pool *mpb.Progress, deltaLayers []types.BlobInfo) { defer copySemaphore.Release(1) defer copyGroup.Done() cld := copyLayerData{} @@ -825,7 +832,7 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error { logrus.Debugf("Skipping foreign layer %q copy to %s", cld.destInfo.Digest, ic.c.dest.Reference().Transport().Name()) } } else { - cld.destInfo, cld.diffID, cld.err = ic.copyLayer(ctx, srcLayer, toEncrypt, pool) + cld.destInfo, cld.diffID, cld.err = ic.copyLayer(ctx, index, srcLayer, toEncrypt, pool, deltaLayers) } data[index] = cld } @@ -857,7 +864,7 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error { if err != nil { return errors.Wrapf(err, "Can't acquire semaphore") } - go copyLayerHelper(i, srcLayer, encLayerBitmap[i], progressPool) + go copyLayerHelper(i, srcLayer, encLayerBitmap[i], progressPool, deltaLayers) } // Wait for all layers to be copied @@ -1040,9 +1047,83 @@ type diffIDResult struct { err error } +// Get all the deltas that apply to this layer +func (ic *imageCopier) getMatchingDeltaLayers(ctx context.Context, srcIndex int, deltaLayers []types.BlobInfo) (digest.Digest, []*types.BlobInfo) { + if deltaLayers == nil { + return "", nil + } + config, _ := ic.src.OCIConfig(ctx) + if config == nil || config.RootFS.DiffIDs == nil || len(config.RootFS.DiffIDs) <= srcIndex { + return "", nil + } + + layerDiffId := config.RootFS.DiffIDs[srcIndex] + + var matchingLayers []*types.BlobInfo + for i := range deltaLayers { + deltaLayer := &deltaLayers[i] + to := deltaLayer.Annotations["com.redhat.deltaTo"] + if to == layerDiffId.String() { + matchingLayers = append(matchingLayers, deltaLayer) + } + } + + return layerDiffId, matchingLayers +} + +// Looks at which of the matching delta froms have locally available data and picks the best one +func (ic *imageCopier) resolveDeltaLayer(ctx context.Context, matchingDeltas []*types.BlobInfo) (io.ReadCloser, tar_patch.DataSource, types.BlobInfo, error) { + // Sort smallest deltas so we favour the smallest useable one + sort.Slice(matchingDeltas, func(i, j int) bool { + return matchingDeltas[i].Size < matchingDeltas[j].Size + }) + + for i := range matchingDeltas { + matchingDelta := matchingDeltas[i] + from := matchingDelta.Annotations["com.redhat.deltaFrom"] + fromDigest, err := digest.Parse(from) + if err != nil { + continue // Silently ignore if server specified a werid format + } + + dataSource, err := ic.c.dest.GetLayerDeltaData(ctx, fromDigest) + if err != nil { + return nil, nil, types.BlobInfo{}, err // Internal error + } + if dataSource == nil { + continue // from layer doesn't exist + } + + logrus.Debugf("Using delta %v for DiffID %v", matchingDelta.Digest, fromDigest) + + deltaStream, _, err := ic.c.rawSource.GetBlob(ctx, *matchingDelta, ic.c.blobInfoCache) + if err != nil { + return nil, nil, types.BlobInfo{}, errors.Wrapf(err, "Error reading delta blob %s", matchingDelta.Digest) + } + return deltaStream, dataSource, *matchingDelta, nil + } + return nil, nil, types.BlobInfo{}, nil +} + +func (ic *imageCopier) canUseDeltas(srcInfo types.BlobInfo) (bool, string) { + // Deltas rewrite the manifest to refer to the uncompressed digest, so we must be able to substiture blobs + if !ic.canSubstituteBlobs { + return false, "" + } + + switch srcInfo.MediaType { + case manifest.DockerV2Schema2LayerMediaType, manifest.DockerV2SchemaLayerMediaTypeUncompressed: + return true, manifest.DockerV2SchemaLayerMediaTypeUncompressed + case imgspecv1.MediaTypeImageLayer, imgspecv1.MediaTypeImageLayerGzip, imgspecv1.MediaTypeImageLayerZstd: + return true, imgspecv1.MediaTypeImageLayer + } + + return false, "" +} + // copyLayer copies a layer with srcInfo (with known Digest and Annotations and possibly known Size) in src to dest, perhaps compressing it if canCompress, // and returns a complete blobInfo of the copied layer, and a value for LayerDiffIDs if diffIDIsNeeded -func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, toEncrypt bool, pool *mpb.Progress) (types.BlobInfo, digest.Digest, error) { +func (ic *imageCopier) copyLayer(ctx context.Context, srcIndex int, srcInfo types.BlobInfo, toEncrypt bool, pool *mpb.Progress, deltaLayers []types.BlobInfo) (types.BlobInfo, digest.Digest, error) { cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be "" // Diffs are needed if we are encrypting an image or trying to decrypt an image diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" || toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.ociDecryptConfig != nil) @@ -1061,6 +1142,49 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to } } + // First look for a delta that matches this layer and substitute the result of that + if ok, deltaResultMediaType := ic.canUseDeltas(srcInfo); ok { + // Get deltas going TO this layer + deltaDiffID, matchingDeltas := ic.getMatchingDeltaLayers(ctx, srcIndex, deltaLayers) + // Get best possible FROM delta + deltaStream, deltaDataSource, matchingDelta, err := ic.resolveDeltaLayer(ctx, matchingDeltas) + if err != nil { + return types.BlobInfo{}, "", err + } + if deltaStream != nil { + bar := ic.c.createProgressBar(pool, matchingDelta, "delta", "done") + + wrappedDeltaStream := bar.ProxyReader(deltaStream) + + // Convert deltaStream to uncompressed tar layer stream + pr, pw := io.Pipe() + go func() { + tar_patch.Apply(deltaStream, deltaDataSource, pw) + deltaDataSource.Close() + deltaStream.Close() + wrappedDeltaStream.Close() + pw.Close() + }() + defer pr.Close() + + // Copy uncompressed tar layer to destination, verifying the diffID + blobInfo, err := ic.c.copyBlobFromStream(ctx, pr, types.BlobInfo{Digest: deltaDiffID, Size: -1, MediaType: deltaResultMediaType, Annotations: srcInfo.Annotations}, nil, ic.canModifyManifest, false, toEncrypt, nil) + if err != nil { + return types.BlobInfo{}, "", err + } + + bar.SetTotal(matchingDelta.Size, true) + + // We verified this when streaming the applied delta above + diffID := deltaDiffID + + // Record the fact that this blob is uncompressed + ic.c.blobInfoCache.RecordDigestUncompressedPair(diffID, diffID) + + return blobInfo, diffID, err + } + } + // Fallback: copy the layer, computing the diffID if we need to do so srcStream, srcBlobSize, err := ic.c.rawSource.GetBlob(ctx, srcInfo, ic.c.blobInfoCache) if err != nil { @@ -1210,7 +1334,9 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr return types.BlobInfo{}, errors.Wrapf(err, "Error reading blob %s", srcInfo.Digest) } isCompressed := decompressor != nil - destStream = bar.ProxyReader(destStream) + if bar != nil { + destStream = bar.ProxyReader(destStream) + } // === Send a copy of the original, uncompressed, stream, to a separate path if necessary. var originalLayerReader io.Reader // DO NOT USE this other than to drain the input if no other consumer in the pipeline has done so. @@ -1229,6 +1355,11 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr logrus.Debugf("Using original blob without modification for encrypted blob") compressionOperation = types.PreserveOriginal inputInfo = srcInfo + } else if canModifyBlob && manifest.IsNoCompressType(srcInfo.MediaType) { + // This is a blob we should not repack, such as a delta + logrus.Debugf("Using original blob without modification for no-compress type") + compressionOperation = types.PreserveOriginal + inputInfo = srcInfo } else if canModifyBlob && c.dest.DesiredLayerCompression() == types.Compress && !isCompressed { logrus.Debugf("Compressing blob on the fly") compressionOperation = types.Compress diff --git a/directory/directory_dest.go b/directory/directory_dest.go index d70b6c07fb..70bf710b22 100644 --- a/directory/directory_dest.go +++ b/directory/directory_dest.go @@ -8,6 +8,7 @@ import ( "path/filepath" "runtime" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" "github.com/pkg/errors" @@ -245,6 +246,10 @@ func (d *dirImageDestination) Commit(context.Context, types.UnparsedImage) error return nil } +func (d *dirImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return nil, nil +} + // returns true if path exists func pathExists(path string) (bool, error) { _, err := os.Stat(path) diff --git a/directory/directory_src.go b/directory/directory_src.go index ad9129d401..956675319f 100644 --- a/directory/directory_src.go +++ b/directory/directory_src.go @@ -44,6 +44,14 @@ func (s *dirImageSource) GetManifest(ctx context.Context, instanceDigest *digest return m, manifest.GuessMIMEType(m), err } +func (s *dirImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + return nil, "", nil +} + +func (s *dirImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return nil, nil +} + // HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently. func (s *dirImageSource) HasThreadSafeGetBlob() bool { return false diff --git a/docker/archive/dest.go b/docker/archive/dest.go index 1cf197429b..b44cfc08c7 100644 --- a/docker/archive/dest.go +++ b/docker/archive/dest.go @@ -5,8 +5,10 @@ import ( "io" "os" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker/tarfile" "github.com/containers/image/v5/types" + digest "github.com/opencontainers/go-digest" "github.com/pkg/errors" ) @@ -70,3 +72,7 @@ func (d *archiveImageDestination) Close() error { func (d *archiveImageDestination) Commit(ctx context.Context, unparsedToplevel types.UnparsedImage) error { return d.Destination.Commit(ctx) } + +func (d *archiveImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return nil, nil +} diff --git a/docker/daemon/daemon_dest.go b/docker/daemon/daemon_dest.go index c6afd4bde0..79f4cc8754 100644 --- a/docker/daemon/daemon_dest.go +++ b/docker/daemon/daemon_dest.go @@ -4,10 +4,12 @@ import ( "context" "io" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/docker/tarfile" "github.com/containers/image/v5/types" "github.com/docker/docker/client" + digest "github.com/opencontainers/go-digest" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -146,3 +148,7 @@ func (d *daemonImageDestination) Commit(ctx context.Context, unparsedToplevel ty return err } } + +func (d *daemonImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return nil, nil +} diff --git a/docker/docker_image_dest.go b/docker/docker_image_dest.go index ab74e1607d..4d660094bf 100644 --- a/docker/docker_image_dest.go +++ b/docker/docker_image_dest.go @@ -14,6 +14,7 @@ import ( "path/filepath" "strings" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/internal/iolimits" "github.com/containers/image/v5/manifest" @@ -642,3 +643,7 @@ sigExists: func (d *dockerImageDestination) Commit(context.Context, types.UnparsedImage) error { return nil } + +func (d *dockerImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return nil, nil +} diff --git a/docker/docker_image_src.go b/docker/docker_image_src.go index 9c0c20c64e..68995deb43 100644 --- a/docker/docker_image_src.go +++ b/docker/docker_image_src.go @@ -201,6 +201,43 @@ func (s *dockerImageSource) fetchManifest(ctx context.Context, tagOrDigest strin return manblob, simplifyContentType(res.Header.Get("Content-Type")), nil } +func (s *dockerImageSource) getDeltaManifestTagName(ctx context.Context, instanceDigest *digest.Digest) (string, error) { + digest, err := s.manifestDigest(ctx, instanceDigest) + if err != nil { + return "", err + } + + return "delta-" + digest.Encoded()[:12], nil +} + +func (s *dockerImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + tagname, err := s.getDeltaManifestTagName(ctx, instanceDigest) + if err != nil { + return nil, "", err + } + // Don't return error if the manifest doesn't exist, only for internal errors + // Deltas are an optional optimization anyway + mb, mt, _ := s.fetchManifest(ctx, tagname) + return mb, mt, nil +} + +func (s *dockerImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + tagname, err := s.getDeltaManifestTagName(ctx, instanceDigest) + if err != nil { + return nil, err + } + deltaRef, err := reference.WithTag(s.ref.ref, tagname) + if err != nil { + return nil, err + } + + dr, err := newReference(deltaRef) + if err != nil { + return nil, err + } + return newImageDestination(s.c.sys, dr) +} + // ensureManifestIsLoaded sets s.cachedManifest and s.cachedManifestMIMEType // // ImageSource implementations are not required or expected to do any caching, diff --git a/docker/tarfile/src.go b/docker/tarfile/src.go index 4d2368c70a..8dbc8b4aae 100644 --- a/docker/tarfile/src.go +++ b/docker/tarfile/src.go @@ -410,6 +410,14 @@ func (s *Source) GetManifest(ctx context.Context, instanceDigest *digest.Digest) return s.generatedManifest, manifest.DockerV2Schema2MediaType, nil } +func (s *Source) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + return nil, "", nil +} + +func (s *Source) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return nil, nil +} + // uncompressedReadCloser is an io.ReadCloser that closes both the uncompressed stream and the underlying input. type uncompressedReadCloser struct { io.Reader diff --git a/go.mod b/go.mod index ba53d9910e..757c5d3b6b 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/14rcole/gopopulate v0.0.0-20180821133914-b175b219e774 // indirect github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect github.com/BurntSushi/toml v0.3.1 + github.com/alexlarsson/tar-diff v0.0.0-20200420105158-0c605fa029cc github.com/containers/libtrust v0.0.0-20190913040956-14b96171aa3b github.com/containers/ocicrypt v1.0.2 github.com/containers/storage v1.19.0 diff --git a/go.sum b/go.sum index 5a43b61b51..2a05603b47 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpH github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alexlarsson/tar-diff v0.0.0-20200420105158-0c605fa029cc h1:uRrjrYWDr57NOSfx3SH/eHaYQVdteSA+HqkoWN7WWC8= +github.com/alexlarsson/tar-diff v0.0.0-20200420105158-0c605fa029cc/go.mod h1:kOnQH0N/DjC4jcEkwQ7MU4sJEOVubfnCNYjDTVBcLhE= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/image/memory.go b/image/memory.go index 4c96b37d88..a48e2475f1 100644 --- a/image/memory.go +++ b/image/memory.go @@ -49,6 +49,10 @@ func (i *memoryImage) Manifest(ctx context.Context) ([]byte, string, error) { return i.serializedManifest, i.genericManifest.manifestMIMEType(), nil } +func (i *memoryImage) DeltaLayers(ctx context.Context) ([]types.BlobInfo, error) { + return nil, nil +} + // Signatures is like ImageSource.GetSignatures, but the result is cached; it is OK to call this however often you need. func (i *memoryImage) Signatures(ctx context.Context) ([][]byte, error) { // Modifying an image invalidates signatures; a caller asking the updated image for signatures diff --git a/image/unparsed.go b/image/unparsed.go index 4e3028d855..8bd28bf204 100644 --- a/image/unparsed.go +++ b/image/unparsed.go @@ -66,6 +66,17 @@ func (i *UnparsedImage) Manifest(ctx context.Context) ([]byte, string, error) { return i.cachedManifest, i.cachedManifestMIMEType, nil } +func (i *UnparsedImage) DeltaLayers(ctx context.Context) ([]types.BlobInfo, error) { + // Note that GetDeltaManifest can return nil with a nil error. This is ok if no deltas exist + mb, mt, err := i.src.GetDeltaManifest(ctx, i.instanceDigest) + if mb == nil { + return nil, err + } + + m, err := manifestInstanceFromBlob(ctx, nil, i.src, mb, mt) + return m.LayerInfos(), nil +} + // expectedManifestDigest returns a the expected value of the manifest digest, and an indicator whether it is known. // The bool return value seems redundant with digest != ""; it is used explicitly // to refuse (unexpected) situations when the digest exists but is "". diff --git a/manifest/oci.go b/manifest/oci.go index aafe6693bc..22cdc3cce2 100644 --- a/manifest/oci.go +++ b/manifest/oci.go @@ -32,6 +32,11 @@ type OCI1 struct { imgspecv1.Manifest } +const ( + // MediaTypeDescriptor specifies the media type for a content descriptor. + MediaTypeTarDiff = "application/vnd.redhat.tardiff" +) + // SupportedOCI1MediaType checks if the specified string is a supported OCI1 // media type. // @@ -42,7 +47,7 @@ type OCI1 struct { // useful for validation anyway. func SupportedOCI1MediaType(m string) error { switch m { - case imgspecv1.MediaTypeDescriptor, imgspecv1.MediaTypeImageConfig, imgspecv1.MediaTypeImageLayer, imgspecv1.MediaTypeImageLayerGzip, imgspecv1.MediaTypeImageLayerNonDistributable, imgspecv1.MediaTypeImageLayerNonDistributableGzip, imgspecv1.MediaTypeImageLayerNonDistributableZstd, imgspecv1.MediaTypeImageLayerZstd, imgspecv1.MediaTypeImageManifest, imgspecv1.MediaTypeLayoutHeader, ociencspec.MediaTypeLayerEnc, ociencspec.MediaTypeLayerGzipEnc: + case imgspecv1.MediaTypeDescriptor, imgspecv1.MediaTypeImageConfig, imgspecv1.MediaTypeImageLayer, imgspecv1.MediaTypeImageLayerGzip, imgspecv1.MediaTypeImageLayerNonDistributable, imgspecv1.MediaTypeImageLayerNonDistributableGzip, imgspecv1.MediaTypeImageLayerNonDistributableZstd, imgspecv1.MediaTypeImageLayerZstd, imgspecv1.MediaTypeImageManifest, imgspecv1.MediaTypeLayoutHeader, ociencspec.MediaTypeLayerEnc, ociencspec.MediaTypeLayerGzipEnc, MediaTypeTarDiff: return nil default: return fmt.Errorf("unsupported OCIv1 media type: %q", m) @@ -281,3 +286,7 @@ func getDecryptedMediaType(mediatype string) (string, error) { return strings.TrimSuffix(mediatype, "+encrypted"), nil } + +func IsNoCompressType(mediatype string) bool { + return mediatype == MediaTypeTarDiff +} diff --git a/oci/archive/oci_dest.go b/oci/archive/oci_dest.go index 0509eaa83b..c5650cb150 100644 --- a/oci/archive/oci_dest.go +++ b/oci/archive/oci_dest.go @@ -5,6 +5,7 @@ import ( "io" "os" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/types" "github.com/containers/storage/pkg/archive" digest "github.com/opencontainers/go-digest" @@ -140,6 +141,10 @@ func (d *ociArchiveImageDestination) Commit(ctx context.Context, unparsedTopleve return tarDirectory(src, dst) } +func (d *ociArchiveImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return nil, nil +} + // tar converts the directory at src and saves it to dst func tarDirectory(src, dst string) error { // input is a stream of bytes from the archive of the directory at path diff --git a/oci/archive/oci_src.go b/oci/archive/oci_src.go index 8f07b33070..e17fc51b29 100644 --- a/oci/archive/oci_src.go +++ b/oci/archive/oci_src.go @@ -89,6 +89,14 @@ func (s *ociArchiveImageSource) GetManifest(ctx context.Context, instanceDigest return s.unpackedSrc.GetManifest(ctx, instanceDigest) } +func (s *ociArchiveImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + return nil, "", nil +} + +func (s *ociArchiveImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return nil, nil +} + // HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently. func (s *ociArchiveImageSource) HasThreadSafeGetBlob() bool { return false diff --git a/oci/layout/oci_dest.go b/oci/layout/oci_dest.go index fb0449ca52..8e85a8595c 100644 --- a/oci/layout/oci_dest.go +++ b/oci/layout/oci_dest.go @@ -9,6 +9,7 @@ import ( "path/filepath" "runtime" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/types" digest "github.com/opencontainers/go-digest" @@ -314,6 +315,10 @@ func (d *ociImageDestination) Commit(context.Context, types.UnparsedImage) error return ioutil.WriteFile(d.ref.indexPath(), indexJSON, 0644) } +func (d *ociImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return nil, nil +} + func ensureDirectoryExists(path string) error { if _, err := os.Stat(path); err != nil && os.IsNotExist(err) { if err := os.MkdirAll(path, 0755); err != nil { diff --git a/oci/layout/oci_src.go b/oci/layout/oci_src.go index f515203df7..ae022a5177 100644 --- a/oci/layout/oci_src.go +++ b/oci/layout/oci_src.go @@ -103,6 +103,14 @@ func (s *ociImageSource) GetManifest(ctx context.Context, instanceDigest *digest return m, mimeType, nil } +func (s *ociImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + return nil, "", nil +} + +func (s *ociImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return nil, nil +} + // HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently. func (s *ociImageSource) HasThreadSafeGetBlob() bool { return false diff --git a/openshift/openshift.go b/openshift/openshift.go index 28bfc456d5..58ba67b6be 100644 --- a/openshift/openshift.go +++ b/openshift/openshift.go @@ -11,6 +11,7 @@ import ( "net/url" "strings" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker" "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/internal/iolimits" @@ -211,6 +212,17 @@ func (s *openshiftImageSource) GetManifest(ctx context.Context, instanceDigest * return s.docker.GetManifest(ctx, instanceDigest) } +func (s *openshiftImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + if err := s.ensureImageIsResolved(ctx); err != nil { + return nil, "", err + } + return s.docker.GetDeltaManifest(ctx, instanceDigest) +} + +func (s *openshiftImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return s.docker.GetDeltaManifestDestination(ctx, instanceDigest) +} + // HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently. func (s *openshiftImageSource) HasThreadSafeGetBlob() bool { return false @@ -511,6 +523,10 @@ func (d *openshiftImageDestination) Commit(ctx context.Context, unparsedToplevel return d.docker.Commit(ctx, unparsedToplevel) } +func (d *openshiftImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + return d.docker.GetLayerDeltaData(ctx, diffID) +} + // These structs are subsets of github.com/openshift/origin/pkg/image/api/v1 and its dependencies. type imageStream struct { Status imageStreamStatus `json:"status,omitempty"` diff --git a/storage/storage_image.go b/storage/storage_image.go index df4b67c7a7..92e2cecab0 100644 --- a/storage/storage_image.go +++ b/storage/storage_image.go @@ -15,6 +15,7 @@ import ( "sync" "sync/atomic" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker/reference" "github.com/containers/image/v5/image" "github.com/containers/image/v5/internal/tmpdir" @@ -226,6 +227,14 @@ func (s *storageImageSource) GetManifest(ctx context.Context, instanceDigest *di return s.cachedManifest, manifest.GuessMIMEType(s.cachedManifest), err } +func (s *storageImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + return nil, "", nil +} + +func (s *storageImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return nil, nil +} + // LayerInfosForCopy() returns the list of layer blobs that make up the root filesystem of // the image, after they've been decompressed. func (s *storageImageSource) LayerInfosForCopy(ctx context.Context, instanceDigest *digest.Digest) ([]types.BlobInfo, error) { @@ -900,6 +909,52 @@ func (s *storageImageDestination) Commit(ctx context.Context, unparsedToplevel t return nil } +type LayerDeltaDataSource struct { + fs *tar_patch.FilesystemDataSource + store storage.Store + id string +} + +func (s *LayerDeltaDataSource) Close() error { + err := s.fs.Close() + s.store.Unmount(s.id, false) + return err +} + +func (s *LayerDeltaDataSource) Read(data []byte) (n int, err error) { + return s.fs.Read(data) +} + +func (s *LayerDeltaDataSource) SetCurrentFile(file string) error { + return s.fs.SetCurrentFile(file) +} + +func (s *LayerDeltaDataSource) Seek(offset int64, whence int) (int64, error) { + return s.fs.Seek(offset, whence) +} + +func (s *storageImageDestination) GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) { + layers, err := s.imageRef.transport.store.LayersByUncompressedDigest(diffID) + if err != nil && err != storage.ErrLayerUnknown { + return nil, err // Internal error + } + if layers == nil || len(layers) == 0 { + return nil, nil // Unknown layer + } + + layerId := layers[len(layers)-1].ID + mountPoint, err := s.imageRef.transport.store.Mount(layerId, "") + if err != nil { + return nil, err + } + + return &LayerDeltaDataSource{ + fs: tar_patch.NewFilesystemDataSource(mountPoint), + store: s.imageRef.transport.store, + id: layerId, + }, nil +} + var manifestMIMETypes = []string{ imgspecv1.MediaTypeImageManifest, manifest.DockerV2Schema2MediaType, diff --git a/tarball/tarball_src.go b/tarball/tarball_src.go index 694ad17bd1..4115eed055 100644 --- a/tarball/tarball_src.go +++ b/tarball/tarball_src.go @@ -247,6 +247,14 @@ func (is *tarballImageSource) GetManifest(ctx context.Context, instanceDigest *d return is.manifest, imgspecv1.MediaTypeImageManifest, nil } +func (s *tarballImageSource) GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) { + return nil, "", nil +} + +func (s *tarballImageSource) GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (types.ImageDestination, error) { + return nil, nil +} + // GetSignatures returns the image's signatures. It may use a remote (= slow) service. // This source implementation does not support manifest lists, so the passed-in instanceDigest should always be nil, // as there can be no secondary manifests. diff --git a/types/types.go b/types/types.go index d469e03b53..d128191a44 100644 --- a/types/types.go +++ b/types/types.go @@ -5,6 +5,7 @@ import ( "io" "time" + "github.com/alexlarsson/tar-diff/pkg/tar-patch" "github.com/containers/image/v5/docker/reference" compression "github.com/containers/image/v5/pkg/compression/types" digest "github.com/opencontainers/go-digest" @@ -256,6 +257,17 @@ type ImageSource interface { // The Digest field is guaranteed to be provided; Size may be -1. // WARNING: The list may contain duplicates, and they are semantically relevant. LayerInfosForCopy(ctx context.Context, instanceDigest *digest.Digest) ([]BlobInfo, error) + // GetDeltaManifest returns the delta manifest for the current image, as well as its type, if it exist. + // No error is returned if no delta manifest exists, just a nil slice + // It may use a remote (= slow) service. + // If instanceDigest is not nil, it contains a digest of the specific manifest instance to retrieve deltas for (when the primary manifest is a manifest list); + // this never happens if the primary manifest is not a manifest list (e.g. if the source never returns manifest lists). + GetDeltaManifest(ctx context.Context, instanceDigest *digest.Digest) ([]byte, string, error) + // GetDeltaManifestDestination returns an ImageDestination that can be used to update the delta manifest for this Image. + // If deltas are not supported it will return nil + // If instanceDigest is not nil, it contains a digest of the specific manifest instance to retrieve deltas for (when the primary manifest is a manifest list); + // this never happens if the primary manifest is not a manifest list (e.g. if the source never returns manifest lists). + GetDeltaManifestDestination(ctx context.Context, instanceDigest *digest.Digest) (ImageDestination, error) } // ImageDestination is a service, possibly remote (= slow), to store components of a single image. @@ -329,6 +341,10 @@ type ImageDestination interface { // - Uploaded data MAY be visible to others before Commit() is called // - Uploaded data MAY be removed or MAY remain around if Close() is called without Commit() (i.e. rollback is allowed but not guaranteed) Commit(ctx context.Context, unparsedToplevel UnparsedImage) error + // Tries to get access to the uncompressed data of a given DiffID that is locally available + // This data is used to apply a delta from this layer + // If deltas are not supported or the layer is not available, nil is returned (and no error) + GetLayerDeltaData(ctx context.Context, diffID digest.Digest) (tar_patch.DataSource, error) } // ManifestTypeRejectedError is returned by ImageDestination.PutManifest if the destination is in principle available, @@ -407,6 +423,9 @@ type Image interface { // Size returns an approximation of the amount of disk space which is consumed by the image in its current // location. If the size is not known, -1 will be returned. Size() (int64, error) + // Downloads and parses the delta manifest for the image, returning the available delta layers + // If no deltas available, returns nil without an error + DeltaLayers(ctx context.Context) ([]BlobInfo, error) } // ImageCloser is an Image with a Close() method which must be called by the user.