From 2db28783c3f793b16696d610909cd3d48a01475c Mon Sep 17 00:00:00 2001 From: tomsweeneyredhat Date: Thu, 25 Jul 2024 18:21:50 -0400 Subject: [PATCH 01/37] Bump to c/image v5.33.0-dev As the title says. Bumping the main branch back to a dev version. Signed-off-by: tomsweeneyredhat --- version/version.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version/version.go b/version/version.go index 4d889f8091..d98e04b9a5 100644 --- a/version/version.go +++ b/version/version.go @@ -6,12 +6,12 @@ const ( // VersionMajor is for an API incompatible changes VersionMajor = 5 // VersionMinor is for functionality in a backwards-compatible manner - VersionMinor = 32 + VersionMinor = 33 // VersionPatch is for backwards-compatible bug fixes VersionPatch = 0 // VersionDev indicates development branch. Releases will be empty string. - VersionDev = "" + VersionDev = "-dev" ) // Version is the specification version that the package types support. From 67e1306281d179a071fbec6e8b292bc15bfb1385 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 28 Jul 2024 04:00:29 +0000 Subject: [PATCH 02/37] fix(deps): update module github.com/vbauerster/mpb/v8 to v8.7.5 Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 19ebeab665..bd3ae509e7 100644 --- a/go.mod +++ b/go.mod @@ -35,7 +35,7 @@ require ( github.com/sylabs/sif/v2 v2.18.0 github.com/ulikunitz/xz v0.5.12 github.com/vbatts/tar-split v0.11.5 - github.com/vbauerster/mpb/v8 v8.7.4 + github.com/vbauerster/mpb/v8 v8.7.5 github.com/xeipuuv/gojsonschema v1.2.0 go.etcd.io/bbolt v1.3.10 golang.org/x/crypto v0.25.0 @@ -94,7 +94,7 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/letsencrypt/boulder v0.0.0-20240418210053-89b07f4543e0 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/pkcs11 v1.1.1 // indirect github.com/mistifyio/go-zfs/v3 v3.0.1 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect diff --git a/go.sum b/go.sum index ee767adc30..c3cc707c58 100644 --- a/go.sum +++ b/go.sum @@ -214,8 +214,8 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= @@ -335,8 +335,8 @@ github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts= github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk= -github.com/vbauerster/mpb/v8 v8.7.4 h1:p4f16iMfUt3PkAC73SCzAtgtSf8TYDqEbJUT3odPrPo= -github.com/vbauerster/mpb/v8 v8.7.4/go.mod h1:r1B5k2Ljj5KJFCekfihbiqyV4VaaRTANYmvWA2btufI= +github.com/vbauerster/mpb/v8 v8.7.5 h1:hUF3zaNsuaBBwzEFoCvfuX3cpesQXZC0Phm/JcHZQ+c= +github.com/vbauerster/mpb/v8 v8.7.5/go.mod h1:bRCnR7K+mj5WXKsy0NWB6Or+wctYGvVwKn6huwvxKa0= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= From b8c1dd7af032c17398d3f3aa241b227d972aea01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 19 Jul 2024 02:07:43 +0200 Subject: [PATCH 03/37] Compute the schema1 DiffID list based on m.LayerInfos() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are already calling m.LayerInfos() anyway, so there is ~no extra cost. And using LayerInfos means we don't need to worry about reversing the order of layers, and we will have access to the layer index, allowing us to acccess the indexTo* fields in the future. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 92dfebd333..0e28db5b26 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -522,26 +522,27 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige func (s *storageImageDestination) computeID(m manifest.Manifest) string { // This is outside of the scope of HasThreadSafePutBlob, so we don’t need to hold s.lock. + layerInfos := m.LayerInfos() + // Build the diffID list. We need the decompressed sums that we've been calculating to // fill in the DiffIDs. It's expected (but not enforced by us) that the number of // diffIDs corresponds to the number of non-EmptyLayer entries in the history. var diffIDs []digest.Digest - switch m := m.(type) { + switch m.(type) { case *manifest.Schema1: - // Build a list of the diffIDs we've generated for the non-throwaway FS layers, - // in reverse of the order in which they were originally listed. - for i, compat := range m.ExtractedV1Compatibility { - if compat.ThrowAway { + // Build a list of the diffIDs we've generated for the non-throwaway FS layers + for _, li := range layerInfos { + if li.EmptyLayer { continue } - blobSum := m.FSLayers[i].BlobSum + blobSum := li.Digest diffID, ok := s.lockProtected.blobDiffIDs[blobSum] if !ok { // this can, in principle, legitimately happen when a layer is reused by TOC. logrus.Infof("error looking up diffID for layer %q", blobSum.String()) return "" } - diffIDs = append([]digest.Digest{diffID}, diffIDs...) + diffIDs = append(diffIDs, diffID) } case *manifest.Schema2, *manifest.OCI1: // We know the ID calculation doesn't actually use the diffIDs, so we don't need to populate @@ -570,7 +571,7 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { } tocIDInput := "" hasLayerPulledByTOC := false - for i := range m.LayerInfos() { + for i := range layerInfos { layerValue := "" // An empty string is not a valid digest, so this is unambiguous with the TOC case. tocDigest, ok := s.lockProtected.indexToTOCDigest[i] // "" if not a TOC if ok { From b520fadb2177c7c212a298b6f1fea3d18e7c55bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 13 Apr 2024 15:52:11 +0200 Subject: [PATCH 04/37] Improve the documentation of layer identification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Don't claim that we only use compressed digests. - Explicitly document that we assume TOC digests to be unambiguous - Actually define the term "DiffID". - Be more precise in computeID about the criteria being layer identity, not where we pull the layer from. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 0e28db5b26..a846df3eda 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -84,7 +84,11 @@ type storageImageDestinationLockProtected struct { currentIndex int // The index of the layer to be committed (i.e., lower indices have already been committed) indexToAddedLayerInfo map[int]addedLayerInfo // Mapping from layer (by index) to blob to add to the image - // In general, a layer is identified either by (compressed) digest, or by TOC digest. + // Externally, a layer is identified either by (compressed) digest, or by TOC digest + // (and we assume the TOC digest also uniquely identifies the contents, i.e. there aren’t two + // different formats/ways to parse a single TOC); internally, we use uncompressed digest (“DiffID”) or a TOC digest. + // We may or may not know the relationships between these three values. + // // When creating a layer, the c/storage layer metadata and image IDs must _only_ be based on trusted values // we have computed ourselves. (Layer reuse can then look up against such trusted values, but it might not // recompute those values for incomding layers — the point of the reuse is that we don’t need to consume the incoming layer.) @@ -554,11 +558,11 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { // We want to use the same ID for “the same” images, but without risking unwanted sharing / malicious image corruption. // // Traditionally that means the same ~config digest, as computed by m.ImageID; - // but if we pull a layer by TOC, we verify the layer against neither the (compressed) blob digest in the manifest, + // but if we identify a layer by TOC, we verify the layer against neither the (compressed) blob digest in the manifest, // nor against the config’s RootFS.DiffIDs. We don’t really want to do either, to allow partial layer pulls where we never see // most of the data. // - // So, if a layer is pulled by TOC (and we do validate against the TOC), the fact that we used the TOC, and the value of the TOC, + // So, if a layer is identified by TOC (and we do validate against the TOC), the fact that we used the TOC, and the value of the TOC, // must enter into the image ID computation. // But for images where no TOC was used, continue to use IDs computed the traditional way, to maximize image reuse on upgrades, // and to introduce the changed behavior only when partial pulls are used. From e7a01b8151b6a3faf67ba90c89b4d1b113f58320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 19 Jul 2024 20:08:17 +0200 Subject: [PATCH 05/37] Allow returning (and reporting) unexpected errors from computeID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some errors are severe enough that just logging and continuing is not really worthwhile. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 17 ++++++++++------- storage/storage_test.go | 14 +++++++++++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index a846df3eda..16377219ed 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -523,7 +523,7 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige // the manifest is not of a type that we recognize, we return an empty value, indicating // that since we don't have a recommendation, a random ID should be used if one needs // to be allocated. -func (s *storageImageDestination) computeID(m manifest.Manifest) string { +func (s *storageImageDestination) computeID(m manifest.Manifest) (string, error) { // This is outside of the scope of HasThreadSafePutBlob, so we don’t need to hold s.lock. layerInfos := m.LayerInfos() @@ -544,7 +544,7 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { if !ok { // this can, in principle, legitimately happen when a layer is reused by TOC. logrus.Infof("error looking up diffID for layer %q", blobSum.String()) - return "" + return "", nil } diffIDs = append(diffIDs, diffID) } @@ -552,7 +552,7 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { // We know the ID calculation doesn't actually use the diffIDs, so we don't need to populate // the diffID list. default: - return "" + return "", nil } // We want to use the same ID for “the same” images, but without risking unwanted sharing / malicious image corruption. @@ -571,7 +571,7 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { // (skopeo copy --format v2s2 docker://…/zstd-chunked-image containers-storage:… ). So this is not happening only in the OCI case above. ordinaryImageID, err := m.ImageID(diffIDs) if err != nil { - return "" + return "", err } tocIDInput := "" hasLayerPulledByTOC := false @@ -586,13 +586,13 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { } if !hasLayerPulledByTOC { - return ordinaryImageID + return ordinaryImageID, nil } // ordinaryImageID is a digest of a config, which is a JSON value. // To avoid the risk of collisions, start the input with @ so that the input is not a valid JSON. tocImageID := digest.FromString("@With TOC:" + tocIDInput).Encoded() logrus.Debugf("Ordinary storage image ID %s; a layer was looked up by TOC, so using image ID %s", ordinaryImageID, tocImageID) - return tocImageID + return tocImageID, nil } // getConfigBlob exists only to let us retrieve the configuration blob so that the manifest package can dig @@ -1160,7 +1160,10 @@ func (s *storageImageDestination) Commit(ctx context.Context, unparsedToplevel t // Create the image record, pointing to the most-recently added layer. intendedID := s.imageRef.id if intendedID == "" { - intendedID = s.computeID(man) + intendedID, err = s.computeID(man) + if err != nil { + return err + } } oldNames := []string{} img, err := s.imageRef.transport.store.CreateImage(intendedID, nil, lastLayer, "", options) diff --git a/storage/storage_test.go b/storage/storage_test.go index 662aa3fbf3..d94890eb4a 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -315,9 +315,21 @@ func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache ty desc := layer.storeBlob(t, dest, cache, manifest.DockerV2Schema2LayerMediaType) layerDescriptors = append(layerDescriptors, desc) } - configDescriptor := manifest.Schema2Descriptor{} // might be good enough + + var configDescriptor manifest.Schema2Descriptor if config != nil { configDescriptor = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType) + } else { + // Use a random digest so that different calls to createUncommittedImageDest with config == nil don’t try to + // use the same image ID. + digestBytes := make([]byte, digest.Canonical.Size()) + _, err := rand.Read(digestBytes) + require.NoError(t, err) + configDescriptor = manifest.Schema2Descriptor{ + MediaType: manifest.DockerV2Schema2ConfigMediaType, + Size: 1, + Digest: digest.NewDigestFromBytes(digest.Canonical, digestBytes), + } } manifest := manifest.Schema2FromComponents(configDescriptor, layerDescriptors) From 6dc3f0c94d922c3e92fabc6691808f939cd786d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 19 Jul 2024 20:54:42 +0200 Subject: [PATCH 06/37] Centralize collecting _consistent_ layer data into trustedLayerIdentityDataLocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currrently we "only" have indexToTOCDigest and blobDiffIDs, but we will make this more complex. Centralizing the consumption of these fields into trustedLayerIdentityDataLocked ensure that all consumers interpret the data exactly consistently (and it also allows us to use a single "trusted" variable instead of 2/3 individual ones). Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 164 +++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 54 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 16377219ed..204474a953 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -95,6 +95,9 @@ type storageImageDestinationLockProtected struct { // Layer identification: For a layer, at least one of indexToTOCDigest and blobDiffIDs must be available before commitLayer is called. // The presence of an indexToTOCDigest is what decides how the layer is identified, i.e. which fields must be trusted. + // + // Users of these fields should use trustedLayerIdentityDataLocked, which centralizes the validity logic, + // instead of interpreting these fields, especially blobDiffIDs, directly. blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest, IFF the layer was created/found/reused by TOC digest @@ -519,6 +522,53 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige return false, private.ReusedBlob{}, nil } +// trustedLayerIdentityData is a _consistent_ set of information known about a single layer. +type trustedLayerIdentityData struct { + layerIdentifiedByTOC bool // true if we decided the layer should be identified by tocDigest, false if by diffID + + diffID digest.Digest // A digest of the uncompressed full contents of the layer, or "" if unknown; must be set if !layerIdentifiedByTOC + tocDigest digest.Digest // A digest of the TOC digest, or "" if unknown; must be set if layerIdentifiedByTOC + blobDigest digest.Digest // A digest of the (possibly-compressed) layer as presented, or "" if unknown/untrusted. +} + +// trustedLayerIdentityDataLocked returns a _consistent_ set of information for a layer with (layerIndex, blobDigest). +// blobDigest is the (possibly-compressed) layer digest referenced in the manifest. +// It returns (trusted, true) if the layer was found, or (_, false) if insufficient data is available. +// +// The caller must hold s.lock. +func (s *storageImageDestination) trustedLayerIdentityDataLocked(layerIndex int, blobDigest digest.Digest) (trustedLayerIdentityData, bool) { + // The decision about layerIdentifiedByTOC must be _stable_ once the data for layerIndex is set, + // even if s.lockProtected.blobDiffIDs changes later and we can subsequently find an entry that wasn’t originally available. + // + // If we previously didn't have a blobDigest match and decided to use the TOC, but _later_ we happen to find + // a blobDigest match, we might in principle want to reconsider, set layerIdentifiedByTOC to false, and use the file: + // but the layer in question, and possibly child layers, might already have been committed to storage. + // A late-arriving addition to s.lockProtected.blobDiffIDs would mean that we would want to set + // new layer IDs for potentially the whole parent chain = throw away the just-created layers and create them all again. + // + // Such a within-image layer reuse is expected to be pretty rare; instead, ignore the unexpected file match + // and proceed to the originally-planned TOC match. + if tocDigest, ok := s.lockProtected.indexToTOCDigest[layerIndex]; ok { + return trustedLayerIdentityData{ + layerIdentifiedByTOC: true, + diffID: "", + tocDigest: tocDigest, + // Don’t set blobDigest even if we can find a blobDiffIDs[blobDigest] entry: layerIdentifiedByTOC == true, + // tocDigest is the layer identifier, and an attacker might have used a manifest with non-matching tocDigest and blobDigest. + blobDigest: "", + }, true + } + if diffID, ok := s.lockProtected.blobDiffIDs[blobDigest]; ok { + return trustedLayerIdentityData{ + layerIdentifiedByTOC: false, + diffID: diffID, + tocDigest: "", + blobDigest: blobDigest, + }, true + } + return trustedLayerIdentityData{}, false +} + // computeID computes a recommended image ID based on information we have so far. If // the manifest is not of a type that we recognize, we return an empty value, indicating // that since we don't have a recommendation, a random ID should be used if one needs @@ -535,18 +585,22 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) (string, error) switch m.(type) { case *manifest.Schema1: // Build a list of the diffIDs we've generated for the non-throwaway FS layers - for _, li := range layerInfos { + for i, li := range layerInfos { if li.EmptyLayer { continue } - blobSum := li.Digest - diffID, ok := s.lockProtected.blobDiffIDs[blobSum] - if !ok { - // this can, in principle, legitimately happen when a layer is reused by TOC. - logrus.Infof("error looking up diffID for layer %q", blobSum.String()) - return "", nil + trusted, ok := s.trustedLayerIdentityDataLocked(i, li.Digest) + if !ok { // We have already committed all layers if we get to this point, so the data must have been available. + return "", fmt.Errorf("internal inconsistency: layer (%d, %q) not found", i, li.Digest) } - diffIDs = append(diffIDs, diffID) + if trusted.diffID == "" { + if trusted.layerIdentifiedByTOC { + logrus.Infof("v2s1 image uses a layer identified by TOC with unknown diffID; choosing a random image ID") + return "", nil + } + return "", fmt.Errorf("internal inconsistency: layer (%d, %q) is not identified by TOC and has no diffID", i, li.Digest) + } + diffIDs = append(diffIDs, trusted.diffID) } case *manifest.Schema2, *manifest.OCI1: // We know the ID calculation doesn't actually use the diffIDs, so we don't need to populate @@ -575,12 +629,15 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) (string, error) } tocIDInput := "" hasLayerPulledByTOC := false - for i := range layerInfos { - layerValue := "" // An empty string is not a valid digest, so this is unambiguous with the TOC case. - tocDigest, ok := s.lockProtected.indexToTOCDigest[i] // "" if not a TOC - if ok { + for i, li := range layerInfos { + trusted, ok := s.trustedLayerIdentityDataLocked(i, li.Digest) + if !ok { // We have already committed all layers if we get to this point, so the data must have been available. + return "", fmt.Errorf("internal inconsistency: layer (%d, %q) not found", i, li.Digest) + } + layerValue := "" // An empty string is not a valid digest, so this is unambiguous with the TOC case. + if trusted.layerIdentifiedByTOC { hasLayerPulledByTOC = true - layerValue = tocDigest.String() + layerValue = trusted.tocDigest.String() } tocIDInput += layerValue + "|" // "|" can not be present in a TOC digest, so this is an unambiguous separator. } @@ -676,14 +733,14 @@ func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDig s.lock.Lock() defer s.lock.Unlock() - if d, found := s.lockProtected.indexToTOCDigest[layerIndex]; found { - return "@TOC=" + d.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. + trusted, ok := s.trustedLayerIdentityDataLocked(layerIndex, blobDigest) + if !ok { + return "", false } - - if d, found := s.lockProtected.blobDiffIDs[blobDigest]; found { - return d.Encoded(), true // This looks like chain IDs, and it uses the traditional value. + if trusted.layerIdentifiedByTOC { + return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. } - return "", false + return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. } // commitLayer commits the specified layer with the given index to the storage. @@ -837,39 +894,38 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // Check if we previously cached a file with that blob's contents. If we didn't, // then we need to read the desired contents from a layer. - var trustedUncompressedDigest, trustedOriginalDigest digest.Digest // For storage.LayerOptions + var filename string + var gotFilename bool s.lock.Lock() - tocDigest := s.lockProtected.indexToTOCDigest[index] // "" if not set - optionalDiffID := s.lockProtected.blobDiffIDs[layerDigest] // "" if not set - filename, gotFilename := s.lockProtected.filenames[layerDigest] + trusted, ok := s.trustedLayerIdentityDataLocked(index, layerDigest) + if ok && trusted.blobDigest != "" { + filename, gotFilename = s.lockProtected.filenames[trusted.blobDigest] + } s.lock.Unlock() - if gotFilename && tocDigest == "" { - // If tocDigest != "", if we now happen to find a layerDigest match, the newLayerID has already been computed as TOC-based, - // and we don't know the relationship of the layerDigest and TOC digest. - // We could recompute newLayerID to be DiffID-based and use the file, but such a within-image layer - // reuse is expected to be pretty rare; instead, ignore the unexpected file match and proceed to the - // originally-planned TOC match. - - // Because tocDigest == "", optionaldiffID must have been set; and even if it weren’t, PutLayer will recompute the digest from the stream. - trustedUncompressedDigest = optionalDiffID - trustedOriginalDigest = layerDigest // The code setting .filenames[layerDigest] is responsible for the contents matching. + if !ok { // We have already determined newLayerID, so the data must have been available. + return nil, fmt.Errorf("internal inconsistency: layer (%d, %q) not found", index, layerDigest) + } + var trustedOriginalDigest digest.Digest // For storage.LayerOptions + if gotFilename { + // The code setting .filenames[trusted.blobDigest] is responsible for ensuring that the file contents match trusted.blobDigest. + trustedOriginalDigest = trusted.blobDigest } else { // Try to find the layer with contents matching the data we use. var layer *storage.Layer // = nil - if tocDigest != "" { - layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(tocDigest) + if trusted.tocDigest != "" { + layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(trusted.tocDigest) if err2 == nil && len(layers) > 0 { layer = &layers[0] } else { - return nil, fmt.Errorf("locating layer for TOC digest %q: %w", tocDigest, err2) + return nil, fmt.Errorf("locating layer for TOC digest %q: %w", trusted.tocDigest, err2) } } else { - // Because tocDigest == "", optionaldiffID must have been set - layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(optionalDiffID) + // When trusted.tocDigest == "", trusted.layerIdentifiedByTOC == false, and trusted.diffID must have been set + layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(trusted.diffID) if err2 == nil && len(layers) > 0 { layer = &layers[0] - } else { - layers, err2 = s.imageRef.transport.store.LayersByCompressedDigest(layerDigest) + } else if trusted.blobDigest != "" { + layers, err2 = s.imageRef.transport.store.LayersByCompressedDigest(trusted.blobDigest) if err2 == nil && len(layers) > 0 { layer = &layers[0] } @@ -907,20 +963,19 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D return nil, fmt.Errorf("storing blob to file %q: %w", filename, err) } - if optionalDiffID == "" && layer.UncompressedDigest != "" { - optionalDiffID = layer.UncompressedDigest + if trusted.diffID == "" && layer.UncompressedDigest != "" { + trusted.diffID = layer.UncompressedDigest // This data might have been unavailable in tryReusingBlobAsPending, and is only known now. } - // The stream we have is uncompressed, this matches contents of the stream. - // If tocDigest != "", trustedUncompressedDigest might still be ""; in that case PutLayer will compute the value from the stream. - trustedUncompressedDigest = optionalDiffID - // FIXME? trustedOriginalDigest could be set to layerDigest IF tocDigest == "" (otherwise layerDigest is untrusted). + // The stream we have is uncompressed, and it matches trusted.diffID (if known). + // + // FIXME? trustedOriginalDigest could be set to trusted.blobDigest if known, to allow more layer reuse. // But for c/storage to reasonably use it (as a CompressedDigest value), we should also ensure the CompressedSize of the created // layer is correct, and the API does not currently make it possible (.CompressedSize is set from the input stream). // // We can legitimately set storage.LayerOptions.OriginalDigest to "", - // but that would just result in PutLayer computing the digest of the input stream == optionalDiffID. + // but that would just result in PutLayer computing the digest of the input stream == trusted.diffID. // So, instead, set .OriginalDigest to the value we know already, to avoid that digest computation. - trustedOriginalDigest = optionalDiffID + trustedOriginalDigest = trusted.diffID // Allow using the already-collected layer contents without extracting the layer again. // @@ -928,11 +983,11 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // We don’t have the original compressed data here to trivially set filenames[layerDigest]. // In particular we can’t achieve the correct Layer.CompressedSize value with the current c/storage API. // Within-image layer reuse is probably very rare, for now we prefer to avoid that complexity. - if trustedUncompressedDigest != "" { + if trusted.diffID != "" { s.lock.Lock() - s.lockProtected.blobDiffIDs[trustedUncompressedDigest] = trustedUncompressedDigest - s.lockProtected.filenames[trustedUncompressedDigest] = filename - s.lockProtected.fileSizes[trustedUncompressedDigest] = fileSize + s.lockProtected.blobDiffIDs[trusted.diffID] = trusted.diffID + s.lockProtected.filenames[trusted.diffID] = filename + s.lockProtected.fileSizes[trusted.diffID] = fileSize s.lock.Unlock() } } @@ -945,8 +1000,9 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // Build the new layer using the diff, regardless of where it came from. // TODO: This can take quite some time, and should ideally be cancellable using ctx.Done(). layer, _, err := s.imageRef.transport.store.PutLayer(newLayerID, parentLayer, nil, "", false, &storage.LayerOptions{ - OriginalDigest: trustedOriginalDigest, - UncompressedDigest: trustedUncompressedDigest, + OriginalDigest: trustedOriginalDigest, + // This might be "" if trusted.layerIdentifiedByTOC; in that case PutLayer will compute the value from the stream. + UncompressedDigest: trusted.diffID, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { return nil, fmt.Errorf("adding layer with blob %q: %w", layerDigest, err) From 757d726e1264d942813e8121f6a9d7a4beeb22b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 29 Feb 2024 13:37:37 +0100 Subject: [PATCH 07/37] Add TOC digest <-> uncompressed digest mapping to BIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new code is not called, so it should not change behavior (apart from extending the BoltDB/SQLite schema). Signed-off-by: Miloslav Trmač --- internal/blobinfocache/blobinfocache.go | 7 +++ internal/blobinfocache/types.go | 9 ++++ pkg/blobinfocache/boltdb/boltdb.go | 52 ++++++++++++++++++++++ pkg/blobinfocache/internal/test/test.go | 24 ++++++++++ pkg/blobinfocache/memory/memory.go | 42 ++++++++++++++---- pkg/blobinfocache/none/none.go | 13 ++++++ pkg/blobinfocache/sqlite/sqlite.go | 59 +++++++++++++++++++++++++ 7 files changed, 198 insertions(+), 8 deletions(-) diff --git a/internal/blobinfocache/blobinfocache.go b/internal/blobinfocache/blobinfocache.go index 893aa959d4..60f8e6a027 100644 --- a/internal/blobinfocache/blobinfocache.go +++ b/internal/blobinfocache/blobinfocache.go @@ -27,6 +27,13 @@ func (bic *v1OnlyBlobInfoCache) Open() { func (bic *v1OnlyBlobInfoCache) Close() { } +func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + return "" +} + +func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { +} + func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { } diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index c9e4aaa485..ed340ba478 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -26,6 +26,15 @@ type BlobInfoCache2 interface { // Close destroys state created by Open(). Close() + // UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. + // Returns "" if the uncompressed digest is unknown. + UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest + // RecordTOCUncompressedPair records that the tocDigest corresponds to uncompressed. + // WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. + // because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. + // (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) + RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) + // RecordDigestCompressorName records a compressor for the blob with the specified digest, // or Uncompressed or UnknownCompression. // WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index 07230f8738..b13246b212 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -25,6 +25,8 @@ var ( // uncompressedDigestBucket stores a mapping from any digest to an uncompressed digest. uncompressedDigestBucket = []byte("uncompressedDigest") + // uncompressedDigestByTOCBucket stores a mapping from a TOC digest to an uncompressed digest. + uncompressedDigestByTOCBucket = []byte("uncompressedDigestByTOC") // digestCompressorBucket stores a mapping from any digest to a compressor, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression). // It may not exist in caches created by older versions, even if uncompressedDigestBucket is present. digestCompressorBucket = []byte("digestCompressor") @@ -243,6 +245,56 @@ func (bdc *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (bdc *cache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + var res digest.Digest + if err := bdc.view(func(tx *bolt.Tx) error { + if b := tx.Bucket(uncompressedDigestByTOCBucket); b != nil { + if uncompressedBytes := b.Get([]byte(tocDigest.String())); uncompressedBytes != nil { + d, err := digest.Parse(string(uncompressedBytes)) + if err == nil { + res = d + return nil + } + // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + } + res = "" + return nil + }); err != nil { // Including os.IsNotExist(err) + return "" // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + return res +} + +// RecordTOCUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { + _ = bdc.update(func(tx *bolt.Tx) error { + b, err := tx.CreateBucketIfNotExists(uncompressedDigestByTOCBucket) + if err != nil { + return err + } + key := []byte(tocDigest.String()) + if previousBytes := b.Get(key); previousBytes != nil { + previous, err := digest.Parse(string(previousBytes)) + if err != nil { + return err + } + if previous != uncompressed { + logrus.Warnf("Uncompressed digest for blob with TOC %q previously recorded as %q, now %q", tocDigest, previous, uncompressed) + } + } + if err := b.Put(key, []byte(uncompressed.String())); err != nil { + return err + } + return nil + }) // FIXME? Log error (but throttle the log volume on repeated accesses)? +} + // RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified // compressor, or is blobinfocache.Uncompressed. // WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 66d6d0c4a6..9d932f51c8 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -43,6 +43,8 @@ func GenericCache(t *testing.T, newTestCache func(t *testing.T) blobinfocache.Bl }{ {"UncompressedDigest", testGenericUncompressedDigest}, {"RecordDigestUncompressedPair", testGenericRecordDigestUncompressedPair}, + {"UncompressedDigestForTOC", testGenericUncompressedDigestForTOC}, + {"RecordTOCUncompressedPair", testGenericRecordTOCUncompressedPair}, {"RecordKnownLocations", testGenericRecordKnownLocations}, {"CandidateLocations", testGenericCandidateLocations}, {"CandidateLocations2", testGenericCandidateLocations2}, @@ -99,6 +101,28 @@ func testGenericRecordDigestUncompressedPair(t *testing.T, cache blobinfocache.B } } +func testGenericUncompressedDigestForTOC(t *testing.T, cache blobinfocache.BlobInfoCache2) { + // Nothing is known. + assert.Equal(t, digest.Digest(""), cache.UncompressedDigestForTOC(digestUnknown)) + + cache.RecordTOCUncompressedPair(digestCompressedA, digestUncompressed) + cache.RecordTOCUncompressedPair(digestCompressedB, digestUncompressed) + // Known TOC→uncompressed mapping + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedA)) + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedB)) +} + +func testGenericRecordTOCUncompressedPair(t *testing.T, cache blobinfocache.BlobInfoCache2) { + for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. + // Known TOC→uncompressed mapping + cache.RecordTOCUncompressedPair(digestCompressedA, digestUncompressed) + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedA)) + // Two mappings to the same uncompressed digest + cache.RecordTOCUncompressedPair(digestCompressedB, digestUncompressed) + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedB)) + } +} + func testGenericRecordKnownLocations(t *testing.T, cache blobinfocache.BlobInfoCache2) { transport := mocks.NameImageTransport("==BlobInfocache transport mock") for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 32aaa4d6f7..61e858deb8 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -24,10 +24,11 @@ type locationKey struct { type cache struct { mutex sync.Mutex // The following fields can only be accessed with mutex held. - uncompressedDigests map[digest.Digest]digest.Digest - digestsByUncompressed map[digest.Digest]*set.Set[digest.Digest] // stores a set of digests for each uncompressed digest - knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference - compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression), for each digest + uncompressedDigests map[digest.Digest]digest.Digest + uncompressedDigestsByTOC map[digest.Digest]digest.Digest + digestsByUncompressed map[digest.Digest]*set.Set[digest.Digest] // stores a set of digests for each uncompressed digest + knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference + compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression), for each digest } // New returns a BlobInfoCache implementation which is in-memory only. @@ -44,10 +45,11 @@ func New() types.BlobInfoCache { func new2() *cache { return &cache{ - uncompressedDigests: map[digest.Digest]digest.Digest{}, - digestsByUncompressed: map[digest.Digest]*set.Set[digest.Digest]{}, - knownLocations: map[locationKey]map[types.BICLocationReference]time.Time{}, - compressors: map[digest.Digest]string{}, + uncompressedDigests: map[digest.Digest]digest.Digest{}, + uncompressedDigestsByTOC: map[digest.Digest]digest.Digest{}, + digestsByUncompressed: map[digest.Digest]*set.Set[digest.Digest]{}, + knownLocations: map[locationKey]map[types.BICLocationReference]time.Time{}, + compressors: map[digest.Digest]string{}, } } @@ -104,6 +106,30 @@ func (mem *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre anyDigestSet.Add(anyDigest) } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (mem *cache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + mem.mutex.Lock() + defer mem.mutex.Unlock() + if d, ok := mem.uncompressedDigestsByTOC[tocDigest]; ok { + return d + } + return "" +} + +// RecordTOCUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (mem *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { + mem.mutex.Lock() + defer mem.mutex.Unlock() + if previous, ok := mem.uncompressedDigestsByTOC[tocDigest]; ok && previous != uncompressed { + logrus.Warnf("Uncompressed digest for blob with TOC %q previously recorded as %q, now %q", tocDigest, previous, uncompressed) + } + mem.uncompressedDigestsByTOC[tocDigest] = uncompressed +} + // RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, // and can be reused given the opaque location data. func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { diff --git a/pkg/blobinfocache/none/none.go b/pkg/blobinfocache/none/none.go index 4b7122f928..9a2219e795 100644 --- a/pkg/blobinfocache/none/none.go +++ b/pkg/blobinfocache/none/none.go @@ -34,6 +34,19 @@ func (noCache) UncompressedDigest(anyDigest digest.Digest) digest.Digest { func (noCache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompressed digest.Digest) { } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (noCache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + return "" +} + +// RecordTOCUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (noCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { +} + // RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, // and can be reused given the opaque location data. func (noCache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index a5be85a652..eb6a4a5c2e 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -295,6 +295,14 @@ func ensureDBHasCurrentSchema(db *sql.DB) error { `PRIMARY KEY (transport, scope, digest, location) )`, }, + { + "DigestTOCUncompressedPairs", + `CREATE TABLE IF NOT EXISTS DigestTOCUncompressedPairs(` + + // index implied by PRIMARY KEY + `tocDigest TEXT PRIMARY KEY NOT NULL,` + + `uncompressedDigest TEXT NOT NULL + )`, + }, } _, err := dbTransaction(db, func(tx *sql.Tx) (void, error) { @@ -385,6 +393,57 @@ func (sqc *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (sqc *cache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + res, err := transaction(sqc, func(tx *sql.Tx) (digest.Digest, error) { + uncompressedString, found, err := querySingleValue[string](tx, "SELECT uncompressedDigest FROM DigestTOCUncompressedPairs WHERE tocDigest = ?", tocDigest.String()) + if err != nil { + return "", err + } + if found { + d, err := digest.Parse(uncompressedString) + if err != nil { + return "", err + } + return d, nil + + } + return "", nil + }) + if err != nil { + return "" // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + return res +} + +// RecordTOCUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (sqc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { + _, _ = transaction(sqc, func(tx *sql.Tx) (void, error) { + previousString, gotPrevious, err := querySingleValue[string](tx, "SELECT uncompressedDigest FROM DigestTOCUncompressedPairs WHERE tocDigest = ?", tocDigest.String()) + if err != nil { + return void{}, fmt.Errorf("looking for uncompressed digest for blob with TOC %q", tocDigest) + } + if gotPrevious { + previous, err := digest.Parse(previousString) + if err != nil { + return void{}, err + } + if previous != uncompressed { + logrus.Warnf("Uncompressed digest for blob with TOC %q previously recorded as %q, now %q", tocDigest, previous, uncompressed) + } + } + if _, err := tx.Exec("INSERT OR REPLACE INTO DigestTOCUncompressedPairs(tocDigest, uncompressedDigest) VALUES (?, ?)", + tocDigest.String(), uncompressed.String()); err != nil { + return void{}, fmt.Errorf("recording uncompressed digest %q for blob with TOC %q: %w", uncompressed, tocDigest, err) + } + return void{}, nil + }) // FIXME? Log error (but throttle the log volume on repeated accesses)? +} + // RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, // and can be reused given the opaque location data. func (sqc *cache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, location types.BICLocationReference) { From 0ad8604de10df06572e37827b38979d4949c7e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 29 Feb 2024 15:35:58 +0100 Subject: [PATCH 08/37] Set up storage destination to support TOC-based layers identified in storage by DiffID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we can, prefer identifying layers by DiffID, because multiple TOCs can map to the same DiffID; and because it maximizes reuse with non-TOC layers. For now, the new situation is unreachable. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 118 ++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 46 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 204474a953..b332382a8f 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -91,18 +91,29 @@ type storageImageDestinationLockProtected struct { // // When creating a layer, the c/storage layer metadata and image IDs must _only_ be based on trusted values // we have computed ourselves. (Layer reuse can then look up against such trusted values, but it might not - // recompute those values for incomding layers — the point of the reuse is that we don’t need to consume the incoming layer.) - - // Layer identification: For a layer, at least one of indexToTOCDigest and blobDiffIDs must be available before commitLayer is called. - // The presence of an indexToTOCDigest is what decides how the layer is identified, i.e. which fields must be trusted. + // recompute those values for incoming layers — the point of the reuse is that we don’t need to consume the incoming layer.) + // + // Layer identification: For a layer, at least one of (indexToDiffID, indexToTOCDigest, blobDiffIDs) must be available + // before commitLayer is called. + // The layer is identified by the first of the three fields which exists, in that order (and the value must be trusted). // + // WARNING: All values in indexToDiffID, indexToTOCDigest, and blobDiffIDs are _individually_ trusted, but blobDiffIDs is more subtle. + // The values in indexTo* are all consistent, because the code writing them processed them all at once, and consistently. + // But it is possible for a layer’s indexToDiffID an indexToTOCDigest to be based on a TOC, without setting blobDiffIDs + // for the compressed digest of that index, and for blobDiffIDs[compressedDigest] to be set _separately_ while processing some + // other layer entry. In particular it is possible for indexToDiffID[index] and blobDiffIDs[compressedDigestAtIndex]] to refer + // to mismatching contents. // Users of these fields should use trustedLayerIdentityDataLocked, which centralizes the validity logic, // instead of interpreting these fields, especially blobDiffIDs, directly. - blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs - indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest, IFF the layer was created/found/reused by TOC digest + // + // Ideally we wouldn’t have blobDiffIDs, and we would just keep records by index, but the public API does not require the caller + // to provide layer indices; and configs don’t have layer indices. blobDiffIDs needs to exist for those cases. + indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID + indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest + blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs. CAREFUL: See the WARNING above. // Layer data: Before commitLayer is called, either at least one of (diffOutputs, indexToAdditionalLayer, filenames) - // should be available; or indexToTOCDigest/blobDiffIDs should be enough to locate an existing c/storage layer. + // should be available; or indexToDiffID/indexToTOCDigest/blobDiffIDs should be enough to locate an existing c/storage layer. // They are looked up in the order they are mentioned above. diffOutputs map[int]*graphdriver.DriverWithDifferOutput // Mapping from layer index to a partially-pulled layer intermediate data indexToAdditionalLayer map[int]storage.AdditionalLayer // Mapping from layer index to their corresponding additional layer @@ -152,9 +163,12 @@ func newImageDestination(sys *types.SystemContext, imageRef storageReference) (* }, indexToStorageID: make(map[int]string), lockProtected: storageImageDestinationLockProtected{ - indexToAddedLayerInfo: make(map[int]addedLayerInfo), - blobDiffIDs: make(map[digest.Digest]digest.Digest), - indexToTOCDigest: make(map[int]digest.Digest), + indexToAddedLayerInfo: make(map[int]addedLayerInfo), + + indexToDiffID: make(map[int]digest.Digest), + indexToTOCDigest: make(map[int]digest.Digest), + blobDiffIDs: make(map[digest.Digest]digest.Digest), + diffOutputs: make(map[int]*graphdriver.DriverWithDifferOutput), indexToAdditionalLayer: make(map[int]storage.AdditionalLayer), filenames: make(map[digest.Digest]string), @@ -548,25 +562,40 @@ func (s *storageImageDestination) trustedLayerIdentityDataLocked(layerIndex int, // // Such a within-image layer reuse is expected to be pretty rare; instead, ignore the unexpected file match // and proceed to the originally-planned TOC match. + + res := trustedLayerIdentityData{} + diffID, layerIdentifiedByDiffID := s.lockProtected.indexToDiffID[layerIndex] + if layerIdentifiedByDiffID { + res.layerIdentifiedByTOC = false + res.diffID = diffID + } if tocDigest, ok := s.lockProtected.indexToTOCDigest[layerIndex]; ok { - return trustedLayerIdentityData{ - layerIdentifiedByTOC: true, - diffID: "", - tocDigest: tocDigest, - // Don’t set blobDigest even if we can find a blobDiffIDs[blobDigest] entry: layerIdentifiedByTOC == true, - // tocDigest is the layer identifier, and an attacker might have used a manifest with non-matching tocDigest and blobDigest. - blobDigest: "", - }, true - } - if diffID, ok := s.lockProtected.blobDiffIDs[blobDigest]; ok { - return trustedLayerIdentityData{ - layerIdentifiedByTOC: false, - diffID: diffID, - tocDigest: "", - blobDigest: blobDigest, - }, true - } - return trustedLayerIdentityData{}, false + res.tocDigest = tocDigest + if !layerIdentifiedByDiffID { + res.layerIdentifiedByTOC = true + } + } + if otherDiffID, ok := s.lockProtected.blobDiffIDs[blobDigest]; ok { + if !layerIdentifiedByDiffID && !res.layerIdentifiedByTOC { + // This is the only data we have, so it is clearly self-consistent. + res.layerIdentifiedByTOC = false + res.diffID = otherDiffID + res.blobDigest = blobDigest + layerIdentifiedByDiffID = true + } else { + // We have set up the layer identity without referring to blobDigest: + // an attacker might have used a manifest with non-matching tocDigest and blobDigest. + // But, if we know a trusted diffID value from other sources, and it matches the one for blobDigest, + // we know blobDigest is fine as well. + if res.diffID != "" && otherDiffID == res.diffID { + res.blobDigest = blobDigest + } + } + } + if !layerIdentifiedByDiffID && !res.layerIdentifiedByTOC { + return trustedLayerIdentityData{}, false // We found nothing at all + } + return res, true } // computeID computes a recommended image ID based on information we have so far. If @@ -912,28 +941,25 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } else { // Try to find the layer with contents matching the data we use. var layer *storage.Layer // = nil - if trusted.tocDigest != "" { - layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(trusted.tocDigest) - if err2 == nil && len(layers) > 0 { + if trusted.diffID != "" { + if layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(trusted.diffID); err2 == nil && len(layers) > 0 { layer = &layers[0] - } else { - return nil, fmt.Errorf("locating layer for TOC digest %q: %w", trusted.tocDigest, err2) } - } else { - // When trusted.tocDigest == "", trusted.layerIdentifiedByTOC == false, and trusted.diffID must have been set - layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(trusted.diffID) - if err2 == nil && len(layers) > 0 { + } + if layer == nil && trusted.tocDigest != "" { + if layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(trusted.tocDigest); err2 == nil && len(layers) > 0 { layer = &layers[0] - } else if trusted.blobDigest != "" { - layers, err2 = s.imageRef.transport.store.LayersByCompressedDigest(trusted.blobDigest) - if err2 == nil && len(layers) > 0 { - layer = &layers[0] - } } - if layer == nil { - return nil, fmt.Errorf("locating layer for blob %q: %w", layerDigest, err2) + } + if layer == nil && trusted.blobDigest != "" { + if layers, err2 := s.imageRef.transport.store.LayersByCompressedDigest(trusted.blobDigest); err2 == nil && len(layers) > 0 { + layer = &layers[0] } } + if layer == nil { + return nil, fmt.Errorf("layer for blob %q/%q/%q not found", trusted.blobDigest, trusted.tocDigest, trusted.diffID) + } + // Read the layer's contents. noCompression := archive.Uncompressed diffOptions := &storage.DiffOptions{ @@ -941,7 +967,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions) if err2 != nil { - return nil, fmt.Errorf("reading layer %q for blob %q: %w", layer.ID, layerDigest, err2) + return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, trusted.blobDigest, trusted.tocDigest, trusted.diffID, err2) } // Copy the layer diff to a file. Diff() takes a lock that it holds // until the ReadCloser that it returns is closed, and PutLayer() wants @@ -1005,7 +1031,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D UncompressedDigest: trusted.diffID, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { - return nil, fmt.Errorf("adding layer with blob %q: %w", layerDigest, err) + return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err) } return layer, nil } From 924d853f069aa1a7af6f124ac1bd645d6144d9ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 29 Feb 2024 15:47:18 +0100 Subject: [PATCH 09/37] Split reusedBlobFromLayerLookup from tryReusingBlobAsPending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add one more instance of this, so share the code. Should not change behavior (it does remove one unreachable code path). Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 59 ++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index b332382a8f..04b04dc79f 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -486,22 +486,9 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err) } - if len(layers) > 0 { - if size != -1 { - s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest - return true, private.ReusedBlob{ - Digest: blobDigest, - Size: size, - }, nil - } - if !options.CanSubstitute { - return false, private.ReusedBlob{}, fmt.Errorf("Internal error: options.CanSubstitute was expected to be true for blob with digest %s", blobDigest) - } - s.lockProtected.blobDiffIDs[uncompressedDigest] = uncompressedDigest - return true, private.ReusedBlob{ - Digest: uncompressedDigest, - Size: layers[0].UncompressedSize, - }, nil + if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found { + s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest + return true, reused, nil } } } @@ -509,26 +496,13 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige if options.TOCDigest != "" && options.LayerIndex != nil { // Check if we have a chunked layer in storage with the same TOC digest. layers, err := s.imageRef.transport.store.LayersByTOCDigest(options.TOCDigest) - if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with TOC digest %q: %w`, options.TOCDigest, err) } - if len(layers) > 0 { - if size != -1 { - s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest - return true, private.ReusedBlob{ - Digest: blobDigest, - Size: size, - MatchedByTOCDigest: true, - }, nil - } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { - s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest - return true, private.ReusedBlob{ - Digest: layers[0].UncompressedDigest, - Size: layers[0].UncompressedSize, - MatchedByTOCDigest: true, - }, nil - } + if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found { + s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest + reused.MatchedByTOCDigest = true + return true, reused, nil } } @@ -536,6 +510,25 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige return false, private.ReusedBlob{}, nil } +// reusedBlobFromLayerLookup returns (true, ReusedBlob) if layers contain a usable match; or (false, ...) if not. +// The caller is still responsible for setting the layer identification fields, to allow the layer to be found again. +func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest, blobSize int64, options *private.TryReusingBlobOptions) (bool, private.ReusedBlob) { + if len(layers) > 0 { + if blobSize != -1 { + return true, private.ReusedBlob{ + Digest: blobDigest, + Size: blobSize, + } + } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { + return true, private.ReusedBlob{ + Digest: layers[0].UncompressedDigest, + Size: layers[0].UncompressedSize, + } + } + } + return false, private.ReusedBlob{} +} + // trustedLayerIdentityData is a _consistent_ set of information known about a single layer. type trustedLayerIdentityData struct { layerIdentifiedByTOC bool // true if we decided the layer should be identified by tocDigest, false if by diffID From d910afa2ec29e6b7cfd62abcdb8d4d6c6d836a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 29 Feb 2024 15:49:25 +0100 Subject: [PATCH 10/37] Use BlobInfoCache to share more layers if (TOC->Uncompressed) mapping is known MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Multiple TOC values might correspond to a single DiffID (e.g. if different compression levels are used); try to share them all, identified by DiffID (so that we also reuse with non-TOC pulls). - LayersByTOCDigest only uses a single TOC digest per layer; BlobInfoCache allows multiple matches, matches layers which have been since deleted, and potentially matches TOC digests which we have created by pushing but haven't pulled yet. - On reuse, we can now use DiffID-based layer identities even if the reuse was TOC~driven. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 04b04dc79f..444073ef3d 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -344,20 +344,30 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces s.lock.Lock() if out.UncompressedDigest != "" { + s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest + if out.TOCDigest != "" { + options.Cache.RecordTOCUncompressedPair(out.TOCDigest, out.UncompressedDigest) + } + // Don’t set indexToTOCDigest on this path: + // - Using UncompressedDigest allows image reuse with non-partially-pulled layers, so we want to set indexToDiffID. + // - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch. + // That TOC is quite unlikely to match any other TOC value. + // The computation of UncompressedDigest means the whole layer has been consumed; while doing that, chunked.GetDiffer is // responsible for ensuring blobDigest has been validated. if out.CompressedDigest != blobDigest { return private.UploadedBlob{}, fmt.Errorf("internal error: ApplyDiffWithDiffer returned CompressedDigest %q not matching expected %q", out.CompressedDigest, blobDigest) } - s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest + // So, record also information about blobDigest, that might benefit reuse. // We trust ApplyDiffWithDiffer to validate or create both values correctly. + s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } else { - // Don’t identify layers by TOC if UncompressedDigest is available. - // - Using UncompressedDigest allows image reuse with non-partially-pulled layers - // - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch. - // That TOC is quite unlikely to match with any other TOC value. + // Use diffID for layer identity if it is known. + if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" { + s.lockProtected.indexToDiffID[options.LayerIndex] = uncompressedDigest + } s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest } s.lockProtected.diffOutputs[options.LayerIndex] = out @@ -494,12 +504,29 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige } if options.TOCDigest != "" && options.LayerIndex != nil { + // Check if we know which which UncompressedDigest the TOC digest resolves to, and we have a match for that. + // Prefer this over LayersByTOCDigest because we can identify the layer using UncompressedDigest, maximizing reuse. + uncompressedDigest := options.Cache.UncompressedDigestForTOC(options.TOCDigest) + if uncompressedDigest != "" { + layers, err = s.imageRef.transport.store.LayersByUncompressedDigest(uncompressedDigest) + if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { + return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err) + } + if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found { + s.lockProtected.indexToDiffID[*options.LayerIndex] = uncompressedDigest + reused.MatchedByTOCDigest = true + return true, reused, nil + } + } // Check if we have a chunked layer in storage with the same TOC digest. layers, err := s.imageRef.transport.store.LayersByTOCDigest(options.TOCDigest) if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with TOC digest %q: %w`, options.TOCDigest, err) } if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found { + if uncompressedDigest != "" { + s.lockProtected.indexToDiffID[*options.LayerIndex] = uncompressedDigest + } s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest reused.MatchedByTOCDigest = true return true, reused, nil From acdd0641d32567fcbf28a9d749bc9d40d316812d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 9 Apr 2024 23:49:23 +0200 Subject: [PATCH 11/37] Record the (TOC digest, uncompressed digest) data when we compress layers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- copy/compression.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/copy/compression.go b/copy/compression.go index 318450a3aa..0164dd91da 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -11,6 +11,7 @@ import ( "github.com/containers/image/v5/pkg/compression" compressiontypes "github.com/containers/image/v5/pkg/compression/types" "github.com/containers/image/v5/types" + chunkedToc "github.com/containers/storage/pkg/chunked/toc" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sirupsen/logrus" ) @@ -308,6 +309,15 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // No useful information case bpcOpCompressUncompressed: c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) + if d.uploadedAnnotations != nil { + tocDigest, err := chunkedToc.GetTOCDigest(d.uploadedAnnotations) + if err != nil { + return fmt.Errorf("parsing just-created compression annotations: %w", err) + } + if tocDigest != nil { + c.blobInfoCache.RecordTOCUncompressedPair(*tocDigest, srcInfo.Digest) + } + } case bpcOpDecompressCompressed: c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) case bpcOpRecompressCompressed, bpcOpPreserveCompressed: From 403d0a24ec11d4ee3f91bbcea202c817f87af518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 30 Apr 2024 19:29:34 +0200 Subject: [PATCH 12/37] Use the uncompressed digest we got from a BlobInfoCache for chunked layers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rely on it instead of triggering the "untrusted DiffID" logic - Also propagate it to storage Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 444073ef3d..0524e74ca3 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -889,6 +889,16 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() if ok { + // If we know a trusted DiffID value (e.g. from a BlobInfoCache), set it in diffOutput. + // That way it will be persisted in storage even if the cache is deleted; also + // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably + // the costly commit delay until a manifest is available). + s.lock.Lock() + if d, ok := s.lockProtected.indexToDiffID[index]; ok { + diffOutput.UncompressedDigest = d + } + s.lock.Unlock() + var untrustedUncompressedDigest digest.Digest if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) From f49cb6278b61f1defa7a2e9fc5918555d347fd8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 23 Jul 2024 18:57:36 +0200 Subject: [PATCH 13/37] HACK: Don't compress with zstd:chunked when encrypting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- copy/single.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/copy/single.go b/copy/single.go index 17cc8c833e..9f50f1dd95 100644 --- a/copy/single.go +++ b/copy/single.go @@ -149,6 +149,28 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar ic.compressionFormat = c.options.DestinationCtx.CompressionFormat ic.compressionLevel = c.options.DestinationCtx.CompressionLevel } + // HACK: Don’t combine zstd:chunked and encryption. + // zstd:chunked can only usefully be consumed using range requests of parts of the layer, which would require the encryption + // to support decrypting arbitrary subsets of the stream. That’s plausible but not supported using the encryption API we have. + // Also, the chunked metadata is exposed in annotations unencrypted, which reveals the TOC digest = layer identity without + // encryption. (That can be determined from the unencrypted config anyway, but, still...) + // + // Ideally this should query a well-defined property of the compression algorithm (and $somehow determine the right fallback) instead of + // hard-coding zstd:chunked / zstd. + if ic.c.options.OciEncryptLayers != nil { + format := ic.compressionFormat + if format == nil { + format = defaultCompressionFormat + } + if format.Name() == compression.ZstdChunked.Name() { + if ic.requireCompressionFormatMatch { + return copySingleImageResult{}, errors.New("explicitly requested to combine zstd:chunked with encryption, which is not beneficial; use plain zstd instead") + } + logrus.Warnf("Compression using zstd:chunked is not beneficial for encrypted layers, using plain zstd instead") + ic.compressionFormat = &compression.Zstd + } + } + // Decide whether we can substitute blobs with semantic equivalents: // - Don’t do that if we can’t modify the manifest at all // - Ensure _this_ copy sees exactly the intended data when either processing a signed image or signing it. From fa715781721d6e166864a6e5e2548c9064278742 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 12:32:28 -0700 Subject: [PATCH 14/37] ostree: rename a var This is to prevent codespell from complaining about pathc -> patch. Signed-off-by: Kir Kolyshkin --- ostree/ostree_src.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ostree/ostree_src.go b/ostree/ostree_src.go index 85a89f253f..0b597ce266 100644 --- a/ostree/ostree_src.go +++ b/ostree/ostree_src.go @@ -151,9 +151,9 @@ func openRepo(path string) (*C.struct_OstreeRepo, error) { var cerr *C.GError cpath := C.CString(path) defer C.free(unsafe.Pointer(cpath)) - pathc := C.g_file_new_for_path(cpath) - defer C.g_object_unref(C.gpointer(pathc)) - repo := C.ostree_repo_new(pathc) + file := C.g_file_new_for_path(cpath) + defer C.g_object_unref(C.gpointer(file)) + repo := C.ostree_repo_new(file) r := glib.GoBool(glib.GBoolean(C.ostree_repo_open(repo, nil, &cerr))) if !r { C.g_object_unref(C.gpointer(repo)) From 0b40d7dfa5bf26812a1d969d2a9c8c10d2505c08 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 12:39:28 -0700 Subject: [PATCH 15/37] pkg/shortnames: compact code Use ok instead of isT and isV, and limit the variables scope. Signed-off-by: Kir Kolyshkin --- pkg/shortnames/shortnames.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/shortnames/shortnames.go b/pkg/shortnames/shortnames.go index 61490932e5..9317600e58 100644 --- a/pkg/shortnames/shortnames.go +++ b/pkg/shortnames/shortnames.go @@ -61,14 +61,12 @@ func parseUnnormalizedShortName(input string) (bool, reference.Named, error) { // the tag or digest and stores it in the return values so that both can be // re-added to a possible resolved alias' or USRs at a later point. func splitUserInput(named reference.Named) (isTagged bool, isDigested bool, normalized reference.Named, tag string, digest digest.Digest) { - tagged, isT := named.(reference.NamedTagged) - if isT { + if tagged, ok := named.(reference.NamedTagged); ok { isTagged = true tag = tagged.Tag() } - digested, isD := named.(reference.Digested) - if isD { + if digested, ok := named.(reference.Digested); ok { isDigested = true digest = digested.Digest() } From e5aa22570fc4b7140a870df361b77045991e3c89 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 12:42:20 -0700 Subject: [PATCH 16/37] signature: fixup to silence codespell s/doesnt/does-not/ Signed-off-by: Kir Kolyshkin --- signature/policy_eval_signedby_test.go | 2 +- signature/policy_eval_sigstore_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/signature/policy_eval_signedby_test.go b/signature/policy_eval_signedby_test.go index 0e9bb83f73..bca2e722cf 100644 --- a/signature/policy_eval_signedby_test.go +++ b/signature/policy_eval_signedby_test.go @@ -174,7 +174,7 @@ func TestPRSignedByIsSignatureAuthorAccepted(t *testing.T) { assert.IsType(t, InvalidSignatureError{}, err) // A valid signature with a rejected identity. - nonmatchingPRM, err := NewPRMExactReference("this/doesnt:match") + nonmatchingPRM, err := NewPRMExactReference("this/does-not:match") require.NoError(t, err) pr, err = NewPRSignedByKeyPath(ktGPG, "fixtures/public-key.gpg", nonmatchingPRM) require.NoError(t, err) diff --git a/signature/policy_eval_sigstore_test.go b/signature/policy_eval_sigstore_test.go index b460071237..8f606c8d0e 100644 --- a/signature/policy_eval_sigstore_test.go +++ b/signature/policy_eval_sigstore_test.go @@ -490,7 +490,7 @@ func TestPRrSigstoreSignedIsSignatureAccepted(t *testing.T) { assertRejected(sar, err) // A valid signature with a rejected identity. - nonmatchingPRM, err := NewPRMExactReference("this/doesnt:match") + nonmatchingPRM, err := NewPRMExactReference("this/does-not:match") require.NoError(t, err) pr, err = newPRSigstoreSigned( PRSigstoreSignedWithKeyPath("fixtures/cosign.pub"), @@ -644,7 +644,7 @@ func TestPRSigstoreSignedIsRunningImageAllowed(t *testing.T) { assertRunningAllowed(t, allowed, err) // 2 invalid signajtures: use dir-img-cosign-valid-2, but a non-matching Docker reference - image = dirImageMock(t, "fixtures/dir-img-cosign-valid-2", "this/doesnt:match") + image = dirImageMock(t, "fixtures/dir-img-cosign-valid-2", "this/does-not:match") pr, err = NewPRSigstoreSigned( PRSigstoreSignedWithKeyPath("fixtures/cosign.pub"), PRSigstoreSignedWithSignedIdentity(prm), From f6370ddd230d5e395f3ab68773d2d62095244718 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 12:49:04 -0700 Subject: [PATCH 17/37] pkg/sysregistriesv2: silence codespell This just changes fo to f, without disrupting the test. Signed-off-by: Kir Kolyshkin --- pkg/sysregistriesv2/system_registries_v2_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/sysregistriesv2/system_registries_v2_test.go b/pkg/sysregistriesv2/system_registries_v2_test.go index 04e020641b..24d52d0879 100644 --- a/pkg/sysregistriesv2/system_registries_v2_test.go +++ b/pkg/sysregistriesv2/system_registries_v2_test.go @@ -628,10 +628,10 @@ func TestRewriteReferenceFailedDuringParseNamed(t *testing.T) { {"example.com/foo/image:latest", "example.com//foo", "example.com/path"}, {"example.com/image:latest", "image", "anotherimage"}, {"example.com/foo/image:latest", "example.com/foo/", "example.com"}, - {"example.com/foo/image", "example.com/fo", "example.com/foo"}, - {"example.com/foo:latest", "example.com/fo", "example.com/foo"}, + {"example.com/foo/image", "example.com/f", "example.com/foo"}, + {"example.com/foo:latest", "example.com/f", "example.com/foo"}, {"example.com/foo@sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - "example.com/fo", "example.com/foo"}, + "example.com/f", "example.com/foo"}, {"docker.io/library/image", "example.com", "example.com"}, {"docker.io/library/image", "*.com", "example.com"}, {"foo.docker.io/library/image", "*.example.com", "example.com/image"}, From adfc928b13971e89a0b9607b67b7020126f721a2 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 13:03:20 -0700 Subject: [PATCH 18/37] copy: fix a comment This fixes a comment in the test code ("expection", "out", and punctuation) to the best of my knowledge. Fixes: c84a3fad ("copy/multiple_test: multiple copy requests of same compression") Signed-off-by: Kir Kolyshkin --- copy/multiple_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy/multiple_test.go b/copy/multiple_test.go index f753c247ea..e89d70c814 100644 --- a/copy/multiple_test.go +++ b/copy/multiple_test.go @@ -93,8 +93,8 @@ func TestPrepareCopyInstancesforInstanceCopyClone(t *testing.T) { actualResponse := convertInstanceCopyToSimplerInstanceCopy(instancesToCopy) assert.Equal(t, expectedResponse, actualResponse) - // Test option with multiple copy request for same compression format - // above expection should stay same, if out ensureCompressionVariantsExist requests zstd twice + // Test option with multiple copy request for same compression format. + // The above expectation should stay the same, if ensureCompressionVariantsExist requests zstd twice. ensureCompressionVariantsExist = []OptionCompressionVariant{{Algorithm: compression.Zstd}, {Algorithm: compression.Zstd}} instancesToCopy, err = prepareInstanceCopies(list, sourceInstances, &Options{EnsureCompressionVariantsExist: ensureCompressionVariantsExist}) require.NoError(t, err) From 271ebf7b79084a41af4911336a1b944e9bbcaeb0 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 13:37:05 -0700 Subject: [PATCH 19/37] copy: fix a typo in log message Found by codespell 2.3.0: Error: ./copy/single.go:217: resuing ==> reusing, resuming, rescuing Signed-off-by: Kir Kolyshkin --- copy/single.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy/single.go b/copy/single.go index 9f50f1dd95..714dc81368 100644 --- a/copy/single.go +++ b/copy/single.go @@ -214,7 +214,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar shouldUpdateSigs := len(sigs) > 0 || len(c.signers) != 0 // TODO: Consider allowing signatures updates only and skipping the image's layers/manifest copy if possible noPendingManifestUpdates := ic.noPendingManifestUpdates() - logrus.Debugf("Checking if we can skip copying: has signatures=%t, OCI encryption=%t, no manifest updates=%t, compression match required for resuing blobs=%t", shouldUpdateSigs, destRequiresOciEncryption, noPendingManifestUpdates, opts.requireCompressionFormatMatch) + logrus.Debugf("Checking if we can skip copying: has signatures=%t, OCI encryption=%t, no manifest updates=%t, compression match required for reusing blobs=%t", shouldUpdateSigs, destRequiresOciEncryption, noPendingManifestUpdates, opts.requireCompressionFormatMatch) if !shouldUpdateSigs && !destRequiresOciEncryption && noPendingManifestUpdates && !ic.requireCompressionFormatMatch { matchedResult, err := ic.compareImageDestinationManifestEqual(ctx, targetInstance) if err != nil { From e2e81f25c2f6e91e4905f7b8df744192bec720c3 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 2 Aug 2024 13:05:41 -0700 Subject: [PATCH 20/37] Simplify codespellrc, fix remaining typos Current .codespellrc contains too many exclusions, and as a result it misses some legitimate typos. A few previous commits silenced or fixed some of the codespell warnings in preparation for this one, which minimizes exclusions, and fixes the actual remaining typos. The fixes here are the result of codespell -w. ./copy/sign_test.go:119: overidden ==> overridden ./copy/encryption.go:51: pratice ==> practice ./copy/multiple_test.go:80: crated ==> created ./copy/progress_bars.go:124: progres ==> progress Signed-off-by: Kir Kolyshkin --- .codespellrc | 8 ++++---- copy/encryption.go | 2 +- copy/multiple_test.go | 2 +- copy/progress_bars.go | 2 +- copy/sign_test.go | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.codespellrc b/.codespellrc index 2d68648595..0b6bdcb31f 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,6 +1,6 @@ # See https://github.com/codespell-project/codespell#using-a-config-file [codespell] -skip = .git,*.pdf,*.svg,.codespellrc,go.sum,system_registries_v2_test.go,Makefile,build,buildah,buildah.spec,imgtype,copy,AUTHORS,bin,vendor,CHANGELOG.md,changelog.txt,seccomp.json,.cirrus.yml,*.xz,*.gz,*.tar,*.tgz,*ico,*.png,*.1,*.5,*.orig,*.rej,*.gpg -check-hidden = true -ignore-regex = \b(isT|BU|this/doesnt:match)\b -ignore-words-list = te,pathc +skip = ./vendor,./.git,./go.sum,*.gpg + +# NOTE words added to the list below need to be lowercased. +ignore-words-list = te,bu diff --git a/copy/encryption.go b/copy/encryption.go index 4259d355b1..b5a88a914b 100644 --- a/copy/encryption.go +++ b/copy/encryption.go @@ -48,7 +48,7 @@ func (ic *imageCopier) blobPipelineDecryptionStep(stream *sourceStream, srcInfo Annotations: stream.info.Annotations, } // DecryptLayer supposedly returns a digest of the decrypted stream. - // In pratice, that value is never set in the current implementation. + // In practice, that value is never set in the current implementation. // And we shouldn’t use it anyway, because it is not trusted: encryption can be made to a public key, // i.e. it doesn’t authenticate the origin of the metadata in any way. reader, _, err := ocicrypt.DecryptLayer(ic.c.options.OciDecryptConfig, stream.reader, desc, false) diff --git a/copy/multiple_test.go b/copy/multiple_test.go index e89d70c814..71cd62936b 100644 --- a/copy/multiple_test.go +++ b/copy/multiple_test.go @@ -77,7 +77,7 @@ func TestPrepareCopyInstancesforInstanceCopyClone(t *testing.T) { // * Still copy gzip variants if they exist in the original // * Not create new Zstd variants if they exist in the original. - // We crated a list of three instances `sourceInstances` and since in oci1.index.zstd-selection.json + // We created a list of three instances `sourceInstances` and since in oci1.index.zstd-selection.json // amd64 already has a zstd instance i.e sourceInstance[1] so it should not create replication for // `sourceInstance[0]` and `sourceInstance[1]` but should do it for `sourceInstance[2]` for `arm64` // and still copy `sourceInstance[2]`. diff --git a/copy/progress_bars.go b/copy/progress_bars.go index 053650b8d3..08128ce8d8 100644 --- a/copy/progress_bars.go +++ b/copy/progress_bars.go @@ -121,7 +121,7 @@ func (c *copier) printCopyInfo(kind string, info types.BlobInfo) { } } -// mark100PercentComplete marks the progres bars as 100% complete; +// mark100PercentComplete marks the progress bars as 100% complete; // it may do so by possibly advancing the current state if it is below the known total. func (bar *progressBar) mark100PercentComplete() { if bar.originalSize > 0 { diff --git a/copy/sign_test.go b/copy/sign_test.go index 49f3fce5f4..1f4a9349fa 100644 --- a/copy/sign_test.go +++ b/copy/sign_test.go @@ -116,7 +116,7 @@ func TestCreateSignatures(t *testing.T) { successfullySignedIdentity: "docker.io/library/busybox:latest", }, { - name: "docker:// with overidden identity", + name: "docker:// with overridden identity", dest: dockerDest, identity: "myregistry.io/myrepo:mytag", successfullySignedIdentity: "myregistry.io/myrepo:mytag", From a6202cb6fb0d4cc2e44be922565e72c5ac12be93 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 12:16:51 +0000 Subject: [PATCH 21/37] fix(deps): update module golang.org/x/oauth2 to v0.22.0 Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index bd3ae509e7..dc5145416c 100644 --- a/go.mod +++ b/go.mod @@ -40,7 +40,7 @@ require ( go.etcd.io/bbolt v1.3.10 golang.org/x/crypto v0.25.0 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 - golang.org/x/oauth2 v0.21.0 + golang.org/x/oauth2 v0.22.0 golang.org/x/sync v0.7.0 golang.org/x/term v0.22.0 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index c3cc707c58..29d5943bb0 100644 --- a/go.sum +++ b/go.sum @@ -421,8 +421,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= From b4ca7d165c1233cff140a404ff7344a4ccace0b2 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:28:03 +0000 Subject: [PATCH 22/37] fix(deps): update module golang.org/x/sync to v0.8.0 Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index dc5145416c..9d3dc2ec8f 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( golang.org/x/crypto v0.25.0 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 golang.org/x/oauth2 v0.22.0 - golang.org/x/sync v0.7.0 + golang.org/x/sync v0.8.0 golang.org/x/term v0.22.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 29d5943bb0..737c8d10db 100644 --- a/go.sum +++ b/go.sum @@ -431,8 +431,8 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= From 5ffda1ddddb1a94d6bd5a421ff510db479f11960 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 25 Jul 2024 11:40:32 -0700 Subject: [PATCH 23/37] manifest: LayerInfos: preallocate the slice Signed-off-by: Kir Kolyshkin --- manifest/docker_schema2.go | 2 +- manifest/oci.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/manifest/docker_schema2.go b/manifest/docker_schema2.go index 818166834d..7e53f4f54e 100644 --- a/manifest/docker_schema2.go +++ b/manifest/docker_schema2.go @@ -202,7 +202,7 @@ func (m *Schema2) ConfigInfo() types.BlobInfo { // The Digest field is guaranteed to be provided; Size may be -1. // WARNING: The list may contain duplicates, and they are semantically relevant. func (m *Schema2) LayerInfos() []LayerInfo { - blobs := []LayerInfo{} + blobs := make([]LayerInfo, 0, len(m.LayersDescriptors)) for _, layer := range m.LayersDescriptors { blobs = append(blobs, LayerInfo{ BlobInfo: BlobInfoFromSchema2Descriptor(layer), diff --git a/manifest/oci.go b/manifest/oci.go index 497cf476e3..f714574ee9 100644 --- a/manifest/oci.go +++ b/manifest/oci.go @@ -95,7 +95,7 @@ func (m *OCI1) ConfigInfo() types.BlobInfo { // The Digest field is guaranteed to be provided; Size may be -1. // WARNING: The list may contain duplicates, and they are semantically relevant. func (m *OCI1) LayerInfos() []LayerInfo { - blobs := []LayerInfo{} + blobs := make([]LayerInfo, 0, len(m.Layers)) for _, layer := range m.Layers { blobs = append(blobs, LayerInfo{ BlobInfo: BlobInfoFromOCI1Descriptor(layer), From 62f10249f8e38c419a36a9d5de141ad7c42c8a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 15 Mar 2024 22:16:46 +0100 Subject: [PATCH 24/37] Fix data returned when returning uncompressed data on a c/storage layer match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rules expect us to set manifest editing updates. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 0524e74ca3..842a3ab068 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -548,8 +548,10 @@ func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest, } } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { return true, private.ReusedBlob{ - Digest: layers[0].UncompressedDigest, - Size: layers[0].UncompressedSize, + Digest: layers[0].UncompressedDigest, + Size: layers[0].UncompressedSize, + CompressionOperation: types.Decompress, + CompressionAlgorithm: nil, } } } From efdc63990c1f9b4ed48ca095eb3008ffe10d8d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Mon, 25 Mar 2024 20:39:20 +0100 Subject: [PATCH 25/37] Turn CandidateCompression into CandidateTemplateWithCompression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... and add CandidateWithLocation and CandidateWithUnknownLocation , so that the BIC implementations only need to deal with one value instead of carrying around three; we will want to add one more, and encapsulating them all into a single template will make it transparent to the cache implementations. Should not change behavior. Signed-off-by: Miloslav Trmač --- pkg/blobinfocache/boltdb/boltdb.go | 25 +--- .../internal/prioritize/prioritize.go | 60 ++++++-- .../internal/prioritize/prioritize_test.go | 133 ++++++++++++++++++ pkg/blobinfocache/memory/memory.go | 25 +--- pkg/blobinfocache/sqlite/sqlite.go | 25 +--- 5 files changed, 195 insertions(+), 73 deletions(-) diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index b13246b212..e9855fa3c1 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -367,8 +367,8 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = string(compressorNameValue) } } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + if template == nil { return candidates } @@ -382,28 +382,11 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if err := t.UnmarshalBinary(v); err != nil { return err } - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: types.BICLocationReference{Opaque: string(k)}, - }, - LastSeen: t, - }) + candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: string(k)}, t)) return nil }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } else if v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 9cd9c8f7d3..18478b5e82 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -25,16 +25,24 @@ const replacementAttempts = 5 // This is a heuristic/guess, and could well use a different value. const replacementUnknownLocationAttempts = 2 -// CandidateCompression returns (true, compressionOp, compressionAlgo) if a blob -// with compressionName (which can be Uncompressed or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. +// CandidateTemplate is a subset of BICReplacementCandidate2 with data related to a specific digest, +// which can be later combined with information about a location. +type CandidateTemplate struct { + digest digest.Digest + compressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed + compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed +} + +// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with compressionName (which can be Uncompressed +// or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. // // v2Options can be set to nil if the call is CandidateLocations (i.e. compression is not required to be known); // if not nil, the call is assumed to be CandidateLocations2. -// -// The (compressionOp, compressionAlgo) values are suitable for BICReplacementCandidate2 -func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) (bool, types.LayerCompression, *compression.Algorithm) { +func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) *CandidateTemplate { if v2Options == nil { - return true, types.PreserveOriginal, nil // Anything goes. The (compressionOp, compressionAlgo) values are not used. + return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm values are not used. + digest: digest, + } } var op types.LayerCompression @@ -45,14 +53,14 @@ func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, d algo = nil case blobinfocache.UnknownCompression: logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) - return false, types.PreserveOriginal, nil // Not allowed with CandidateLocations2 + return nil // Not allowed with CandidateLocations2 default: op = types.Compress algo_, err := compression.AlgorithmByName(compressorName) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", digest.String(), compressorName, err) - return false, types.PreserveOriginal, nil // The BICReplacementCandidate2.CompressionAlgorithm field is required + return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } algo = &algo_ } @@ -66,10 +74,14 @@ func CandidateCompression(v2Options *blobinfocache.CandidateLocations2Options, d } logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", digest.String(), compressorName, requiredCompresssion, v2Options.PossibleManifestFormats) - return false, types.PreserveOriginal, nil + return nil } - return true, op, algo + return &CandidateTemplate{ + digest: digest, + compressionOperation: op, + compressionAlgorithm: algo, + } } // CandidateWithTime is the input to types.BICReplacementCandidate prioritization. @@ -78,6 +90,34 @@ type CandidateWithTime struct { LastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) } +// CandidateWithLocation returns a complete CandidateWithTime combining (template from CandidateTemplateWithCompression, location, lastSeen) +func (template CandidateTemplate) CandidateWithLocation(location types.BICLocationReference, lastSeen time.Time) CandidateWithTime { + return CandidateWithTime{ + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + UnknownLocation: false, + Location: location, + }, + LastSeen: lastSeen, + } +} + +// CandidateWithUnknownLocation returns a complete CandidateWithTime for a template from CandidateTemplateWithCompression and an unknown location. +func (template CandidateTemplate) CandidateWithUnknownLocation() CandidateWithTime { + return CandidateWithTime{ + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + UnknownLocation: true, + Location: types.BICLocationReference{Opaque: ""}, + }, + LastSeen: time.Time{}, + } +} + // candidateSortState is a closure for a comparison used by slices.SortFunc on candidates to prioritize, // along with the specially-treated digest values relevant to the ordering. type candidateSortState struct { diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index ab47fe0625..6f48e78c06 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -8,9 +8,11 @@ import ( "github.com/containers/image/v5/internal/blobinfocache" "github.com/containers/image/v5/pkg/compression" + compressiontypes "github.com/containers/image/v5/pkg/compression/types" "github.com/containers/image/v5/types" "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) const ( @@ -53,6 +55,137 @@ var ( } ) +func TestCandidateTemplateWithCompression(t *testing.T) { + for _, c := range []struct { + name string + requiredCompression *compressiontypes.Algorithm + compressor string + v2Matches bool + // if v2Matches: + v2Op types.LayerCompression + v2Algo string + }{ + { + name: "unknown", + requiredCompression: nil, + compressor: blobinfocache.UnknownCompression, + v2Matches: false, + }, + { + name: "uncompressed", + requiredCompression: nil, + compressor: blobinfocache.Uncompressed, + v2Matches: true, + v2Op: types.Decompress, + v2Algo: "", + }, + { + name: "uncompressed, want gzip", + requiredCompression: &compression.Gzip, + compressor: blobinfocache.Uncompressed, + v2Matches: false, + }, + { + name: "gzip", + requiredCompression: nil, + compressor: compressiontypes.GzipAlgorithmName, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.GzipAlgorithmName, + }, + { + name: "gzip, want zstd", + requiredCompression: &compression.Zstd, + compressor: compressiontypes.GzipAlgorithmName, + v2Matches: false, + }, + { + name: "unknown base", + requiredCompression: nil, + compressor: "this value is unknown", + v2Matches: false, + }, + { + name: "zstd", + requiredCompression: nil, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + }, + { + name: "zstd, want gzip", + requiredCompression: &compression.Gzip, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: false, + }, + { + name: "zstd, want zstd", + requiredCompression: &compression.Zstd, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + }, + { + name: "zstd, want zstd:chunked", + requiredCompression: &compression.ZstdChunked, + compressor: compressiontypes.ZstdAlgorithmName, + v2Matches: false, + }, + } { + res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.compressor) + assert.Equal(t, &CandidateTemplate{ + digest: digestCompressedPrimary, + compressionOperation: types.PreserveOriginal, + compressionAlgorithm: nil, + }, res, c.name) + + // These tests only use RequiredCompression in CandidateLocations2Options for clarity; + // CandidateCompressionMatchesReuseConditions should have its own tests of handling the full set of options. + res = CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{ + RequiredCompression: c.requiredCompression, + }, digestCompressedPrimary, c.compressor) + if !c.v2Matches { + assert.Nil(t, res, c.name) + } else { + require.NotNil(t, res, c.name) + assert.Equal(t, digestCompressedPrimary, res.digest, c.name) + assert.Equal(t, c.v2Op, res.compressionOperation, c.name) + if c.v2Algo == "" { + assert.Nil(t, res.compressionAlgorithm, c.name) + } else { + require.NotNil(t, res.compressionAlgorithm, c.name) + assert.Equal(t, c.v2Algo, res.compressionAlgorithm.Name()) + } + } + } +} + +func TestCandidateWithLocation(t *testing.T) { + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + require.NotNil(t, template) + loc := types.BICLocationReference{Opaque: "opaque"} + time := time.Now() + res := template.CandidateWithLocation(loc, time) + assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) + assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) + assert.Equal(t, false, res.Candidate.UnknownLocation) + assert.Equal(t, loc, res.Candidate.Location) + assert.Equal(t, time, res.LastSeen) +} + +func TestCandidateWithUnknownLocation(t *testing.T) { + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + require.NotNil(t, template) + res := template.CandidateWithUnknownLocation() + assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) + assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) + assert.Equal(t, true, res.Candidate.UnknownLocation) +} + func TestCandidateSortStateLess(t *testing.T) { type p struct { d digest.Digest diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 61e858deb8..ad1de7c841 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -170,34 +170,17 @@ func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if v, ok := mem.compressors[digest]; ok { compressorName = v } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + if template == nil { return candidates } locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present if len(locations) > 0 { for l, t := range locations { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: l, - }, - LastSeen: t, - }) + candidates = append(candidates, template.CandidateWithLocation(l, t)) } } else if v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates } diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index eb6a4a5c2e..4061af9720 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -502,8 +502,8 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = compressor } } - ok, compressionOp, compressionAlgo := prioritize.CandidateCompression(v2Options, digest, compressorName) - if !ok { + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + if template == nil { return candidates, nil } @@ -522,15 +522,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if err := rows.Scan(&location, &time); err != nil { return nil, fmt.Errorf("scanning candidate: %w", err) } - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - Location: types.BICLocationReference{Opaque: location}, - }, - LastSeen: time, - }) + candidates = append(candidates, template.CandidateWithLocation(types.BICLocationReference{Opaque: location}, time)) rowAdded = true } if err := rows.Err(); err != nil { @@ -538,16 +530,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW } if !rowAdded && v2Options != nil { - candidates = append(candidates, prioritize.CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressionOperation: compressionOp, - CompressionAlgorithm: compressionAlgo, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, - }, - LastSeen: time.Time{}, - }) + candidates = append(candidates, template.CandidateWithUnknownLocation()) } return candidates, nil } From 2b45d35846a551fbe4cadb33284156bb4ae9bbff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 30 Jul 2024 19:27:46 +0200 Subject: [PATCH 26/37] Make the fields of CandidateWithTime private MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... just because we now can, and to nudge all future caches to be designed around CandidateTemplateWithCompression. Should not change behavior. Signed-off-by: Miloslav Trmač --- .../internal/prioritize/prioritize.go | 36 +++++++++---------- .../internal/prioritize/prioritize_test.go | 20 +++++------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 18478b5e82..64a0c57855 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -86,35 +86,35 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation // CandidateWithTime is the input to types.BICReplacementCandidate prioritization. type CandidateWithTime struct { - Candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate - LastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) + candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate + lastSeen time.Time // Time the candidate was last known to exist (either read or written) (not set for Candidate.UnknownLocation) } // CandidateWithLocation returns a complete CandidateWithTime combining (template from CandidateTemplateWithCompression, location, lastSeen) func (template CandidateTemplate) CandidateWithLocation(location types.BICLocationReference, lastSeen time.Time) CandidateWithTime { return CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ + candidate: blobinfocache.BICReplacementCandidate2{ Digest: template.digest, CompressionOperation: template.compressionOperation, CompressionAlgorithm: template.compressionAlgorithm, UnknownLocation: false, Location: location, }, - LastSeen: lastSeen, + lastSeen: lastSeen, } } // CandidateWithUnknownLocation returns a complete CandidateWithTime for a template from CandidateTemplateWithCompression and an unknown location. func (template CandidateTemplate) CandidateWithUnknownLocation() CandidateWithTime { return CandidateWithTime{ - Candidate: blobinfocache.BICReplacementCandidate2{ + candidate: blobinfocache.BICReplacementCandidate2{ Digest: template.digest, CompressionOperation: template.compressionOperation, CompressionAlgorithm: template.compressionAlgorithm, UnknownLocation: true, Location: types.BICLocationReference{Opaque: ""}, }, - LastSeen: time.Time{}, + lastSeen: time.Time{}, } } @@ -131,35 +131,35 @@ func (css *candidateSortState) compare(xi, xj CandidateWithTime) int { // Other digest values are primarily sorted by time (more recent first), secondarily by digest (to provide a deterministic order) // First, deal with the primaryDigest/uncompressedDigest cases: - if xi.Candidate.Digest != xj.Candidate.Digest { + if xi.candidate.Digest != xj.candidate.Digest { // - The two digests are different, and one (or both) of the digests is primaryDigest or uncompressedDigest: time does not matter - if xi.Candidate.Digest == css.primaryDigest { + if xi.candidate.Digest == css.primaryDigest { return -1 } - if xj.Candidate.Digest == css.primaryDigest { + if xj.candidate.Digest == css.primaryDigest { return 1 } if css.uncompressedDigest != "" { - if xi.Candidate.Digest == css.uncompressedDigest { + if xi.candidate.Digest == css.uncompressedDigest { return 1 } - if xj.Candidate.Digest == css.uncompressedDigest { + if xj.candidate.Digest == css.uncompressedDigest { return -1 } } } else { // xi.Candidate.Digest == xj.Candidate.Digest // The two digests are the same, and are either primaryDigest or uncompressedDigest: order by time - if xi.Candidate.Digest == css.primaryDigest || (css.uncompressedDigest != "" && xi.Candidate.Digest == css.uncompressedDigest) { - return -xi.LastSeen.Compare(xj.LastSeen) + if xi.candidate.Digest == css.primaryDigest || (css.uncompressedDigest != "" && xi.candidate.Digest == css.uncompressedDigest) { + return -xi.lastSeen.Compare(xj.lastSeen) } } // Neither of the digests are primaryDigest/uncompressedDigest: - if cmp := xi.LastSeen.Compare(xj.LastSeen); cmp != 0 { // Order primarily by time + if cmp := xi.lastSeen.Compare(xj.lastSeen); cmp != 0 { // Order primarily by time return -cmp } // Fall back to digest, if timestamps end up _exactly_ the same (how?!) - return cmp.Compare(xi.Candidate.Digest, xj.Candidate.Digest) + return cmp.Compare(xi.candidate.Digest, xj.candidate.Digest) } // destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with parameters for the @@ -178,7 +178,7 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, uncompressedDigest: uncompressedDigest, }).compare) for _, candidate := range cs { - if candidate.Candidate.UnknownLocation { + if candidate.candidate.UnknownLocation { unknownLocationCandidates = append(unknownLocationCandidates, candidate) } else { knownLocationCandidates = append(knownLocationCandidates, candidate) @@ -190,11 +190,11 @@ func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, unknownLocationCandidatesUsed := min(noLocationLimit, remainingCapacity, len(unknownLocationCandidates)) res := make([]blobinfocache.BICReplacementCandidate2, knownLocationCandidatesUsed) for i := 0; i < knownLocationCandidatesUsed; i++ { - res[i] = knownLocationCandidates[i].Candidate + res[i] = knownLocationCandidates[i].candidate } // If candidates with unknown location are found, lets add them to final list for i := 0; i < unknownLocationCandidatesUsed; i++ { - res = append(res, unknownLocationCandidates[i].Candidate) + res = append(res, unknownLocationCandidates[i].candidate) } return res } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index 6f48e78c06..2ea60e3394 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -168,22 +168,22 @@ func TestCandidateWithLocation(t *testing.T) { loc := types.BICLocationReference{Opaque: "opaque"} time := time.Now() res := template.CandidateWithLocation(loc, time) - assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) - assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) - assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) - assert.Equal(t, false, res.Candidate.UnknownLocation) - assert.Equal(t, loc, res.Candidate.Location) - assert.Equal(t, time, res.LastSeen) + assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) + assert.Equal(t, types.Compress, res.candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, false, res.candidate.UnknownLocation) + assert.Equal(t, loc, res.candidate.Location) + assert.Equal(t, time, res.lastSeen) } func TestCandidateWithUnknownLocation(t *testing.T) { template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) require.NotNil(t, template) res := template.CandidateWithUnknownLocation() - assert.Equal(t, digestCompressedPrimary, res.Candidate.Digest) - assert.Equal(t, types.Compress, res.Candidate.CompressionOperation) - assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.Candidate.CompressionAlgorithm.Name()) - assert.Equal(t, true, res.Candidate.UnknownLocation) + assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) + assert.Equal(t, types.Compress, res.candidate.CompressionOperation) + assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, true, res.candidate.UnknownLocation) } func TestCandidateSortStateLess(t *testing.T) { From 849938da193920f9cb18d6597ed9fe05db16d125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Mon, 25 Mar 2024 21:20:15 +0100 Subject: [PATCH 27/37] Reformat CandidateTemplateWithCompression a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add more logic to the default case, so sharing the CandidateCompressionMatchesReuseConditions call is not going to be as easy. Split the two code paths. Should not change behavior. Signed-off-by: Miloslav Trmač --- .../internal/prioritize/prioritize.go | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 64a0c57855..9c7621f28a 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -45,42 +45,48 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation } } - var op types.LayerCompression - var algo *compression.Algorithm + requiredCompression := "nil" + if v2Options.RequiredCompression != nil { + requiredCompression = v2Options.RequiredCompression.Name() + } switch compressorName { case blobinfocache.Uncompressed: - op = types.Decompress - algo = nil + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, nil) { + logrus.Debugf("Ignoring BlobInfoCache record of digest %q, uncompressed format does not match required %s or MIME types %#v", + digest.String(), requiredCompression, v2Options.PossibleManifestFormats) + return nil + } + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Decompress, + compressionAlgorithm: nil, + } case blobinfocache.UnknownCompression: logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) return nil // Not allowed with CandidateLocations2 default: - op = types.Compress - algo_, err := compression.AlgorithmByName(compressorName) + algo, err := compression.AlgorithmByName(compressorName) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", digest.String(), compressorName, err) return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } - algo = &algo_ - } - if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ - PossibleManifestFormats: v2Options.PossibleManifestFormats, - RequiredCompression: v2Options.RequiredCompression, - }, algo) { - requiredCompresssion := "nil" - if v2Options.RequiredCompression != nil { - requiredCompresssion = v2Options.RequiredCompression.Name() + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, &algo) { + logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", + digest.String(), compressorName, requiredCompression, v2Options.PossibleManifestFormats) + return nil + } + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Compress, + compressionAlgorithm: &algo, } - logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", - digest.String(), compressorName, requiredCompresssion, v2Options.PossibleManifestFormats) - return nil - } - - return &CandidateTemplate{ - digest: digest, - compressionOperation: op, - compressionAlgorithm: algo, } } From d09a403fd686813a1a5ce5e40b153e9029a69a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 13 Mar 2024 21:58:07 +0100 Subject: [PATCH 28/37] Introduce blobinfocache.DigestCompressorData MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will want to record more than a single alghoritm name. For now, just introduce the structure and modify users, we'll add the new fields later. Should not change behavior. Signed-off-by: Miloslav Trmač --- copy/compression.go | 8 ++- internal/blobinfocache/blobinfocache.go | 2 +- internal/blobinfocache/types.go | 16 ++++-- pkg/blobinfocache/boltdb/boltdb.go | 26 ++++++---- .../internal/prioritize/prioritize.go | 14 ++--- .../internal/prioritize/prioritize_test.go | 52 +++++++++++++------ pkg/blobinfocache/internal/test/test.go | 16 ++++-- pkg/blobinfocache/memory/memory.go | 25 +++++---- pkg/blobinfocache/sqlite/sqlite.go | 25 +++++---- 9 files changed, 117 insertions(+), 67 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index 0164dd91da..e5800aa4d5 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -347,14 +347,18 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName // with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about // inconsistent data to be logged. - c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName) + c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.uploadedCompressorName, + }) } } if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest && d.srcCompressorName != internalblobinfocache.UnknownCompression { if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName { // HACK: Don’t record zstd:chunked algorithms, see above. - c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName) + c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.srcCompressorName, + }) } } return nil diff --git a/internal/blobinfocache/blobinfocache.go b/internal/blobinfocache/blobinfocache.go index 60f8e6a027..f31ee3124d 100644 --- a/internal/blobinfocache/blobinfocache.go +++ b/internal/blobinfocache/blobinfocache.go @@ -34,7 +34,7 @@ func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { } -func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) { } func (bic *v1OnlyBlobInfoCache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 { diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index ed340ba478..276c8073e3 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -35,19 +35,25 @@ type BlobInfoCache2 interface { // (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) - // RecordDigestCompressorName records a compressor for the blob with the specified digest, - // or Uncompressed or UnknownCompression. - // WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a - // digest just because some remote author claims so (e.g. because a manifest says so); + // RecordDigestCompressorData records data for the blob with the specified digest. + // WARNING: Only call this with LOCALLY VERIFIED data: + // - don’t record a compressor for a digest just because some remote author claims so + // (e.g. because a manifest says so); // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. - RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) + RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) // CandidateLocations2 returns a prioritized, limited, number of blobs and their locations (if known) // that could possibly be reused within the specified (transport scope) (if they still // exist, which is not guaranteed). CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, options CandidateLocations2Options) []BICReplacementCandidate2 } +// DigestCompressorData is information known about how a blob is compressed. +// (This is worded generically, but basically targeted at the zstd / zstd:chunked situation.) +type DigestCompressorData struct { + BaseVariantCompressor string // A compressor’s base variant name, or Uncompressed or UnknownCompression. +} + // CandidateLocations2Options are used in CandidateLocations2. type CandidateLocations2Options struct { // If !CanSubstitute, the returned candidates will match the submitted digest exactly; if diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index e9855fa3c1..036d2c0149 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -295,12 +295,14 @@ func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompresse }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified -// compressor, or is blobinfocache.Uncompressed. -// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. -// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. -// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) -func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// +// otherwise the cache could be poisoned and cause us to make incorrect edits to type +// information in a manifest. +func (bdc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _ = bdc.update(func(tx *bolt.Tx) error { b, err := tx.CreateBucketIfNotExists(digestCompressorBucket) if err != nil { @@ -308,14 +310,14 @@ func (bdc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressor } key := []byte(anyDigest.String()) if previousBytes := b.Get(key); previousBytes != nil { - if string(previousBytes) != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), compressorName) + if string(previousBytes) != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), data.BaseVariantCompressor) } } - if compressorName == blobinfocache.UnknownCompression { + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { return b.Delete(key) } - return b.Put(key, []byte(compressorName)) + return b.Put(key, []byte(data.BaseVariantCompressor)) }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } @@ -367,7 +369,9 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = string(compressorNameValue) } } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) if template == nil { return candidates } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 9c7621f28a..03548209f9 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -33,12 +33,12 @@ type CandidateTemplate struct { compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed } -// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with compressionName (which can be Uncompressed -// or UnknownCompression) is acceptable for a CandidateLocations* call with v2Options. +// CandidateTemplateWithCompression returns a CandidateTemplate if a blob with data is acceptable +// for a CandidateLocations* call with v2Options. // // v2Options can be set to nil if the call is CandidateLocations (i.e. compression is not required to be known); // if not nil, the call is assumed to be CandidateLocations2. -func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, compressorName string) *CandidateTemplate { +func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, data blobinfocache.DigestCompressorData) *CandidateTemplate { if v2Options == nil { return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm values are not used. digest: digest, @@ -49,7 +49,7 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation if v2Options.RequiredCompression != nil { requiredCompression = v2Options.RequiredCompression.Name() } - switch compressorName { + switch data.BaseVariantCompressor { case blobinfocache.Uncompressed: if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ PossibleManifestFormats: v2Options.PossibleManifestFormats, @@ -68,10 +68,10 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) return nil // Not allowed with CandidateLocations2 default: - algo, err := compression.AlgorithmByName(compressorName) + algo, err := compression.AlgorithmByName(data.BaseVariantCompressor) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", - digest.String(), compressorName, err) + digest.String(), data.BaseVariantCompressor, err) return nil // The BICReplacementCandidate2.CompressionAlgorithm field is required } if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ @@ -79,7 +79,7 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation RequiredCompression: v2Options.RequiredCompression, }, &algo) { logrus.Debugf("Ignoring BlobInfoCache record of digest %q, compression %q does not match required %s or MIME types %#v", - digest.String(), compressorName, requiredCompression, v2Options.PossibleManifestFormats) + digest.String(), data.BaseVariantCompressor, requiredCompression, v2Options.PossibleManifestFormats) return nil } return &CandidateTemplate{ diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index 2ea60e3394..0d9fe12c1d 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -56,10 +56,20 @@ var ( ) func TestCandidateTemplateWithCompression(t *testing.T) { + uncompressedData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.Uncompressed, + } + gzipData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.GzipAlgorithmName, + } + zstdData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + } + for _, c := range []struct { name string requiredCompression *compressiontypes.Algorithm - compressor string + data blobinfocache.DigestCompressorData v2Matches bool // if v2Matches: v2Op types.LayerCompression @@ -68,13 +78,15 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "unknown", requiredCompression: nil, - compressor: blobinfocache.UnknownCompression, - v2Matches: false, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + }, + v2Matches: false, }, { name: "uncompressed", requiredCompression: nil, - compressor: blobinfocache.Uncompressed, + data: uncompressedData, v2Matches: true, v2Op: types.Decompress, v2Algo: "", @@ -82,13 +94,13 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "uncompressed, want gzip", requiredCompression: &compression.Gzip, - compressor: blobinfocache.Uncompressed, + data: uncompressedData, v2Matches: false, }, { name: "gzip", requiredCompression: nil, - compressor: compressiontypes.GzipAlgorithmName, + data: gzipData, v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.GzipAlgorithmName, @@ -96,19 +108,21 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "gzip, want zstd", requiredCompression: &compression.Zstd, - compressor: compressiontypes.GzipAlgorithmName, + data: gzipData, v2Matches: false, }, { name: "unknown base", requiredCompression: nil, - compressor: "this value is unknown", - v2Matches: false, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: "this value is unknown", + }, + v2Matches: false, }, { name: "zstd", requiredCompression: nil, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.ZstdAlgorithmName, @@ -116,13 +130,13 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "zstd, want gzip", requiredCompression: &compression.Gzip, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: false, }, { name: "zstd, want zstd", requiredCompression: &compression.Zstd, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.ZstdAlgorithmName, @@ -130,11 +144,11 @@ func TestCandidateTemplateWithCompression(t *testing.T) { { name: "zstd, want zstd:chunked", requiredCompression: &compression.ZstdChunked, - compressor: compressiontypes.ZstdAlgorithmName, + data: zstdData, v2Matches: false, }, } { - res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.compressor) + res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.data) assert.Equal(t, &CandidateTemplate{ digest: digestCompressedPrimary, compressionOperation: types.PreserveOriginal, @@ -145,7 +159,7 @@ func TestCandidateTemplateWithCompression(t *testing.T) { // CandidateCompressionMatchesReuseConditions should have its own tests of handling the full set of options. res = CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{ RequiredCompression: c.requiredCompression, - }, digestCompressedPrimary, c.compressor) + }, digestCompressedPrimary, c.data) if !c.v2Matches { assert.Nil(t, res, c.name) } else { @@ -163,7 +177,9 @@ func TestCandidateTemplateWithCompression(t *testing.T) { } func TestCandidateWithLocation(t *testing.T) { - template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + }) require.NotNil(t, template) loc := types.BICLocationReference{Opaque: "opaque"} time := time.Now() @@ -177,7 +193,9 @@ func TestCandidateWithLocation(t *testing.T) { } func TestCandidateWithUnknownLocation(t *testing.T) { - template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, compressiontypes.ZstdAlgorithmName) + template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + }) require.NotNil(t, template) res := template.CandidateWithUnknownLocation() assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 9d932f51c8..30b1ab4b12 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -314,7 +314,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // ---------------------------- // If a record exists with compression without Location then // then return a record without location and with `UnknownLocation: true` - cache.RecordDigestCompressorName(digestUnknownLocation, compressiontypes.Bzip2AlgorithmName) + cache.RecordDigestCompressorData(digestUnknownLocation, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.Bzip2AlgorithmName, + }) res = cache.CandidateLocations2(transport, scope, digestUnknownLocation, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, }) @@ -355,7 +357,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // that shouldn’t happen in real-world usage. if scopeIndex != 0 { for _, e := range digestNameSetPrioritization { - cache.RecordDigestCompressorName(e.d, blobinfocache.UnknownCompression) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + }) } } @@ -423,7 +427,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // Set the "known" compression values for _, e := range digestNameSetPrioritization { - cache.RecordDigestCompressorName(e.d, e.m) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: e.m, + }) } // No substitutions allowed: @@ -505,7 +511,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa cache.RecordKnownLocation(transport, scope, e.d, types.BICLocationReference{Opaque: scopeName + e.n}) } for _, e := range digestNameSetFiltering { - cache.RecordDigestCompressorName(e.d, e.m) + cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: e.m, + }) } // No filtering diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index ad1de7c841..067c6b7e11 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -144,19 +144,24 @@ func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope type locationScope[location] = time.Now() // Possibly overwriting an older entry. } -// RecordDigestCompressorName records that the blob with the specified digest is either compressed with the specified -// algorithm, or uncompressed, or that we no longer know. -func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compressorName string) { +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// +// otherwise the cache could be poisoned and cause us to make incorrect edits to type +// information in a manifest. +func (mem *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { mem.mutex.Lock() defer mem.mutex.Unlock() - if previous, ok := mem.compressors[blobDigest]; ok && previous != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", blobDigest, previous, compressorName) + if previous, ok := mem.compressors[anyDigest]; ok && previous != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) } - if compressorName == blobinfocache.UnknownCompression { - delete(mem.compressors, blobDigest) + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { + delete(mem.compressors, anyDigest) return } - mem.compressors[blobDigest] = compressorName + mem.compressors[anyDigest] = data.BaseVariantCompressor } // appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in memory @@ -170,7 +175,9 @@ func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if v, ok := mem.compressors[digest]; ok { compressorName = v } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) if template == nil { return candidates } diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index 4061af9720..8d2bf72898 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -457,29 +457,30 @@ func (sqc *cache) RecordKnownLocation(transport types.ImageTransport, scope type }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// RecordDigestCompressorName records a compressor for the blob with the specified digest, -// or Uncompressed or UnknownCompression. -// WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a -// digest just because some remote author claims so (e.g. because a manifest says so); +// RecordDigestCompressorData records data for the blob with the specified digest. +// WARNING: Only call this with LOCALLY VERIFIED data: +// - don’t record a compressor for a digest just because some remote author claims so +// (e.g. because a manifest says so); +// // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. -func (sqc *cache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { +func (sqc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _, _ = transaction(sqc, func(tx *sql.Tx) (void, error) { previous, gotPrevious, err := querySingleValue[string](tx, "SELECT compressor FROM DigestCompressors WHERE digest = ?", anyDigest.String()) if err != nil { return void{}, fmt.Errorf("looking for compressor of for %q", anyDigest) } - if gotPrevious && previous != compressorName { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, compressorName) + if gotPrevious && previous != data.BaseVariantCompressor { + logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) } - if compressorName == blobinfocache.UnknownCompression { + if data.BaseVariantCompressor == blobinfocache.UnknownCompression { if _, err := tx.Exec("DELETE FROM DigestCompressors WHERE digest = ?", anyDigest.String()); err != nil { return void{}, fmt.Errorf("deleting compressor for digest %q: %w", anyDigest, err) } } else { if _, err := tx.Exec("INSERT OR REPLACE INTO DigestCompressors(digest, compressor) VALUES (?, ?)", - anyDigest.String(), compressorName); err != nil { - return void{}, fmt.Errorf("recording compressor %q for %q: %w", compressorName, anyDigest, err) + anyDigest.String(), data.BaseVariantCompressor); err != nil { + return void{}, fmt.Errorf("recording compressor %q for %q: %w", data.BaseVariantCompressor, anyDigest, err) } } return void{}, nil @@ -502,7 +503,9 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW compressorName = compressor } } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressorName) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressorName, + }) if template == nil { return candidates, nil } From babdac8191e9162b47ced80a2e1b39cd2d3882b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 13 Mar 2024 22:13:30 +0100 Subject: [PATCH 29/37] Always record only the base variant information about consumed source blobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... because we don't trust the TOC data, if any. This allows us to remove the zstd:chunked hack; we, at least, now record those blobs as zstd. Signed-off-by: Miloslav Trmač --- copy/compression.go | 109 +++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index e5800aa4d5..081c49312f 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -35,10 +35,10 @@ var ( // bpDetectCompressionStepData contains data that the copy pipeline needs about the “detect compression” step. type bpDetectCompressionStepData struct { - isCompressed bool - format compressiontypes.Algorithm // Valid if isCompressed - decompressor compressiontypes.DecompressorFunc // Valid if isCompressed - srcCompressorName string // Compressor name to possibly record in the blob info cache for the source blob. + isCompressed bool + format compressiontypes.Algorithm // Valid if isCompressed + decompressor compressiontypes.DecompressorFunc // Valid if isCompressed + srcCompressorBaseVariantName string // Compressor name to possibly record in the blob info cache for the source blob. } // blobPipelineDetectCompressionStep updates *stream to detect its current compression format. @@ -58,9 +58,9 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI decompressor: decompressor, } if res.isCompressed { - res.srcCompressorName = format.Name() + res.srcCompressorBaseVariantName = format.BaseVariantName() } else { - res.srcCompressorName = internalblobinfocache.Uncompressed + res.srcCompressorBaseVariantName = internalblobinfocache.Uncompressed } if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() { @@ -71,13 +71,13 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI // bpCompressionStepData contains data that the copy pipeline needs about the compression step. type bpCompressionStepData struct { - operation bpcOperation // What we are actually doing - uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) - uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. - uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. - srcCompressorName string // Compressor name to record in the blob info cache for the source blob. - uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. - closers []io.Closer // Objects to close after the upload is done, if any. + operation bpcOperation // What we are actually doing + uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) + uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. + uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. + srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob. + uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. + closers []io.Closer // Objects to close after the upload is done, if any. } type bpcOperation int @@ -129,11 +129,11 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp // We can’t do anything with an encrypted blob unless decrypted. logrus.Debugf("Using original blob without modification for encrypted blob") return &bpCompressionStepData{ - operation: bpcOpPreserveOpaque, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: nil, - srcCompressorName: internalblobinfocache.UnknownCompression, - uploadedCompressorName: internalblobinfocache.UnknownCompression, + operation: bpcOpPreserveOpaque, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression, + uploadedCompressorName: internalblobinfocache.UnknownCompression, }, nil } return nil, nil @@ -158,13 +158,13 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpCompressUncompressed, - uploadedOperation: types.Compress, - uploadedAlgorithm: uploadedAlgorithm, - uploadedAnnotations: annotations, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: uploadedAlgorithm.Name(), - closers: []io.Closer{reader}, + operation: bpcOpCompressUncompressed, + uploadedOperation: types.Compress, + uploadedAlgorithm: uploadedAlgorithm, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: uploadedAlgorithm.Name(), + closers: []io.Closer{reader}, }, nil } return nil, nil @@ -199,13 +199,13 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp } succeeded = true return &bpCompressionStepData{ - operation: bpcOpRecompressCompressed, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: ic.compressionFormat, - uploadedAnnotations: annotations, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: ic.compressionFormat.Name(), - closers: []io.Closer{decompressed, recompressed}, + operation: bpcOpRecompressCompressed, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: ic.compressionFormat, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: ic.compressionFormat.Name(), + closers: []io.Closer{decompressed, recompressed}, }, nil } return nil, nil @@ -226,12 +226,12 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpDecompressCompressed, - uploadedOperation: types.Decompress, - uploadedAlgorithm: nil, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: internalblobinfocache.Uncompressed, - closers: []io.Closer{s}, + operation: bpcOpDecompressCompressed, + uploadedOperation: types.Decompress, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorName: internalblobinfocache.Uncompressed, + closers: []io.Closer{s}, }, nil } return nil, nil @@ -269,11 +269,14 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom algorithm = nil } return &bpCompressionStepData{ - operation: bpcOp, - uploadedOperation: uploadedOp, - uploadedAlgorithm: algorithm, - srcCompressorName: detected.srcCompressorName, - uploadedCompressorName: detected.srcCompressorName, + operation: bpcOp, + uploadedOperation: uploadedOp, + uploadedAlgorithm: algorithm, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + // We only record the base variant of the format on upload; we didn’t do anything with + // the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger + // reuse of any kind between the blob digest and the TOC digest. + uploadedCompressorName: detected.srcCompressorBaseVariantName, } } @@ -333,9 +336,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation) } } - if d.srcCompressorName == "" || d.uploadedCompressorName == "" { - return fmt.Errorf("internal error: missing compressor names (src: %q, uploaded: %q)", - d.srcCompressorName, d.uploadedCompressorName) + if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" { + return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)", + d.srcCompressorBaseVariantName, d.uploadedCompressorName) } if d.uploadedCompressorName != internalblobinfocache.UnknownCompression { if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName { @@ -353,13 +356,13 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf } } if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest && - d.srcCompressorName != internalblobinfocache.UnknownCompression { - if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName { - // HACK: Don’t record zstd:chunked algorithms, see above. - c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ - BaseVariantCompressor: d.srcCompressorName, - }) - } + d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression { + // If the source is already using some TOC-dependent variant, we either copied the + // blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest, + // so record neither the variant name, nor the TOC digest. + c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.srcCompressorBaseVariantName, + }) } return nil } From 074735e808fb2836312e034357419f926e7fc6c0 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:26:22 +0000 Subject: [PATCH 30/37] fix(deps): update module golang.org/x/crypto to v0.26.0 Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 8 ++++---- go.sum | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 9d3dc2ec8f..dbaf490a28 100644 --- a/go.mod +++ b/go.mod @@ -38,11 +38,11 @@ require ( github.com/vbauerster/mpb/v8 v8.7.5 github.com/xeipuuv/gojsonschema v1.2.0 go.etcd.io/bbolt v1.3.10 - golang.org/x/crypto v0.25.0 + golang.org/x/crypto v0.26.0 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 golang.org/x/oauth2 v0.22.0 golang.org/x/sync v0.8.0 - golang.org/x/term v0.22.0 + golang.org/x/term v0.23.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -133,8 +133,8 @@ require ( go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/mod v0.18.0 // indirect golang.org/x/net v0.26.0 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/sys v0.23.0 // indirect + golang.org/x/text v0.17.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect google.golang.org/grpc v1.64.1 // indirect google.golang.org/protobuf v1.33.0 // indirect diff --git a/go.sum b/go.sum index 737c8d10db..508dcb0c1b 100644 --- a/go.sum +++ b/go.sum @@ -389,8 +389,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= @@ -450,23 +450,23 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= From cb4dfedb4781bd6aa772a315f976d51516f6f524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 6 Aug 2024 21:53:34 +0200 Subject: [PATCH 31/37] Unrelated: Fix a bug in SQLite BlobInfoCache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we don't know an uncompressed digest, don't try using "". Signed-off-by: Miloslav Trmač --- pkg/blobinfocache/sqlite/sqlite.go | 55 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index 8d2bf72898..9d000a5c7a 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -561,40 +561,41 @@ func (sqc *cache) candidateLocations(transport types.ImageTransport, scope types if err != nil { return nil, err } - - // FIXME? We could integrate this with appendReplacementCandidates into a single join instead of N+1 queries. - // (In the extreme, we could turn _everything_ this function does into a single query. - // And going even further, even DestructivelyPrioritizeReplacementCandidates could be turned into SQL.) - // For now, we prioritize simplicity, and sharing both code and implementation structure with the other cache implementations. - rows, err := tx.Query("SELECT anyDigest FROM DigestUncompressedPairs WHERE uncompressedDigest = ?", uncompressedDigest.String()) - if err != nil { - return nil, fmt.Errorf("querying for other digests: %w", err) - } - defer rows.Close() - for rows.Next() { - var otherDigestString string - if err := rows.Scan(&otherDigestString); err != nil { - return nil, fmt.Errorf("scanning other digest: %w", err) - } - otherDigest, err := digest.Parse(otherDigestString) + if uncompressedDigest != "" { + // FIXME? We could integrate this with appendReplacementCandidates into a single join instead of N+1 queries. + // (In the extreme, we could turn _everything_ this function does into a single query. + // And going even further, even DestructivelyPrioritizeReplacementCandidates could be turned into SQL.) + // For now, we prioritize simplicity, and sharing both code and implementation structure with the other cache implementations. + rows, err := tx.Query("SELECT anyDigest FROM DigestUncompressedPairs WHERE uncompressedDigest = ?", uncompressedDigest.String()) if err != nil { - return nil, err + return nil, fmt.Errorf("querying for other digests: %w", err) } - if otherDigest != primaryDigest && otherDigest != uncompressedDigest { - res, err = sqc.appendReplacementCandidates(res, tx, transport, scope, otherDigest, v2Options) + defer rows.Close() + for rows.Next() { + var otherDigestString string + if err := rows.Scan(&otherDigestString); err != nil { + return nil, fmt.Errorf("scanning other digest: %w", err) + } + otherDigest, err := digest.Parse(otherDigestString) if err != nil { return nil, err } + if otherDigest != primaryDigest && otherDigest != uncompressedDigest { + res, err = sqc.appendReplacementCandidates(res, tx, transport, scope, otherDigest, v2Options) + if err != nil { + return nil, err + } + } + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterating through other digests: %w", err) } - } - if err := rows.Err(); err != nil { - return nil, fmt.Errorf("iterating through other digests: %w", err) - } - if uncompressedDigest != primaryDigest { - res, err = sqc.appendReplacementCandidates(res, tx, transport, scope, uncompressedDigest, v2Options) - if err != nil { - return nil, err + if uncompressedDigest != primaryDigest { + res, err = sqc.appendReplacementCandidates(res, tx, transport, scope, uncompressedDigest, v2Options) + if err != nil { + return nil, err + } } } } From 5dcb348296bd98f90ea7a6a93f7da89afd2ad075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 6 Aug 2024 10:05:09 +0200 Subject: [PATCH 32/37] Fix a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should not change behavior. Signed-off-by: Miloslav Trmač --- pkg/blobinfocache/internal/test/test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 30b1ab4b12..9ad471a64f 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -422,7 +422,7 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa }) assertCandidatesMatch2(t, scopeName, []candidate{}, res) - // Tests of lookups / prioritization when compression is unknown + // Tests of lookups / prioritization when compression is known // ------------------------------------------------------------- // Set the "known" compression values From f9d27e8c6d95a04eb7156ce2dfb0be3273051244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 13 Mar 2024 21:58:07 +0100 Subject: [PATCH 33/37] Add digest -> specific variant, annotation data to BIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cache implementations are recording both the base and specific compression variant; CandidateLocations2 all call CandidateTemplateWithCompression to choose the appropriate variants to return based on CandidateLocations2Options. This way, neither the BIC implementations nor the transports are not responsible for converting zstd:chunked entries to zstd entries if the user wants the latter. Signed-off-by: Miloslav Trmač --- copy/compression.go | 8 +- internal/blobinfocache/types.go | 21 ++-- internal/manifest/manifest.go | 5 - internal/manifest/manifest_test.go | 3 + pkg/blobinfocache/boltdb/boltdb.go | 92 +++++++++++++--- .../internal/prioritize/prioritize.go | 70 ++++++++---- .../internal/prioritize/prioritize_test.go | 103 +++++++++++++++--- pkg/blobinfocache/internal/test/test.go | 86 ++++++++++----- pkg/blobinfocache/memory/memory.go | 38 +++++-- pkg/blobinfocache/sqlite/sqlite.go | 74 +++++++++++-- 10 files changed, 387 insertions(+), 113 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index 081c49312f..e859d0139c 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -351,7 +351,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about // inconsistent data to be logged. c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{ - BaseVariantCompressor: d.uploadedCompressorName, + BaseVariantCompressor: d.uploadedCompressorName, + SpecificVariantCompressor: internalblobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }) } } @@ -361,7 +363,9 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest, // so record neither the variant name, nor the TOC digest. c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{ - BaseVariantCompressor: d.srcCompressorBaseVariantName, + BaseVariantCompressor: d.srcCompressorBaseVariantName, + SpecificVariantCompressor: internalblobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }) } return nil diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index 276c8073e3..acf82ee639 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -37,8 +37,11 @@ type BlobInfoCache2 interface { // RecordDigestCompressorData records data for the blob with the specified digest. // WARNING: Only call this with LOCALLY VERIFIED data: - // - don’t record a compressor for a digest just because some remote author claims so - // (e.g. because a manifest says so); + // - don’t record a compressor for a digest just because some remote author claims so + // (e.g. because a manifest says so); + // - don’t record the non-base variant or annotations if we are not _sure_ that the base variant + // and the blob’s digest match the non-base variant’s annotations (e.g. because we saw them + // in a manifest) // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. RecordDigestCompressorData(anyDigest digest.Digest, data DigestCompressorData) @@ -52,6 +55,9 @@ type BlobInfoCache2 interface { // (This is worded generically, but basically targeted at the zstd / zstd:chunked situation.) type DigestCompressorData struct { BaseVariantCompressor string // A compressor’s base variant name, or Uncompressed or UnknownCompression. + // The following fields are only valid if the base variant is neither Uncompressed nor UnknownCompression: + SpecificVariantCompressor string // A non-base variant compressor (or UnknownCompression if the true format is just the base variant) + SpecificVariantAnnotations map[string]string // Annotations required to benefit from the base variant. } // CandidateLocations2Options are used in CandidateLocations2. @@ -66,9 +72,10 @@ type CandidateLocations2Options struct { // BICReplacementCandidate2 is an item returned by BlobInfoCache2.CandidateLocations2. type BICReplacementCandidate2 struct { - Digest digest.Digest - CompressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed - CompressionAlgorithm *compressiontypes.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed - UnknownLocation bool // is true when `Location` for this blob is not set - Location types.BICLocationReference // not set if UnknownLocation is set to `true` + Digest digest.Digest + CompressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed + CompressionAlgorithm *compressiontypes.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed + CompressionAnnotations map[string]string // If necessary, annotations necessary to use CompressionAlgorithm + UnknownLocation bool // is true when `Location` for this blob is not set + Location types.BICLocationReference // not set if UnknownLocation is set to `true` } diff --git a/internal/manifest/manifest.go b/internal/manifest/manifest.go index ee0ddc772a..3fb52104a6 100644 --- a/internal/manifest/manifest.go +++ b/internal/manifest/manifest.go @@ -205,11 +205,6 @@ type ReuseConditions struct { // (which can be nil to represent uncompressed or unknown) matches reuseConditions. func CandidateCompressionMatchesReuseConditions(c ReuseConditions, candidateCompression *compressiontypes.Algorithm) bool { if c.RequiredCompression != nil { - if c.RequiredCompression.Name() == compressiontypes.ZstdChunkedAlgorithmName { - // HACK: Never match when the caller asks for zstd:chunked, because we don’t record the annotations required to use the chunked blobs. - // The caller must re-compress to build those annotations. - return false - } if candidateCompression == nil || (c.RequiredCompression.Name() != candidateCompression.Name() && c.RequiredCompression.Name() != candidateCompression.BaseVariantName()) { return false diff --git a/internal/manifest/manifest_test.go b/internal/manifest/manifest_test.go index ae2cc32ca0..ebb3ef5593 100644 --- a/internal/manifest/manifest_test.go +++ b/internal/manifest/manifest_test.go @@ -192,6 +192,9 @@ func TestCandidateCompressionMatchesReuseConditions(t *testing.T) { }{ // RequiredCompression restrictions {&compression.Zstd, nil, &compression.Zstd, true}, + {&compression.Zstd, nil, &compression.ZstdChunked, true}, + {&compression.ZstdChunked, nil, &compression.Zstd, false}, + {&compression.ZstdChunked, nil, &compression.ZstdChunked, true}, {&compression.Gzip, nil, &compression.Zstd, false}, {&compression.Zstd, nil, nil, false}, {nil, nil, &compression.Zstd, true}, diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index 036d2c0149..2d4137ffd2 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -2,6 +2,8 @@ package boltdb import ( + "bytes" + "encoding/json" "errors" "fmt" "io/fs" @@ -30,6 +32,10 @@ var ( // digestCompressorBucket stores a mapping from any digest to a compressor, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression). // It may not exist in caches created by older versions, even if uncompressedDigestBucket is present. digestCompressorBucket = []byte("digestCompressor") + // digestSpecificVariantCompressorBucket stores a mapping from any digest to a (compressor, NUL byte, annotations as JSON), valid + // only if digestCompressorBucket contains a value. The compressor is not `UnknownCompression`. + digestSpecificVariantCompressorBucket = []byte("digestSpecificVariantCompressor") + // It may not exist in caches created by older versions, even if digestCompressorBucket is present. // digestByUncompressedBucket stores a bucket per uncompressed digest, with the bucket containing a set of digests for that uncompressed digest // (as a set of key=digest, value="" pairs) digestByUncompressedBucket = []byte("digestByUncompressed") @@ -299,25 +305,68 @@ func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompresse // WARNING: Only call this with LOCALLY VERIFIED data: // - don’t record a compressor for a digest just because some remote author claims so // (e.g. because a manifest says so); +// - don’t record the non-base variant or annotations if we are not _sure_ that the base variant +// and the blob’s digest match the non-base variant’s annotations (e.g. because we saw them +// in a manifest) // // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. func (bdc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { _ = bdc.update(func(tx *bolt.Tx) error { + key := []byte(anyDigest.String()) + b, err := tx.CreateBucketIfNotExists(digestCompressorBucket) if err != nil { return err } - key := []byte(anyDigest.String()) + warned := false if previousBytes := b.Get(key); previousBytes != nil { if string(previousBytes) != data.BaseVariantCompressor { logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, string(previousBytes), data.BaseVariantCompressor) + warned = true } } if data.BaseVariantCompressor == blobinfocache.UnknownCompression { - return b.Delete(key) + if err := b.Delete(key); err != nil { + return err + } + if b := tx.Bucket(digestSpecificVariantCompressorBucket); b != nil { + if err := b.Delete(key); err != nil { + return err + } + } } - return b.Put(key, []byte(data.BaseVariantCompressor)) + if err := b.Put(key, []byte(data.BaseVariantCompressor)); err != nil { + return err + } + + if data.SpecificVariantCompressor != blobinfocache.UnknownCompression { + b, err := tx.CreateBucketIfNotExists(digestSpecificVariantCompressorBucket) + if err != nil { + return err + } + if !warned { // Don’t warn twice about the same digest + if previousBytes := b.Get(key); previousBytes != nil { + if prevSVCBytes, _, ok := bytes.Cut(previousBytes, []byte{0}); ok { + prevSVC := string(prevSVCBytes) + if data.SpecificVariantCompressor != prevSVC { + logrus.Warnf("Specific compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, prevSVC, data.SpecificVariantCompressor) + } + } + } + } + annotations, err := json.Marshal(data.SpecificVariantAnnotations) + if err != nil { + return err + } + data := bytes.Clone([]byte(data.SpecificVariantCompressor)) + data = append(data, 0) + data = append(data, annotations...) + if err := b.Put(key, data); err != nil { + return err + } + } + return nil }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } @@ -354,24 +403,36 @@ func (bdc *cache) RecordKnownLocation(transport types.ImageTransport, scope type // appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket // (which might be nil) with corresponding compression -// info from compressionBucket (which might be nil), and returns the result of appending them +// info from compressionBucket and specificVariantCompresssionBucket (which might be nil), and returns the result of appending them // to candidates. // v2Options is not nil if the caller is CandidateLocations2: this allows including candidates with unknown location, and filters out candidates // with unknown compression. -func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket, compressionBucket *bolt.Bucket, digest digest.Digest, - v2Options *blobinfocache.CandidateLocations2Options) []prioritize.CandidateWithTime { +func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket, compressionBucket, specificVariantCompresssionBucket *bolt.Bucket, + digest digest.Digest, v2Options *blobinfocache.CandidateLocations2Options) []prioritize.CandidateWithTime { digestKey := []byte(digest.String()) - compressorName := blobinfocache.UnknownCompression + compressionData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, + } if compressionBucket != nil { // the bucket won't exist if the cache was created by a v1 implementation and // hasn't yet been updated by a v2 implementation if compressorNameValue := compressionBucket.Get(digestKey); len(compressorNameValue) > 0 { - compressorName = string(compressorNameValue) + compressionData.BaseVariantCompressor = string(compressorNameValue) + } + if specificVariantCompresssionBucket != nil { + if svcData := specificVariantCompresssionBucket.Get(digestKey); svcData != nil { + if compressorBytes, annotationBytes, ok := bytes.Cut(svcData, []byte{0}); ok { + compressionData.SpecificVariantCompressor = string(compressorBytes) + if err := json.Unmarshal(annotationBytes, &compressionData.SpecificVariantAnnotations); err != nil { + return candidates // FIXME? Log error (but throttle the log volume on repeated accesses)? + } + } + } } } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressorName, - }) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressionData) if template == nil { return candidates } @@ -416,11 +477,12 @@ func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types if scopeBucket != nil { scopeBucket = scopeBucket.Bucket([]byte(scope.Opaque)) } - // compressionBucket won't have been created if previous writers never recorded info about compression, + // compressionBucket and svCompressionBucket won't have been created if previous writers never recorded info about compression, // and we don't want to fail just because of that compressionBucket := tx.Bucket(digestCompressorBucket) + specificVariantCompressionBucket := tx.Bucket(digestSpecificVariantCompressorBucket) - res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, primaryDigest, v2Options) + res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, specificVariantCompressionBucket, primaryDigest, v2Options) if canSubstitute { if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" { b := tx.Bucket(digestByUncompressedBucket) @@ -433,7 +495,7 @@ func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types return err } if d != primaryDigest && d != uncompressedDigestValue { - res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, d, v2Options) + res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, specificVariantCompressionBucket, d, v2Options) } return nil }); err != nil { @@ -442,7 +504,7 @@ func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types } } if uncompressedDigestValue != primaryDigest { - res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, uncompressedDigestValue, v2Options) + res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, specificVariantCompressionBucket, uncompressedDigestValue, v2Options) } } } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize.go b/pkg/blobinfocache/internal/prioritize/prioritize.go index 03548209f9..40ed09bed5 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize.go @@ -28,9 +28,10 @@ const replacementUnknownLocationAttempts = 2 // CandidateTemplate is a subset of BICReplacementCandidate2 with data related to a specific digest, // which can be later combined with information about a location. type CandidateTemplate struct { - digest digest.Digest - compressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed - compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed + digest digest.Digest + compressionOperation types.LayerCompression // Either types.Decompress for uncompressed, or types.Compress for compressed + compressionAlgorithm *compression.Algorithm // An algorithm when the candidate is compressed, or nil when it is uncompressed + compressionAnnotations map[string]string // If necessary, annotations necessary to use compressionAlgorithm } // CandidateTemplateWithCompression returns a CandidateTemplate if a blob with data is acceptable @@ -40,7 +41,7 @@ type CandidateTemplate struct { // if not nil, the call is assumed to be CandidateLocations2. func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocations2Options, digest digest.Digest, data blobinfocache.DigestCompressorData) *CandidateTemplate { if v2Options == nil { - return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm values are not used. + return &CandidateTemplate{ // Anything goes. The compressionOperation, compressionAlgorithm and compressionAnnotations values are not used. digest: digest, } } @@ -60,14 +61,40 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation return nil } return &CandidateTemplate{ - digest: digest, - compressionOperation: types.Decompress, - compressionAlgorithm: nil, + digest: digest, + compressionOperation: types.Decompress, + compressionAlgorithm: nil, + compressionAnnotations: nil, } case blobinfocache.UnknownCompression: logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unknown compression", digest.String()) return nil // Not allowed with CandidateLocations2 default: + // See if we can use the specific variant, first. + if data.SpecificVariantCompressor != blobinfocache.UnknownCompression { + algo, err := compression.AlgorithmByName(data.SpecificVariantCompressor) + if err != nil { + logrus.Debugf("Not considering unrecognized specific compression variant %q for BlobInfoCache record of digest %q: %v", + data.SpecificVariantCompressor, digest.String(), err) + } else { + if !manifest.CandidateCompressionMatchesReuseConditions(manifest.ReuseConditions{ + PossibleManifestFormats: v2Options.PossibleManifestFormats, + RequiredCompression: v2Options.RequiredCompression, + }, &algo) { + logrus.Debugf("Ignoring specific compression variant %q for BlobInfoCache record of digest %q, it does not match required %s or MIME types %#v", + data.SpecificVariantCompressor, digest.String(), requiredCompression, v2Options.PossibleManifestFormats) + } else { + return &CandidateTemplate{ + digest: digest, + compressionOperation: types.Compress, + compressionAlgorithm: &algo, + compressionAnnotations: data.SpecificVariantAnnotations, + } + } + } + } + + // Try the base variant. algo, err := compression.AlgorithmByName(data.BaseVariantCompressor) if err != nil { logrus.Debugf("Ignoring BlobInfoCache record of digest %q with unrecognized compression %q: %v", @@ -83,9 +110,10 @@ func CandidateTemplateWithCompression(v2Options *blobinfocache.CandidateLocation return nil } return &CandidateTemplate{ - digest: digest, - compressionOperation: types.Compress, - compressionAlgorithm: &algo, + digest: digest, + compressionOperation: types.Compress, + compressionAlgorithm: &algo, + compressionAnnotations: nil, } } } @@ -100,11 +128,12 @@ type CandidateWithTime struct { func (template CandidateTemplate) CandidateWithLocation(location types.BICLocationReference, lastSeen time.Time) CandidateWithTime { return CandidateWithTime{ candidate: blobinfocache.BICReplacementCandidate2{ - Digest: template.digest, - CompressionOperation: template.compressionOperation, - CompressionAlgorithm: template.compressionAlgorithm, - UnknownLocation: false, - Location: location, + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + CompressionAnnotations: template.compressionAnnotations, + UnknownLocation: false, + Location: location, }, lastSeen: lastSeen, } @@ -114,11 +143,12 @@ func (template CandidateTemplate) CandidateWithLocation(location types.BICLocati func (template CandidateTemplate) CandidateWithUnknownLocation() CandidateWithTime { return CandidateWithTime{ candidate: blobinfocache.BICReplacementCandidate2{ - Digest: template.digest, - CompressionOperation: template.compressionOperation, - CompressionAlgorithm: template.compressionAlgorithm, - UnknownLocation: true, - Location: types.BICLocationReference{Opaque: ""}, + Digest: template.digest, + CompressionOperation: template.compressionOperation, + CompressionAlgorithm: template.compressionAlgorithm, + CompressionAnnotations: template.compressionAnnotations, + UnknownLocation: true, + Location: types.BICLocationReference{Opaque: ""}, }, lastSeen: time.Time{}, } diff --git a/pkg/blobinfocache/internal/prioritize/prioritize_test.go b/pkg/blobinfocache/internal/prioritize/prioritize_test.go index 0d9fe12c1d..a344d7e271 100644 --- a/pkg/blobinfocache/internal/prioritize/prioritize_test.go +++ b/pkg/blobinfocache/internal/prioritize/prioritize_test.go @@ -56,14 +56,26 @@ var ( ) func TestCandidateTemplateWithCompression(t *testing.T) { + chunkedAnnotations := map[string]string{"a": "b"} uncompressedData := blobinfocache.DigestCompressorData{ - BaseVariantCompressor: blobinfocache.Uncompressed, + BaseVariantCompressor: blobinfocache.Uncompressed, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, } gzipData := blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressiontypes.GzipAlgorithmName, + BaseVariantCompressor: compressiontypes.GzipAlgorithmName, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, } zstdData := blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, + } + zstdChunkedData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + SpecificVariantCompressor: compressiontypes.ZstdChunkedAlgorithmName, + SpecificVariantAnnotations: chunkedAnnotations, } for _, c := range []struct { @@ -72,14 +84,17 @@ func TestCandidateTemplateWithCompression(t *testing.T) { data blobinfocache.DigestCompressorData v2Matches bool // if v2Matches: - v2Op types.LayerCompression - v2Algo string + v2Op types.LayerCompression + v2Algo string + v2Annotations map[string]string }{ { name: "unknown", requiredCompression: nil, data: blobinfocache.DigestCompressorData{ - BaseVariantCompressor: blobinfocache.UnknownCompression, + BaseVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }, v2Matches: false, }, @@ -90,6 +105,7 @@ func TestCandidateTemplateWithCompression(t *testing.T) { v2Matches: true, v2Op: types.Decompress, v2Algo: "", + v2Annotations: nil, }, { name: "uncompressed, want gzip", @@ -104,6 +120,7 @@ func TestCandidateTemplateWithCompression(t *testing.T) { v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.GzipAlgorithmName, + v2Annotations: nil, }, { name: "gzip, want zstd", @@ -115,7 +132,9 @@ func TestCandidateTemplateWithCompression(t *testing.T) { name: "unknown base", requiredCompression: nil, data: blobinfocache.DigestCompressorData{ - BaseVariantCompressor: "this value is unknown", + BaseVariantCompressor: "this value is unknown", + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }, v2Matches: false, }, @@ -126,6 +145,7 @@ func TestCandidateTemplateWithCompression(t *testing.T) { v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.ZstdAlgorithmName, + v2Annotations: nil, }, { name: "zstd, want gzip", @@ -140,6 +160,7 @@ func TestCandidateTemplateWithCompression(t *testing.T) { v2Matches: true, v2Op: types.Compress, v2Algo: compressiontypes.ZstdAlgorithmName, + v2Annotations: nil, }, { name: "zstd, want zstd:chunked", @@ -147,12 +168,59 @@ func TestCandidateTemplateWithCompression(t *testing.T) { data: zstdData, v2Matches: false, }, + { + name: "zstd:chunked", + requiredCompression: nil, + data: zstdChunkedData, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdChunkedAlgorithmName, + v2Annotations: chunkedAnnotations, + }, + { + name: "zstd:chunked, want gzip", + requiredCompression: &compression.Gzip, + data: zstdChunkedData, + v2Matches: false, + }, + { + name: "zstd:chunked, want zstd", // Note that we return the full chunked data in this case. + requiredCompression: &compression.Zstd, + data: zstdChunkedData, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdChunkedAlgorithmName, + v2Annotations: chunkedAnnotations, + }, + { + name: "zstd:chunked, want zstd:chunked", + requiredCompression: &compression.ZstdChunked, + data: zstdChunkedData, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdChunkedAlgorithmName, + v2Annotations: chunkedAnnotations, + }, + { + name: "zstd:unknown", + requiredCompression: nil, + data: blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + SpecificVariantCompressor: "this value is unknown", + SpecificVariantAnnotations: chunkedAnnotations, + }, + v2Matches: true, + v2Op: types.Compress, + v2Algo: compressiontypes.ZstdAlgorithmName, + v2Annotations: nil, + }, } { res := CandidateTemplateWithCompression(nil, digestCompressedPrimary, c.data) assert.Equal(t, &CandidateTemplate{ - digest: digestCompressedPrimary, - compressionOperation: types.PreserveOriginal, - compressionAlgorithm: nil, + digest: digestCompressedPrimary, + compressionOperation: types.PreserveOriginal, + compressionAlgorithm: nil, + compressionAnnotations: nil, }, res, c.name) // These tests only use RequiredCompression in CandidateLocations2Options for clarity; @@ -172,13 +240,16 @@ func TestCandidateTemplateWithCompression(t *testing.T) { require.NotNil(t, res.compressionAlgorithm, c.name) assert.Equal(t, c.v2Algo, res.compressionAlgorithm.Name()) } + assert.Equal(t, c.v2Annotations, res.compressionAnnotations, c.name) } } } func TestCandidateWithLocation(t *testing.T) { template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + SpecificVariantCompressor: compressiontypes.ZstdChunkedAlgorithmName, + SpecificVariantAnnotations: map[string]string{"a": "b"}, }) require.NotNil(t, template) loc := types.BICLocationReference{Opaque: "opaque"} @@ -186,7 +257,8 @@ func TestCandidateWithLocation(t *testing.T) { res := template.CandidateWithLocation(loc, time) assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) assert.Equal(t, types.Compress, res.candidate.CompressionOperation) - assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, compressiontypes.ZstdChunkedAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, map[string]string{"a": "b"}, res.candidate.CompressionAnnotations) assert.Equal(t, false, res.candidate.UnknownLocation) assert.Equal(t, loc, res.candidate.Location) assert.Equal(t, time, res.lastSeen) @@ -194,13 +266,16 @@ func TestCandidateWithLocation(t *testing.T) { func TestCandidateWithUnknownLocation(t *testing.T) { template := CandidateTemplateWithCompression(&blobinfocache.CandidateLocations2Options{}, digestCompressedPrimary, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + SpecificVariantCompressor: compressiontypes.ZstdChunkedAlgorithmName, + SpecificVariantAnnotations: map[string]string{"a": "b"}, }) require.NotNil(t, template) res := template.CandidateWithUnknownLocation() assert.Equal(t, digestCompressedPrimary, res.candidate.Digest) assert.Equal(t, types.Compress, res.candidate.CompressionOperation) - assert.Equal(t, compressiontypes.ZstdAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, compressiontypes.ZstdChunkedAlgorithmName, res.candidate.CompressionAlgorithm.Name()) + assert.Equal(t, map[string]string{"a": "b"}, res.candidate.CompressionAnnotations) assert.Equal(t, true, res.candidate.UnknownLocation) } diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 9ad471a64f..cb4a4e6b01 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -25,7 +25,7 @@ const ( compressorNameU = blobinfocache.Uncompressed compressorNameA = compressiontypes.GzipAlgorithmName compressorNameB = compressiontypes.ZstdAlgorithmName - compressorNameCU = compressiontypes.ZstdChunkedAlgorithmName + compressorNameCU = compressiontypes.XzAlgorithmName digestUnknownLocation = digest.Digest("sha256:7777777777777777777777777777777777777777777777777777777777777777") digestFilteringUncompressed = digest.Digest("sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") @@ -150,6 +150,7 @@ func testGenericRecordKnownLocations(t *testing.T, cache blobinfocache.BlobInfoC type candidate struct { d digest.Digest cn string + ca map[string]string lr string } @@ -194,11 +195,12 @@ func assertCandidatesMatch2(t *testing.T, scopeName string, expected []candidate algo = &algo_ } e[i] = blobinfocache.BICReplacementCandidate2{ - Digest: ev.d, - CompressionOperation: op, - CompressionAlgorithm: algo, - UnknownLocation: false, - Location: types.BICLocationReference{Opaque: scopeName + ev.lr}, + Digest: ev.d, + CompressionOperation: op, + CompressionAlgorithm: algo, + CompressionAnnotations: ev.ca, + UnknownLocation: false, + Location: types.BICLocationReference{Opaque: scopeName + ev.lr}, } } assertCandidatesMatch2Native(t, e, actual) @@ -283,14 +285,16 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa {"B", digestCompressedB, compressorNameB}, {"CU", digestCompressedUnrelated, compressorNameCU}, } + chunkedAnnotations := map[string]string{"a": "b"} digestNameSetFiltering := []struct { // Used primarily to test filtering in CandidateLocations2Options - n string - d digest.Digest - m string + n string + d digest.Digest + base, specific string + annotations map[string]string }{ - {"gzip", digestGzip, compressiontypes.GzipAlgorithmName}, - {"zstd", digestZstd, compressiontypes.ZstdAlgorithmName}, - {"zstdChunked", digestZstdChunked, compressiontypes.ZstdChunkedAlgorithmName}, + {"gzip", digestGzip, compressiontypes.GzipAlgorithmName, blobinfocache.UnknownCompression, nil}, + {"zstd", digestZstd, compressiontypes.ZstdAlgorithmName, blobinfocache.UnknownCompression, nil}, + {"zstdChunked", digestZstdChunked, compressiontypes.ZstdAlgorithmName, compressiontypes.ZstdChunkedAlgorithmName, chunkedAnnotations}, } for _, e := range digestNameSetFiltering { // digestFilteringUncompressed exists only to allow the three entries to be considered as candidates cache.RecordDigestUncompressedPair(e.d, digestFilteringUncompressed) @@ -315,7 +319,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // If a record exists with compression without Location then // then return a record without location and with `UnknownLocation: true` cache.RecordDigestCompressorData(digestUnknownLocation, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressiontypes.Bzip2AlgorithmName, + BaseVariantCompressor: compressiontypes.Bzip2AlgorithmName, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }) res = cache.CandidateLocations2(transport, scope, digestUnknownLocation, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, @@ -358,7 +364,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa if scopeIndex != 0 { for _, e := range digestNameSetPrioritization { cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: blobinfocache.UnknownCompression, + BaseVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }) } } @@ -428,7 +436,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa // Set the "known" compression values for _, e := range digestNameSetPrioritization { cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: e.m, + BaseVariantCompressor: e.m, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, }) } @@ -512,7 +522,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa } for _, e := range digestNameSetFiltering { cache.RecordDigestCompressorData(e.d, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: e.m, + BaseVariantCompressor: e.base, + SpecificVariantCompressor: e.specific, + SpecificVariantAnnotations: e.annotations, }) } @@ -521,9 +533,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa CanSubstitute: true, }) assertCandidatesMatch2(t, scopeName, []candidate{ // Sorted in the reverse of digestNameSetFiltering order - {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, lr: "zstdChunked"}, - {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, lr: "zstd"}, - {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, lr: "gzip"}, + {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, ca: chunkedAnnotations, lr: "zstdChunked"}, + {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, ca: nil, lr: "zstd"}, + {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, ca: nil, lr: "gzip"}, }, res) // Manifest format filters @@ -532,16 +544,16 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa PossibleManifestFormats: []string{manifest.DockerV2Schema2MediaType}, }) assertCandidatesMatch2(t, scopeName, []candidate{ - {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, lr: "gzip"}, + {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, ca: nil, lr: "gzip"}, }, res) res = cache.CandidateLocations2(transport, scope, digestFilteringUncompressed, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, PossibleManifestFormats: []string{imgspecv1.MediaTypeImageManifest}, }) assertCandidatesMatch2(t, scopeName, []candidate{ // Sorted in the reverse of digestNameSetFiltering order - {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, lr: "zstdChunked"}, - {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, lr: "zstd"}, - {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, lr: "gzip"}, + {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, ca: chunkedAnnotations, lr: "zstdChunked"}, + {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, ca: nil, lr: "zstd"}, + {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, ca: nil, lr: "gzip"}, }, res) // Compression algorithm filters @@ -550,21 +562,37 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa RequiredCompression: &compression.Gzip, }) assertCandidatesMatch2(t, scopeName, []candidate{ - {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, lr: "gzip"}, + {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, ca: nil, lr: "gzip"}, }, res) res = cache.CandidateLocations2(transport, scope, digestFilteringUncompressed, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, RequiredCompression: &compression.ZstdChunked, }) - // Right now, zstd:chunked requests never match a candidate, see CandidateCompressionMatchesReuseConditions(). - assertCandidatesMatch2(t, scopeName, []candidate{}, res) + assertCandidatesMatch2(t, scopeName, []candidate{ + {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, ca: chunkedAnnotations, lr: "zstdChunked"}, + }, res) res = cache.CandidateLocations2(transport, scope, digestFilteringUncompressed, blobinfocache.CandidateLocations2Options{ CanSubstitute: true, RequiredCompression: &compression.Zstd, }) - assertCandidatesMatch2(t, scopeName, []candidate{ // When the user asks for zstd, zstd:chunked candidates are also acceptable. - {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, lr: "zstdChunked"}, - {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, lr: "zstd"}, + assertCandidatesMatch2(t, scopeName, []candidate{ // When the user asks for zstd, zstd:chunked candidates are also acceptable, and include the chunked information. + {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, ca: chunkedAnnotations, lr: "zstdChunked"}, + {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, ca: nil, lr: "zstd"}, + }, res) + + // After RecordDigestCompressorData with zstd:chunked details, a later call with zstd-only does not drop the chunked details. + cache.RecordDigestCompressorData(digestZstdChunked, blobinfocache.DigestCompressorData{ + BaseVariantCompressor: compressiontypes.ZstdAlgorithmName, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, + }) + res = cache.CandidateLocations2(transport, scope, digestFilteringUncompressed, blobinfocache.CandidateLocations2Options{ + CanSubstitute: true, + }) + assertCandidatesMatch2(t, scopeName, []candidate{ // Sorted in the reverse of digestNameSetFiltering order + {d: digestZstdChunked, cn: compressiontypes.ZstdChunkedAlgorithmName, ca: chunkedAnnotations, lr: "zstdChunked"}, + {d: digestZstd, cn: compressiontypes.ZstdAlgorithmName, ca: nil, lr: "zstd"}, + {d: digestGzip, cn: compressiontypes.GzipAlgorithmName, ca: nil, lr: "gzip"}, }, res) } } diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 067c6b7e11..9d4125d664 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -28,7 +28,7 @@ type cache struct { uncompressedDigestsByTOC map[digest.Digest]digest.Digest digestsByUncompressed map[digest.Digest]*set.Set[digest.Digest] // stores a set of digests for each uncompressed digest knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference - compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression), for each digest + compressors map[digest.Digest]blobinfocache.DigestCompressorData // stores compression data for each digest; BaseVariantCompressor != UnknownCompression } // New returns a BlobInfoCache implementation which is in-memory only. @@ -49,7 +49,7 @@ func new2() *cache { uncompressedDigestsByTOC: map[digest.Digest]digest.Digest{}, digestsByUncompressed: map[digest.Digest]*set.Set[digest.Digest]{}, knownLocations: map[locationKey]map[types.BICLocationReference]time.Time{}, - compressors: map[digest.Digest]string{}, + compressors: map[digest.Digest]blobinfocache.DigestCompressorData{}, } } @@ -148,20 +148,36 @@ func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope type // WARNING: Only call this with LOCALLY VERIFIED data: // - don’t record a compressor for a digest just because some remote author claims so // (e.g. because a manifest says so); +// - don’t record the non-base variant or annotations if we are not _sure_ that the base variant +// and the blob’s digest match the non-base variant’s annotations (e.g. because we saw them +// in a manifest) // // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. func (mem *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobinfocache.DigestCompressorData) { mem.mutex.Lock() defer mem.mutex.Unlock() - if previous, ok := mem.compressors[anyDigest]; ok && previous != data.BaseVariantCompressor { - logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) + if previous, ok := mem.compressors[anyDigest]; ok { + if previous.BaseVariantCompressor != data.BaseVariantCompressor { + logrus.Warnf("Base compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous.BaseVariantCompressor, data.BaseVariantCompressor) + } else if previous.SpecificVariantCompressor != blobinfocache.UnknownCompression && data.SpecificVariantCompressor != blobinfocache.UnknownCompression && + previous.SpecificVariantCompressor != data.SpecificVariantCompressor { + logrus.Warnf("Specific compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous.SpecificVariantCompressor, data.SpecificVariantCompressor) + } + // We don’t check SpecificVariantAnnotations for equality, it’s possible that their generation is not deterministic. + + // Preserve specific variant information if the incoming data does not have it. + if data.BaseVariantCompressor != blobinfocache.UnknownCompression && data.SpecificVariantCompressor == blobinfocache.UnknownCompression && + previous.SpecificVariantCompressor != blobinfocache.UnknownCompression { + data.SpecificVariantCompressor = previous.SpecificVariantCompressor + data.SpecificVariantAnnotations = previous.SpecificVariantAnnotations + } } if data.BaseVariantCompressor == blobinfocache.UnknownCompression { delete(mem.compressors, anyDigest) return } - mem.compressors[anyDigest] = data.BaseVariantCompressor + mem.compressors[anyDigest] = data } // appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in memory @@ -171,13 +187,15 @@ func (mem *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobi // with unknown compression. func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, v2Options *blobinfocache.CandidateLocations2Options) []prioritize.CandidateWithTime { - compressorName := blobinfocache.UnknownCompression + compressionData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, + } if v, ok := mem.compressors[digest]; ok { - compressorName = v + compressionData = v } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressorName, - }) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressionData) if template == nil { return candidates } diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index 9d000a5c7a..1a79310239 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -3,6 +3,7 @@ package sqlite import ( "database/sql" + "encoding/json" "errors" "fmt" "sync" @@ -303,6 +304,16 @@ func ensureDBHasCurrentSchema(db *sql.DB) error { `uncompressedDigest TEXT NOT NULL )`, }, + { + "DigestSpecificVariantCompressors", // If changing the schema incompatibly, merge this with DigestCompressors. + `CREATE TABLE IF NOT EXISTS DigestSpecificVariantCompressors(` + + // index implied by PRIMARY KEY + `digest TEXT PRIMARY KEY NOT NULL,` + + // The compressor is not `UnknownCompression`. + `specificVariantCompressor TEXT NOT NULL, + specificVariantAnnotations BLOB NOT NULL + )`, + }, } _, err := dbTransaction(db, func(tx *sql.Tx) (void, error) { @@ -461,6 +472,9 @@ func (sqc *cache) RecordKnownLocation(transport types.ImageTransport, scope type // WARNING: Only call this with LOCALLY VERIFIED data: // - don’t record a compressor for a digest just because some remote author claims so // (e.g. because a manifest says so); +// - don’t record the non-base variant or annotations if we are not _sure_ that the base variant +// and the blob’s digest match the non-base variant’s annotations (e.g. because we saw them +// in a manifest) // // otherwise the cache could be poisoned and cause us to make incorrect edits to type // information in a manifest. @@ -468,21 +482,46 @@ func (sqc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobi _, _ = transaction(sqc, func(tx *sql.Tx) (void, error) { previous, gotPrevious, err := querySingleValue[string](tx, "SELECT compressor FROM DigestCompressors WHERE digest = ?", anyDigest.String()) if err != nil { - return void{}, fmt.Errorf("looking for compressor of for %q", anyDigest) + return void{}, fmt.Errorf("looking for compressor of %q", anyDigest) } + warned := false if gotPrevious && previous != data.BaseVariantCompressor { logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, previous, data.BaseVariantCompressor) + warned = true } if data.BaseVariantCompressor == blobinfocache.UnknownCompression { if _, err := tx.Exec("DELETE FROM DigestCompressors WHERE digest = ?", anyDigest.String()); err != nil { return void{}, fmt.Errorf("deleting compressor for digest %q: %w", anyDigest, err) } + if _, err := tx.Exec("DELETE FROM DigestSpecificVariantCompressors WHERE digest = ?", anyDigest.String()); err != nil { + return void{}, fmt.Errorf("deleting specific variant compressor for digest %q: %w", anyDigest, err) + } } else { if _, err := tx.Exec("INSERT OR REPLACE INTO DigestCompressors(digest, compressor) VALUES (?, ?)", anyDigest.String(), data.BaseVariantCompressor); err != nil { return void{}, fmt.Errorf("recording compressor %q for %q: %w", data.BaseVariantCompressor, anyDigest, err) } } + + if data.SpecificVariantCompressor != blobinfocache.UnknownCompression { + if !warned { // Don’t warn twice about the same digest + prevSVC, found, err := querySingleValue[string](tx, "SELECT specificVariantCompressor FROM DigestSpecificVariantCompressors WHERE digest = ?", anyDigest.String()) + if err != nil { + return void{}, fmt.Errorf("looking for specific variant compressor of %q", anyDigest) + } + if found && data.SpecificVariantCompressor != prevSVC { + logrus.Warnf("Specific compressor for blob with digest %s previously recorded as %s, now %s", anyDigest, prevSVC, data.SpecificVariantCompressor) + } + } + annotations, err := json.Marshal(data.SpecificVariantAnnotations) + if err != nil { + return void{}, err + } + if _, err := tx.Exec("INSERT OR REPLACE INTO DigestSpecificVariantCompressors(digest, specificVariantCompressor, specificVariantAnnotations) VALUES (?, ?, ?)", + anyDigest.String(), data.SpecificVariantCompressor, annotations); err != nil { + return void{}, fmt.Errorf("recording specific variant compressor %q/%q for %q: %w", data.SpecificVariantCompressor, annotations, anyDigest, err) + } + } return void{}, nil }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } @@ -493,19 +532,32 @@ func (sqc *cache) RecordDigestCompressorData(anyDigest digest.Digest, data blobi // with unknown compression. func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, tx *sql.Tx, transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, v2Options *blobinfocache.CandidateLocations2Options) ([]prioritize.CandidateWithTime, error) { - compressorName := blobinfocache.UnknownCompression + compressionData := blobinfocache.DigestCompressorData{ + BaseVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantCompressor: blobinfocache.UnknownCompression, + SpecificVariantAnnotations: nil, + } if v2Options != nil { - compressor, found, err := querySingleValue[string](tx, "SELECT compressor FROM DigestCompressors WHERE digest = ?", digest.String()) - if err != nil { - return nil, fmt.Errorf("scanning compressorName: %w", err) - } - if found { - compressorName = compressor + var baseVariantCompressor string + var specificVariantCompressor sql.NullString + var annotationBytes []byte + switch err := tx.QueryRow("SELECT compressor, specificVariantCompressor, specificVariantAnnotations "+ + "FROM DigestCompressors LEFT JOIN DigestSpecificVariantCompressors USING (digest) WHERE digest = ?", digest.String()). + Scan(&baseVariantCompressor, &specificVariantCompressor, &annotationBytes); { + case errors.Is(err, sql.ErrNoRows): // Do nothing + case err != nil: + return nil, fmt.Errorf("scanning compressor data: %w", err) + default: + compressionData.BaseVariantCompressor = baseVariantCompressor + if specificVariantCompressor.Valid && annotationBytes != nil { + compressionData.SpecificVariantCompressor = specificVariantCompressor.String + if err := json.Unmarshal(annotationBytes, &compressionData.SpecificVariantAnnotations); err != nil { + return nil, err + } + } } } - template := prioritize.CandidateTemplateWithCompression(v2Options, digest, blobinfocache.DigestCompressorData{ - BaseVariantCompressor: compressorName, - }) + template := prioritize.CandidateTemplateWithCompression(v2Options, digest, compressionData) if template == nil { return candidates, nil } From 76af27cd12776cb0d7a24cc8e7feeec24ba48273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 13 Mar 2024 22:26:18 +0100 Subject: [PATCH 34/37] Record the specific variant, and TOC annotations, for blobs we compress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce distinct uploadedCompressorBaseVariantName and uploadedCompressorSpecificVariantName fields; that way we now never call RecordDigestCompressorData with inconsistent zstd / zstd:chunked in one field, so we can always record data when we see, or create, a zstd:chunked layer, removing the current hack. Signed-off-by: Miloslav Trmač --- copy/compression.go | 108 +++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/copy/compression.go b/copy/compression.go index e859d0139c..cfb4c8d107 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -71,13 +71,14 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI // bpCompressionStepData contains data that the copy pipeline needs about the compression step. type bpCompressionStepData struct { - operation bpcOperation // What we are actually doing - uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) - uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. - uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. - srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob. - uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob. - closers []io.Closer // Objects to close after the upload is done, if any. + operation bpcOperation // What we are actually doing + uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do) + uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits. + uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed. + srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob. + uploadedCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the uploaded blob. + uploadedCompressorSpecificVariantName string // Compressor specific variant name to record in the blob info cache for the uploaded blob. + closers []io.Closer // Objects to close after the upload is done, if any. } type bpcOperation int @@ -129,11 +130,12 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp // We can’t do anything with an encrypted blob unless decrypted. logrus.Debugf("Using original blob without modification for encrypted blob") return &bpCompressionStepData{ - operation: bpcOpPreserveOpaque, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: nil, - srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression, - uploadedCompressorName: internalblobinfocache.UnknownCompression, + operation: bpcOpPreserveOpaque, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression, + uploadedCompressorBaseVariantName: internalblobinfocache.UnknownCompression, + uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression, }, nil } return nil, nil @@ -157,14 +159,19 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp Digest: "", Size: -1, } + specificVariantName := uploadedAlgorithm.Name() + if specificVariantName == uploadedAlgorithm.BaseVariantName() { + specificVariantName = internalblobinfocache.UnknownCompression + } return &bpCompressionStepData{ - operation: bpcOpCompressUncompressed, - uploadedOperation: types.Compress, - uploadedAlgorithm: uploadedAlgorithm, - uploadedAnnotations: annotations, - srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, - uploadedCompressorName: uploadedAlgorithm.Name(), - closers: []io.Closer{reader}, + operation: bpcOpCompressUncompressed, + uploadedOperation: types.Compress, + uploadedAlgorithm: uploadedAlgorithm, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorBaseVariantName: uploadedAlgorithm.BaseVariantName(), + uploadedCompressorSpecificVariantName: specificVariantName, + closers: []io.Closer{reader}, }, nil } return nil, nil @@ -197,15 +204,20 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp Digest: "", Size: -1, } + specificVariantName := ic.compressionFormat.Name() + if specificVariantName == ic.compressionFormat.BaseVariantName() { + specificVariantName = internalblobinfocache.UnknownCompression + } succeeded = true return &bpCompressionStepData{ - operation: bpcOpRecompressCompressed, - uploadedOperation: types.PreserveOriginal, - uploadedAlgorithm: ic.compressionFormat, - uploadedAnnotations: annotations, - srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, - uploadedCompressorName: ic.compressionFormat.Name(), - closers: []io.Closer{decompressed, recompressed}, + operation: bpcOpRecompressCompressed, + uploadedOperation: types.PreserveOriginal, + uploadedAlgorithm: ic.compressionFormat, + uploadedAnnotations: annotations, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorBaseVariantName: ic.compressionFormat.BaseVariantName(), + uploadedCompressorSpecificVariantName: specificVariantName, + closers: []io.Closer{decompressed, recompressed}, }, nil } return nil, nil @@ -226,12 +238,13 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp Size: -1, } return &bpCompressionStepData{ - operation: bpcOpDecompressCompressed, - uploadedOperation: types.Decompress, - uploadedAlgorithm: nil, - srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, - uploadedCompressorName: internalblobinfocache.Uncompressed, - closers: []io.Closer{s}, + operation: bpcOpDecompressCompressed, + uploadedOperation: types.Decompress, + uploadedAlgorithm: nil, + srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorBaseVariantName: internalblobinfocache.Uncompressed, + uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression, + closers: []io.Closer{s}, }, nil } return nil, nil @@ -276,7 +289,8 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom // We only record the base variant of the format on upload; we didn’t do anything with // the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger // reuse of any kind between the blob digest and the TOC digest. - uploadedCompressorName: detected.srcCompressorBaseVariantName, + uploadedCompressorBaseVariantName: detected.srcCompressorBaseVariantName, + uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression, } } @@ -336,26 +350,16 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation) } } - if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" { - return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)", - d.srcCompressorBaseVariantName, d.uploadedCompressorName) + if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorBaseVariantName == "" || d.uploadedCompressorSpecificVariantName == "" { + return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded base: %q, uploaded specific: %q)", + d.srcCompressorBaseVariantName, d.uploadedCompressorBaseVariantName, d.uploadedCompressorSpecificVariantName) } - if d.uploadedCompressorName != internalblobinfocache.UnknownCompression { - if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName { - // HACK: Don’t record zstd:chunked algorithms. - // There is already a similar hack in internal/imagedestination/impl/helpers.CandidateMatchesTryReusingBlobOptions, - // and that one prevents reusing zstd:chunked blobs, so recording the algorithm here would be mostly harmless. - // - // We skip that here anyway to work around the inability of blobPipelineDetectCompressionStep to differentiate - // between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName - // with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about - // inconsistent data to be logged. - c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{ - BaseVariantCompressor: d.uploadedCompressorName, - SpecificVariantCompressor: internalblobinfocache.UnknownCompression, - SpecificVariantAnnotations: nil, - }) - } + if d.uploadedCompressorBaseVariantName != internalblobinfocache.UnknownCompression { + c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{ + BaseVariantCompressor: d.uploadedCompressorBaseVariantName, + SpecificVariantCompressor: d.uploadedCompressorSpecificVariantName, + SpecificVariantAnnotations: d.uploadedAnnotations, + }) } if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest && d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression { From 243b49d8abd26f4ecc2342d13091d6844afbf9f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 15 Mar 2024 22:17:56 +0100 Subject: [PATCH 35/37] Extend private.ReusedBlob to allow zstd:chunked reuses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a CompressionAnnotations field - Allow turning a known-zstd blob into a zstd:chunked one if we know the right annotations This just adds the fields, nothing sets them yet, should not change behavior. Signed-off-by: Miloslav Trmač --- copy/single.go | 29 +++++++++++++++++++-------- copy/single_test.go | 30 +++++++++++++++++++++++----- internal/imagedestination/wrapper.go | 3 +++ internal/private/private.go | 5 +++++ 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/copy/single.go b/copy/single.go index 714dc81368..d70c928557 100644 --- a/copy/single.go +++ b/copy/single.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "maps" "reflect" "slices" "strings" @@ -888,21 +889,33 @@ func updatedBlobInfoFromReuse(inputInfo types.BlobInfo, reusedBlob private.Reuse // Handling of compression, encryption, and the related MIME types and the like are all the responsibility // of the generic code in this package. res := types.BlobInfo{ - Digest: reusedBlob.Digest, - Size: reusedBlob.Size, - URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior. - Annotations: inputInfo.Annotations, // FIXME: This should remove zstd:chunked annotations (but those annotations being left with incorrect values should not break pulls) - MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation. + Digest: reusedBlob.Digest, + Size: reusedBlob.Size, + URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior. + // FIXME: This should remove zstd:chunked annotations IF the original was chunked and the new one isn’t + // (but those annotations being left with incorrect values should not break pulls). + Annotations: maps.Clone(inputInfo.Annotations), + MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation. CompressionOperation: reusedBlob.CompressionOperation, CompressionAlgorithm: reusedBlob.CompressionAlgorithm, CryptoOperation: inputInfo.CryptoOperation, // Expected to be unset anyway. } // The transport is only expected to fill CompressionOperation and CompressionAlgorithm - // if the blob was substituted; otherwise, fill it in based + // if the blob was substituted; otherwise, it is optional, and if not set, fill it in based // on what we know from the srcInfos we were given. if reusedBlob.Digest == inputInfo.Digest { - res.CompressionOperation = inputInfo.CompressionOperation - res.CompressionAlgorithm = inputInfo.CompressionAlgorithm + if res.CompressionOperation == types.PreserveOriginal { + res.CompressionOperation = inputInfo.CompressionOperation + } + if res.CompressionAlgorithm == nil { + res.CompressionAlgorithm = inputInfo.CompressionAlgorithm + } + } + if len(reusedBlob.CompressionAnnotations) != 0 { + if res.Annotations == nil { + res.Annotations = map[string]string{} + } + maps.Copy(res.Annotations, reusedBlob.CompressionAnnotations) } return res } diff --git a/copy/single_test.go b/copy/single_test.go index 144b5ed2aa..890a63bce8 100644 --- a/copy/single_test.go +++ b/copy/single_test.go @@ -55,22 +55,42 @@ func TestUpdatedBlobInfoFromReuse(t *testing.T) { }, { // Reuse with substitution reused: private.ReusedBlob{ - Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - Size: 513543640, - CompressionOperation: types.Decompress, - CompressionAlgorithm: nil, + Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + Size: 513543640, + CompressionOperation: types.Decompress, + CompressionAlgorithm: nil, + CompressionAnnotations: map[string]string{"decompressed": "value"}, }, expected: types.BlobInfo{ Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", Size: 513543640, URLs: nil, - Annotations: map[string]string{"test-annotation-2": "two"}, + Annotations: map[string]string{"test-annotation-2": "two", "decompressed": "value"}, MediaType: imgspecv1.MediaTypeImageLayerGzip, CompressionOperation: types.Decompress, CompressionAlgorithm: nil, // CryptoOperation is set to the zero value }, }, + { // Reuse turning zstd into zstd:chunked + reused: private.ReusedBlob{ + Digest: "sha256:6a5a5368e0c2d3e5909184fa28ddfd56072e7ff3ee9a945876f7eee5896ef5bb", + Size: 51354364, + CompressionOperation: types.Compress, + CompressionAlgorithm: &compression.ZstdChunked, + CompressionAnnotations: map[string]string{"zstd-toc": "value"}, + }, + expected: types.BlobInfo{ + Digest: "sha256:6a5a5368e0c2d3e5909184fa28ddfd56072e7ff3ee9a945876f7eee5896ef5bb", + Size: 51354364, + URLs: nil, + Annotations: map[string]string{"test-annotation-2": "two", "zstd-toc": "value"}, + MediaType: imgspecv1.MediaTypeImageLayerGzip, + CompressionOperation: types.Compress, + CompressionAlgorithm: &compression.ZstdChunked, + // CryptoOperation is set to the zero value + }, + }, } { res := updatedBlobInfoFromReuse(srcInfo, c.reused) assert.Equal(t, c.expected, res, fmt.Sprintf("%#v", c.reused)) diff --git a/internal/imagedestination/wrapper.go b/internal/imagedestination/wrapper.go index cdd3c5e5d0..f5a38541ae 100644 --- a/internal/imagedestination/wrapper.go +++ b/internal/imagedestination/wrapper.go @@ -76,6 +76,9 @@ func (w *wrapped) TryReusingBlobWithOptions(ctx context.Context, info types.Blob Size: blob.Size, CompressionOperation: blob.CompressionOperation, CompressionAlgorithm: blob.CompressionAlgorithm, + // CompressionAnnotations could be set to blob.Annotations, but that may contain unrelated + // annotations, and we didn’t use the blob.Annotations field previously, so we’ll + // continue not using it. }, nil } diff --git a/internal/private/private.go b/internal/private/private.go index 63fb9326de..d81ea6703e 100644 --- a/internal/private/private.go +++ b/internal/private/private.go @@ -134,9 +134,14 @@ type ReusedBlob struct { Size int64 // Must be provided // The following compression fields should be set when the reuse substitutes // a differently-compressed blob. + // They may be set also to change from a base variant to a specific variant of an algorithm. CompressionOperation types.LayerCompression // Compress/Decompress, matching the reused blob; PreserveOriginal if N/A CompressionAlgorithm *compression.Algorithm // Algorithm if compressed, nil if decompressed or N/A + // Annotations that should be added, for CompressionAlgorithm. Note that they might need to be + // added even if the digest doesn’t change (if we found the annotations in a cache). + CompressionAnnotations map[string]string + MatchedByTOCDigest bool // Whether the layer was reused/matched by TOC digest. Used only for UI purposes. } From ac2ca25a970c650ee1052d53c4817a9f41be4dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 15 Mar 2024 22:28:07 +0100 Subject: [PATCH 36/37] Allow dockerImageDestination to reuse zstd:chunked blobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Return the required annotations, if we have them - If we have a zstd blob and the BIC contains the annotations, we don't check for the blob's presence initially. In that case, don't skip it if we find the BIC annotations. Signed-off-by: Miloslav Trmač --- docker/docker_image_dest.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/docker/docker_image_dest.go b/docker/docker_image_dest.go index 7f7a74bd37..ed3d4a2c0b 100644 --- a/docker/docker_image_dest.go +++ b/docker/docker_image_dest.go @@ -332,6 +332,7 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context, return false, private.ReusedBlob{}, errors.New("Can not check for a blob with unknown digest") } + originalCandidateKnownToBeMissing := false if impl.OriginalCandidateMatchesTryReusingBlobOptions(options) { // First, check whether the blob happens to already exist at the destination. haveBlob, reusedInfo, err := d.tryReusingExactBlob(ctx, info, options.Cache) @@ -341,9 +342,17 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context, if haveBlob { return true, reusedInfo, nil } + originalCandidateKnownToBeMissing = true } else { logrus.Debugf("Ignoring exact blob match, compression %s does not match required %s or MIME types %#v", optionalCompressionName(options.OriginalCompression), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats) + // We can get here with a blob detected to be zstd when the user wants a zstd:chunked. + // In that case we keep originalCandiateKnownToBeMissing = false, so that if we find + // a BIC entry for this blob, we do use that entry and return a zstd:chunked entry + // with the BIC’s annotations. + // This is not quite correct, it only works if the BIC also contains an acceptable _location_. + // Ideally, we could look up just the compression algorithm/annotations for info.digest, + // and use it even if no location candidate exists and the original dandidate is present. } // Then try reusing blobs from other locations. @@ -387,7 +396,8 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context, // for it in the current repo. candidateRepo = reference.TrimNamed(d.ref.ref) } - if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest { + if originalCandidateKnownToBeMissing && + candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest { logrus.Debug("... Already tried the primary destination") continue } @@ -427,10 +437,12 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context, options.Cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, newBICLocationReference(d.ref)) return true, private.ReusedBlob{ - Digest: candidate.Digest, - Size: size, - CompressionOperation: candidate.CompressionOperation, - CompressionAlgorithm: candidate.CompressionAlgorithm}, nil + Digest: candidate.Digest, + Size: size, + CompressionOperation: candidate.CompressionOperation, + CompressionAlgorithm: candidate.CompressionAlgorithm, + CompressionAnnotations: candidate.CompressionAnnotations, + }, nil } return false, private.ReusedBlob{}, nil From 3d38daee42ab55b706c30345654a6de148917e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 15 Mar 2024 22:33:11 +0100 Subject: [PATCH 37/37] Detect zstd:chunked format in source blobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... instead of only treating it as zstd. Signed-off-by: Miloslav Trmač --- copy/compression.go | 10 ++++++++++ copy/single.go | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/copy/compression.go b/copy/compression.go index cfb4c8d107..fb5e1b174e 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -52,6 +52,16 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI } stream.reader = reader + if decompressor != nil && format.Name() == compressiontypes.ZstdAlgorithmName { + tocDigest, err := chunkedToc.GetTOCDigest(srcInfo.Annotations) + if err != nil { + return bpDetectCompressionStepData{}, err + } + if tocDigest != nil { + format = compression.ZstdChunked + } + + } res := bpDetectCompressionStepData{ isCompressed: decompressor != nil, format: format, diff --git a/copy/single.go b/copy/single.go index d70c928557..ba414a22d6 100644 --- a/copy/single.go +++ b/copy/single.go @@ -163,7 +163,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar if format == nil { format = defaultCompressionFormat } - if format.Name() == compression.ZstdChunked.Name() { + if format.Name() == compressiontypes.ZstdChunkedAlgorithmName { if ic.requireCompressionFormatMatch { return copySingleImageResult{}, errors.New("explicitly requested to combine zstd:chunked with encryption, which is not beneficial; use plain zstd instead") }