Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/cmd/builder/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func Build(ctx context.Context, client *containerd.Client, options types.Builder
if _, err := imageService.Create(ctx, image); err != nil {
// if already exists; skip.
if errors.Is(err, errdefs.ErrAlreadyExists) {
if err = imageService.Delete(ctx, targetRef); err != nil {
if err = imageService.Delete(ctx, targetRef, images.SynchronousDelete()); err != nil {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may explain some of the "internal: not found" errrors we sometimes see when building.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed this behavior is not intuitive at all. I expect that delete to be sync by default and the async is activated by parameter, Good catch

return err
}
if _, err = imageService.Create(ctx, image); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/image/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func Convert(ctx context.Context, client *containerd.Client, srcRawRef, targetRa
}

// converter.Convert() gains the lease by itself
newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...)
newImg, err := converterutil.Convert(ctx, client, targetRef, srcRef, convertOpts...)
if err != nil {
return err
}
Expand All @@ -208,7 +208,7 @@ func Convert(ctx context.Context, client *containerd.Client, srcRawRef, targetRa
return err
}
is := client.ImageService()
_ = is.Delete(ctx, newI.Name)
_ = is.Delete(ctx, newI.Name, images.SynchronousDelete())
finimg, err := is.Create(ctx, *newI)
if err != nil {
return err
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/image/crypt.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/containerd/imgcrypt/v2/images/encryption/parsehelpers"

"github.com/containerd/nerdctl/v2/pkg/api/types"
nerdconverter "github.com/containerd/nerdctl/v2/pkg/imgutil/converter"
"github.com/containerd/nerdctl/v2/pkg/platformutil"
"github.com/containerd/nerdctl/v2/pkg/referenceutil"
)
Expand Down Expand Up @@ -93,7 +94,7 @@ func Crypt(ctx context.Context, client *containerd.Client, srcRawRef, targetRawR
convertOpts = append(convertOpts, converter.WithIndexConvertFunc(convertFunc))

// converter.Convert() gains the lease by itself
newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...)
newImg, err := nerdconverter.Convert(ctx, client, targetRef, srcRef, convertOpts...)
if err != nil {
return err
}
Expand Down
10 changes: 8 additions & 2 deletions pkg/cmd/image/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (

"github.com/containerd/nerdctl/v2/pkg/api/types"
"github.com/containerd/nerdctl/v2/pkg/errutil"
nerdconverter "github.com/containerd/nerdctl/v2/pkg/imgutil/converter"
"github.com/containerd/nerdctl/v2/pkg/imgutil/dockerconfigresolver"
"github.com/containerd/nerdctl/v2/pkg/imgutil/push"
"github.com/containerd/nerdctl/v2/pkg/ipfs"
Expand Down Expand Up @@ -119,7 +120,12 @@ func Push(ctx context.Context, client *containerd.Client, rawRef string, options
pushRef = ref + "-tmp-reduced-platform"
// Push fails with "400 Bad Request" when the manifest is multi-platform but we do not locally have multi-platform blobs.
// So we create a tmp reduced-platform image to avoid the error.
platImg, err := converter.Convert(ctx, client, pushRef, ref, converter.WithPlatform(platMC))
// Ensure all the layers are here: https://github.com/containerd/nerdctl/issues/3425
err = EnsureAllContent(ctx, client, ref, platMC, options.GOptions)
if err != nil {
return err
}
platImg, err := nerdconverter.Convert(ctx, client, pushRef, ref, converter.WithPlatform(platMC))
if err != nil {
if len(options.Platforms) == 0 {
return fmt.Errorf("failed to create a tmp single-platform image %q: %w", pushRef, err)
Expand All @@ -132,7 +138,7 @@ func Push(ctx context.Context, client *containerd.Client, rawRef string, options

if options.Estargz {
pushRef = ref + "-tmp-esgz"
esgzImg, err := converter.Convert(ctx, client, pushRef, ref, converter.WithPlatform(platMC), converter.WithLayerConvertFunc(eStargzConvertFunc()))
esgzImg, err := nerdconverter.Convert(ctx, client, pushRef, ref, converter.WithPlatform(platMC), converter.WithLayerConvertFunc(eStargzConvertFunc()))
if err != nil {
return fmt.Errorf("failed to convert to eStargz: %v", err)
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/image/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ func Remove(ctx context.Context, client *containerd.Client, args []string, optio

if cid, ok := runningImages[found.Image.Name]; ok {
if options.Force {
// FIXME: this is suspicious, but passing the opt seem to break some tests
// if err = is.Delete(ctx, found.Image.Name, delOpts...); err != nil {
if err = is.Delete(ctx, found.Image.Name); err != nil {
return err
}
Expand Down Expand Up @@ -126,6 +128,8 @@ func Remove(ctx context.Context, client *containerd.Client, args []string, optio

if cid, ok := runningImages[found.Image.Name]; ok {
if options.Force {
// FIXME: this is suspicious, but passing the opt seem to break some tests
// if err = is.Delete(ctx, found.Image.Name, delOpts...); err != nil {
if err = is.Delete(ctx, found.Image.Name); err != nil {
return false, err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/image/tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"

containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/errdefs"
"github.com/containerd/log"

Expand Down Expand Up @@ -81,7 +82,7 @@ func Tag(ctx context.Context, client *containerd.Client, options types.ImageTagO
img.Name = parsedReference.String()
if _, err = imageService.Create(ctx, img); err != nil {
if errdefs.IsAlreadyExists(err) {
if err = imageService.Delete(ctx, img.Name); err != nil {
if err = imageService.Delete(ctx, img.Name, images.SynchronousDelete()); err != nil {
return err
}
if _, err = imageService.Create(ctx, img); err != nil {
Expand Down
55 changes: 55 additions & 0 deletions pkg/imgutil/converter/convert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package converter

import (
"context"

"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/images/converter"
)

// Something seems wrong in converter.Convert.
// When dstRef != srcRef, convert will first forcefully delete dstRef,
// *asynchronously*, then create the image.
// This seems to cause a race conditions, and the deletion may kick in after the creation.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the lease is not maintained during the creation ? 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No clue.

My gut intuition is that this thing faceplants when the old ref is actually exactly the same image as new ref.

Note that patching containerd to also use a sync delete does NOT fix the issue. 🤔

Anyhow, for me the bottom-line really is:

  • do not use leases + delete - clealy buggy
  • do not use async delete - counter-intuitive

// This here is to workaround the bug, by manually creating the image first,
// then converting it in place (which avoid the problematic code-path).
// See containerd upstream discussion https://github.com/containerd/containerd/pull/11628 and
// nerdctl issues:
// https://github.com/containerd/nerdctl/issues/3509#issuecomment-2398236766
// https://github.com/containerd/nerdctl/issues/3513
// Note this should be remove if/when containerd merges in a fix.

func Convert(ctx context.Context, client converter.Client, dstRef, srcRef string, opts ...converter.Opt) (*images.Image, error) {
imageService := client.ImageService()

img, err := imageService.Get(ctx, srcRef)
if err != nil {
return nil, err
}

img.Name = dstRef

_ = imageService.Delete(ctx, img.Name, images.SynchronousDelete())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to delete/re-create destRef if it already exists ?

Copy link
Contributor Author

@apostasie apostasie Apr 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not seem like we do need to delete indeed (note that I am not 100% familiar with it yet).

Indeed, my containerd patch does away with the delete (which is the source of the problem), and instead tries to update, then if it fails tries to create.

This patch here is opting for the least amount of change, but of course happy to clean-up a bit more if we want.

lmk your preference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fahedouch

Actually, I don't think this is the end of it (though I am convinced this patch will significantly enhance the situation).

I think in a follow-up PR we should thoroughly review our codebase and:

  • make sure we use leases where need be (GC might still happen and trigger similar symptoms)
  • evaluate why we are calling delete and what happens if we do not

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM to unblock the situation. But in the short and medium term, I don't want to move the containerd logic to nerdctl.


if _, err = imageService.Create(ctx, img); err != nil {
return nil, err
}

return converter.Convert(ctx, client, dstRef, dstRef, opts...)
}