Skip to content

Commit

Permalink
Go back to capi patch for CRD race condition (#2923)
Browse files Browse the repository at this point in the history
The patch in upstream controller-runtime left one scenario unsolved and
we happen to now be hitting it. I'm working on fixing this upstream but
in the meantime we are going back to the original patch.
  • Loading branch information
g-gaston authored Feb 14, 2024
1 parent 468e978 commit 7b53de4
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 113 deletions.
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ https://github.com/kubernetes-sigs/apiserver-network-proxy
** sigs.k8s.io/cluster-api; version v1.6.0 --
https://github.com/kubernetes-sigs/cluster-api

** sigs.k8s.io/controller-runtime; version v0.16.4 --
** sigs.k8s.io/controller-runtime; version v0.16.3 --
https://github.com/kubernetes-sigs/controller-runtime

** sigs.k8s.io/json; version v0.0.0-20221116044647-bc3834ca7abd --
Expand Down
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ https://github.com/kubernetes-sigs/cluster-api
** sigs.k8s.io/cluster-api/test/infrastructure/kind; version v1.6.0 --
https://github.com/kubernetes-sigs/cluster-api

** sigs.k8s.io/controller-runtime; version v0.16.4 --
** sigs.k8s.io/controller-runtime; version v0.16.3 --
https://github.com/kubernetes-sigs/controller-runtime

** sigs.k8s.io/json; version v0.0.0-20221116044647-bc3834ca7abd --
Expand Down
20 changes: 10 additions & 10 deletions projects/kubernetes-sigs/cluster-api/CHECKSUMS
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
edb650666212458d2933b237cc8e971a28c325ad991aa7867a6700a9955a1836 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
8bf3a6e1497a0bc7365b48696e6310fcadef949eeb50b77a8e8769622b6451e4 _output/bin/cluster-api/linux-amd64/clusterctl
09a9e9ce38948db589e036bce31fa566454b4a0cf4a39bb63226b7d2c95c43d4 _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
3be6e1f055c662f872cb4059ce4489659db439312a4eb0326f16142322008f27 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
64e208ec15e035c6818af46e4a097f890db3b1473f31c59d1d5a6b564f97aa83 _output/bin/cluster-api/linux-amd64/manager
78195694b39ddbac6a08be081b0ca457c8c795731d370fd86d918ef966987b52 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
aebe6f8a5af764f6f9bad56bab9bd811ce85e3bd2cec1effcc60c816aa96026d _output/bin/cluster-api/linux-arm64/clusterctl
0c6aed4aaf5c128759db833844c0663cb9a80d07975598f7f0176bfac045ebbc _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
a7da98ce118f366689f05e36ef9f163e2395509a4e75b4139d8f147a6c1e9e0e _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
1375fa164a8b2679d4c3801101954ad8522ea34ab1f9c7db1df752cf34d6ff71 _output/bin/cluster-api/linux-arm64/manager
9cc600dbbd7b6aba2f799da0dc566676188cc95dfbf833beca162295aa18d57e _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
8aa761dd700376a5ac1750c41b6e412b1b8b66ea24e4c65ca503b90836a68ab6 _output/bin/cluster-api/linux-amd64/clusterctl
f6b7ce3009f058f28b1136d1144a5c0dcb3a44278bd9947e034d4428d1300c62 _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
4c7fac42e79e0c4c8847f3b976835da8c7c39cfa592819639d24a5534f7ee189 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
2950db4742b814f727389f8d315bab4905f40cf907e9de089743b7e67fe96897 _output/bin/cluster-api/linux-amd64/manager
0c1588ede3ab75a0213416d0772a7422326fea282cdee39671038e45cb7f5fcb _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
1ac5248d227318f8ee776c69959250ae2d3c05d9e66f549aafab4fc35dcfecec _output/bin/cluster-api/linux-arm64/clusterctl
64bb7ffb8f6feaf6a5d78cea83f90ac3d4605b5179befbc391584a53ee65705e _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
9dccfc56c3b0179700312295d02d29b008b2fd9c2c988728c979751764371eeb _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
ddae3144474404243a96bce43ad42af71e38f05b8fd006848f4f6f7b3c017af0 _output/bin/cluster-api/linux-arm64/manager

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
From 12158d086f242c6e7fa2930689fa01ef38ab9ece Mon Sep 17 00:00:00 2001
From: Guillermo Gaston <[email protected]>
Date: Sat, 20 Jan 2024 22:05:04 +0000
Subject: [PATCH 37/37] Restart controller if RESTMapping outdated cache is
detected when reconciling external object

---
controllers/external/util.go | 28 +++++++-
controllers/external/util_test.go | 116 +++++++++++++++++++++++++++++-
2 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/controllers/external/util.go b/controllers/external/util.go
index 5b6443c78..62ed07959 100644
--- a/controllers/external/util.go
+++ b/controllers/external/util.go
@@ -19,13 +19,17 @@ package external
import (
"context"
"strings"
+ "syscall"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apiserver/pkg/storage/names"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
+ "sigs.k8s.io/controller-runtime/pkg/log"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)
@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
obj.SetKind(ref.Kind)
obj.SetName(ref.Name)
key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace}
- if err := c.Get(ctx, key, obj); err != nil {
+ err := c.Get(ctx, key, obj)
+ if isV1alpha4NotFoundFromDiscoveryError(err) {
+ logErrorAndGracefulShutdown(
+ ctx,
+ err,
+ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built",
+ )
+ }
+ if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name)
}
return obj, nil
}

+func isV1alpha4NotFoundFromDiscoveryError(err error) bool {
+ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{}
+ noResourceMatchErr := &meta.NoResourceMatchError{}
+ return errors.As(err, &discoverFailedErr) &&
+ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound.
+ strings.Contains(err.Error(), "cluster.x-k8s.io/v1alpha4")
+}
+
+func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) {
+ logger := log.FromContext(ctx)
+ logger.Error(err, msg)
+ syscall.Kill(syscall.Getpid(), syscall.SIGINT)
+}
+
// Delete uses the client and reference to delete an external, unstructured object.
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
obj := new(unstructured.Unstructured)
diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go
index 012445478..570cd4dae 100644
--- a/controllers/external/util_test.go
+++ b/controllers/external/util_test.go
@@ -17,6 +17,7 @@ limitations under the License.
package external

import (
+ "fmt"
"testing"

. "github.com/onsi/gomega"
@@ -25,16 +26,16 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime/schema"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

-var (
- ctx = ctrl.SetupSignalHandler()
-)
+var ctx = ctrl.SetupSignalHandler()

const (
testClusterName = "test-cluster"
@@ -323,3 +324,112 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) {
})
g.Expect(err).To(HaveOccurred())
}
+
+func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) {
+ tests := []struct {
+ name string
+ err error
+ want bool
+ }{
+ {
+ name: "the error we are looking for",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ want: true,
+ },
+ {
+ name: "the error we are looking for infra api for but wrapped",
+ err: fmt.Errorf("failed to get restmapping: %w",
+ &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ ),
+ want: true,
+ },
+ {
+ name: "the error we are looking for bootstrap api for but wrapped",
+ err: fmt.Errorf("failed to get restmapping: %w",
+ &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "bootstrap.cluster.x-k8s.io",
+ Version: "v1beta1",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "bootstrap.cluster.x-k8s.io/v1alpha4"),
+ },
+ ),
+ want: true,
+ },
+ {
+ name: "v1alpha4 not found with different group",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "different.group",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io not found error with different version",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "differentkind",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: errors.New("some other error"),
+ },
+ want: false,
+ },
+ {
+ name: "plain not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ },
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ Message: "infrastructure.cluster.x-k8s.io/v1alpha4",
+ },
+ },
+ want: false,
+ },
+ {
+ name: "not error",
+ err: nil,
+ want: false,
+ },
+ {
+ name: "other error",
+ err: errors.New("some other error"),
+ want: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ g := NewWithT(t)
+ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want))
+ })
+ }
+}
--
2.34.1

0 comments on commit 7b53de4

Please sign in to comment.