diff --git a/docs/book/src/topics/eks/creating-a-cluster.md b/docs/book/src/topics/eks/creating-a-cluster.md index fcb85db130..0ef75009c6 100644 --- a/docs/book/src/topics/eks/creating-a-cluster.md +++ b/docs/book/src/topics/eks/creating-a-cluster.md @@ -34,4 +34,12 @@ kubectl --namespace=default get secret managed-test-user-kubeconfig \ This kubeconfig is used internally by CAPI and shouldn't be used outside of the management server. It is used by CAPI to perform operations, such as draining a node. The name of the secret that contains the kubeconfig will be `[cluster-name]-kubeconfig` where you need to replace **[cluster-name]** with the name of your cluster. Note that there is NO `-user` in the name. -The kubeconfig is regenerated every `sync-period` as the token that is embedded in the kubeconfig is only valid for a short period of time. When EKS support is enabled the maximum sync period is 10 minutes. If you try to set `--sync-period` to greater than 10 minutes then an error will be raised. +There are three keys in the CAPI kubeconfig for eks clusters: + +| keys | purpose | +|-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| value | contains a complete kubeconfig with the cluster admin user and token embedded | +| relative | contains a kubeconfig with the cluster admin user, referencing the token file in a relative path - assumes you are mounting all the secret keys in the same dir | +| single-file | contains the same token embedded in the complete kubeconfig, it is separated into a single file so that existing APIMachinery can reload the token file when the secret is updated | + +The secret contents are regenerated every `sync-period` as the token that is embedded in the kubeconfig and token file is only valid for a short period of time. When EKS support is enabled the maximum sync period is 10 minutes. If you try to set `--sync-period` to greater than 10 minutes then an error will be raised. diff --git a/pkg/cloud/services/eks/config.go b/pkg/cloud/services/eks/config.go index 8559c2fa7f..a894f18557 100644 --- a/pkg/cloud/services/eks/config.go +++ b/pkg/cloud/services/eks/config.go @@ -31,9 +31,12 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/clientcmd/api" + "sigs.k8s.io/controller-runtime/pkg/client" ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/secret" ) @@ -42,6 +45,9 @@ const ( tokenPrefix = "k8s-aws-v1." //nolint:gosec clusterNameHeader = "x-k8s-aws-id" tokenAgeMins = 15 + + relativeKubeconfigKey = "relative" + relativeTokenFileKey = "token-file" ) func (s *Service) reconcileKubeconfig(ctx context.Context, cluster *eks.Cluster) error { @@ -110,28 +116,44 @@ func (s *Service) createCAPIKubeconfigSecret(ctx context.Context, cluster *eks.C clusterName := s.scope.KubernetesClusterName() userName := s.getKubeConfigUserName(clusterName, false) - cfg, err := s.createBaseKubeConfig(cluster, userName) + config, err := s.createBaseKubeConfig(cluster, userName) if err != nil { return fmt.Errorf("creating base kubeconfig: %w", err) } + clusterConfig := config.DeepCopy() token, err := s.generateToken() if err != nil { return fmt.Errorf("generating presigned token: %w", err) } - cfg.AuthInfos = map[string]*api.AuthInfo{ + clusterConfig.AuthInfos = map[string]*api.AuthInfo{ userName: { Token: token, }, } - out, err := clientcmd.Write(*cfg) + out, err := clientcmd.Write(*clusterConfig) if err != nil { return errors.Wrap(err, "failed to serialize config to yaml") } - kubeconfigSecret := kubeconfig.GenerateSecretWithOwner(*clusterRef, out, controllerOwnerRef) + secretData := make(map[string][]byte) + secretData[secret.KubeconfigDataName] = out + + config.AuthInfos = map[string]*api.AuthInfo{ + userName: { + TokenFile: "./" + relativeTokenFileKey, + }, + } + out, err = clientcmd.Write(*config) + if err != nil { + return errors.Wrap(err, "failed to serialize config to yaml") + } + secretData[relativeKubeconfigKey] = out + secretData[relativeTokenFileKey] = []byte(token) + + kubeconfigSecret := generateSecretWithOwner(*clusterRef, secretData, controllerOwnerRef) if err := s.scope.Client.Create(ctx, kubeconfigSecret); err != nil { return errors.Wrap(err, "failed to create kubeconfig secret") } @@ -142,32 +164,49 @@ func (s *Service) createCAPIKubeconfigSecret(ctx context.Context, cluster *eks.C func (s *Service) updateCAPIKubeconfigSecret(ctx context.Context, configSecret *corev1.Secret, cluster *eks.Cluster) error { s.scope.Debug("Updating EKS kubeconfigs for cluster", "cluster-name", s.scope.KubernetesClusterName()) + controllerOwnerRef := *metav1.NewControllerRef(s.scope.ControlPlane, ekscontrolplanev1.GroupVersion.WithKind("AWSManagedControlPlane")) - data, ok := configSecret.Data[secret.KubeconfigDataName] - if !ok { - return errors.Errorf("missing key %q in secret data", secret.KubeconfigDataName) + if !util.HasOwnerRef(configSecret.OwnerReferences, controllerOwnerRef) { + return fmt.Errorf("EKS kubeconfig %s/%s missing expected AWSManagedControlPlane ownership", configSecret.Namespace, configSecret.Name) } - config, err := clientcmd.Load(data) + clusterName := s.scope.KubernetesClusterName() + userName := s.getKubeConfigUserName(clusterName, false) + config, err := s.createBaseKubeConfig(cluster, userName) if err != nil { - return errors.Wrap(err, "failed to convert kubeconfig Secret into a clientcmdapi.Config") + return fmt.Errorf("creating base kubeconfig: %w", err) } + clusterConfig := config.DeepCopy() token, err := s.generateToken() if err != nil { return fmt.Errorf("generating presigned token: %w", err) } - userName := s.getKubeConfigUserName(*cluster.Name, false) - config.AuthInfos[userName].Token = token + clusterConfig.AuthInfos = map[string]*api.AuthInfo{ + userName: { + Token: token, + }, + } - out, err := clientcmd.Write(*config) + out, err := clientcmd.Write(*clusterConfig) if err != nil { return errors.Wrap(err, "failed to serialize config to yaml") } - configSecret.Data[secret.KubeconfigDataName] = out + config.AuthInfos = map[string]*api.AuthInfo{ + userName: { + TokenFile: "./" + relativeTokenFileKey, + }, + } + out, err = clientcmd.Write(*config) + if err != nil { + return errors.Wrap(err, "failed to serialize config to yaml") + } + configSecret.Data[relativeKubeconfigKey] = out + configSecret.Data[relativeTokenFileKey] = []byte(token) + err = s.scope.Client.Update(ctx, configSecret) if err != nil { return fmt.Errorf("updating kubeconfig secret: %w", err) @@ -283,3 +322,21 @@ func (s *Service) getKubeConfigUserName(clusterName string, isUser bool) string return fmt.Sprintf("%s-capi-admin", clusterName) } + +// generateSecretWithOwner returns a Kubernetes secret for the given Cluster name, namespace, kubeconfig data, and ownerReference. +func generateSecretWithOwner(clusterName client.ObjectKey, data map[string][]byte, owner metav1.OwnerReference) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secret.Name(clusterName.Name, secret.Kubeconfig), + Namespace: clusterName.Namespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName.Name, + }, + OwnerReferences: []metav1.OwnerReference{ + owner, + }, + }, + Data: data, + Type: clusterv1.ClusterSecretType, + } +} diff --git a/pkg/cloud/services/eks/config_test.go b/pkg/cloud/services/eks/config_test.go new file mode 100644 index 0000000000..6d6f4ce2ec --- /dev/null +++ b/pkg/cloud/services/eks/config_test.go @@ -0,0 +1,268 @@ +package eks + +import ( + "context" + "net/http" + "net/url" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/service/eks" + "github.com/aws/aws-sdk-go/service/sts" + "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/sts/mock_stsiface" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/secret" +) + +func Test_createCAPIKubeconfigSecret(t *testing.T) { + testCases := []struct { + name string + input *eks.Cluster + serviceFunc func() *Service + wantErr bool + }{ + { + name: "create kubeconfig secret", + input: &eks.Cluster{ + CertificateAuthority: &eks.Certificate{Data: aws.String("")}, + Endpoint: aws.String("https://F00BA4.gr4.us-east-2.eks.amazonaws.com"), + }, + serviceFunc: func() *Service { + mockCtrl := gomock.NewController(t) + stsMock := mock_stsiface.NewMockSTSAPI(mockCtrl) + op := request.Request{ + Operation: &request.Operation{Name: "GetCallerIdentity", + HTTPMethod: "POST", + HTTPPath: "/", + }, + HTTPRequest: &http.Request{ + Header: make(http.Header), + URL: &url.URL{ + Scheme: "https", + Host: "F00BA4.gr4.us-east-2.eks.amazonaws.com", + }, + }, + } + stsMock.EXPECT().GetCallerIdentityRequest(gomock.Any()).Return(&op, &sts.GetCallerIdentityOutput{}) + + scheme := runtime.NewScheme() + _ = infrav1.AddToScheme(scheme) + _ = ekscontrolplanev1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + client := fake.NewClientBuilder().WithScheme(scheme).Build() + managedScope, _ := scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ + Client: client, + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo", + }, + }, + ControlPlane: &ekscontrolplanev1.AWSManagedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo", + UID: types.UID("1"), + }, + Spec: ekscontrolplanev1.AWSManagedControlPlaneSpec{ + EKSClusterName: "cluster-foo", + }, + }, + }) + + service := NewService(managedScope) + service.STSClient = stsMock + return service + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + service := tc.serviceFunc() + clusterRef := types.NamespacedName{ + Namespace: service.scope.Namespace(), + Name: service.scope.Name(), + } + err := service.createCAPIKubeconfigSecret(context.TODO(), tc.input, &clusterRef) + if tc.wantErr { + g.Expect(err).ToNot(BeNil()) + } else { + g.Expect(err).To(BeNil()) + var kubeconfigSecret corev1.Secret + g.Expect(service.scope.Client.Get(context.TODO(), types.NamespacedName{Namespace: "ns", Name: "capi-cluster-foo-kubeconfig"}, &kubeconfigSecret)).To(BeNil()) + g.Expect(kubeconfigSecret.Data).ToNot(BeNil()) + g.Expect(len(kubeconfigSecret.Data)).To(BeIdenticalTo(3)) + g.Expect(kubeconfigSecret.Data[secret.KubeconfigDataName]).ToNot(BeEmpty()) + g.Expect(kubeconfigSecret.Data[relativeKubeconfigKey]).ToNot(BeEmpty()) + g.Expect(kubeconfigSecret.Data[relativeTokenFileKey]).ToNot(BeEmpty()) + } + }) + } +} + +func Test_updateCAPIKubeconfigSecret(t *testing.T) { + type testCase struct { + name string + input *eks.Cluster + secret *corev1.Secret + serviceFunc func(tc testCase) *Service + wantErr bool + } + testCases := []testCase{ + { + name: "update kubeconfig secret", + input: &eks.Cluster{ + Name: aws.String("cluster-foo"), + CertificateAuthority: &eks.Certificate{Data: aws.String("")}, + Endpoint: aws.String("https://F00BA4.gr4.us-east-2.eks.amazonaws.com"), + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo-kubeconfig", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "controlplane.cluster.x-k8s.io/v1beta2", + Kind: "AWSManagedControlPlane", + Name: "capi-cluster-foo", + UID: "1", + Controller: aws.Bool(true), + }, + }, + }, + Data: make(map[string][]byte), + }, + serviceFunc: func(tc testCase) *Service { + mockCtrl := gomock.NewController(t) + stsMock := mock_stsiface.NewMockSTSAPI(mockCtrl) + op := request.Request{ + Operation: &request.Operation{Name: "GetCallerIdentity", + HTTPMethod: "POST", + HTTPPath: "/", + }, + HTTPRequest: &http.Request{ + Header: make(http.Header), + URL: &url.URL{ + Scheme: "https", + Host: "F00BA4.gr4.us-east-2.eks.amazonaws.com", + }, + }, + } + stsMock.EXPECT().GetCallerIdentityRequest(gomock.Any()).Return(&op, &sts.GetCallerIdentityOutput{}) + + scheme := runtime.NewScheme() + _ = infrav1.AddToScheme(scheme) + _ = ekscontrolplanev1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tc.secret).Build() + managedScope, _ := scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ + Client: client, + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo", + }, + }, + ControlPlane: &ekscontrolplanev1.AWSManagedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo", + UID: "1", + }, + Spec: ekscontrolplanev1.AWSManagedControlPlaneSpec{ + EKSClusterName: "cluster-foo", + }, + }, + }) + + service := NewService(managedScope) + service.STSClient = stsMock + return service + }, + }, + { + name: "detect incorrect ownership on the kubeconfig secret", + input: &eks.Cluster{ + Name: aws.String("cluster-foo"), + CertificateAuthority: &eks.Certificate{Data: aws.String("")}, + Endpoint: aws.String("https://F00BA4.gr4.us-east-2.eks.amazonaws.com"), + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo-kubeconfig", + }, + Data: make(map[string][]byte), + }, + serviceFunc: func(tc testCase) *Service { + scheme := runtime.NewScheme() + _ = infrav1.AddToScheme(scheme) + _ = ekscontrolplanev1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tc.secret).Build() + managedScope, _ := scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ + Client: client, + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo", + }, + }, + ControlPlane: &ekscontrolplanev1.AWSManagedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "ns", + Name: "capi-cluster-foo", + UID: "1", + }, + Spec: ekscontrolplanev1.AWSManagedControlPlaneSpec{ + EKSClusterName: "cluster-foo", + }, + }, + }) + + service := NewService(managedScope) + return service + }, + wantErr: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + service := tc.serviceFunc(tc) + err := service.updateCAPIKubeconfigSecret(context.TODO(), tc.secret, tc.input) + if tc.wantErr { + g.Expect(err).ToNot(BeNil()) + } else { + g.Expect(err).To(BeNil()) + var kubeconfigSecret corev1.Secret + g.Expect(service.scope.Client.Get(context.TODO(), types.NamespacedName{Namespace: "ns", Name: "capi-cluster-foo-kubeconfig"}, &kubeconfigSecret)).To(BeNil()) + g.Expect(kubeconfigSecret.Data).ToNot(BeNil()) + g.Expect(len(kubeconfigSecret.Data)).To(BeIdenticalTo(3)) + g.Expect(kubeconfigSecret.Data[secret.KubeconfigDataName]).ToNot(BeEmpty()) + g.Expect(kubeconfigSecret.Data[relativeKubeconfigKey]).ToNot(BeEmpty()) + g.Expect(kubeconfigSecret.Data[relativeTokenFileKey]).ToNot(BeEmpty()) + } + }) + } +} diff --git a/test/e2e/shared/suite.go b/test/e2e/shared/suite.go index a71769ed2b..bf1212e4c2 100644 --- a/test/e2e/shared/suite.go +++ b/test/e2e/shared/suite.go @@ -140,6 +140,7 @@ func Node1BeforeSuite(e2eCtx *E2EContext) []byte { By(fmt.Sprintf("Trying to create CloudFormation stack... attempt %d", count)) success := true if err := createCloudFormationStack(e2eCtx.AWSSession, bootstrapTemplate, bootstrapTags); err != nil { + By(fmt.Sprintf("Failed to create CloudFormation stack in attempt %d: %s", count, err.Error())) deleteCloudFormationStack(e2eCtx.AWSSession, bootstrapTemplate) success = false }