From 3b30af8d53ee8b16277dec85a584739403edacd5 Mon Sep 17 00:00:00 2001 From: Christian Bianchi Date: Fri, 1 Sep 2023 08:01:11 +0200 Subject: [PATCH] Cilium eni (#1621) * Add ENI mode for Cilium on AWS. * Add ENI mode for Cilium on AWS. * Add ENI mode for Cilium on AWS. * Add ENI mode for Cilium on AWS. * Add ENI mode for Cilium on AWS. * refactoring * refactoring * nancy * nancy * set cluster name and remove the CNI config for configmap * unused * change to boolean * revert to working state * fix subnettagfilter * fix subnettagfilter * desired * install true and exclusive * remove extraEnv * change to conflist * rename CNI_CONF_NAME * switch to conflist for cilium * switch to conflist for cilium * selector * selector comment * release version fix to string * Add AWS operator version * release version * eni without kubeproxy * fix * move error definition to own file * Define IPV4 routing * Free Extra IPs --------- Co-authored-by: Pau --- .nancy-ignore | 1 + CHANGELOG.md | 2 +- flag/service/installation/installation.go | 5 ++ flag/service/service.go | 12 +-- .../cluster-operator/templates/configmap.yaml | 2 + helm/cluster-operator/values.yaml | 3 + main.go | 1 + pkg/label/version.go | 2 + service/controller/cluster.go | 2 + service/controller/key/cilium.go | 10 +++ service/controller/key/common.go | 4 + service/controller/key/provider.go | 5 ++ .../resource/clusterconfigmap/desired.go | 88 ++++++++++++++++++- .../resource/clusterconfigmap/error.go | 8 ++ .../resource/clusterconfigmap/resource.go | 4 + service/service.go | 1 + 16 files changed, 141 insertions(+), 9 deletions(-) create mode 100644 flag/service/installation/installation.go create mode 100644 service/controller/key/provider.go diff --git a/.nancy-ignore b/.nancy-ignore index 089f611cb..8b3896e2c 100644 --- a/.nancy-ignore +++ b/.nancy-ignore @@ -17,6 +17,7 @@ sonatype-2022-6522 CVE-2020-8561 CVE-2023-29401 CVE-2023-26125 +CVE-2023-3978 # golang/google.golang.org/grpc@v1.52.0 CVE-2023-32731 until=2023-08-30 diff --git a/CHANGELOG.md b/CHANGELOG.md index aa9b7be32..27f308786 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ### Added +- Add ENI mode for Cilium on AWS. - Consider new control-plane label. ### Changed diff --git a/flag/service/installation/installation.go b/flag/service/installation/installation.go new file mode 100644 index 000000000..4f38115f1 --- /dev/null +++ b/flag/service/installation/installation.go @@ -0,0 +1,5 @@ +package installation + +type Installation struct { + Name string +} diff --git a/flag/service/service.go b/flag/service/service.go index 7a4cbab53..8e7cb068b 100644 --- a/flag/service/service.go +++ b/flag/service/service.go @@ -4,6 +4,7 @@ import ( "github.com/giantswarm/operatorkit/v8/pkg/flag/service/kubernetes" "github.com/giantswarm/cluster-operator/v5/flag/service/image" + "github.com/giantswarm/cluster-operator/v5/flag/service/installation" "github.com/giantswarm/cluster-operator/v5/flag/service/kubeconfig" "github.com/giantswarm/cluster-operator/v5/flag/service/provider" "github.com/giantswarm/cluster-operator/v5/flag/service/release" @@ -11,9 +12,10 @@ import ( // Service is an intermediate data structure for command line configuration flags. type Service struct { - Image image.Image - KubeConfig kubeconfig.KubeConfig - Kubernetes kubernetes.Kubernetes - Provider provider.Provider - Release release.Release + Image image.Image + Installation installation.Installation + KubeConfig kubeconfig.KubeConfig + Kubernetes kubernetes.Kubernetes + Provider provider.Provider + Release release.Release } diff --git a/helm/cluster-operator/templates/configmap.yaml b/helm/cluster-operator/templates/configmap.yaml index 0601763aa..606f9347b 100644 --- a/helm/cluster-operator/templates/configmap.yaml +++ b/helm/cluster-operator/templates/configmap.yaml @@ -39,6 +39,8 @@ data: caFile: '' crtFile: '' keyFile: '' + installation: + name: '{{ .Values.installation.name }}' provider: kind: '{{ .Values.provider.kind }}' release: diff --git a/helm/cluster-operator/values.yaml b/helm/cluster-operator/values.yaml index 6ab2e83c1..121575ce0 100644 --- a/helm/cluster-operator/values.yaml +++ b/helm/cluster-operator/values.yaml @@ -33,6 +33,9 @@ kubernetes: provider: kind: "" +installation: + name: "" + release: app: config: diff --git a/main.go b/main.go index 999bf9853..24f8eca5c 100644 --- a/main.go +++ b/main.go @@ -123,6 +123,7 @@ func mainE() error { daemonCommand.PersistentFlags().String(f.Service.Kubernetes.TLS.CrtFile, "", "Certificate file path to use to authenticate with Kubernetes.") daemonCommand.PersistentFlags().String(f.Service.Kubernetes.TLS.KeyFile, "", "Key file path to use to authenticate with Kubernetes.") + daemonCommand.PersistentFlags().String(f.Service.Installation.Name, "", "Name of the installation.") daemonCommand.PersistentFlags().String(f.Service.Provider.Kind, "", "Provider of the installation. One of aws, azure, kvm.") daemonCommand.PersistentFlags().String(f.Service.Release.App.Config.Default, "", "Default properties for app.") diff --git a/pkg/label/version.go b/pkg/label/version.go index 1d2250076..a4cad452f 100644 --- a/pkg/label/version.go +++ b/pkg/label/version.go @@ -9,4 +9,6 @@ const ( OperatorVersion = "cluster-operator.giantswarm.io/version" // ReleaseVersion is a label specifying a tenant cluster release version. ReleaseVersion = "release.giantswarm.io/version" + // AWSReleaseVersion is a label specifying a tenant cluster AWS operator release version. + AWSReleaseVersion = "aws-operator.giantswarm.io/version" ) diff --git a/service/controller/cluster.go b/service/controller/cluster.go index eb67454ff..738eaa38a 100644 --- a/service/controller/cluster.go +++ b/service/controller/cluster.go @@ -70,6 +70,7 @@ type ClusterConfig struct { DNSIP string ClusterDomain string KiamWatchDogEnabled bool + Installation string NewCommonClusterObjectFunc func() infrastructurev1alpha3.CommonClusterObject Provider string RawAppDefaultConfig string @@ -260,6 +261,7 @@ func newClusterResources(config ClusterConfig) ([]resource.Interface, error) { ClusterIPRange: config.ClusterIPRange, DNSIP: config.DNSIP, + Installation: config.Installation, Provider: config.Provider, } diff --git a/service/controller/key/cilium.go b/service/controller/key/cilium.go index 833bad318..97459881a 100644 --- a/service/controller/key/cilium.go +++ b/service/controller/key/cilium.go @@ -10,3 +10,13 @@ func ForceDisableCiliumKubeProxyReplacement(cluster apiv1beta1.Cluster) bool { return found && v == "true" } + +func AWSEniModeEnabled(cluster apiv1beta1.Cluster) bool { + mode, found := cluster.Annotations[annotation.CiliumIpamModeAnnotation] + if !found { + // we default to 'kubernetes' mode + return false + } + + return mode == annotation.CiliumIpamModeENI +} diff --git a/service/controller/key/common.go b/service/controller/key/common.go index 7345199d5..642076ef6 100644 --- a/service/controller/key/common.go +++ b/service/controller/key/common.go @@ -79,6 +79,10 @@ func ReleaseVersion(getter LabelsGetter) string { return getter.GetLabels()[label.ReleaseVersion] } +func AWSOperatorReleaseVersion(getter LabelsGetter) string { + return getter.GetLabels()[label.AWSReleaseVersion] +} + func IsBundle(appName string) bool { return strings.HasSuffix(appName, "-bundle") } diff --git a/service/controller/key/provider.go b/service/controller/key/provider.go new file mode 100644 index 000000000..ed09ee2e2 --- /dev/null +++ b/service/controller/key/provider.go @@ -0,0 +1,5 @@ +package key + +func IsAWS(provider string) bool { + return provider == "aws" +} diff --git a/service/controller/resource/clusterconfigmap/desired.go b/service/controller/resource/clusterconfigmap/desired.go index 5c3820d63..65aa4a7bd 100644 --- a/service/controller/resource/clusterconfigmap/desired.go +++ b/service/controller/resource/clusterconfigmap/desired.go @@ -6,6 +6,8 @@ import ( "regexp" "strconv" + releasev1alpha1 "github.com/giantswarm/release-operator/v4/api/v1alpha1" + "github.com/giantswarm/apiextensions/v6/pkg/apis/infrastructure/v1alpha3" "github.com/giantswarm/microerror" "gopkg.in/yaml.v3" @@ -58,7 +60,7 @@ func (r *Resource) GetDesiredState(ctx context.Context, obj interface{}) ([]*cor // enableCiliumNetworkPolicy is only enabled by default for AWS clusters. var enableCiliumNetworkPolicy bool { - if r.provider == "aws" { + if key.IsAWS(r.provider) { useProxyProtocol = true enableCiliumNetworkPolicy = true } @@ -95,7 +97,7 @@ func (r *Resource) GetDesiredState(ctx context.Context, obj interface{}) ([]*cor }, } - if r.provider == "aws" { + if key.IsAWS(r.provider) { var irsa bool var accountID string var vpcID string @@ -150,7 +152,8 @@ func (r *Resource) GetDesiredState(ctx context.Context, obj interface{}) ([]*cor }, } - if key.ForceDisableCiliumKubeProxyReplacement(cr) { + // We only need this if the cluster is in overlay mode during the upgrade + if key.ForceDisableCiliumKubeProxyReplacement(cr) && !key.AWSEniModeEnabled(cr) { ciliumValues["kubeProxyReplacement"] = "disabled" } else { ciliumValues["kubeProxyReplacement"] = "strict" @@ -159,6 +162,85 @@ func (r *Resource) GetDesiredState(ctx context.Context, obj interface{}) ([]*cor ciliumValues["cleanupKubeProxy"] = true } + if key.IsAWS(r.provider) && key.AWSEniModeEnabled(cr) { + // Add selector to not interfere with nodes still running in AWS CNI + awsCluster := &v1alpha3.AWSCluster{} + err := r.ctrlClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, awsCluster) + if err != nil { + return nil, microerror.Mask(err) + } + + var re releasev1alpha1.Release + err = r.ctrlClient.Get( + ctx, + types.NamespacedName{Name: key.ReleaseName(key.ReleaseVersion(&cr))}, + &re, + ) + if err != nil { + return nil, microerror.Mask(err) + } + + var awsOperatorRelease string + for _, v := range re.Spec.Components { + if v.Name == "aws-operator" { + awsOperatorRelease = v.Version + } + } + + if awsOperatorRelease == "" { + return nil, microerror.Mask(releaseNotFound) + } + + // This is a hack to only introduce the selector during the upgrade on the new nodes, old ones work with AWS CNI + if key.ForceDisableCiliumKubeProxyReplacement(cr) { + ciliumValues["nodeSelector"] = map[string]interface{}{ + "aws-operator.giantswarm.io/version": awsOperatorRelease, + } + } + + ciliumValues["eni"] = map[string]interface{}{ + "enabled": true, + //"awsEnablePrefixDelegation": true, + } + + ciliumValues["ipam"] = map[string]interface{}{ + "mode": "eni", + } + + // there is autodiscoverability on the VPC CIDrs + ciliumValues["ipv4NativeRoutingCIDR"] = podCIDR + + // https://docs.cilium.io/en/v1.13/network/concepts/routing/#id5 + ciliumValues["endpointRoutes"] = map[string]interface{}{ + "enabled": true, + } + + ciliumValues["operator"] = map[string]interface{}{ + "extraArgs": []string{ + "--aws-release-excess-ips=true", + }, + } + + ciliumValues["egressMasqueradeInterfaces"] = "eth+" + ciliumValues["tunnel"] = "disabled" + // Used by cilium to tag ENIs it creates and be able to filter and clean them up. + ciliumValues["cluster"] = map[string]interface{}{ + "name": key.ClusterID(&cr), + } + ciliumValues["cni"] = map[string]interface{}{ + "customConf": true, + "exclusive": true, + "configMap": "cilium-cni-configuration", + } + ciliumValues["extraEnv"] = []map[string]string{ + { + "name": "CNI_CONF_NAME", + "value": "21-cilium.conflist", + }, + } + + } + configMapSpecs := []configMapSpec{ { Name: key.ClusterConfigMapName(&cr), diff --git a/service/controller/resource/clusterconfigmap/error.go b/service/controller/resource/clusterconfigmap/error.go index 3feb7dae2..5c2c1cdc0 100644 --- a/service/controller/resource/clusterconfigmap/error.go +++ b/service/controller/resource/clusterconfigmap/error.go @@ -19,3 +19,11 @@ var wrongTypeError = µerror.Error{ func IsWrongType(err error) bool { return microerror.Cause(err) == wrongTypeError } + +var releaseNotFound = µerror.Error{ + Kind: "releaseNotFound", +} + +func IsReleaseNotFound(err error) bool { + return microerror.Cause(err) == releaseNotFound +} diff --git a/service/controller/resource/clusterconfigmap/resource.go b/service/controller/resource/clusterconfigmap/resource.go index 9f67238bc..da6b7a731 100644 --- a/service/controller/resource/clusterconfigmap/resource.go +++ b/service/controller/resource/clusterconfigmap/resource.go @@ -26,6 +26,7 @@ type Config struct { ClusterIPRange string DNSIP string + Installation string Provider string } @@ -69,6 +70,9 @@ func New(config Config) (*Resource, error) { if config.DNSIP == "" { return nil, microerror.Maskf(invalidConfigError, "%T.DNSIP must not be empty", config) } + if config.Installation == "" { + return nil, microerror.Maskf(invalidConfigError, "%T.Installation must not be empty", config) + } if config.Provider == "" { return nil, microerror.Maskf(invalidConfigError, "%T.Provider must not be empty", config) } diff --git a/service/service.go b/service/service.go index 4987f3729..fda137cc7 100644 --- a/service/service.go +++ b/service/service.go @@ -280,6 +280,7 @@ func New(config Config) (*Service, error) { DNSIP: dnsIP, ClusterDomain: config.Viper.GetString(config.Flag.Guest.Cluster.Kubernetes.ClusterDomain), KiamWatchDogEnabled: config.Viper.GetBool(config.Flag.Service.Release.App.Config.KiamWatchDogEnabled), + Installation: config.Viper.GetString(config.Flag.Service.Installation.Name), NewCommonClusterObjectFunc: newCommonClusterObjectFunc(provider), Provider: provider, RawAppDefaultConfig: config.Viper.GetString(config.Flag.Service.Release.App.Config.Default),