From 236a1d33ac3461a28da6ccf2e5e09887a0764d31 Mon Sep 17 00:00:00 2001 From: Guido van der Hart Date: Fri, 6 Sep 2024 11:42:07 +0200 Subject: [PATCH] feat(securityGroups): Improve robustness of security group reconcilers --- api/v1beta1/types.go | 3 + ...tructure.cluster.x-k8s.io_oscclusters.yaml | 6 +- ....cluster.x-k8s.io_oscclustertemplates.yaml | 10 +-- ...tructure.cluster.x-k8s.io_oscmachines.yaml | 2 +- ....cluster.x-k8s.io_oscmachinetemplates.yaml | 6 +- config/rbac/role.yaml | 68 +--------------- .../osccluster_securitygroup_controller.go | 80 +++++++++---------- 7 files changed, 57 insertions(+), 118 deletions(-) diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index b9039439a..9fd33dfe2 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -278,6 +278,9 @@ type OscSecurityGroupRule struct { // The ip range of the security group rule // +optional IpRange string `json:"ipRange,omitempty"` + // The name of the security group to use as target + // +optional + TargetSecurityGroupName string `json:"targetSecurityGroupName,omitempty"` // The beginning of the port range // +optional FromPortRange int32 `json:"fromPortRange,omitempty"` diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclusters.yaml index f3816e0f4..da9a8c7b7 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.2 name: oscclusters.infrastructure.cluster.x-k8s.io spec: group: infrastructure.cluster.x-k8s.io @@ -364,6 +364,10 @@ spec: resourceId: description: The security group rule id type: string + targetSecurityGroupName: + description: The name of the security group to use + as target + type: string toPortRange: description: The end of the port range format: int32 diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclustertemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclustertemplates.yaml index b8c2140b0..cb1a53340 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclustertemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscclustertemplates.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.2 name: oscclustertemplates.infrastructure.cluster.x-k8s.io spec: group: infrastructure.cluster.x-k8s.io @@ -49,26 +49,22 @@ spec: ObjectMeta is metadata that all persisted resources must have, which includes all objects users must create. This is a copy of customizable fields from metav1.ObjectMeta. - ObjectMeta is embedded in `Machine.Spec`, `MachineDeployment.Template` and `MachineSet.Template`, which are not top-level Kubernetes objects. Given that metav1.ObjectMeta has lots of special cases and read-only fields which end up in the generated CRD validation, having it as a subset simplifies the API and some issues that can impact user experience. - During the [upgrade to controller-tools@v2](https://github.com/kubernetes-sigs/cluster-api/pull/1054) for v1alpha2, we noticed a failure would occur running Cluster API test suite against the new CRDs, specifically `spec.metadata.creationTimestamp in body must be of type string: "null"`. The investigation showed that `controller-tools@v2` behaves differently than its previous version when handling types from [metav1](k8s.io/apimachinery/pkg/apis/meta/v1) package. - In more details, we found that embedded (non-top level) types that embedded `metav1.ObjectMeta` had validation properties, including for `creationTimestamp` (metav1.Time). The `metav1.Time` type specifies a custom json marshaller that, when IsZero() is true, returns `null` which breaks validation because the field isn't marked as nullable. - In future versions, controller-tools@v2 might allow overriding the type and validation for embedded types. When that happens, this hack should be revisited. properties: @@ -436,6 +432,10 @@ spec: resourceId: description: The security group rule id type: string + targetSecurityGroupName: + description: The name of the security group + to use as target + type: string toPortRange: description: The end of the port range format: int32 diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachines.yaml index cfabece85..ce0c58800 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachines.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.2 name: oscmachines.infrastructure.cluster.x-k8s.io spec: group: infrastructure.cluster.x-k8s.io diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachinetemplates.yaml index 4c71d4260..ed321481d 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_oscmachinetemplates.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.2 name: oscmachinetemplates.infrastructure.cluster.x-k8s.io spec: group: infrastructure.cluster.x-k8s.io @@ -50,26 +50,22 @@ spec: ObjectMeta is metadata that all persisted resources must have, which includes all objects users must create. This is a copy of customizable fields from metav1.ObjectMeta. - ObjectMeta is embedded in `Machine.Spec`, `MachineDeployment.Template` and `MachineSet.Template`, which are not top-level Kubernetes objects. Given that metav1.ObjectMeta has lots of special cases and read-only fields which end up in the generated CRD validation, having it as a subset simplifies the API and some issues that can impact user experience. - During the [upgrade to controller-tools@v2](https://github.com/kubernetes-sigs/cluster-api/pull/1054) for v1alpha2, we noticed a failure would occur running Cluster API test suite against the new CRDs, specifically `spec.metadata.creationTimestamp in body must be of type string: "null"`. The investigation showed that `controller-tools@v2` behaves differently than its previous version when handling types from [metav1](k8s.io/apimachinery/pkg/apis/meta/v1) package. - In more details, we found that embedded (non-top level) types that embedded `metav1.ObjectMeta` had validation properties, including for `creationTimestamp` (metav1.Time). The `metav1.Time` type specifies a custom json marshaller that, when IsZero() is true, returns `null` which breaks validation because the field isn't marked as nullable. - In future versions, controller-tools@v2 might allow overriding the type and validation for embedded types. When that happens, this hack should be revisited. properties: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 4b210e48e..0d134a7ad 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -27,29 +27,8 @@ rules: - cluster.x-k8s.io resources: - clusters - verbs: - - get - - list - - watch -- apiGroups: - - cluster.x-k8s.io - resources: - clusters/status - verbs: - - get - - list - - watch -- apiGroups: - - cluster.x-k8s.io - resources: - machines - verbs: - - get - - list - - watch -- apiGroups: - - cluster.x-k8s.io - resources: - machines/status verbs: - get @@ -59,32 +38,8 @@ rules: - infrastructure.cluster.x-k8s.io resources: - oscclusters - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - infrastructure.cluster.x-k8s.io - resources: - - oscclusters/finalizers - verbs: - - update -- apiGroups: - - infrastructure.cluster.x-k8s.io - resources: - - oscclusters/status - verbs: - - get - - patch - - update -- apiGroups: - - infrastructure.cluster.x-k8s.io - resources: - oscmachines + - oscmachinetemplates verbs: - create - delete @@ -96,32 +51,15 @@ rules: - apiGroups: - infrastructure.cluster.x-k8s.io resources: + - oscclusters/finalizers - oscmachines/finalizers verbs: - update - apiGroups: - infrastructure.cluster.x-k8s.io resources: + - oscclusters/status - oscmachines/status - verbs: - - get - - patch - - update -- apiGroups: - - infrastructure.cluster.x-k8s.io - resources: - - oscmachinetemplates - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - infrastructure.cluster.x-k8s.io - resources: - oscmachinetemplates/status verbs: - get diff --git a/controllers/osccluster_securitygroup_controller.go b/controllers/osccluster_securitygroup_controller.go index 55b28bc00..b2918f8fe 100644 --- a/controllers/osccluster_securitygroup_controller.go +++ b/controllers/osccluster_securitygroup_controller.go @@ -184,14 +184,25 @@ func reconcileSecurityGroupRule(ctx context.Context, clusterScope *scope.Cluster ToPortRange := securityGroupRuleSpec.ToPortRange associateSecurityGroupId := securityGroupsRef.ResourceMap[securityGroupName] clusterScope.V(4).Info("Get associateSecurityGroupId", "securityGroup", associateSecurityGroupId) + + targetSecurityGroupName := securityGroupRuleSpec.TargetSecurityGroupName + targetSecurityGroupId := "" + if targetSecurityGroupName != "" { + targetSecurityGroupId = securityGroupsRef.ResourceMap[targetSecurityGroupName] + clusterScope.V(4).Info("Get targetSecurityGroupId", "securityGroup", targetSecurityGroupId) + if targetSecurityGroupId == "" { + return reconcile.Result{RequeueAfter: 30 * time.Second}, fmt.Errorf("the target securityGroup %s does not exist (yet) for OscCluster %s/%s", targetSecurityGroupName, clusterScope.GetNamespace(), clusterScope.GetName()) + } + } + clusterScope.V(4).Info("Check if the desired securityGroupRule exist", "securityGroupRuleName", securityGroupRuleName) - securityGroupFromSecurityGroupRule, err := securityGroupSvc.GetSecurityGroupFromSecurityGroupRule(associateSecurityGroupId, Flow, IpProtocol, IpRange, "", FromPortRange, ToPortRange) + securityGroupFromSecurityGroupRule, err := securityGroupSvc.GetSecurityGroupFromSecurityGroupRule(associateSecurityGroupId, Flow, IpProtocol, IpRange, targetSecurityGroupId, FromPortRange, ToPortRange) if err != nil { return reconcile.Result{}, err } if securityGroupFromSecurityGroupRule == nil { clusterScope.V(4).Info("Create the desired securityGroupRule", "securityGroupRuleName", securityGroupRuleName) - securityGroupFromSecurityGroupRule, err = securityGroupSvc.CreateSecurityGroupRule(associateSecurityGroupId, Flow, IpProtocol, IpRange, "", FromPortRange, ToPortRange) + securityGroupFromSecurityGroupRule, err = securityGroupSvc.CreateSecurityGroupRule(associateSecurityGroupId, Flow, IpProtocol, IpRange, targetSecurityGroupId, FromPortRange, ToPortRange) if err != nil { return reconcile.Result{}, fmt.Errorf("%w Can not create securityGroupRule for OscCluster %s/%s", err, clusterScope.GetNamespace(), clusterScope.GetName()) } @@ -203,47 +214,34 @@ func reconcileSecurityGroupRule(ctx context.Context, clusterScope *scope.Cluster // deleteSecurityGroup reconcile the deletion of securityGroup of the cluster. func deleteSecurityGroup(ctx context.Context, clusterScope *scope.ClusterScope, securityGroupId string, securityGroupSvc security.OscSecurityGroupInterface, clock_time clock.Clock) (reconcile.Result, error) { - - currentTimeout := clock_time.Now().Add(time.Second * 600) - var loadbalancer_delete = false - for !loadbalancer_delete { - err, httpRes := securityGroupSvc.DeleteSecurityGroup(securityGroupId) + err, httpRes := securityGroupSvc.DeleteSecurityGroup(securityGroupId) + if err != nil { + time.Sleep(20 * time.Second) + buffer := new(strings.Builder) + _, err := io.Copy(buffer, httpRes.Body) if err != nil { - time.Sleep(20 * time.Second) - buffer := new(strings.Builder) - _, err := io.Copy(buffer, httpRes.Body) - if err != nil { - return reconcile.Result{}, nil - } - httpResBody := buffer.String() - clusterScope.V(4).Info("Find body", "httpResBody", httpResBody) - httpResBodyData := []byte(httpResBody) - httpResBodyParsed, err := gabs.ParseJSON(httpResBodyData) - - if err != nil { - return reconcile.Result{}, fmt.Errorf("%w Can not delete securityGroup for Osccluster %s/%s", err, clusterScope.GetNamespace(), clusterScope.GetName()) - } - httpResCode := strings.Replace(strings.Replace(fmt.Sprintf("%v", httpResBodyParsed.Path("Errors.Code").Data()), "[", "", 1), "]", "", 1) - httpResType := strings.Replace(strings.Replace(fmt.Sprintf("%v", httpResBodyParsed.Path("Errors.Type").Data()), "[", "", 1), "]", "", 1) - var unexpectedErr bool = true - - if httpResCode == "9085" && httpResType == "ResourceConflict" { - clusterScope.V(2).Info("LoadBalancer is not deleting yet") - unexpectedErr = false - } - if unexpectedErr { - return reconcile.Result{}, fmt.Errorf(" Can not delete securityGroup because of the uncatch error for Osccluster %s/%s", clusterScope.GetNamespace(), clusterScope.GetName()) - } - clusterScope.V(2).Info("Wait until loadBalancer is deleting") - } else { - loadbalancer_delete = true + return reconcile.Result{}, nil } + httpResBody := buffer.String() + clusterScope.V(4).Info("Find body", "httpResBody", httpResBody) + httpResBodyData := []byte(httpResBody) + httpResBodyParsed, err := gabs.ParseJSON(httpResBodyData) - if clock_time.Now().After(currentTimeout) { - return reconcile.Result{}, fmt.Errorf("%w Can not delete securityGroup because to waiting loadbalancer to be delete timeout for Osccluster %s/%s", err, clusterScope.GetNamespace(), clusterScope.GetName()) + if err != nil { + return reconcile.Result{}, fmt.Errorf("%w cannot delete securityGroup for Osccluster %s/%s", err, clusterScope.GetNamespace(), clusterScope.GetName()) } + httpResCode := strings.Replace(strings.Replace(fmt.Sprintf("%v", httpResBodyParsed.Path("Errors.Code").Data()), "[", "", 1), "]", "", 1) + httpResType := strings.Replace(strings.Replace(fmt.Sprintf("%v", httpResBodyParsed.Path("Errors.Type").Data()), "[", "", 1), "]", "", 1) + if httpResCode == "9085" && httpResType == "ResourceConflict" { + clusterScope.V(2).Info("LoadBalancer is not deleted yet") + return reconcile.Result{RequeueAfter: 30 * time.Second}, nil + } + if httpResCode != "200" { + return reconcile.Result{}, fmt.Errorf("cannot delete securityGroup (%s - %s) for Osccluster %s/%s", httpResCode, httpResType, clusterScope.GetNamespace(), clusterScope.GetName()) + } } + return reconcile.Result{}, nil } @@ -321,9 +319,9 @@ func reconcileSecurityGroup(ctx context.Context, clusterScope *scope.ClusterScop for _, securityGroupRuleSpec := range *securityGroupRulesSpec { clusterScope.V(4).Info("Get sgrule", "sgRuleName", securityGroupRuleSpec.Name) clusterScope.V(4).Info("Create securityGroupRule for the desired securityGroup", "securityGroupName", securityGroupName) - _, err = reconcileSecurityGroupRule(ctx, clusterScope, securityGroupRuleSpec, securityGroupName, securityGroupSvc) + reconcileSecurityGroupRule, err := reconcileSecurityGroupRule(ctx, clusterScope, securityGroupRuleSpec, securityGroupName, securityGroupSvc) if err != nil { - return reconcile.Result{}, err + return reconcileSecurityGroupRule, err } } } @@ -336,9 +334,9 @@ func reconcileSecurityGroup(ctx context.Context, clusterScope *scope.ClusterScop for _, securityGroupRuleSpec := range *securityGroupRulesSpec { clusterScope.V(4).Info("Get sgrule", "sgRuleName", securityGroupRuleSpec.Name) clusterScope.V(4).Info("Create securityGroupRule for the desired securityGroup", "securityGroupName", securityGroupName) - _, err = reconcileSecurityGroupRule(ctx, clusterScope, securityGroupRuleSpec, securityGroupName, securityGroupSvc) + reconcileSecurityGroupRule, err := reconcileSecurityGroupRule(ctx, clusterScope, securityGroupRuleSpec, securityGroupName, securityGroupSvc) if err != nil { - return reconcile.Result{}, err + return reconcileSecurityGroupRule, err } } }