Skip to content

Commit

Permalink
OCPBUGS-16728: Require Service Deletion for LB Type Updates
Browse files Browse the repository at this point in the history
Due to the AWS CCM leaking resources when the load balancer type
is changed on a service, the cloud team is now blocking updates
to the load balancer type. As a result, the Ingress Operator
will require the service to be deleted and recreated when the
Ingress Controller's load balancer type changes.

This change introduces a `Progressing: True` status message when
the load balancer type is modified, instructing the user on how to
effectuate the change.

This commit does not alter the function of the LB type in the status,
which has always (incorrectly) represented the desired state. However,
with this change, a new scenario is introduced where the desired LB
type may differ from the actual state. Consequently, the LB type
status may no longer reliably reflect the actual state as it
previously did, potentially surprising users who mistakenly
assumed it always indicated the current state.
  • Loading branch information
gcs278 committed Nov 28, 2024
1 parent 2a26973 commit acacecb
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 71 deletions.
4 changes: 4 additions & 0 deletions pkg/operator/controller/ingress/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,10 @@ func setDefaultDomain(ic *operatorv1.IngressController, ingressConfig *configv1.
}

func setDefaultPublishingStrategy(ic *operatorv1.IngressController, platformStatus *configv1.PlatformStatus, domainMatchesBaseDomain bool, ingressConfig *configv1.Ingress, alreadyAdmitted bool) bool {
// WARNING: setDefaultPublishingStrategy follows an outdated pattern of using spec and status. It uses the status
// to represent the *desired* state (spec + defaulting), but per Kubernetes standards, the status should reflect
// the *actual* state. As a result, this function is considered legacy. For new API fields, use spec as the desired
// state and syncIngressControllerStatus to populate the status with the actual state.
effectiveStrategy := ic.Spec.EndpointPublishingStrategy.DeepCopy()
if effectiveStrategy == nil {
var strategyType operatorv1.EndpointPublishingStrategyType
Expand Down
71 changes: 61 additions & 10 deletions pkg/operator/controller/ingress/load_balancer_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,6 @@ var (
// local-with-fallback annotation for kube-proxy (see
// <https://bugzilla.redhat.com/show_bug.cgi?id=1960284>).
localWithFallbackAnnotation,
// AWS load balancer type annotation to set either CLB/ELB or NLB
AWSLBTypeAnnotation,
// awsLBProxyProtocolAnnotation is used to enable the PROXY protocol on any
// AWS load balancer services created.
//
Expand Down Expand Up @@ -294,6 +292,11 @@ var (
managedAtServiceCreationLBServiceAnnotations = func() map[configv1.PlatformType]sets.Set[string] {
result := map[configv1.PlatformType]sets.Set[string]{
configv1.AWSPlatformType: sets.New[string](
// Annotation to set either CLB/ELB or NLB.
// This annotation was previously fully managed (https://issues.redhat.com/browse/NE-865),
// but now required service recreation due to issues with AWS leaking resources when updating
// LB Type without recreating the service (see https://issues.redhat.com/browse/OCPBUGS-16728).
AWSLBTypeAnnotation,
// Annotation for configuring load balancer subnets.
// This requires service recreation because NLBs do not support updates to subnets,
// the operator cannot detect errors if the subnets are invalid, and to prevent
Expand Down Expand Up @@ -794,29 +797,61 @@ func loadBalancerServiceIsUpgradeable(ic *operatorv1.IngressController, deployme
return nil
}

// effectuateMessage returns a message describing how to effectuate a
// change that requires the service to be deleted.
func effectuateMessage(changedMsg, ocPatchRevertCmd string, service *corev1.Service) string {
return fmt.Sprintf(
"%[1]s To effectuate this change, you must delete the service: `oc -n %[2]s delete svc/%[3]s`; "+
"the service load-balancer will then be deprovisioned and a new one created. This will most likely "+
"cause the new load-balancer to have a different host name and IP address and cause disruption. To "+
"return to the previous state, you can revert the change to the IngressController: `%[4]s`. "+
"Direct updates to the service annotations are not supported.",
changedMsg, service.Namespace, service.Name, ocPatchRevertCmd,
)
}

// loadBalancerServiceIsProgressing returns an error value indicating if the
// load balancer service is in progressing status.
// Tricky: Status fields have mixed functions. Older status fields often represent the
// *desired* state, while some newer fields reflect the *actual* state of the object.
func loadBalancerServiceIsProgressing(ic *operatorv1.IngressController, service *corev1.Service, platform *configv1.PlatformStatus, subnetsAWSEnabled bool, eipAllocationsAWSEnabled bool) error {
var errs []error
// Tricky: Scope in the status indicates the *desired* scope.
wantScope := ic.Status.EndpointPublishingStrategy.LoadBalancer.Scope
haveScope := operatorv1.ExternalLoadBalancer
if IsServiceInternal(service) {
haveScope = operatorv1.InternalLoadBalancer
}
if wantScope != haveScope {
err := fmt.Errorf("The IngressController scope was changed from %q to %q.", haveScope, wantScope)
changedMsg := fmt.Sprintf("The IngressController scope was changed from %q to %q.", haveScope, wantScope)
err := fmt.Errorf(changedMsg)
if _, ok := platformsWithMutableScope[platform.Type]; !ok {
err = fmt.Errorf("%[1]s To effectuate this change, you must delete the service: `oc -n %[2]s delete svc/%[3]s`; the service load-balancer will then be deprovisioned and a new one created. This will most likely cause the new load-balancer to have a different host name and IP address from the old one's. Alternatively, you can revert the change to the IngressController: `oc -n openshift-ingress-operator patch ingresscontrollers/%[4]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"loadBalancer\":{\"scope\":\"%[5]s\"}}}}'`", err.Error(), service.Namespace, service.Name, ic.Name, haveScope)
ocPatchRevertCmd := fmt.Sprintf("oc -n openshift-ingress-operator patch ingresscontrollers/%[1]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"loadBalancer\":{\"scope\":\"%[2]s\"}}}}'", ic.Name, haveScope)
err = fmt.Errorf(effectuateMessage(changedMsg, ocPatchRevertCmd, service))
}
errs = append(errs, err)
}

haveLBType := getAWSLoadBalancerTypeFromServiceAnnotation(service)
if platform.Type == configv1.AWSPlatformType {
// Tricky: LB Type in the status indicates the *desired* scope.
wantLBType := getAWSLoadBalancerTypeInStatus(ic)

if wantLBType != haveLBType {
changedMsg := fmt.Sprintf("The IngressController load balancer type was changed from %q to %q.", haveLBType, wantLBType)
ocPatchRevertCmd := fmt.Sprintf("oc -n openshift-ingress-operator patch ingresscontrollers/%[1]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"type\":\"LoadBalancerService\",\"loadBalancer\":{\"providerParameters\":{\"type\":\"AWS\",\"aws\":{\"type\":\"%[2]s\"}}}}}}'", ic.Name, haveLBType)
err := fmt.Errorf(effectuateMessage(changedMsg, ocPatchRevertCmd, service))
errs = append(errs, err)
}
}

if platform.Type == configv1.AWSPlatformType && subnetsAWSEnabled {
var (
// Tricky: Subnets in the status indicates the *actual* scope.
wantSubnets, haveSubnets *operatorv1.AWSSubnets
paramsFieldName string
)
switch getAWSLoadBalancerTypeInStatus(ic) {
switch haveLBType {
case operatorv1.AWSNetworkLoadBalancer:
if nlbParams := getAWSNetworkLoadBalancerParametersInSpec(ic); nlbParams != nil {
wantSubnets = nlbParams.Subnets
Expand All @@ -841,14 +876,15 @@ func loadBalancerServiceIsProgressing(ic *operatorv1.IngressController, service
haveSubnetsPrettyJson := convertAWSSubnetListToPatchJson(haveSubnets, "{}", "[]")
wantSubnetsPrettyJson := convertAWSSubnetListToPatchJson(wantSubnets, "{}", "[]")
changedMsg := fmt.Sprintf("The IngressController subnets were changed from %q to %q.", haveSubnetsPrettyJson, wantSubnetsPrettyJson)
ocPatchRevertCmd := fmt.Sprintf("oc -n openshift-ingress-operator patch ingresscontrollers/%[1]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"type\":\"LoadBalancerService\",\"loadBalancer\":{\"providerParameters\":{\"type\":\"AWS\",\"aws\":{\"type\":\"%[2]s\",\"%[3]s\":{\"subnets\":%[4]s}}}}}}}'", ic.Name, getAWSLoadBalancerTypeInStatus(ic), paramsFieldName, haveSubnetsPatchJson)
err := fmt.Errorf("%[1]s To effectuate this change, you must delete the service: `oc -n %[2]s delete svc/%[3]s`; the service load-balancer will then be deprovisioned and a new one created. This will most likely cause the new load-balancer to have a different host name and IP address and cause disruption. To return to the previous state, you can revert the change to the IngressController: `%[4]s`", changedMsg, service.Namespace, service.Name, ocPatchRevertCmd)
ocPatchRevertCmd := fmt.Sprintf("oc -n openshift-ingress-operator patch ingresscontrollers/%[1]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"type\":\"LoadBalancerService\",\"loadBalancer\":{\"providerParameters\":{\"type\":\"AWS\",\"aws\":{\"type\":\"%[2]s\",\"%[3]s\":{\"subnets\":%[4]s}}}}}}}'", ic.Name, haveLBType, paramsFieldName, haveSubnetsPatchJson)
err := fmt.Errorf(effectuateMessage(changedMsg, ocPatchRevertCmd, service))
errs = append(errs, err)
}
}

if platform.Type == configv1.AWSPlatformType && eipAllocationsAWSEnabled && getAWSLoadBalancerTypeInStatus(ic) == operatorv1.AWSNetworkLoadBalancer {
if platform.Type == configv1.AWSPlatformType && eipAllocationsAWSEnabled && haveLBType == operatorv1.AWSNetworkLoadBalancer {
var (
// Tricky: EIP Allocations in the status indicate the *actual* scope.
wantEIPAllocations, haveEIPAllocations []operatorv1.EIPAllocation
)
if nlbParams := getAWSNetworkLoadBalancerParametersInSpec(ic); nlbParams != nil {
Expand All @@ -864,8 +900,8 @@ func loadBalancerServiceIsProgressing(ic *operatorv1.IngressController, service
haveEIPAllocationsPrettyJson := convertAWSEIPAllocationsListToPatchJson(haveEIPAllocations, "[]")
wantEIPAllocationsPrettyJson := convertAWSEIPAllocationsListToPatchJson(wantEIPAllocations, "[]")
changedMsg := fmt.Sprintf("The IngressController eipAllocations were changed from %q to %q.", haveEIPAllocationsPrettyJson, wantEIPAllocationsPrettyJson)
ocPatchRevertCmd := fmt.Sprintf("oc -n openshift-ingress-operator patch ingresscontrollers/%[1]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"type\":\"LoadBalancerService\",\"loadBalancer\":{\"providerParameters\":{\"type\":\"AWS\",\"aws\":{\"type\":\"%[2]s\",\"%[3]s\":{\"eipAllocations\":%[4]s}}}}}}}'", ic.Name, getAWSLoadBalancerTypeInStatus(ic), "networkLoadBalancer", haveEIPAllocationsPatchJson)
err := fmt.Errorf("%[1]s To effectuate this change, you must delete the service: `oc -n %[2]s delete svc/%[3]s`; the service load-balancer will then be deprovisioned and a new one created. This will most likely cause the new load-balancer to have a different host name and IP address and cause disruption. To return to the previous state, you can revert the change to the IngressController: `%[4]s`", changedMsg, service.Namespace, service.Name, ocPatchRevertCmd)
ocPatchRevertCmd := fmt.Sprintf("oc -n openshift-ingress-operator patch ingresscontrollers/%[1]s --type=merge --patch='{\"spec\":{\"endpointPublishingStrategy\":{\"type\":\"LoadBalancerService\",\"loadBalancer\":{\"providerParameters\":{\"type\":\"AWS\",\"aws\":{\"type\":\"%[2]s\",\"%[3]s\":{\"eipAllocations\":%[4]s}}}}}}}'", ic.Name, haveLBType, "networkLoadBalancer", haveEIPAllocationsPatchJson)
err := fmt.Errorf(effectuateMessage(changedMsg, ocPatchRevertCmd, service))
errs = append(errs, err)
}
}
Expand Down Expand Up @@ -978,6 +1014,21 @@ func loadBalancerSourceRangesMatch(ic *operatorv1.IngressController, current *co
return fmt.Errorf("You have manually edited an operator-managed object. You must revert your modifications by removing the Spec.LoadBalancerSourceRanges field of LoadBalancer-typed service %q. You can use the new AllowedSourceRanges API field on the ingresscontroller to configure this setting instead.", current.Name)
}

// getAWSLoadBalancerTypeFromServiceAnnotation gets the effective load balancer type by looking at the
// service.beta.kubernetes.io/aws-load-balancer-type annotation of the LoadBalancer-type Service.
// If the annotation isn't specified, the function returns the default of Classic.
func getAWSLoadBalancerTypeFromServiceAnnotation(service *corev1.Service) operatorv1.AWSLoadBalancerType {
if service == nil {
return ""
}

if a, ok := service.Annotations[AWSLBTypeAnnotation]; ok && a == AWSNLBAnnotation {
return operatorv1.AWSNetworkLoadBalancer
}

return operatorv1.AWSClassicLoadBalancer
}

// getSubnetsFromServiceAnnotation gets the effective subnets by looking at the
// service.beta.kubernetes.io/aws-load-balancer-subnets annotation of the LoadBalancer-type Service.
// If no subnets are specified in the annotation, this function returns nil.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,14 @@ func Test_loadBalancerServiceChanged(t *testing.T) {
svc.Annotations["service.beta.kubernetes.io/aws-load-balancer-subnets"] = "foo-subnet"
svc.Annotations["service.beta.kubernetes.io/aws-load-balancer-type"] = "NLB"
},
expect: true,
expect: false,
},
{
description: "if the service.beta.kubernetes.io/aws-load-balancer-type is added",
mutate: func(svc *corev1.Service) {
svc.Annotations["service.beta.kubernetes.io/aws-load-balancer-type"] = "NLB"
},
expect: false,
},
{
description: "if the service.beta.kubernetes.io/aws-load-balancer-eip-allocations annotation added",
Expand Down
4 changes: 2 additions & 2 deletions pkg/operator/controller/ingress/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ func computeLoadBalancerProgressingStatus(ic *operatorv1.IngressController, serv
func updateIngressControllerAWSSubnetStatus(ic *operatorv1.IngressController, service *corev1.Service) {
// Set the subnets status based on the actual service annotation and the load balancer type,
// as NLBs and CLBs have separate subnet configuration fields.
switch getAWSLoadBalancerTypeInStatus(ic) {
switch getAWSLoadBalancerTypeFromServiceAnnotation(service) {
case operatorv1.AWSNetworkLoadBalancer:
// NetworkLoadBalancerParameters should be initialized by setDefaultPublishingStrategy
// when an IngressController is admitted, so we don't need to initialize here.
Expand All @@ -968,7 +968,7 @@ func updateIngressControllerAWSSubnetStatus(ic *operatorv1.IngressController, se
// sync its status to the effective eipAllocations on the LoadBalancer-type service.
func updateIngressControllerAWSEIPAllocationStatus(ic *operatorv1.IngressController, service *corev1.Service) {
// Set the eipAllocations status based on the actual service annotation and on the load balancer type `NLB`.
switch getAWSLoadBalancerTypeInStatus(ic) {
switch getAWSLoadBalancerTypeFromServiceAnnotation(service) {
case operatorv1.AWSNetworkLoadBalancer:
// NetworkLoadBalancerParameters should be initialized by setDefaultPublishingStrategy
// when an IngressController is admitted, so we don't need to initialize here.
Expand Down
Loading

0 comments on commit acacecb

Please sign in to comment.