From e5b696a4a9078568a47371936af98f8f8590b0f6 Mon Sep 17 00:00:00 2001 From: candita Date: Fri, 9 Aug 2024 19:52:28 -0400 Subject: [PATCH] Update the condition's lastTransitionTime only if the condition status changes. OCPBUGS-37491: Ingress operator status not degraded when canary route fails The fix involves recognizing when the condition status changes, and only updating the condition's transition time when this happens. * pkg/operator/controller/dns/controller.go - update logic for setting condition transition time. * pkg/operator/controller/ingress/status.go - update logic for setting condition transition time. * pkg/operator/controller/status/controller.go - update logic for setting condition transition time. * pkg/operator/controller/status/controller_test.go - test lastTransitionTime changes. --- pkg/operator/controller/dns/controller.go | 16 +-- pkg/operator/controller/ingress/status.go | 16 +-- .../controller/ingress/status_test.go | 105 +++++++++++++++++- pkg/operator/controller/status/controller.go | 16 +-- 4 files changed, 122 insertions(+), 31 deletions(-) diff --git a/pkg/operator/controller/dns/controller.go b/pkg/operator/controller/dns/controller.go index 8c6fea4490..18724d2d6f 100644 --- a/pkg/operator/controller/dns/controller.go +++ b/pkg/operator/controller/dns/controller.go @@ -439,7 +439,7 @@ func mergeStatuses(zones []configv1.DNSZone, statuses, updates []iov1.DNSZoneSta var clock utilclock.Clock = utilclock.RealClock{} // mergeConditions adds or updates matching conditions, and updates -// the transition time if details of a condition have changed. Returns +// the transition time if the status of a condition changed. Returns // the updated condition array. func mergeConditions(conditions, updates []iov1.DNSZoneCondition) []iov1.DNSZoneCondition { now := metav1.NewTime(clock.Now()) @@ -449,13 +449,13 @@ func mergeConditions(conditions, updates []iov1.DNSZoneCondition) []iov1.DNSZone for j, cond := range conditions { if cond.Type == update.Type { add = false - if conditionChanged(cond, update) { - conditions[j].Status = update.Status - conditions[j].Reason = update.Reason - conditions[j].Message = update.Message + if update.Status != conditions[j].Status { conditions[j].LastTransitionTime = now - break } + conditions[j].Reason = update.Reason + conditions[j].Message = update.Message + conditions[j].Status = update.Status + break } } if add { @@ -467,10 +467,6 @@ func mergeConditions(conditions, updates []iov1.DNSZoneCondition) []iov1.DNSZone return conditions } -func conditionChanged(a, b iov1.DNSZoneCondition) bool { - return a.Status != b.Status || a.Reason != b.Reason || a.Message != b.Message -} - // migrateRecordStatusConditions purges deprecated fields from DNS Record // status. Removes "Failed" (DNSRecordFailedConditionType) and replaces with // "Published" (DNSRecordPublishedConditionType). If the status needs to be diff --git a/pkg/operator/controller/ingress/status.go b/pkg/operator/controller/ingress/status.go index 3baea68b83..dad7b9fa96 100644 --- a/pkg/operator/controller/ingress/status.go +++ b/pkg/operator/controller/ingress/status.go @@ -126,7 +126,7 @@ func (r *reconciler) syncIngressControllerSelectorStatus(ic *operatorv1.IngressC } // MergeConditions adds or updates matching conditions, and updates -// the transition time if details of a condition have changed. Returns +// the transition time if the status of a condition changed. Returns // the updated condition array. func MergeConditions(conditions []operatorv1.OperatorCondition, updates ...operatorv1.OperatorCondition) []operatorv1.OperatorCondition { now := metav1.NewTime(clock.Now()) @@ -136,13 +136,13 @@ func MergeConditions(conditions []operatorv1.OperatorCondition, updates ...opera for j, cond := range conditions { if cond.Type == update.Type { add = false - if conditionChanged(cond, update) { - conditions[j].Status = update.Status - conditions[j].Reason = update.Reason - conditions[j].Message = update.Message + if update.Status != conditions[j].Status { conditions[j].LastTransitionTime = now - break } + conditions[j].Reason = update.Reason + conditions[j].Message = update.Message + conditions[j].Status = update.Status + break } } if add { @@ -835,10 +835,6 @@ func conditionsEqual(a, b []operatorv1.OperatorCondition) bool { return cmp.Equal(a, b, conditionCmpOpts...) } -func conditionChanged(a, b operatorv1.OperatorCondition) bool { - return a.Status != b.Status || a.Reason != b.Reason || a.Message != b.Message -} - // computeLoadBalancerStatus returns the set of current // LoadBalancer-prefixed conditions for the given ingress controller, which are // used later to determine the ingress controller's Degraded or Available status. diff --git a/pkg/operator/controller/ingress/status_test.go b/pkg/operator/controller/ingress/status_test.go index 7502430eec..25abb25ff8 100644 --- a/pkg/operator/controller/ingress/status_test.go +++ b/pkg/operator/controller/ingress/status_test.go @@ -6,13 +6,14 @@ import ( "crypto/x509" "crypto/x509/pkix" "encoding/pem" - "k8s.io/utils/pointer" "math/big" "reflect" "strings" "testing" "time" + "k8s.io/utils/pointer" + "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -2749,19 +2750,121 @@ func Test_MergeConditions(t *testing.T) { cond("A", "False", "Reason", start), cond("B", "True", "Reason", start), cond("Ignored", "True", "Reason", start), + cond("D", "True", "Reason", start), }, updates: []operatorv1.OperatorCondition{ cond("A", "True", "Reason", middle), cond("B", "True", "Reason", middle), cond("C", "False", "Reason", middle), + cond("D", "True", "NewReason", start), }, expected: []operatorv1.OperatorCondition{ cond("A", "True", "Reason", later), cond("B", "True", "Reason", start), + // Test case C has lastTransitionTime update because it is a new condition. cond("C", "False", "Reason", later), cond("Ignored", "True", "Reason", start), + // Test case D does not change lastTransitionTime because status did not change. + cond("D", "True", "NewReason", start), }, }, + "an update to only the message in CanaryChecksSucceeding should not change lastTransitionTime": { + conditions: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "False", + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksRepetitiveFailures", + Message: "Canary route checks for the default ingress controller are failing. Last 1 error messages:\nerror sending canary HTTP Request: Timeout: Get \"https://canary-openshift-ingress-canary.apps.example.local\": context deadline exceeded (Client.Timeout exceeded while awaiting headers) (x5 over 4m40s)", + }}, + updates: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "False", + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksRepetitiveFailures", + Message: "Canary route checks for the default ingress controller are failing. Last 1 error messages:\nerror sending canary HTTP Request: Timeout: Get \"https://canary-openshift-ingress-canary.apps.example.local\": context deadline exceeded (Client.Timeout exceeded while awaiting headers) (x14 over 15m10s)", + }}, + expected: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "False", + // A change to the condition's message should not change the lastTransitionTime. + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksRepetitiveFailures", + // Use the updated message. + Message: "Canary route checks for the default ingress controller are failing. Last 1 error messages:\nerror sending canary HTTP Request: Timeout: Get \"https://canary-openshift-ingress-canary.apps.example.local\": context deadline exceeded (Client.Timeout exceeded while awaiting headers) (x14 over 15m10s)", + }}, + }, + "an update to message, reason, and status in CanaryChecksSucceeding should change lastTransitionTime, message, reason, and status": { + conditions: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "True", + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + updates: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "False", + LastTransitionTime: metav1.NewTime(middle), + Reason: "CanaryChecksRepetitiveFailures", + Message: "Canary route checks for the default ingress controller are failing. Last 1 error messages:\nerror sending canary HTTP Request: Timeout: Get \"https://canary-openshift-ingress-canary.apps.example.local\": context deadline exceeded (Client.Timeout exceeded while awaiting headers) (x5 over 4m40s)", + }}, + expected: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "False", + // Update lastTransitionTime if status changed. + LastTransitionTime: metav1.NewTime(later), + Reason: "CanaryChecksRepetitiveFailures", + Message: "Canary route checks for the default ingress controller are failing. Last 1 error messages:\nerror sending canary HTTP Request: Timeout: Get \"https://canary-openshift-ingress-canary.apps.example.local\": context deadline exceeded (Client.Timeout exceeded while awaiting headers) (x5 over 4m40s)", + }}, + }, + "an update to only the status in CanaryChecksSucceeding should change lastTransitionTime and status": { + conditions: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "Unknown", + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + updates: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "True", + LastTransitionTime: metav1.NewTime(middle), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + expected: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "True", + // Update lastTransitionTime if status changed. + LastTransitionTime: metav1.NewTime(later), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + }, + "an update to only the lastTransitionTime in CanaryChecksSucceeding should not change anything": { + conditions: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "True", + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + updates: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "True", + LastTransitionTime: metav1.NewTime(middle), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + expected: []operatorv1.OperatorCondition{{ + Type: "CanaryChecksSucceeding", + Status: "True", + // Don't update lastTransitionTime since status did not change. + LastTransitionTime: metav1.NewTime(start), + Reason: "CanaryChecksSucceeding", + Message: "Canary route checks for the default ingress controller are successful", + }}, + }, } // Simulate the passage of time between original condition creation diff --git a/pkg/operator/controller/status/controller.go b/pkg/operator/controller/status/controller.go index 517ff24d84..7f57cb840c 100644 --- a/pkg/operator/controller/status/controller.go +++ b/pkg/operator/controller/status/controller.go @@ -555,7 +555,7 @@ func computeOperatorAvailableCondition(ingresses []operatorv1.IngressController) } // mergeConditions adds or updates matching conditions, and updates -// the transition time if details of a condition have changed. Returns +// the transition time if the status of a condition changed. Returns // the updated condition array. func mergeConditions(conditions []configv1.ClusterOperatorStatusCondition, updates ...configv1.ClusterOperatorStatusCondition) []configv1.ClusterOperatorStatusCondition { now := metav1.NewTime(clock.Now()) @@ -565,13 +565,13 @@ func mergeConditions(conditions []configv1.ClusterOperatorStatusCondition, updat for j, cond := range conditions { if cond.Type == update.Type { add = false - if conditionChanged(cond, update) { - conditions[j].Status = update.Status - conditions[j].Reason = update.Reason - conditions[j].Message = update.Message + if update.Status != conditions[j].Status { conditions[j].LastTransitionTime = now - break } + conditions[j].Reason = update.Reason + conditions[j].Message = update.Message + conditions[j].Status = update.Status + break } } if add { @@ -583,10 +583,6 @@ func mergeConditions(conditions []configv1.ClusterOperatorStatusCondition, updat return conditions } -func conditionChanged(a, b configv1.ClusterOperatorStatusCondition) bool { - return a.Status != b.Status || a.Reason != b.Reason || a.Message != b.Message -} - // operatorStatusesEqual compares two ClusterOperatorStatus values. Returns // true if the provided ClusterOperatorStatus values should be considered equal // for the purpose of determining whether an update is necessary, false otherwise.