Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-31550: Gateway API - recreating SMCP which breaks Gateway API #1115

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions pkg/operator/controller/gatewayclass/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@ package gatewayclass

import (
"context"
"sync"

logf "github.com/openshift/cluster-ingress-operator/pkg/log"
operatorcontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller"

maistrav2 "github.com/maistra/istio-operator/pkg/apis/maistra/v2"
operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"

"k8s.io/client-go/tools/record"

gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1"

"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"

"sigs.k8s.io/controller-runtime/pkg/cache"
Expand All @@ -35,6 +41,7 @@ const (
)

var log = logf.Logger.WithName(controllerName)
var gatewayClassController controller.Controller

// NewUnmanaged creates and returns a controller that watches gatewayclasses and
// installs and configures Istio. This is an unmanaged controller, which means
Expand All @@ -61,6 +68,15 @@ func NewUnmanaged(mgr manager.Manager, config Config) (controller.Controller, er
if err := c.Watch(source.Kind[client.Object](operatorCache, &gatewayapiv1.GatewayClass{}, &handler.EnqueueRequestForObject{}, isOurGatewayClass, notIstioGatewayClass)); err != nil {
return nil, err
}

isServiceMeshSubscription := predicate.NewPredicateFuncs(func(o client.Object) bool {
return o.GetName() == operatorcontroller.ServiceMeshSubscriptionName().Name
})
if err = c.Watch(source.Kind[client.Object](operatorCache, &operatorsv1alpha1.Subscription{},
enqueueRequestForDefaultGatewayClassController(config.OperandNamespace), isServiceMeshSubscription)); err != nil {
return nil, err
}
gatewayClassController = c
return c, nil
}

Expand All @@ -81,6 +97,23 @@ type reconciler struct {
client client.Client
cache cache.Cache
recorder record.EventRecorder

startSMCPWatch sync.Once
}

func enqueueRequestForDefaultGatewayClassController(namespace string) handler.EventHandler {
return handler.EnqueueRequestsFromMapFunc(
func(ctx context.Context, a client.Object) []reconcile.Request {
return []reconcile.Request{
{
NamespacedName: types.NamespacedName{
Namespace: namespace,
Name: OpenShiftDefaultGatewayClassName,
},
},
}
},
)
}

// Reconcile expects request to refer to a GatewayClass and creates or
Expand All @@ -90,15 +123,29 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (

var gatewayclass gatewayapiv1.GatewayClass
if err := r.cache.Get(ctx, request.NamespacedName, &gatewayclass); err != nil {
log.Error(err, "failed to get gatewayclass", "request", request)
return reconcile.Result{}, err
}

var errs []error
if _, _, err := r.ensureServiceMeshOperatorSubscription(ctx); err != nil {
log.Error(err, "failed to ensure ServiceMeshOperatorSubscription", "request", request)
errs = append(errs, err)
}
if _, _, err := r.ensureServiceMeshControlPlane(ctx, &gatewayclass); err != nil {
log.Error(err, "failed to ensure ServiceMeshControlPlane", "request", request)
errs = append(errs, err)
} else {
r.startSMCPWatch.Do(func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we want to start a watch every time we reconcile?

Copy link
Contributor Author

@anirudhAgniRedhat anirudhAgniRedhat Dec 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @candita, We are using sync.Once here, to start the watch for SMCP, which should run only once. So, this should not run on every reconciliation.

Since we need to create the SMCP watch after the OSSM operator is installed, we cannot start the watch as usual.

isOurSMCP := predicate.NewPredicateFuncs(func(o client.Object) bool {
return o.GetName() == operatorcontroller.ServiceMeshControlPlaneName(r.config.OperandNamespace).Name
})
if err = gatewayClassController.Watch(source.Kind[client.Object](r.cache, &maistrav2.ServiceMeshControlPlane{}, enqueueRequestForDefaultGatewayClassController(r.config.OperandNamespace), isOurSMCP)); err != nil {
log.Error(err, "failed to watch ServiceMeshControlPlane", "request", request)
errs = append(errs, err)
}
})
}

return reconcile.Result{}, utilerrors.NewAggregate(errs)
}
4 changes: 4 additions & 0 deletions pkg/operator/controller/names.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ const (
// Remote worker label, used for node affinity of router deployment.
// Router should not run on remote worker nodes
RemoteWorkerLabel = "node.openshift.io/remote-worker"

// OpenshiftOperatorNamespace is the default namespace for
// the openshift operator resources.
OpenshiftOperatorNamespace = "openshift-operators"
)

// IngressClusterOperatorName returns the namespaced name of the ClusterOperator
Expand Down
1 change: 1 addition & 0 deletions pkg/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ func New(config operatorconfig.Config, kubeConfig *rest.Config) (*Operator, erro
operatorcontroller.DefaultOperandNamespace: {},
operatorcontroller.DefaultCanaryNamespace: {},
operatorcontroller.GlobalMachineSpecifiedConfigNamespace: {},
operatorcontroller.OpenshiftOperatorNamespace: {},
},
},
// Use a non-caching client everywhere. The default split client does not
Expand Down
17 changes: 17 additions & 0 deletions test/e2e/gateway_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func testGatewayAPIResources(t *testing.T) {
// - the OSSM Istio operator is installed successfully and has status Running and Ready. e.g. istio-operator-9f5c88857-2xfrr -n openshift-operators
// - Istiod is installed successfully and has status Running and Ready. e.g istiod-openshift-gateway-867bb8d5c7-4z6mp -n openshift-ingress
// - the SMCP is created successfully (OSSM 2.x).
// - deletes SMCP and subscription and tests if it gets recreated
func testGatewayAPIIstioInstallation(t *testing.T) {
t.Helper()

Expand All @@ -118,6 +119,22 @@ func testGatewayAPIIstioInstallation(t *testing.T) {
if err := assertSMCP(t); err != nil {
t.Fatalf("failed to find expected SMCP: %v", err)
}
// delete existing SMCP to test it gets recreated
if err := deleteExistingSMCP(t); err != nil {
t.Fatalf("failed to delete existing SMCP: %v", err)
}
// check if SMCP gets recreated
if err := assertSMCP(t); err != nil {
t.Fatalf("failed to find expected SMCP: %v", err)
}
// delete existing Subscription to test it gets recreated
if err := deleteExistingSubscription(t, openshiftOperatorsNamespace, expectedSubscriptionName); err != nil {
t.Fatalf("failed to delete existing Subscription %s: %v", expectedSubscriptionName, err)
}
// checks if subscription gets recreated.
if err := assertSubscription(t, openshiftOperatorsNamespace, expectedSubscriptionName); err != nil {
t.Fatalf("failed to find expected Subscription %s: %v", expectedSubscriptionName, err)
}
}

// testGatewayAPIObjects tests that Gateway API objects can be created successfully.
Expand Down
93 changes: 93 additions & 0 deletions test/e2e/util_gatewayapi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,53 @@ func assertSubscription(t *testing.T, namespace, subName string) error {
return err
}

// deleteExistingSubscription deletes if the subscription of the given name exists and returns an error if not.
func deleteExistingSubscription(t *testing.T, namespace, subName string) error {
t.Helper()
existingSubscription := &operatorsv1alpha1.Subscription{}
newSubscription := &operatorsv1alpha1.Subscription{}
nsName := types.NamespacedName{Namespace: namespace, Name: subName}

err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 30*time.Second, false, func(context context.Context) (bool, error) {
if err := kclient.Get(context, nsName, existingSubscription); err != nil {
t.Logf("failed to get Subscription %s: %v", nsName.Name, err)
return false, nil
}
return true, nil
})
if err != nil {
t.Errorf("failed to get Subscription %s: %v", nsName.Name, err)
return err
}
// deleting Subscription.
err = kclient.Delete(context.Background(), existingSubscription)
if err != nil {
t.Errorf("failed to delete Subscription %s: %v", nsName.Name, err)
return err
}
err = wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 1*time.Minute, false, func(ctx context.Context) (bool, error) {
if err := kclient.Get(ctx, nsName, newSubscription); err != nil {
if kerrors.IsNotFound(err) {
return true, nil
}
t.Logf("failed to delete Subscription %s: %v", nsName.Name, err)
return false, nil
}
// if new Subscription got recreated while the poll ensures the Subscription is deleted.
if newSubscription != nil && newSubscription.UID != existingSubscription.UID {
return true, nil
}
t.Logf("Subscription %s still exists", nsName.Name)
return false, nil
})
if err != nil {
return fmt.Errorf("timed out waiting for Subscription %s to be deleted: %v", nsName.Name, err)
}
t.Logf("deleted Subscription %s", nsName.Name)
return nil

}

// assertOSSMOperator checks if the OSSM Istio operator gets successfully installed
// and returns an error if not.
func assertOSSMOperator(t *testing.T) error {
Expand Down Expand Up @@ -625,6 +672,52 @@ func assertSMCP(t *testing.T) error {
return err
}

// deleteExistingSMCP deletes if the SMCP exists and returns an error if not.
func deleteExistingSMCP(t *testing.T) error {
t.Helper()
existingSMCP := &maistrav2.ServiceMeshControlPlane{}
newSMCP := &maistrav2.ServiceMeshControlPlane{}
nsName := types.NamespacedName{Namespace: operatorcontroller.DefaultOperandNamespace, Name: openshiftSMCPName}

err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 30*time.Second, false, func(context context.Context) (bool, error) {
if err := kclient.Get(context, nsName, existingSMCP); err != nil {
t.Logf("failed to get smcp %s: %v", nsName.Name, err)
return false, nil
}
return true, nil
})
if err != nil {
t.Errorf("failed to get smcp %s: %v", nsName.Name, err)
return err
}
// deleting SMCP.
err = kclient.Delete(context.Background(), existingSMCP)
if err != nil {
t.Errorf("failed to delete smcp %s: %v", nsName.Name, err)
return err
}
err = wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 1*time.Minute, false, func(ctx context.Context) (bool, error) {
if err := kclient.Get(ctx, nsName, newSMCP); err != nil {
if kerrors.IsNotFound(err) {
return true, nil
}
t.Logf("failed to delete SMCP %s: %v", nsName.Name, err)
return false, nil
}
// if new SMCP got recreated while the poll ensures the SMCP is deleted.
if newSMCP != nil && newSMCP.UID != existingSMCP.UID {
return true, nil
}
t.Logf("smcp %s still exists", nsName.Name)
return false, nil
})
if err != nil {
return fmt.Errorf("timed out waiting for SMCP %s to be deleted: %v", nsName.Name, err)
}
t.Logf("deleted smcp %s", nsName.Name)
return nil
}

// assertDNSRecord checks to make sure a DNSRecord exists in a ready state,
// and returns an error if not.
func assertDNSRecord(t *testing.T, recordName types.NamespacedName) error {
Expand Down