From ae3c3bef3451728a3f3d465dcf48e5fdad4f1cb2 Mon Sep 17 00:00:00 2001 From: Mulham Raee Date: Mon, 29 Jan 2024 14:27:24 +0100 Subject: [PATCH] reconcile ControlPlane version --- ...ne.cluster.x-k8s.io_rosacontrolplanes.yaml | 3 +- .../rosa/api/v1beta2/conditions_consts.go | 3 + .../api/v1beta2/rosacontrolplane_types.go | 6 +- .../rosa/api/v1beta2/zz_generated.deepcopy.go | 5 - .../rosacontrolplane_controller.go | 70 +++++++++++- pkg/rosa/versions.go | 100 ++++++++++++++++++ 6 files changed, 178 insertions(+), 9 deletions(-) create mode 100644 pkg/rosa/versions.go diff --git a/config/crd/bases/controlplane.cluster.x-k8s.io_rosacontrolplanes.yaml b/config/crd/bases/controlplane.cluster.x-k8s.io_rosacontrolplanes.yaml index cd41d50b64..db89c2c86b 100644 --- a/config/crd/bases/controlplane.cluster.x-k8s.io_rosacontrolplanes.yaml +++ b/config/crd/bases/controlplane.cluster.x-k8s.io_rosacontrolplanes.yaml @@ -270,7 +270,8 @@ spec: supportRoleARN: type: string version: - description: Openshift version, for example "openshift-v4.14.5". + description: Openshift version, for example "4.14.5". + pattern: ^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$ type: string workerRoleARN: type: string diff --git a/controlplane/rosa/api/v1beta2/conditions_consts.go b/controlplane/rosa/api/v1beta2/conditions_consts.go index 79351f148e..797e04a0a5 100644 --- a/controlplane/rosa/api/v1beta2/conditions_consts.go +++ b/controlplane/rosa/api/v1beta2/conditions_consts.go @@ -21,4 +21,7 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" const ( // ROSAControlPlaneReadyCondition condition reports on the successful reconciliation of ROSAControlPlane. ROSAControlPlaneReadyCondition clusterv1.ConditionType = "ROSAControlPlaneReady" + + // ROSAControlPlaneUpgradingCondition condition reports whether ROSAControlPlane is upgrading or not. + ROSAControlPlaneUpgradingCondition clusterv1.ConditionType = "ROSAControlPlaneUpgrading" ) diff --git a/controlplane/rosa/api/v1beta2/rosacontrolplane_types.go b/controlplane/rosa/api/v1beta2/rosacontrolplane_types.go index 1bb86d4bb7..185aa3a9fe 100644 --- a/controlplane/rosa/api/v1beta2/rosacontrolplane_types.go +++ b/controlplane/rosa/api/v1beta2/rosacontrolplane_types.go @@ -48,8 +48,10 @@ type RosaControlPlaneSpec struct { //nolint: maligned // The AWS Region the cluster lives in. Region *string `json:"region"` - // Openshift version, for example "openshift-v4.14.5". - Version *string `json:"version"` + // Openshift version, for example "4.14.5". + // + // +kubebuilder:validation:Pattern:=`^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$` + Version string `json:"version"` // ControlPlaneEndpoint represents the endpoint used to communicate with the control plane. // +optional diff --git a/controlplane/rosa/api/v1beta2/zz_generated.deepcopy.go b/controlplane/rosa/api/v1beta2/zz_generated.deepcopy.go index 39ebe113b7..017f4f60dd 100644 --- a/controlplane/rosa/api/v1beta2/zz_generated.deepcopy.go +++ b/controlplane/rosa/api/v1beta2/zz_generated.deepcopy.go @@ -123,11 +123,6 @@ func (in *RosaControlPlaneSpec) DeepCopyInto(out *RosaControlPlaneSpec) { *out = new(string) **out = **in } - if in.Version != nil { - in, out := &in.Version, &out.Version - *out = new(string) - **out = **in - } out.ControlPlaneEndpoint = in.ControlPlaneEndpoint out.RolesRef = in.RolesRef if in.OIDCID != nil { diff --git a/controlplane/rosa/controllers/rosacontrolplane_controller.go b/controlplane/rosa/controllers/rosacontrolplane_controller.go index 700847215d..a01289587d 100644 --- a/controlplane/rosa/controllers/rosacontrolplane_controller.go +++ b/controlplane/rosa/controllers/rosacontrolplane_controller.go @@ -27,6 +27,7 @@ import ( "time" cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -188,6 +189,15 @@ func (r *ROSAControlPlaneReconciler) reconcileNormal(ctx context.Context, rosaSc } defer rosaClient.Close() + isValid, err := validateControlPlaneSpec(rosaClient, rosaScope) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to validate ROSAControlPlane.spec: %w", err) + } + if !isValid { + // dont' requeue because input is invalid and manual intervention is needed. + return ctrl.Result{}, nil + } + cluster, err := rosaClient.GetCluster() if err != nil { return ctrl.Result{}, err @@ -213,6 +223,9 @@ func (r *ROSAControlPlaneReconciler) reconcileNormal(ctx context.Context, rosaSc if err := r.reconcileKubeconfig(ctx, rosaScope, rosaClient, cluster); err != nil { return ctrl.Result{}, fmt.Errorf("failed to reconcile kubeconfig: %w", err) } + if err := r.reconcileClusterVersion(rosaScope, rosaClient, cluster); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil case cmv1.ClusterStateError: errorMessage := cluster.Status().ProvisionErrorMessage() @@ -255,7 +268,7 @@ func (r *ROSAControlPlaneReconciler) reconcileNormal(ctx context.Context, rosaSc DisableUserWorkloadMonitoring(true). Version( cmv1.NewVersion(). - ID(*rosaScope.ControlPlane.Spec.Version). + ID(fmt.Sprintf("openshift-v%s", rosaScope.ControlPlane.Spec.Version)). ChannelGroup("stable"), ). ExpirationTimestamp(time.Now().Add(1 * time.Hour)). @@ -394,6 +407,41 @@ func (r *ROSAControlPlaneReconciler) reconcileDelete(ctx context.Context, rosaSc return ctrl.Result{RequeueAfter: time.Second * 60}, nil } +func (r *ROSAControlPlaneReconciler) reconcileClusterVersion(rosaScope *scope.ROSAControlPlaneScope, rosaClient *rosa.RosaClient, cluster *cmv1.Cluster) error { + version := rosaScope.ControlPlane.Spec.Version + if version == cluster.Version().RawID() { + conditions.MarkFalse(rosaScope.ControlPlane, rosacontrolplanev1.ROSAControlPlaneUpgradingCondition, "upgraded", clusterv1.ConditionSeverityInfo, "") + return nil + } + + scheduledUpgrade, err := rosaClient.CheckExistingScheduledUpgrade(cluster) + if err != nil { + return fmt.Errorf("failed to get existing scheduled upgrades: %w", err) + } + + if scheduledUpgrade == nil { + scheduledUpgrade, err = rosaClient.ScheduleControlPlaneUpgrade(cluster, version, time.Now()) + if err != nil { + return fmt.Errorf("failed to schedule control plane upgrade to version %s: %w", version, err) + } + } + + condition := &clusterv1.Condition{ + Type: rosacontrolplanev1.ROSAControlPlaneUpgradingCondition, + Status: corev1.ConditionTrue, + Reason: string(scheduledUpgrade.State().Value()), + Message: fmt.Sprintf("Upgrading to version %s", scheduledUpgrade.Version()), + } + conditions.Set(rosaScope.ControlPlane, condition) + + // if cluster is already upgrading to another version we need to wait until the current upgrade is finished, return an error to requeue and try later. + if scheduledUpgrade.Version() != version { + return fmt.Errorf("there is already a %s upgrade to version %s", scheduledUpgrade.State().Value(), scheduledUpgrade.Version()) + } + + return nil +} + func (r *ROSAControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, rosaScope *scope.ROSAControlPlaneScope, rosaClient *rosa.RosaClient, cluster *cmv1.Cluster) error { rosaScope.Debug("Reconciling ROSA kubeconfig for cluster", "cluster-name", rosaScope.RosaClusterName()) @@ -510,6 +558,26 @@ func (r *ROSAControlPlaneReconciler) reconcileClusterAdminPassword(ctx context.C return password, nil } +func validateControlPlaneSpec(rosaClient *rosa.RosaClient, rosaScope *scope.ROSAControlPlaneScope) (bool, error) { + // reset previous message. + rosaScope.ControlPlane.Status.FailureMessage = nil + + version := rosaScope.ControlPlane.Spec.Version + isSupported, err := rosaClient.IsVersionSupported(version) + if err != nil { + return false, err + } + + if !isSupported { + message := fmt.Sprintf("version %s is not supported", version) + rosaScope.ControlPlane.Status.FailureMessage = &message + return false, nil + } + + // TODO: add more input validations + return true, nil +} + func (r *ROSAControlPlaneReconciler) rosaClusterToROSAControlPlane(log *logger.Logger) handler.MapFunc { return func(ctx context.Context, o client.Object) []ctrl.Request { rosaCluster, ok := o.(*expinfrav1.ROSACluster) diff --git a/pkg/rosa/versions.go b/pkg/rosa/versions.go new file mode 100644 index 0000000000..255ac94190 --- /dev/null +++ b/pkg/rosa/versions.go @@ -0,0 +1,100 @@ +package rosa + +import ( + "fmt" + "time" + + cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" +) + +// IsVersionSupported checks whether the input version is supported for ROSA clusters. +func (c *RosaClient) IsVersionSupported(versionID string) (bool, error) { + filter := fmt.Sprintf("raw_id='%s' AND channel_group = '%s'", versionID, "stable") + response, err := c.ocm.ClustersMgmt().V1(). + Versions(). + List(). + Search(filter). + Page(1).Size(1). + Parameter("product", "hcp"). + Send() + if err != nil { + return false, handleErr(response.Error(), err) + } + if response.Total() == 0 { + return false, nil + } + + version := response.Items().Get(0) + return version.ROSAEnabled() && version.HostedControlPlaneEnabled(), nil +} + +// CheckExistingScheduledUpgrade checks and returns the current upgrade schedule if any. +func (c *RosaClient) CheckExistingScheduledUpgrade(cluster *cmv1.Cluster) (*cmv1.ControlPlaneUpgradePolicy, error) { + upgradePolicies, err := c.getControlPlaneUpgradePolicies(cluster.ID()) + if err != nil { + return nil, err + } + for _, upgradePolicy := range upgradePolicies { + if upgradePolicy.UpgradeType() == cmv1.UpgradeTypeControlPlane { + return upgradePolicy, nil + } + } + return nil, nil +} + +// ScheduleControlPlaneUpgrade schedules a new control plane upgrade to the specified version at the specified time. +func (c *RosaClient) ScheduleControlPlaneUpgrade(cluster *cmv1.Cluster, version string, nextRun time.Time) (*cmv1.ControlPlaneUpgradePolicy, error) { + // earliestNextRun is set to at least 5 min from now by the OCM API. + // we set it to 6 min here to account for latencty. + earliestNextRun := time.Now().Add(time.Minute * 6) + if nextRun.Before(earliestNextRun) { + nextRun = earliestNextRun + } + + upgradePolicy, err := cmv1.NewControlPlaneUpgradePolicy(). + UpgradeType(cmv1.UpgradeTypeControlPlane). + ScheduleType(cmv1.ScheduleTypeManual). + Version(version). + NextRun(nextRun). + Build() + if err != nil { + return nil, err + } + + response, err := c.ocm.ClustersMgmt().V1(). + Clusters().Cluster(cluster.ID()). + ControlPlane(). + UpgradePolicies(). + Add().Body(upgradePolicy). + Send() + if err != nil { + return nil, handleErr(response.Error(), err) + } + + return response.Body(), nil +} + +func (c *RosaClient) getControlPlaneUpgradePolicies(clusterID string) (controlPlaneUpgradePolicies []*cmv1.ControlPlaneUpgradePolicy, err error) { + collection := c.ocm.ClustersMgmt().V1(). + Clusters(). + Cluster(clusterID). + ControlPlane(). + UpgradePolicies() + page := 1 + size := 100 + for { + response, err := collection.List(). + Page(page). + Size(size). + Send() + if err != nil { + return nil, handleErr(response.Error(), err) + } + controlPlaneUpgradePolicies = append(controlPlaneUpgradePolicies, response.Items().Slice()...) + if response.Size() < size { + break + } + page++ + } + return +}