Skip to content

Commit

Permalink
Merge pull request #4761 from muraee/rosa-cp-upgrade
Browse files Browse the repository at this point in the history
✨ ROSA: Reconcile ROSAControlPlane version
  • Loading branch information
k8s-ci-robot authored Jan 29, 2024
2 parents 836e77c + ae3c3be commit e10643a
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,8 @@ spec:
supportRoleARN:
type: string
version:
description: Openshift version, for example "openshift-v4.14.5".
description: Openshift version, for example "4.14.5".
pattern: ^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$
type: string
workerRoleARN:
type: string
Expand Down
3 changes: 3 additions & 0 deletions controlplane/rosa/api/v1beta2/conditions_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
const (
// ROSAControlPlaneReadyCondition condition reports on the successful reconciliation of ROSAControlPlane.
ROSAControlPlaneReadyCondition clusterv1.ConditionType = "ROSAControlPlaneReady"

// ROSAControlPlaneUpgradingCondition condition reports whether ROSAControlPlane is upgrading or not.
ROSAControlPlaneUpgradingCondition clusterv1.ConditionType = "ROSAControlPlaneUpgrading"
)
6 changes: 4 additions & 2 deletions controlplane/rosa/api/v1beta2/rosacontrolplane_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ type RosaControlPlaneSpec struct { //nolint: maligned
// The AWS Region the cluster lives in.
Region *string `json:"region"`

// Openshift version, for example "openshift-v4.14.5".
Version *string `json:"version"`
// Openshift version, for example "4.14.5".
//
// +kubebuilder:validation:Pattern:=`^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$`
Version string `json:"version"`

// ControlPlaneEndpoint represents the endpoint used to communicate with the control plane.
// +optional
Expand Down
5 changes: 0 additions & 5 deletions controlplane/rosa/api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 69 additions & 1 deletion controlplane/rosa/controllers/rosacontrolplane_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"time"

cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -188,6 +189,15 @@ func (r *ROSAControlPlaneReconciler) reconcileNormal(ctx context.Context, rosaSc
}
defer rosaClient.Close()

isValid, err := validateControlPlaneSpec(rosaClient, rosaScope)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to validate ROSAControlPlane.spec: %w", err)
}
if !isValid {
// dont' requeue because input is invalid and manual intervention is needed.
return ctrl.Result{}, nil
}

cluster, err := rosaClient.GetCluster()
if err != nil {
return ctrl.Result{}, err
Expand All @@ -213,6 +223,9 @@ func (r *ROSAControlPlaneReconciler) reconcileNormal(ctx context.Context, rosaSc
if err := r.reconcileKubeconfig(ctx, rosaScope, rosaClient, cluster); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to reconcile kubeconfig: %w", err)
}
if err := r.reconcileClusterVersion(rosaScope, rosaClient, cluster); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
case cmv1.ClusterStateError:
errorMessage := cluster.Status().ProvisionErrorMessage()
Expand Down Expand Up @@ -255,7 +268,7 @@ func (r *ROSAControlPlaneReconciler) reconcileNormal(ctx context.Context, rosaSc
DisableUserWorkloadMonitoring(true).
Version(
cmv1.NewVersion().
ID(*rosaScope.ControlPlane.Spec.Version).
ID(fmt.Sprintf("openshift-v%s", rosaScope.ControlPlane.Spec.Version)).
ChannelGroup("stable"),
).
ExpirationTimestamp(time.Now().Add(1 * time.Hour)).
Expand Down Expand Up @@ -394,6 +407,41 @@ func (r *ROSAControlPlaneReconciler) reconcileDelete(ctx context.Context, rosaSc
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}

func (r *ROSAControlPlaneReconciler) reconcileClusterVersion(rosaScope *scope.ROSAControlPlaneScope, rosaClient *rosa.RosaClient, cluster *cmv1.Cluster) error {
version := rosaScope.ControlPlane.Spec.Version
if version == cluster.Version().RawID() {
conditions.MarkFalse(rosaScope.ControlPlane, rosacontrolplanev1.ROSAControlPlaneUpgradingCondition, "upgraded", clusterv1.ConditionSeverityInfo, "")
return nil
}

scheduledUpgrade, err := rosaClient.CheckExistingScheduledUpgrade(cluster)
if err != nil {
return fmt.Errorf("failed to get existing scheduled upgrades: %w", err)
}

if scheduledUpgrade == nil {
scheduledUpgrade, err = rosaClient.ScheduleControlPlaneUpgrade(cluster, version, time.Now())
if err != nil {
return fmt.Errorf("failed to schedule control plane upgrade to version %s: %w", version, err)
}
}

condition := &clusterv1.Condition{
Type: rosacontrolplanev1.ROSAControlPlaneUpgradingCondition,
Status: corev1.ConditionTrue,
Reason: string(scheduledUpgrade.State().Value()),
Message: fmt.Sprintf("Upgrading to version %s", scheduledUpgrade.Version()),
}
conditions.Set(rosaScope.ControlPlane, condition)

// if cluster is already upgrading to another version we need to wait until the current upgrade is finished, return an error to requeue and try later.
if scheduledUpgrade.Version() != version {
return fmt.Errorf("there is already a %s upgrade to version %s", scheduledUpgrade.State().Value(), scheduledUpgrade.Version())
}

return nil
}

func (r *ROSAControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, rosaScope *scope.ROSAControlPlaneScope, rosaClient *rosa.RosaClient, cluster *cmv1.Cluster) error {
rosaScope.Debug("Reconciling ROSA kubeconfig for cluster", "cluster-name", rosaScope.RosaClusterName())

Expand Down Expand Up @@ -510,6 +558,26 @@ func (r *ROSAControlPlaneReconciler) reconcileClusterAdminPassword(ctx context.C
return password, nil
}

func validateControlPlaneSpec(rosaClient *rosa.RosaClient, rosaScope *scope.ROSAControlPlaneScope) (bool, error) {
// reset previous message.
rosaScope.ControlPlane.Status.FailureMessage = nil

version := rosaScope.ControlPlane.Spec.Version
isSupported, err := rosaClient.IsVersionSupported(version)
if err != nil {
return false, err
}

if !isSupported {
message := fmt.Sprintf("version %s is not supported", version)
rosaScope.ControlPlane.Status.FailureMessage = &message
return false, nil
}

// TODO: add more input validations
return true, nil
}

func (r *ROSAControlPlaneReconciler) rosaClusterToROSAControlPlane(log *logger.Logger) handler.MapFunc {
return func(ctx context.Context, o client.Object) []ctrl.Request {
rosaCluster, ok := o.(*expinfrav1.ROSACluster)
Expand Down
100 changes: 100 additions & 0 deletions pkg/rosa/versions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package rosa

import (
"fmt"
"time"

cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
)

// IsVersionSupported checks whether the input version is supported for ROSA clusters.
func (c *RosaClient) IsVersionSupported(versionID string) (bool, error) {
filter := fmt.Sprintf("raw_id='%s' AND channel_group = '%s'", versionID, "stable")
response, err := c.ocm.ClustersMgmt().V1().
Versions().
List().
Search(filter).
Page(1).Size(1).
Parameter("product", "hcp").
Send()
if err != nil {
return false, handleErr(response.Error(), err)
}
if response.Total() == 0 {
return false, nil
}

version := response.Items().Get(0)
return version.ROSAEnabled() && version.HostedControlPlaneEnabled(), nil
}

// CheckExistingScheduledUpgrade checks and returns the current upgrade schedule if any.
func (c *RosaClient) CheckExistingScheduledUpgrade(cluster *cmv1.Cluster) (*cmv1.ControlPlaneUpgradePolicy, error) {
upgradePolicies, err := c.getControlPlaneUpgradePolicies(cluster.ID())
if err != nil {
return nil, err
}
for _, upgradePolicy := range upgradePolicies {
if upgradePolicy.UpgradeType() == cmv1.UpgradeTypeControlPlane {
return upgradePolicy, nil
}
}
return nil, nil
}

// ScheduleControlPlaneUpgrade schedules a new control plane upgrade to the specified version at the specified time.
func (c *RosaClient) ScheduleControlPlaneUpgrade(cluster *cmv1.Cluster, version string, nextRun time.Time) (*cmv1.ControlPlaneUpgradePolicy, error) {
// earliestNextRun is set to at least 5 min from now by the OCM API.
// we set it to 6 min here to account for latencty.
earliestNextRun := time.Now().Add(time.Minute * 6)
if nextRun.Before(earliestNextRun) {
nextRun = earliestNextRun
}

upgradePolicy, err := cmv1.NewControlPlaneUpgradePolicy().
UpgradeType(cmv1.UpgradeTypeControlPlane).
ScheduleType(cmv1.ScheduleTypeManual).
Version(version).
NextRun(nextRun).
Build()
if err != nil {
return nil, err
}

response, err := c.ocm.ClustersMgmt().V1().
Clusters().Cluster(cluster.ID()).
ControlPlane().
UpgradePolicies().
Add().Body(upgradePolicy).
Send()
if err != nil {
return nil, handleErr(response.Error(), err)
}

return response.Body(), nil
}

func (c *RosaClient) getControlPlaneUpgradePolicies(clusterID string) (controlPlaneUpgradePolicies []*cmv1.ControlPlaneUpgradePolicy, err error) {
collection := c.ocm.ClustersMgmt().V1().
Clusters().
Cluster(clusterID).
ControlPlane().
UpgradePolicies()
page := 1
size := 100
for {
response, err := collection.List().
Page(page).
Size(size).
Send()
if err != nil {
return nil, handleErr(response.Error(), err)
}
controlPlaneUpgradePolicies = append(controlPlaneUpgradePolicies, response.Items().Slice()...)
if response.Size() < size {
break
}
page++
}
return
}

0 comments on commit e10643a

Please sign in to comment.