From 409975292c463e48e8327c2df915ad7b0ce483f5 Mon Sep 17 00:00:00 2001 From: babusrithar Date: Thu, 21 Sep 2023 14:20:02 -0400 Subject: [PATCH] CRDB-31650 : Add UseSpot instance option. --- pkg/cmd/roachtest/spec/cluster_spec.go | 7 +++++-- pkg/cmd/roachtest/spec/option.go | 13 +++++++++++++ pkg/roachprod/vm/gce/gcloud.go | 12 ++++++------ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/pkg/cmd/roachtest/spec/cluster_spec.go b/pkg/cmd/roachtest/spec/cluster_spec.go index 8c40fc6f96f7..8292ca9cb798 100644 --- a/pkg/cmd/roachtest/spec/cluster_spec.go +++ b/pkg/cmd/roachtest/spec/cluster_spec.go @@ -98,7 +98,8 @@ type ClusterSpec struct { ReusePolicy clusterReusePolicy TerminateOnMigration bool UbuntuVersion vm.UbuntuVersion - + // Use a spot instance or equivalent of a cloud provider. + UseSpot bool // FileSystem determines the underlying FileSystem // to be used. The default is ext4. FileSystem fileSystemType @@ -200,6 +201,7 @@ func getGCEOpts( minCPUPlatform string, arch vm.CPUArch, volumeType string, + useSpot bool, ) vm.ProviderOpts { opts := gce.DefaultProviderOpts() opts.MachineType = machineType @@ -224,6 +226,7 @@ func getGCEOpts( opts.UseMultipleDisks = !RAID0 } opts.TerminateOnMigration = terminateOnMigration + opts.UseSpot = useSpot if volumeType != "" { opts.PDVolumeType = volumeType } @@ -427,7 +430,7 @@ func (s *ClusterSpec) RoachprodOpts( case GCE: providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount, createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration, - s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch), s.GCE.VolumeType, + s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch), s.GCE.VolumeType, s.UseSpot, ) case Azure: providerOpts = getAzureOpts(machineType, zones) diff --git a/pkg/cmd/roachtest/spec/option.go b/pkg/cmd/roachtest/spec/option.go index b0e2a0adecd8..1d38816f5d87 100644 --- a/pkg/cmd/roachtest/spec/option.go +++ b/pkg/cmd/roachtest/spec/option.go @@ -185,6 +185,19 @@ func TerminateOnMigration() Option { } } +// UseSpotInstances creates a spot instance or equivalent of a cloud provider. +// Using this option creates SpotVMs instead of on demand VMS. SpotVMS are +// cheaper but can be terminated at any time by the cloud provider. +// This option is only supported by GCE for now. +// See https://cloud.google.com/compute/docs/instances/spot, +// https://azure.microsoft.com/en-in/products/virtual-machines/spot +// and https://aws.amazon.com/ec2/spot/ for more details. +func UseSpotInstances() Option { + return func(spec *ClusterSpec) { + spec.UseSpot = true + } +} + // SetFileSystem is an Option which can be used to set // the underlying file system to be used. func SetFileSystem(fs fileSystemType) Option { diff --git a/pkg/roachprod/vm/gce/gcloud.go b/pkg/roachprod/vm/gce/gcloud.go index 1a6c13d17a99..8e4a4887557f 100644 --- a/pkg/roachprod/vm/gce/gcloud.go +++ b/pkg/roachprod/vm/gce/gcloud.go @@ -271,9 +271,9 @@ func DefaultProviderOpts() *ProviderOpts { PDVolumeType: "pd-ssd", PDVolumeSize: 500, TerminateOnMigration: false, + UseSpot: false, useSharedUser: true, preemptible: false, - useSpot: false, } } @@ -295,17 +295,17 @@ type ProviderOpts struct { PDVolumeType string PDVolumeSize int UseMultipleDisks bool + // use spot instances (i.e., latest version of preemptibles which can run > 24 hours) + UseSpot bool + // GCE allows two availability policies in case of a maintenance event (see --maintenance-policy via gcloud), // 'TERMINATE' or 'MIGRATE'. The default is 'MIGRATE' which we denote by 'TerminateOnMigration == false'. TerminateOnMigration bool - // useSharedUser indicates that the shared user rather than the personal // user should be used to ssh into the remote machines. useSharedUser bool // use preemptible instances preemptible bool - // use spot instances (i.e., latest version of preemptibles which can run > 24 hours) - useSpot bool } // Provider is the GCE implementation of the vm.Provider interface. @@ -835,7 +835,7 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) { strings.Join(defaultZones, ","))) flags.BoolVar(&o.preemptible, ProviderName+"-preemptible", false, "use preemptible GCE instances (lifetime cannot exceed 24h)") - flags.BoolVar(&o.useSpot, ProviderName+"-use-spot", false, + flags.BoolVar(&o.UseSpot, ProviderName+"-use-spot", false, "use spot GCE instances (like preemptible but lifetime can exceed 24h)") flags.BoolVar(&o.TerminateOnMigration, ProviderName+"-terminateOnMigration", false, "use 'TERMINATE' maintenance policy (for GCE live migrations)") @@ -1043,7 +1043,7 @@ func (p *Provider) Create( // Preemptible instances require the following arguments set explicitly args = append(args, "--maintenance-policy", "TERMINATE") args = append(args, "--no-restart-on-failure") - } else if providerOpts.useSpot { + } else if providerOpts.UseSpot { args = append(args, "--provisioning-model", "SPOT") } else { if providerOpts.TerminateOnMigration {