Skip to content
This repository has been archived by the owner on Jun 8, 2019. It is now read-only.

Commit

Permalink
Add support for slow scaling Azure instance classes
Browse files Browse the repository at this point in the history
This is useful for rarer instance classes where physical capacity is low, to avoid allocation failures
  • Loading branch information
cberner committed Jul 14, 2017
1 parent 341e415 commit 05d402a
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 10 deletions.
14 changes: 10 additions & 4 deletions autoscaler/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def get_retry_after(self, response):


class AzureGroups(object):
def __init__(self, resource_groups, client: AzureApi):
def __init__(self, resource_groups, slow_scale_classes, client: AzureApi):
self.resource_groups = resource_groups
self.slow_scale_classes = slow_scale_classes
self.client = client

def get_all_groups(self, kube_nodes):
Expand All @@ -76,7 +77,8 @@ def get_all_groups(self, kube_nodes):
scale_sets_by_type.setdefault((scale_set.location, scale_set.instance_type), []).append(scale_set)
for key, scale_sets in scale_sets_by_type.items():
location, instance_type = key
groups.append(AzureVirtualScaleSet(location, resource_group.name, self.client, instance_type, scale_sets, kube_nodes))
slow_scale = _get_azure_class(instance_type) in self.slow_scale_classes
groups.append(AzureVirtualScaleSet(location, resource_group.name, self.client, instance_type, slow_scale, scale_sets, kube_nodes))

return groups

Expand All @@ -96,7 +98,7 @@ def _get_azure_class(type_):
class AzureVirtualScaleSet(AutoScalingGroup):
provider = 'azure'

def __init__(self, region, resource_group, client: AzureApi, instance_type, scale_sets: List[AzureScaleSet], kube_nodes):
def __init__(self, region, resource_group, client: AzureApi, instance_type, slow_scale: bool, scale_sets: List[AzureScaleSet], kube_nodes):
self.client = client
self.instance_type = instance_type
self.tags = {}
Expand All @@ -111,6 +113,7 @@ def __init__(self, region, resource_group, client: AzureApi, instance_type, scal
# HACK: for matching node selectors
self.selectors['azure/type'] = self.instance_type
self.selectors['azure/class'] = _get_azure_class(self.instance_type)
self.slow_scale = slow_scale

self.min_size = 0
self.max_size = 10000
Expand Down Expand Up @@ -176,7 +179,10 @@ def set_desired_capacity(self, new_desired_capacity):
futures = []
for scale_set in self.scale_sets.values():
if scale_set.capacity < _SCALE_SET_SIZE_LIMIT:
new_group_capacity = min(_SCALE_SET_SIZE_LIMIT, scale_set.capacity + scale_out)
if self.slow_scale:
new_group_capacity = scale_set.capacity + 1
else:
new_group_capacity = min(_SCALE_SET_SIZE_LIMIT, scale_set.capacity + scale_out)
scale_out -= (new_group_capacity - scale_set.capacity)
if scale_set.provisioning_state == 'Updating':
logger.warn("Update of {} already in progress".format(scale_set.name))
Expand Down
4 changes: 2 additions & 2 deletions autoscaler/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class Cluster(object):

def __init__(self, aws_regions, aws_access_key, aws_secret_key,
azure_client_id, azure_client_secret, azure_subscription_id, azure_tenant_id,
azure_resource_group_names, kubeconfig,
azure_resource_group_names, azure_slow_scale_classes, kubeconfig,
idle_threshold, type_idle_threshold,
instance_init_time, cluster_name, notifier,
max_scale_in_fraction=0.1,
Expand Down Expand Up @@ -149,7 +149,7 @@ def __init__(self, aws_regions, aws_access_key, aws_secret_key,
monitor_client.config.retry_policy.policy = azure.AzureBoundedRetry.from_retry(monitor_client.config.retry_policy.policy)
self.azure_client = AzureWriteThroughCachedApi(AzureWrapper(compute_client, monitor_client))

self.azure_groups = azure.AzureGroups(resource_groups, self.azure_client)
self.azure_groups = azure.AzureGroups(resource_groups, azure_slow_scale_classes, self.azure_client)

# config
self.azure_resource_group_names = azure_resource_group_names
Expand Down
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
@click.option("--type-idle-threshold", default=3600*24*7)
@click.option("--over-provision", default=5)
@click.option("--max-scale-in-fraction", default=0.1)
@click.option("--azure-slow-scale-classes", default="")
@click.option("--azure-resource-groups")
@click.option("--azure-client-id", default=None, envvar='AZURE_CLIENT_ID')
@click.option("--azure-client-secret", default=None, envvar='AZURE_CLIENT_SECRET')
Expand All @@ -51,7 +52,7 @@
"for more verbosity.",
type=click.IntRange(0, 3, clamp=True),
count=True)
def main(cluster_name, aws_regions, azure_resource_groups, sleep, kubeconfig,
def main(cluster_name, aws_regions, azure_resource_groups, azure_slow_scale_classes, sleep, kubeconfig,
azure_client_id, azure_client_secret, azure_subscription_id, azure_tenant_id,
aws_access_key, aws_secret_key, datadog_api_key,
idle_threshold, type_idle_threshold, max_scale_in_fraction,
Expand All @@ -76,6 +77,7 @@ def main(cluster_name, aws_regions, azure_resource_groups, sleep, kubeconfig,
azure_subscription_id=azure_subscription_id,
azure_tenant_id=azure_tenant_id,
azure_resource_group_names=azure_resource_groups.split(',') if azure_resource_groups else [],
azure_slow_scale_classes=azure_slow_scale_classes.split(',') if azure_slow_scale_classes else [],
kubeconfig=kubeconfig,
idle_threshold=idle_threshold,
instance_init_time=instance_init_time,
Expand Down
31 changes: 28 additions & 3 deletions test/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,38 @@ def test_scale_up(self):
resource_group = 'test-resource-group'
scale_set = AzureScaleSet(region, resource_group, 'test-scale-set', instance_type, 0, 'Succeeded')

virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, [scale_set], [])
virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, False, [scale_set], [])

virtual_scale_set.scale(5)

mock_client.virtual_machine_scale_sets.create_or_update.assert_called_once()
self.assertEqual(mock_client.virtual_machine_scale_sets.create_or_update.call_args[1]['parameters'].sku.capacity, 5)

def test_slow_scale_up(self):
region = 'test'
mock_client = mock.Mock()
mock_client.virtual_machine_scale_set_vms = mock.Mock()
mock_client.virtual_machine_scale_set_vms.list = mock.Mock(return_value=[])
mock_client.virtual_machine_scale_sets = mock.Mock()
mock_client.virtual_machine_scale_sets.create_or_update = mock.Mock()

monitor_client = mock.Mock()
monitor_client.activity_logs = mock.Mock()
monitor_client.activity_logs.list = mock.Mock(return_value=[])

instance_type = 'Standard_D1_v2'
resource_group = 'test-resource-group'
scale_set = AzureScaleSet(region, resource_group, 'test-scale-set', instance_type, 0, 'Succeeded')
scale_set2 = AzureScaleSet(region, resource_group, 'test-scale-set2', instance_type, 0, 'Succeeded')

virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, True, [scale_set, scale_set2], [])

virtual_scale_set.scale(2)

self.assertEqual(mock_client.virtual_machine_scale_sets.create_or_update.call_count, 2)
self.assertEqual(mock_client.virtual_machine_scale_sets.create_or_update.call_args_list[0][1]['parameters'].sku.capacity, 1)
self.assertEqual(mock_client.virtual_machine_scale_sets.create_or_update.call_args_list[1][1]['parameters'].sku.capacity, 1)

def test_out_of_quota(self):
region = 'test'
mock_client = mock.Mock()
Expand All @@ -51,7 +76,7 @@ def test_out_of_quota(self):
resource_group = 'test-resource-group'
scale_set = AzureScaleSet(region, resource_group, 'test-scale-set', instance_type, 0, 'Succeeded',
timeout_until=datetime.now(pytz.utc) + timedelta(minutes=10), timeout_reason="fake reason")
virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, [scale_set], [])
virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, False, [scale_set], [])
self.assertTrue(virtual_scale_set.is_timed_out())

def test_scale_in(self):
Expand Down Expand Up @@ -80,7 +105,7 @@ def test_scale_in(self):
instance_type = 'Standard_D1_v2'
scale_set = AzureScaleSet(region, resource_group, 'test-scale-set', instance_type, 1, 'Succeeded')

virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, [scale_set], [test_node])
virtual_scale_set = AzureVirtualScaleSet(region, resource_group, AzureWrapper(mock_client, monitor_client), instance_type, False, [scale_set], [test_node])

self.assertEqual(virtual_scale_set.instance_ids, {instance.vm_id})
self.assertEqual(virtual_scale_set.nodes, [test_node])
Expand Down
1 change: 1 addition & 0 deletions test/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def setUp(self):
azure_subscription_id='',
azure_tenant_id='',
azure_resource_group_names=[],
azure_slow_scale_classes=[],
kubeconfig='~/.kube/config',
idle_threshold=60,
instance_init_time=60,
Expand Down

0 comments on commit 05d402a

Please sign in to comment.