Skip to content

Commit

Permalink
azure: support for existing subnet and internal IPs
Browse files Browse the repository at this point in the history
This change is adding possibility to configure azure.subnet_id

fixes #2910
  • Loading branch information
tomkukral committed Dec 3, 2024
1 parent 13ce397 commit 2b44133
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 3 deletions.
15 changes: 15 additions & 0 deletions docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,21 @@ Available fields and semantics:
# Reference: https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview
storage_account: user-storage-account-name
# Specify subnet_id to use for instances (optional).
# SkyPilot created new vnet and subnet by default but it will reuse exisiting subnet if declared.
subnet_id: /subscriptions/<subscription-id>/resourceGroups/<resource-group-name>/providers/Microsoft.Network/virtualNetworks/<vnet-name>/subnets/<subnet-name>
# Should instances be assigned private IPs only? (optional)
#
# Set to true to use private IPs to communicate between the local client and
# any SkyPilot nodes. This requires the networking stack be properly set up.
#
# When set to true, SkyPilot will only use private subnets to launch nodes and won't expose
# instances on public IP addresses.
# Reference: https://learn.microsoft.com/en-us/azure/virtual-network/virtual-network-manage-subnet?tabs=azure-portal
# Default: false.
use_internal_ips: true
# Advanced Kubernetes configurations (optional).
kubernetes:
# The networking mode for accessing SSH jump pod (optional).
Expand Down
10 changes: 10 additions & 0 deletions sky/clouds/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,14 @@ def make_deploy_resources_variables(
if resource_group_name is None:
resource_group_name = f'{cluster_name.name_on_cloud}-{region_name}'

# Determine subnet_id if configured
subnet_id = skypilot_config.get_nested(('azure', 'subnet_id'), None)

# Determine if internal IPs should be used
use_internal_ips = skypilot_config.get_nested(
('azure', 'use_internal_ips'), False)


# Setup commands to eliminate the banner and restart sshd.
# This script will modify /etc/ssh/sshd_config and add a bash script
# into .bashrc. The bash script will restart sshd if it has not been
Expand Down Expand Up @@ -423,6 +431,8 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]:
'azure_subscription_id': self.get_project_id(dryrun),
'resource_group': resource_group_name,
'use_external_resource_group': use_external_resource_group,
'subnet_id': subnet_id,
'use_internal_ips': use_internal_ips,
}

# Setting disk performance tier for high disk tier.
Expand Down
7 changes: 7 additions & 0 deletions sky/provision/azure/azure-config-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"metadata": {
"description": "Name of the Network Security Group associated with the SkyPilot cluster."
}
},
"existingSubnet": {
"type": "string",
"metadata": {
"description": "Existing subnet id to use."
}
}
},
"variables": {
Expand Down Expand Up @@ -86,6 +92,7 @@
"apiVersion": "2019-11-01",
"name": "[variables('vnetName')]",
"location": "[variables('location')]",
"condition": "[equals(parameters('existingSubnet'), '')]",
"properties": {
"addressSpace": {
"addressPrefixes": [
Expand Down
17 changes: 14 additions & 3 deletions sky/provision/azure/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ def bootstrap_instances(
'use_external_resource_group field')
use_external_resource_group = provider_config['use_external_resource_group']

subnet_id = provider_config.get('subnet_id', '')

if 'tags' in provider_config:
params['tags'] = provider_config['tags']

Expand Down Expand Up @@ -142,12 +144,17 @@ def bootstrap_instances(
cluster_id, nsg_name = get_cluster_id_and_nsg_name(
resource_group=provider_config['resource_group'],
cluster_name_on_cloud=cluster_name_on_cloud)

# subnet_mask is ignored if subnet_id (of existing subnet) is provided
subnet_mask = provider_config.get('subnet_mask')
if subnet_mask is None:
# choose a random subnet, skipping most common value of 0
random.seed(cluster_id)
subnet_mask = f'10.{random.randint(1, 254)}.0.0/16'
logger.info(f'Using subnet mask: {subnet_mask}')

if subnet_id == '':
# log only when subnet_mask will be used
logger.info(f'Using subnet mask: {subnet_mask}')

parameters = {
'properties': {
Expand All @@ -165,7 +172,10 @@ def bootstrap_instances(
},
'location': {
'value': params['location']
}
},
'existingSubnet': {
'value': subnet_id
},
},
}
}
Expand Down Expand Up @@ -215,6 +225,7 @@ def bootstrap_instances(
# append output resource ids to be used with vm creation
provider_config['msi'] = outputs['msi']['value']
provider_config['nsg'] = outputs['nsg']['value']
provider_config['subnet'] = outputs['subnet']['value']
provider_config[
'subnet'] = outputs['subnet']['value'] if subnet_id == '' else subnet_id

return config
4 changes: 4 additions & 0 deletions sky/templates/azure-ray.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ provider:
# leakage.
disable_launch_config_check: true

{%- if subnet_id is not none %}
subnet_id: {{subnet_id}}
{%- endif %}
use_internal_ips: {{use_internal_ips}}

auth:
ssh_user: azureuser
Expand Down
6 changes: 6 additions & 0 deletions sky/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,12 @@ def get_config_schema():
'resource_group_vm': {
'type': 'string',
},
'subnet_id': {
'type': 'string',
},
'use_internal_ips': {
'type': 'boolean',
},
}
},
'kubernetes': {
Expand Down

0 comments on commit 2b44133

Please sign in to comment.