Skip to content

Commit

Permalink
enable azure gpu on frontend (#4226)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-g-town authored Feb 2, 2024
1 parent ad7ad6d commit 05acfee
Show file tree
Hide file tree
Showing 6 changed files with 394 additions and 228 deletions.
122 changes: 98 additions & 24 deletions dashboard/src/components/AzureProvisionerSettings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ import InputRow from "./form-components/InputRow";
import Button from "./porter/Button";
import Error from "./porter/Error";
import Icon from "./porter/Icon";
import InputSlider from "./porter/InputSlider";
import Link from "./porter/Link";
import Select from "./porter/Select";
import Spacer from "./porter/Spacer";
import Step from "./porter/Step";
import Text from "./porter/Text";
Expand All @@ -53,6 +55,7 @@ type Props = RouteComponentProps & {
provisionerError?: string;
credentialId: string;
clusterId?: number;
gpuModal?: boolean;
};

const VALID_CIDR_RANGE_PATTERN =
Expand All @@ -71,6 +74,11 @@ const AzureProvisionerSettings: React.FC<Props> = (props) => {
const [clusterName, setClusterName] = useState("");
const [azureLocation, setAzureLocation] = useState("eastus");
const [machineType, setMachineType] = useState("Standard_B2als_v2");
const [gpuMinInstances, setGpuMinInstances] = useState(1);
const [gpuMaxInstances, setGpuMaxInstances] = useState(5);
const [gpuInstanceType, setGpuInstanceType] = useState(
"Standard_NC4as_T4_v3"
);
const [isExpanded, setIsExpanded] = useState(false);
const [minInstances, setMinInstances] = useState(1);
const [maxInstances, setMaxInstances] = useState(10);
Expand All @@ -85,13 +93,22 @@ const AzureProvisionerSettings: React.FC<Props> = (props) => {
regionFilteredMachineTypeOptions,
setRegionFilteredMachineTypeOptions,
] = useState<MachineTypeOption[]>(azureSupportedMachineTypes(azureLocation));
const [
regionFilteredGPUMachineTypeOptions,
setRegionFilteredGPUMachineTypeOptions,
] = useState<MachineTypeOption[]>(
azureSupportedMachineTypes(azureLocation, true)
);

const { showIntercomWithMessage } = useIntercom();

useEffect(() => {
setRegionFilteredMachineTypeOptions(
azureSupportedMachineTypes(azureLocation)
);
setRegionFilteredGPUMachineTypeOptions(
azureSupportedMachineTypes(azureLocation, true)
);
}, [azureLocation]);

const markStepStarted = async (
Expand Down Expand Up @@ -188,6 +205,42 @@ const AzureProvisionerSettings: React.FC<Props> = (props) => {
console.log(err);
}

const nodePools = [
new AKSNodePool({
instanceType: "Standard_B2als_v2",
minInstances: 1,
maxInstances: 3,
nodePoolType: NodePoolType.SYSTEM,
mode: "User",
}),
new AKSNodePool({
instanceType: "Standard_B2as_v2",
minInstances: 1,
maxInstances: 3,
nodePoolType: NodePoolType.MONITORING,
mode: "User",
}),
new AKSNodePool({
instanceType: machineType,
minInstances: minInstances || 1,
maxInstances: maxInstances || 10,
nodePoolType: NodePoolType.APPLICATION,
mode: "User",
}),
];

// Conditionally add the last EKSNodeGroup if gpuModal is enabled
if (props.gpuModal) {
nodePools.push(
new AKSNodePool({
instanceType: gpuInstanceType,
minInstances: gpuMinInstances || 0,
maxInstances: gpuMaxInstances || 5,
nodePoolType: NodePoolType.CUSTOM,
})
);
}

const data = new Contract({
cluster: new Cluster({
projectId: currentProject.id,
Expand All @@ -201,29 +254,7 @@ const AzureProvisionerSettings: React.FC<Props> = (props) => {
clusterVersion: clusterVersion || "v1.27.3",
cidrRange: cidrRange || "10.78.0.0/16",
location: azureLocation,
nodePools: [
new AKSNodePool({
instanceType: "Standard_B2als_v2",
minInstances: 1,
maxInstances: 3,
nodePoolType: NodePoolType.SYSTEM,
mode: "User",
}),
new AKSNodePool({
instanceType: "Standard_B2as_v2",
minInstances: 1,
maxInstances: 3,
nodePoolType: NodePoolType.MONITORING,
mode: "User",
}),
new AKSNodePool({
instanceType: machineType,
minInstances: minInstances || 1,
maxInstances: maxInstances || 10,
nodePoolType: NodePoolType.APPLICATION,
mode: "User",
}),
],
nodePools,
skuTier,
}),
},
Expand Down Expand Up @@ -317,7 +348,10 @@ const AzureProvisionerSettings: React.FC<Props> = (props) => {

// TODO: pass in contract as the already parsed object, rather than JSON (requires changes to AWS/GCP provisioning)
const contract = Contract.fromJsonString(
JSON.stringify(props.selectedClusterVersion)
JSON.stringify(props.selectedClusterVersion),
{
ignoreUnknownFields: true,
}
);

if (
Expand Down Expand Up @@ -471,6 +505,46 @@ const AzureProvisionerSettings: React.FC<Props> = (props) => {
);
};

if (props.gpuModal) {
return (
<>
<Select
options={regionFilteredGPUMachineTypeOptions}
width="350px"
disabled={isReadOnly}
value={gpuInstanceType}
setValue={(x: string) => {
setGpuInstanceType(x);
}}
label="GPU Instance type"
/>
<Spacer y={1} />
<InputSlider
label="Max Instances: "
unit="nodes"
min={0}
max={5}
step={1}
width="350px"
disabled={isReadOnly}
value={gpuMaxInstances.toString()}
setValue={(x: number) => {
setGpuMaxInstances(x);
}}
/>
<Button
disabled={isDisabled()}
onClick={createCluster}
status={getStatus()}
>
Provision
</Button>

<Spacer y={0.5} />
</>
);
}

return (
<>
<StyledForm>{renderForm()}</StyledForm>
Expand Down
103 changes: 100 additions & 3 deletions dashboard/src/components/azureUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,34 @@ export const AzureLocationOptions = [
export type MachineTypeOption = {
value: string;
label: string;
resources: { vCPU: number; RAM: number; GPU?: number };
supportedRegions: Set<string>;
};

export const azureSupportedMachineTypes = (
region: string
region: string,
gpu?: boolean
): MachineTypeOption[] => {
return AzureMachineTypeOptions.filter((option) =>
option.supportedRegions.has(region)
return AzureMachineTypeOptions.filter(
(option) =>
option.supportedRegions.has(region) && !!option.resources.GPU === !!gpu
);
};

export const azureMachineTypeDetails = (
type: string
): MachineTypeOption | undefined => {
const matches = AzureMachineTypeOptions.filter(
(option) => option.value === type
);

if (matches.length === 0) {
return undefined;
}

return matches[0];
};

// Retrieve updated list of supported regions by running the following command: az vm list-skus --all --output table | grep <INSTANCE_TYPE> | grep 1,2,3 | grep None | awk '{print "\047" tolower($2) "\047"}' | paste -s -d, -
// last updated 12/19/2020
//
Expand All @@ -44,6 +61,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_B2als_v2",
label: "Standard_B2als_v2",
resources: { vCPU: 2, RAM: 4 },
supportedRegions: new Set<string>([
"australiaeast",
"brazilsouth",
Expand Down Expand Up @@ -71,6 +89,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_B2as_v2",
label: "Standard_B2as_v2",
resources: { vCPU: 2, RAM: 8 },
supportedRegions: new Set<string>([
"australiaeast",
"brazilsouth",
Expand Down Expand Up @@ -98,6 +117,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_A2_v2",
label: "Standard_A2_v2",
resources: { vCPU: 2, RAM: 4 },
supportedRegions: new Set<string>([
"australiaeast",
"canadacentral",
Expand All @@ -122,6 +142,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_A4_v2",
label: "Standard_A4_v2",
resources: { vCPU: 4, RAM: 8 },
supportedRegions: new Set<string>([
"australiaeast",
"canadacentral",
Expand All @@ -146,6 +167,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_DS1_v2",
label: "Standard_DS1_v2",
resources: { vCPU: 1, RAM: 3.5 },
supportedRegions: new Set<string>([
"australiaeast",
"canadacentral",
Expand All @@ -170,6 +192,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_DS2_v2",
label: "Standard_DS2_v2",
resources: { vCPU: 2, RAM: 7 },
supportedRegions: new Set<string>([
"australiaeast",
"canadacentral",
Expand Down Expand Up @@ -202,6 +225,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_D2ads_v5",
label: "Standard_D2ads_v5",
resources: { vCPU: 2, RAM: 8 },
supportedRegions: new Set<string>([
"australiaeast",
"canadacentral",
Expand All @@ -221,6 +245,7 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
{
value: "Standard_B4als_v2",
label: "Standard_B4als_v2",
resources: { vCPU: 4, RAM: 8 },
supportedRegions: new Set<string>([
"australiaeast",
"brazilsouth",
Expand All @@ -245,4 +270,76 @@ const AzureMachineTypeOptions: MachineTypeOption[] = [
"westus3",
]),
},
{
value: "Standard_NC4as_T4_v3",
label: "Standard_NC4as_T4_v3",
resources: { vCPU: 4, RAM: 28, GPU: 1 },
supportedRegions: new Set<string>([
"australiaeast",
"centralindia",
"eastus",
"eastus2",
"japaneast",
"northeurope",
"southcentralus",
"southeastasia",
"uksouth",
"westeurope",
"westus2",
]),
},
{
value: "Standard_NC8as_T4_v3",
label: "Standard_NC8as_T4_v3",
resources: { vCPU: 8, RAM: 56, GPU: 1 },
supportedRegions: new Set<string>([
"australiaeast",
"centralindia",
"eastus",
"eastus2",
"japaneast",
"northeurope",
"southcentralus",
"southeastasia",
"uksouth",
"westeurope",
"westus2",
]),
},
{
value: "Standard_NC16as_T4_v3",
label: "Standard_NC16as_T4_v3",
resources: { vCPU: 16, RAM: 110, GPU: 1 },
supportedRegions: new Set<string>([
"australiaeast",
"centralindia",
"eastus",
"eastus2",
"japaneast",
"northeurope",
"southcentralus",
"southeastasia",
"uksouth",
"westeurope",
"westus2",
]),
},
{
value: "Standard_NC64as_T4_v3",
label: "Standard_NC64as_T4_v3",
resources: { vCPU: 64, RAM: 440, GPU: 4 },
supportedRegions: new Set<string>([
"australiaeast",
"centralindia",
"eastus",
"eastus2",
"japaneast",
"northeurope",
"southcentralus",
"southeastasia",
"uksouth",
"westeurope",
"westus2",
]),
},
];
Loading

0 comments on commit 05acfee

Please sign in to comment.