-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.ts
218 lines (203 loc) · 5.42 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import * as pulumi from "@pulumi/pulumi";
import * as random from "@pulumi/random";
import * as azure from "@pulumi/azure-native";
import * as k8s from "@pulumi/kubernetes";
import TraefikRoute from "./TraefikRoute";
// Create Resource Group
const resourceGroup = new azure.resources.ResourceGroup("mlplatform-rg");
// Generate random password for PostgreSQL
const mlflowDBPassword = new random.RandomPassword("mlflow-db-password", {
length: 16,
special: false,
});
// Create PostgreSQL server
const mlflowDBServer = new azure.dbforpostgresql.Server("mldbserver", {
resourceGroupName: resourceGroup.name,
location: "centralus",
version: "15",
administratorLogin: "postgres",
administratorLoginPassword: mlflowDBPassword.result,
backup: {
backupRetentionDays: 7,
},
sku: {
tier: "Burstable",
name: "Standard_B1ms",
},
storage: {
storageSizeGB: 32,
},
});
// Create PostgreSQL database
const database = new azure.dbforpostgresql.Database(
"mlflowDB",
{
resourceGroupName: resourceGroup.name,
serverName: mlflowDBServer.name,
charset: "UTF8",
collation: "en_US.utf8",
databaseName: "mlflowDB",
},
{ dependsOn: [mlflowDBServer] }
);
// Create AKS cluster
const cluster = new azure.containerservice.ManagedCluster("mlplatform-k8s", {
resourceGroupName: resourceGroup.name,
dnsPrefix: "mlplatform",
identity: {
type: "SystemAssigned",
},
agentPoolProfiles: [ // each agent pool is group of VMs with same configuration
{
name: "agentpool",
count: 2,
vmSize: "standard_b2s",
mode: "System",
maxPods: 110,
osType: "Linux",
type: "VirtualMachineScaleSets",
// Enable autoscaling
enableAutoScaling: true,
minCount: 1,
maxCount: 5,
},
],
networkProfile: {
networkPlugin: "azure",
loadBalancerSku: "standard",
}
});
const aksPostgresFirewallRule = new azure.dbforpostgresql.FirewallRule(
"aks-postgres",
{
resourceGroupName: resourceGroup.name,
serverName: mlflowDBServer.name,
startIpAddress: "0.0.0.0",
endIpAddress: "255.255.255.255",
}
);
const storageAccount = new azure.storage.StorageAccount("ml-storage", {
accountName: "mlinfrastorage",
allowBlobPublicAccess: false,
allowSharedKeyAccess: true,
defaultToOAuthAuthentication: false,
encryption: {
keySource: azure.storage.KeySource.Microsoft_Storage,
requireInfrastructureEncryption: false,
},
keyPolicy: {
keyExpirationPeriodInDays: 20,
},
kind: azure.storage.Kind.Storage,
location: "westus",
resourceGroupName: resourceGroup.name,
sasPolicy: {
expirationAction: azure.storage.ExpirationAction.Log,
sasExpirationPeriod: "1.15:59:59",
},
sku: {
name: azure.storage.SkuName.Standard_GRS,
},
});
// blob container resource
const blobContainer = new azure.storage.BlobContainer("artifact-storage", {
accountName: storageAccount.name,
resourceGroupName: resourceGroup.name,
});
// Retrieve the Storage Account Keys
const storageAccountKeys = pulumi
.all([resourceGroup.name, storageAccount.name])
.apply(([rgName, saName]) =>
azure.storage.listStorageAccountKeys({
resourceGroupName: rgName,
accountName: saName,
})
);
// Export the kubeconfig
export const kubeconfig = pulumi
.all([resourceGroup.name, cluster.name])
.apply(([resourceGroupName, clusterName]) =>
pulumi.secret(
azure.containerservice
.listManagedClusterUserCredentials({
resourceGroupName: resourceGroupName,
resourceName: clusterName,
})
.then(
(
credentials: azure.containerservice.ListManagedClusterUserCredentialsResult
) => {
const encoded = credentials?.kubeconfigs?.[0]?.value ?? "";
return Buffer.from(encoded, "base64").toString();
}
)
.catch((err) => console.error(err))
)
);
// Export the primary key
export const primaryStorageKey = storageAccountKeys.keys[0].value;
const k8sprovider = new k8s.Provider("k8s-provider", {
kubeconfig: kubeconfig,
});
/**
*
* MLFlow container on AKS
*
* If deployment fails cuz of loop backoff: kubectl logs mlflow-5d7d856c96-qkqpk -c mlflow
*
*/
const mlflow = new k8s.helm.v3.Chart(
"mlflow",
{
chart: "mlflow",
fetchOpts: { repo: "https://community-charts.github.io/helm-charts" },
values: {
backendStore: {
postgres: {
enabled: true,
host: mlflowDBServer.fullyQualifiedDomainName,
port: 5432,
database: database.name,
user: "postgres",
password: mlflowDBPassword.result,
},
},
artifactRoot: {
azureBlob: {
enabled: true,
accessKey: primaryStorageKey,
storageAccount: storageAccount.name,
container: blobContainer.name,
connectionString: pulumi.interpolate`DefaultEndpointsProtocol=https;AccountName=mlinfrastorage;AccountKey=${primaryStorageKey};EndpointSuffix=core.windows.net`
}
}
},
},
{
provider: k8sprovider,
}
);
/**
*
* Setting up Traefik and route for /mlflow
*
*/
const traefik = new k8s.helm.v3.Chart(
"traefik",
{
chart: "traefik",
fetchOpts: { repo: "https://helm.traefik.io/traefik" },
},
{
provider: k8sprovider,
}
);
new TraefikRoute(
"mlflow-traefik-route",
{
prefix: "/mlflow",
service: mlflow.getResource("v1/Service", "mlflow"),
namespace: "default",
},
{ provider: k8sprovider }
);