forked from Azure/azureml-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline.yml
59 lines (57 loc) · 1.8 KB
/
pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
description: Pipeline using distributed job to train model based on cifar-10 dataset
display_name: cifar-10-pipeline-example
experiment_name: cifar-10-pipeline-example
jobs:
get_data:
type: command
command: >-
wget https://azuremlexamples.blob.core.windows.net/datasets/cifar-10-python.tar.gz;
tar -xvzf cifar-10-python.tar.gz -C ${{outputs.cifar}};
rm cifar-10-python.tar.gz;
compute: azureml:gpu-cluster
environment: azureml://registries/azureml/environments/sklearn-1.5/labels/latest
outputs:
cifar:
type: uri_folder
mode: upload
train_model:
type: command
command: >-
python main.py
--data-dir ${{inputs.cifar}}
--epochs ${{inputs.epochs}}
--model-dir ${{outputs.model_dir}}
code: src/train-model
inputs:
epochs: 1
cifar: ${{parent.jobs.get_data.outputs.cifar}}
outputs:
model_dir:
type: uri_folder
mode: upload
environment: azureml://registries/azureml/environments/acpt-pytorch-2.2-cuda12.1/labels/latest
compute: azureml:gpu-cluster
distribution:
type: pytorch
process_count_per_instance: 1
resources:
instance_count: 2
eval_model:
type: command
command: >-
python main.py
--data-dir ${{inputs.cifar}}
--model-dir ${{inputs.model_dir}}/model
code: src/eval-model
environment: azureml://registries/azureml/environments/acpt-pytorch-2.2-cuda12.1/labels/latest
compute: azureml:gpu-cluster
distribution:
type: pytorch
process_count_per_instance: 1
resources:
instance_count: 2
inputs:
cifar: ${{parent.jobs.get_data.outputs.cifar}}
model_dir: ${{parent.jobs.train_model.outputs.model_dir}}