-
Notifications
You must be signed in to change notification settings - Fork 72
80 lines (71 loc) · 2.6 KB
/
hubverse-aws-upload.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
name: Upload hub data to a hubverse-hosted AWS S3 bucket
on:
push:
branches:
- main
env:
# Hubverse AWS account number
AWS_ACCOUNT: 767397675902
permissions:
contents: read
# id-token write required for AWS auth
id-token: write
jobs:
upload:
# Don't run on forked repositories
if: github.event.repository.fork != true
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get hub cloud config
# save cloud-related fields from admin config as environment variables
# (jq json parser is installed on Github-hosted runners)
run: |
cloud_enabled=$(cat ./hub-config/admin.json | jq -r '.cloud.enabled') \
&& echo "CLOUD_ENABLED=$cloud_enabled"
cloud_storage_location=$(cat ./hub-config/admin.json | jq -r '.cloud.host.storage_location') \
&& echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location"
echo "CLOUD_ENABLED=$cloud_enabled" >> $GITHUB_ENV
echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location" >> $GITHUB_ENV
- name: Configure AWS credentials
# request credentials to assume the hub's AWS role via OpenID Connect
if: env.CLOUD_ENABLED == 'true'
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT }}:role/${{ env.CLOUD_STORAGE_LOCATION }}
aws-region: us-east-1
- name: Install rclone
if: env.CLOUD_ENABLED == 'true'
run: |
curl https://rclone.org/install.sh | sudo bash
rclone version
- name: Sync files to cloud storage
# sync specified hub directories to S3
# (to exclude a directory, remove it from the hub_directories list below)
if: env.CLOUD_ENABLED == 'true'
run: |
hub_directories=(
'auxiliary-data'
'hub-config'
'model-abstracts'
'model-metadata'
'target-data'
)
for DIRECTORY in "${hub_directories[@]}"
do
if [ -d "./$DIRECTORY" ]
then
rclone sync \
"./$DIRECTORY/" \
":s3,provider=AWS,env_auth:$BUCKET_NAME/$DIRECTORY" \
--checksum --verbose --stats-one-line --config=/dev/null
fi
done
# unlike other data, model-outputs are synced to a "raw" location
# so we can transform it before presenting to users
rclone sync ./model-output/ ":s3,provider=AWS,env_auth:$BUCKET_NAME/raw/model-output" \
--checksum --verbose --stats-one-line --config=/dev/null
shell: bash
env:
BUCKET_NAME: ${{ env.CLOUD_STORAGE_LOCATION }}