Skip to content

Merge pull request #1479 from ShitalAdhikari/main #373

Merge pull request #1479 from ShitalAdhikari/main

Merge pull request #1479 from ShitalAdhikari/main #373

name: Upload hub data to a hubverse-hosted AWS S3 bucket
on:
push:
branches:
- main
env:
# Hubverse AWS account number
AWS_ACCOUNT: 767397675902
permissions:
contents: read
# id-token write required for AWS auth
id-token: write
jobs:
upload:
# Don't run on forked repositories
if: github.event.repository.fork != true
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get hub cloud config
# save cloud-related fields from admin config as environment variables
# (jq json parser is installed on Github-hosted runners)
run: |
cloud_enabled=$(cat ./hub-config/admin.json | jq -r '.cloud.enabled') \
&& echo "CLOUD_ENABLED=$cloud_enabled"
cloud_storage_location=$(cat ./hub-config/admin.json | jq -r '.cloud.host.storage_location') \
&& echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location"
echo "CLOUD_ENABLED=$cloud_enabled" >> $GITHUB_ENV
echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location" >> $GITHUB_ENV
- name: Configure AWS credentials
# request credentials to assume the hub's AWS role via OpenID Connect
if: env.CLOUD_ENABLED == 'true'
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT }}:role/${{ env.CLOUD_STORAGE_LOCATION }}
aws-region: us-east-1
- name: Install rclone
if: env.CLOUD_ENABLED == 'true'
run: |
curl https://rclone.org/install.sh | sudo bash
rclone version
- name: Sync files to cloud storage
# sync specified hub directories to S3
# (to exclude a directory, remove it from the hub_directories list below)
if: env.CLOUD_ENABLED == 'true'
run: |
hub_directories=(
'auxiliary-data'
'hub-config'
'model-abstracts'
'model-metadata'
'target-data'
)
for DIRECTORY in "${hub_directories[@]}"
do
if [ -d "./$DIRECTORY" ]
then
rclone sync \
"./$DIRECTORY/" \
":s3,provider=AWS,env_auth:$BUCKET_NAME/$DIRECTORY" \
--checksum --verbose --stats-one-line --config=/dev/null
fi
done
# unlike other data, model-outputs are synced to a "raw" location
# so we can transform it before presenting to users
rclone sync ./model-output/ ":s3,provider=AWS,env_auth:$BUCKET_NAME/raw/model-output" \
--checksum --verbose --stats-one-line --config=/dev/null
shell: bash
env:
BUCKET_NAME: ${{ env.CLOUD_STORAGE_LOCATION }}