Merge pull request #1480 from benjaminbenteke/main #374
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Upload hub data to a hubverse-hosted AWS S3 bucket | |
on: | |
push: | |
branches: | |
- main | |
env: | |
# Hubverse AWS account number | |
AWS_ACCOUNT: 767397675902 | |
permissions: | |
contents: read | |
# id-token write required for AWS auth | |
id-token: write | |
jobs: | |
upload: | |
# Don't run on forked repositories | |
if: github.event.repository.fork != true | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Get hub cloud config | |
# save cloud-related fields from admin config as environment variables | |
# (jq json parser is installed on Github-hosted runners) | |
run: | | |
cloud_enabled=$(cat ./hub-config/admin.json | jq -r '.cloud.enabled') \ | |
&& echo "CLOUD_ENABLED=$cloud_enabled" | |
cloud_storage_location=$(cat ./hub-config/admin.json | jq -r '.cloud.host.storage_location') \ | |
&& echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location" | |
echo "CLOUD_ENABLED=$cloud_enabled" >> $GITHUB_ENV | |
echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location" >> $GITHUB_ENV | |
- name: Configure AWS credentials | |
# request credentials to assume the hub's AWS role via OpenID Connect | |
if: env.CLOUD_ENABLED == 'true' | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT }}:role/${{ env.CLOUD_STORAGE_LOCATION }} | |
aws-region: us-east-1 | |
- name: Install rclone | |
if: env.CLOUD_ENABLED == 'true' | |
run: | | |
curl https://rclone.org/install.sh | sudo bash | |
rclone version | |
- name: Sync files to cloud storage | |
# sync specified hub directories to S3 | |
# (to exclude a directory, remove it from the hub_directories list below) | |
if: env.CLOUD_ENABLED == 'true' | |
run: | | |
hub_directories=( | |
'auxiliary-data' | |
'hub-config' | |
'model-abstracts' | |
'model-metadata' | |
'target-data' | |
) | |
for DIRECTORY in "${hub_directories[@]}" | |
do | |
if [ -d "./$DIRECTORY" ] | |
then | |
rclone sync \ | |
"./$DIRECTORY/" \ | |
":s3,provider=AWS,env_auth:$BUCKET_NAME/$DIRECTORY" \ | |
--checksum --verbose --stats-one-line --config=/dev/null | |
fi | |
done | |
# unlike other data, model-outputs are synced to a "raw" location | |
# so we can transform it before presenting to users | |
rclone sync ./model-output/ ":s3,provider=AWS,env_auth:$BUCKET_NAME/raw/model-output" \ | |
--checksum --verbose --stats-one-line --config=/dev/null | |
shell: bash | |
env: | |
BUCKET_NAME: ${{ env.CLOUD_STORAGE_LOCATION }} |