Scrape Groceries #24
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scrape Groceries | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 1 * * *" # Daily at 1am UTC (12pm AEST) | |
env: | |
AWS_REGION: ap-southeast-2 | |
jobs: | |
scrape-woolies: | |
permissions: | |
contents: read # Required for checkout action | |
id-token: write # This is required for requesting the JWT | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- run: pip3 install -r requirements.txt | |
- run: python3 main.py sync woolies | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: woolies_snapshot | |
path: ./output/woolies/ | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}" | |
aws-region: ap-southeast-2 | |
- run: aws s3 sync ./output/woolies/ s3://grocery-scrape-au/woolies/ | |
scrape-coles: | |
permissions: | |
contents: read # Required for checkout action | |
id-token: write # This is required for requesting the JWT | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- run: pip3 install -r requirements.txt | |
- run: python3 main.py sync coles | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: coles_snapshot | |
path: ./output/coles/ | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}" | |
aws-region: ap-southeast-2 | |
- run: aws s3 sync ./output/coles/ s3://grocery-scrape-au/coles/ | |
merge-price-history: | |
permissions: | |
contents: read # Required for checkout action | |
id-token: write # This is required for requesting the JWT | |
runs-on: ubuntu-latest | |
needs: | |
- scrape-woolies | |
- scrape-coles | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Download coles artifact | |
uses: actions/download-artifact@v3 | |
with: | |
name: coles_snapshot | |
path: ./output/coles/ | |
- name: Download woolies artifact | |
uses: actions/download-artifact@v3 | |
with: | |
name: woolies_snapshot | |
path: ./output/woolies/ | |
- run: pip3 install -r requirements.txt | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}" | |
aws-region: ap-southeast-2 | |
# Sync copies the file down if it exists, skips otherwise | |
- run: aws s3 sync s3://grocery-scrape-au/latest-canonical.json.gz ./output/ | |
- run: python3 main.py analysis --compress | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: latest_canonical | |
path: ./output/latest-canonical.json.gz | |
- name: Upload all files after finished analysis | |
# The content-encoding is necessary so that S3 sends the correct content-encoding header on GET | |
run: | | |
aws s3 sync ./output/ s3://grocery-scrape-au/ | |
aws s3 cp --content-encoding gzip static/data/latest-canonical.woolies.compressed.json.gz s3://hotprices.org/data/ | |
aws s3 cp --content-encoding gzip static/data/latest-canonical.coles.compressed.json.gz s3://hotprices.org/data/ |