-
Notifications
You must be signed in to change notification settings - Fork 4
85 lines (85 loc) · 3.14 KB
/
scrape-groceries.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
name: Scrape Groceries
on:
workflow_dispatch:
schedule:
- cron: "0 1 * * *" # Daily at 1am UTC (12pm AEST)
env:
AWS_REGION: ap-southeast-2
jobs:
scrape-woolies:
permissions:
contents: read # Required for checkout action
id-token: write # This is required for requesting the JWT
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip3 install -r requirements.txt
- run: python3 main.py sync woolies
- uses: actions/upload-artifact@v3
with:
name: woolies_snapshot
path: ./output/woolies/
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}"
aws-region: ap-southeast-2
- run: aws s3 sync ./output/woolies/ s3://grocery-scrape-au/woolies/
scrape-coles:
permissions:
contents: read # Required for checkout action
id-token: write # This is required for requesting the JWT
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip3 install -r requirements.txt
- run: python3 main.py sync coles
- uses: actions/upload-artifact@v3
with:
name: coles_snapshot
path: ./output/coles/
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}"
aws-region: ap-southeast-2
- run: aws s3 sync ./output/coles/ s3://grocery-scrape-au/coles/
merge-price-history:
permissions:
contents: read # Required for checkout action
id-token: write # This is required for requesting the JWT
runs-on: ubuntu-latest
needs:
- scrape-woolies
- scrape-coles
steps:
- uses: actions/checkout@v4
- name: Download coles artifact
uses: actions/download-artifact@v3
with:
name: coles_snapshot
path: ./output/coles/
- name: Download woolies artifact
uses: actions/download-artifact@v3
with:
name: woolies_snapshot
path: ./output/woolies/
- run: pip3 install -r requirements.txt
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}"
aws-region: ap-southeast-2
# Sync copies the file down if it exists, skips otherwise
- run: aws s3 sync s3://grocery-scrape-au/latest-canonical.json.gz ./output/
- run: python3 main.py analysis
- uses: actions/upload-artifact@v3
with:
name: latest_canonical
path: ./output/latest-canonical.json.gz
- name: Upload all files after finished analysis
# The content-encoding is necessary so that S3 sends the correct content-encoding header on GET
run: |
aws s3 sync ./output/ s3://grocery-scrape-au/
aws s3 cp --content-encoding gzip static/data/latest-canonical.woolies.compressed.json.gz s3://hotprices.org/data/
aws s3 cp --content-encoding gzip static/data/latest-canonical.coles.compressed.json.gz s3://hotprices.org/data/