Skip to content

Commit

Permalink
Move functionality into package
Browse files Browse the repository at this point in the history
  • Loading branch information
Javex committed Sep 26, 2023
1 parent 544fd3a commit 82fc384
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ jobs:
steps:
- uses: actions/checkout@v4
- run: pip3 install -r requirements.txt
- run: python3 woolies.py
- run: python3 main.py woolies
- uses: actions/upload-artifact@v3
with:
name: woolies_snapshot
path: ./woolies/*.json
path: ./output/woolies/*.json
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}"
aws-region: ap-southeast-2
- run: aws s3 sync ./woolies/ s3://grocery-scrape-au/woolies/
- run: aws s3 sync ./output/woolies/ s3://grocery-scrape-au/woolies/
scrape-coles:
permissions:
contents: read # Required for checkout action
Expand All @@ -33,14 +33,14 @@ jobs:
steps:
- uses: actions/checkout@v4
- run: pip3 install -r requirements.txt
- run: python3 coles.py
- run: python3 main.py coles
- uses: actions/upload-artifact@v3
with:
name: coles_snapshot
path: ./coles/*.json
path: ./output/coles/*.json
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: "${{ vars.TARGET_ROLE_ARN }}"
aws-region: ap-southeast-2
- run: aws s3 sync ./coles/ s3://grocery-scrape-au/coles/
- run: aws s3 sync ./output/coles/ s3://grocery-scrape-au/coles/
Empty file added hotprices_au/__init__.py
Empty file.
20 changes: 4 additions & 16 deletions coles.py → hotprices_au/coles.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import requests
import json
import sys
import pathlib
from datetime import datetime
from bs4 import BeautifulSoup

from . import output


class ColesScraper:

Expand Down Expand Up @@ -71,20 +72,7 @@ def get_categories(self):
return categories


def save_data(categories):
now = datetime.now()
date_str = now.strftime("%Y-%m-%d")
fname = f"{date_str}.json"
save_dir = pathlib.Path(f"coles")
save_dir.mkdir(exist_ok=True)
fpath = save_dir / fname
fpath.write_text(json.dumps(categories))


def main():
quick = False
if len(sys.argv) > 1 and sys.argv[1] == "--quick":
quick = True
def main(quick):
coles = ColesScraper(store_id='0584', quick=quick)
categories = coles.get_categories()
#categories = load_cache()
Expand All @@ -108,7 +96,7 @@ def main():
if quick:
break
#save_cache(categories)
save_data(categories)
output.save_data('coles', categories)
#print(json.dumps(category, indent=4))


Expand Down
13 changes: 13 additions & 0 deletions hotprices_au/output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pathlib
import json
from datetime import datetime


def save_data(store, categories):
now = datetime.now()
date_str = now.strftime("%Y-%m-%d")
fname = f"{date_str}.json"
save_dir = pathlib.Path(f"output/{store}")
save_dir.mkdir(parents=True, exist_ok=True)
fpath = save_dir / fname
fpath.write_text(json.dumps(categories))
35 changes: 4 additions & 31 deletions woolies.py → hotprices_au/woolies.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import requests
import json
import sys
import pathlib
from datetime import datetime

from . import output


class WooliesAPI:
Expand Down Expand Up @@ -88,31 +87,7 @@ def get_categories(self):
return categories


def load_cache():
with open('woolies_all.json') as f:
cache_data = json.loads(f.read())
return cache_data


def save_cache(cache_data):
with open('woolies_all.json', 'w') as f:
f.write(json.dumps(cache_data))


def save_data(categories):
now = datetime.now()
date_str = now.strftime("%Y-%m-%d")
fname = f"{date_str}.json"
save_dir = pathlib.Path(f"woolies")
save_dir.mkdir(exist_ok=True)
fpath = save_dir / fname
fpath.write_text(json.dumps(categories))


def main():
quick = False
if len(sys.argv) > 1 and sys.argv[1] == "--quick":
quick = True
def main(quick):
woolies = WooliesAPI(quick=quick)
categories = woolies.get_categories()
#categories = load_cache()
Expand All @@ -137,9 +112,7 @@ def main():

if quick:
break
#save_cache(categories)
save_data(categories)
#print(json.dumps(category, indent=4))
output.save_data('woolies', categories)


if __name__ == '__main__':
Expand Down

0 comments on commit 82fc384

Please sign in to comment.