diff --git a/.github/workflows/scrape-woolies.yml b/.github/workflows/scrape-groceries.yml similarity index 80% rename from .github/workflows/scrape-woolies.yml rename to .github/workflows/scrape-groceries.yml index 74da27d..caa4001 100644 --- a/.github/workflows/scrape-woolies.yml +++ b/.github/workflows/scrape-groceries.yml @@ -14,17 +14,17 @@ jobs: steps: - uses: actions/checkout@v4 - run: pip3 install -r requirements.txt - - run: python3 woolies.py + - run: python3 main.py woolies - uses: actions/upload-artifact@v3 with: name: woolies_snapshot - path: ./woolies/*.json + path: ./output/woolies/*.json - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: "${{ vars.TARGET_ROLE_ARN }}" aws-region: ap-southeast-2 - - run: aws s3 sync ./woolies/ s3://grocery-scrape-au/woolies/ + - run: aws s3 sync ./output/woolies/ s3://grocery-scrape-au/woolies/ scrape-coles: permissions: contents: read # Required for checkout action @@ -33,14 +33,14 @@ jobs: steps: - uses: actions/checkout@v4 - run: pip3 install -r requirements.txt - - run: python3 coles.py + - run: python3 main.py coles - uses: actions/upload-artifact@v3 with: name: coles_snapshot - path: ./coles/*.json + path: ./output/coles/*.json - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: "${{ vars.TARGET_ROLE_ARN }}" aws-region: ap-southeast-2 - - run: aws s3 sync ./coles/ s3://grocery-scrape-au/coles/ \ No newline at end of file + - run: aws s3 sync ./output/coles/ s3://grocery-scrape-au/coles/ \ No newline at end of file diff --git a/hotprices_au/__init__.py b/hotprices_au/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/coles.py b/hotprices_au/coles.py similarity index 89% rename from coles.py rename to hotprices_au/coles.py index bd0b08c..e611b45 100644 --- a/coles.py +++ b/hotprices_au/coles.py @@ -1,10 +1,11 @@ import requests import json -import sys import pathlib from datetime import datetime from bs4 import BeautifulSoup +from . import output + class ColesScraper: @@ -71,20 +72,7 @@ def get_categories(self): return categories -def save_data(categories): - now = datetime.now() - date_str = now.strftime("%Y-%m-%d") - fname = f"{date_str}.json" - save_dir = pathlib.Path(f"coles") - save_dir.mkdir(exist_ok=True) - fpath = save_dir / fname - fpath.write_text(json.dumps(categories)) - - -def main(): - quick = False - if len(sys.argv) > 1 and sys.argv[1] == "--quick": - quick = True +def main(quick): coles = ColesScraper(store_id='0584', quick=quick) categories = coles.get_categories() #categories = load_cache() @@ -108,7 +96,7 @@ def main(): if quick: break #save_cache(categories) - save_data(categories) + output.save_data('coles', categories) #print(json.dumps(category, indent=4)) diff --git a/hotprices_au/output.py b/hotprices_au/output.py new file mode 100644 index 0000000..aae03aa --- /dev/null +++ b/hotprices_au/output.py @@ -0,0 +1,13 @@ +import pathlib +import json +from datetime import datetime + + +def save_data(store, categories): + now = datetime.now() + date_str = now.strftime("%Y-%m-%d") + fname = f"{date_str}.json" + save_dir = pathlib.Path(f"output/{store}") + save_dir.mkdir(parents=True, exist_ok=True) + fpath = save_dir / fname + fpath.write_text(json.dumps(categories)) \ No newline at end of file diff --git a/woolies.py b/hotprices_au/woolies.py similarity index 83% rename from woolies.py rename to hotprices_au/woolies.py index a533fff..3b4af8c 100644 --- a/woolies.py +++ b/hotprices_au/woolies.py @@ -1,8 +1,7 @@ import requests import json -import sys -import pathlib -from datetime import datetime + +from . import output class WooliesAPI: @@ -88,31 +87,7 @@ def get_categories(self): return categories -def load_cache(): - with open('woolies_all.json') as f: - cache_data = json.loads(f.read()) - return cache_data - - -def save_cache(cache_data): - with open('woolies_all.json', 'w') as f: - f.write(json.dumps(cache_data)) - - -def save_data(categories): - now = datetime.now() - date_str = now.strftime("%Y-%m-%d") - fname = f"{date_str}.json" - save_dir = pathlib.Path(f"woolies") - save_dir.mkdir(exist_ok=True) - fpath = save_dir / fname - fpath.write_text(json.dumps(categories)) - - -def main(): - quick = False - if len(sys.argv) > 1 and sys.argv[1] == "--quick": - quick = True +def main(quick): woolies = WooliesAPI(quick=quick) categories = woolies.get_categories() #categories = load_cache() @@ -137,9 +112,7 @@ def main(): if quick: break - #save_cache(categories) - save_data(categories) - #print(json.dumps(category, indent=4)) + output.save_data('woolies', categories) if __name__ == '__main__':