Skip to content

Commit

Permalink
Add debug option to fetch individual category & page
Browse files Browse the repository at this point in the history
  • Loading branch information
Javex committed Oct 30, 2024
1 parent 81c19dc commit 9d9c49e
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
27 changes: 22 additions & 5 deletions hotprices_au/sites/coles.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,19 @@ def start(self):
self.session.headers["ocp-apim-subscription-key"] = self.api_key
self.version = next_data_json["buildId"]

def get_category(self, cat_slug):
def get_category(self, cat_slug, page_filter: int):
params = {
"slug": cat_slug,
"page": 1,
}
product_count = 0
error_count = 0
while True:
# If there's a filter and we're not on the right page then skip
if page_filter != None and params["page"] != page_filter:
params["page"] += 1
continue

print(f'Page {params["page"]}')
response = self.session.get(
f"https://www.coles.com.au/_next/data/{self.version}/en/browse/{cat_slug}.json",
Expand All @@ -54,8 +59,10 @@ def get_category(self, cat_slug):
response.raise_for_status()
except requests.HTTPError:
error_count += 1
print(response.text)
if error_count > ERROR_COUNT_MAX:
print(f'Error fetching page {params["page"]}')
# Need to also raise an error if there's a page filter as there
# are no more pages to try
if error_count > ERROR_COUNT_MAX or page_filter is not None:
raise
else:
params["page"] += 1
Expand Down Expand Up @@ -238,15 +245,25 @@ def parse_str_unit(size):
return units.parse_str_unit(size)


def main(quick, save_path):
def main(quick, save_path, category, page: int):
"""
category: Slug or name or category to fetch, will fetch only that one.
page: Page number to fetch.
"""
coles = ColesScraper(store_id="0584", quick=quick)
categories = coles.get_categories()
# Rename to avoid the overwrite below
category_filter = category.lower()
# categories = load_cache()
for category_obj in categories:
cat_slug = category_obj["seoToken"]
cat_desc = category_obj["name"]
if category_filter is not None and (
category_filter != cat_desc.lower() or category_filter != cat_slug.lower()
):
continue
print(f"Fetching category {cat_slug} ({cat_desc})")
category = coles.get_category(cat_slug)
category = coles.get_category(cat_slug, page_filter=page)
all_category_bundles = list(category)
category_obj["Products"] = all_category_bundles

Expand Down
4 changes: 3 additions & 1 deletion hotprices_au/sites/woolies.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ def get_canonical(item, today):
return result


def main(quick, save_path):
def main(quick, save_path, category_filter: str, page_filter: int):
if category_filter is not None or page_filter is not None:
raise NotImplementedError("Filters not implemented for woolies yet.")
woolies = WooliesAPI(quick=quick)
categories = woolies.get_categories()
# categories = load_cache()
Expand Down
8 changes: 7 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def main_sync(args):
f"requested to skip if output file exists."
)
else:
sites.sites[args.store].main(args.quick, save_path)
sites.sites[args.store].main(args.quick, save_path, args.category, args.page)


def main_analysis(args):
Expand Down Expand Up @@ -52,6 +52,12 @@ def main():
help="Print relative path where file will be stored, then exit",
)
sync_parser.add_argument("--skip-existing", action="store_true", default=False)
sync_parser.add_argument("--category", help="Fetch a particular category only.")
sync_parser.add_argument(
"--page",
help="Only fetch one particular page. Useful when also using the --category option.",
type=int,
)
sync_parser.add_argument("store", choices=list(sites.sites))
sync_parser.set_defaults(func=main_sync)

Expand Down

0 comments on commit 9d9c49e

Please sign in to comment.