Skip to content

Commit

Permalink
Fix typing issues for typed Scrapy.
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR committed Nov 19, 2024
1 parent d1cbd50 commit 75e7bb0
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
11 changes: 3 additions & 8 deletions tests/test_job_posting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,7 @@
from pydantic import ValidationError
from scrapy_poet import DummyResponse
from scrapy_spider_metadata import get_spider_metadata
from zyte_common_items import (
JobPosting,
JobPostingNavigation,
ProbabilityRequest,
Request,
)
from zyte_common_items import JobPosting, JobPostingNavigation, ProbabilityRequest

from zyte_spider_templates._geolocations import (
GEOLOCATION_OPTIONS,
Expand Down Expand Up @@ -472,7 +467,7 @@ def test_get_nextpage_request():
url = "https://example.com"

# Minimal Args
request = Request(url)
request = ProbabilityRequest(url=url)
spider = JobPostingSpider(url="https://example.com")
parse_navigation = lambda _: None
spider.parse_navigation = parse_navigation # type: ignore
Expand All @@ -490,7 +485,7 @@ def test_get_parse_navigation_request():
url = "https://example.com"

# Minimal args
request = Request(url)
request = ProbabilityRequest(url=url)
spider = JobPostingSpider(url="https://example.com")
parse_navigation = lambda _: None
spider.parse_navigation = parse_navigation # type: ignore
Expand Down
23 changes: 13 additions & 10 deletions zyte_spider_templates/spiders/job_posting.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

from enum import Enum
from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional, cast

import requests
import scrapy
from pydantic import BaseModel, ConfigDict, Field
from scrapy import Request
from scrapy.crawler import Crawler
from scrapy_poet import DummyResponse
from scrapy_spider_metadata import Args
Expand Down Expand Up @@ -154,19 +154,19 @@ def get_start_request(self, url):
else "jobPostingNavigation"
},
}
return Request(
return scrapy.Request(
url=url,
callback=callback,
meta=meta,
)

def start_requests(self) -> Iterable[Request]:
def start_requests(self) -> Iterable[scrapy.Request]:
for url in self.start_urls:
yield self.get_start_request(url)

def parse_navigation(
self, response: DummyResponse, navigation: JobPostingNavigation
) -> Iterable[Request]:
) -> Iterable[scrapy.Request]:
job_postings = navigation.items or []
for request in job_postings:
yield self.get_parse_job_posting_request(request)
Expand All @@ -178,7 +178,9 @@ def parse_navigation(
f"are no job posting links found in {navigation.url}"
)
else:
yield self.get_nextpage_request(navigation.nextPage)
yield self.get_nextpage_request(
cast(ProbabilityRequest, navigation.nextPage)
)

def parse_job_posting(
self, response: DummyResponse, job_posting: JobPosting
Expand All @@ -189,6 +191,7 @@ def parse_job_posting(
if probability is None or probability >= 0.1:
yield job_posting
else:
assert self.crawler.stats
self.crawler.stats.inc_value("drop_item/job_posting/low_probability")
self.logger.info(
f"Ignoring item from {response.url} since its probability is "
Expand All @@ -197,11 +200,11 @@ def parse_job_posting(

def get_parse_navigation_request(
self,
request: Union[ProbabilityRequest, Request],
request: ProbabilityRequest,
callback: Optional[Callable] = None,
page_params: Optional[Dict[str, Any]] = None,
page_type: str = "jobPostingNavigation",
) -> Request:
) -> scrapy.Request:
callback = callback or self.parse_navigation

return request.to_scrapy(
Expand All @@ -218,7 +221,7 @@ def get_parse_navigation_request(

def get_nextpage_request(
self,
request: Union[ProbabilityRequest, Request],
request: ProbabilityRequest,
callback: Optional[Callable] = None,
page_params: Optional[Dict[str, Any]] = None,
):
Expand All @@ -228,7 +231,7 @@ def get_nextpage_request(

def get_parse_job_posting_request(
self, request: ProbabilityRequest, callback: Optional[Callable] = None
) -> Request:
) -> scrapy.Request:
callback = callback or self.parse_job_posting

probability = request.get_probability()
Expand Down

0 comments on commit 75e7bb0

Please sign in to comment.