From 727c506263715f341a64a1e50246842fd46c622c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 6 Nov 2024 14:28:30 +0100
Subject: [PATCH 01/11] WIP

---
 zyte_spider_templates/spiders/serp.py | 73 ++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index dea6922..37b7e4e 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -1,12 +1,15 @@
+from enum import Enum
 from typing import Any, Dict, Iterable, List, Optional, Union
 
 from pydantic import BaseModel, Field, field_validator
 from scrapy import Request
 from scrapy.settings import SETTINGS_PRIORITIES, BaseSettings
+from scrapy_poet import DummyResponse, DynamicDeps
 from scrapy_spider_metadata import Args
 from w3lib.url import add_or_replace_parameter
-from zyte_common_items import Serp
+from zyte_common_items import Product, Serp
 
+from ..documentation import document_enum
 from ..params import MaxRequestsParam
 from ._google_domains import GoogleDomain
 from .base import BaseSpider
@@ -48,6 +51,55 @@ class SerpMaxPagesParam(BaseModel):
     )
 
 
+@document_enum
+class SerpItemType(str, Enum):
+    serp: str = "serp"
+    """
+    Yield the data of result pages, do not follow result links.
+    """
+
+    product: str = "product"
+    """
+    Follow result links and yield product details data from them.
+    """
+
+    # TODO: extend with additional item types.
+
+
+# NOTE: serp is excluded on purposed, since it is not used below.
+# TODO: Add a test to make sure that this is in sync with the enum class above.
+ITEM_TYPE_CLASSES = {
+    SerpItemType.product: Product,
+}
+
+
+class SerpItemTypeParam(BaseModel):
+    item_type: SerpItemType = Field(
+        title="Item type",
+        description="Data type of the output items.",
+        default=SerpItemType.serp,
+        json_schema_extra={
+            "enumMeta": {
+                # TODO: Add a test to make sure this is in sync with the enum class above.
+                # TODO: Try automating the generation of this metadata from the enum type above.
+                SerpItemType.serp: {
+                    "title": "serp",
+                    "description": (
+                        "Yield the data of result pages, do not follow result " "links."
+                    ),
+                },
+                SerpItemType.product: {
+                    "title": "product",
+                    "description": (
+                        "Follow result links and yield product details data "
+                        "from them."
+                    ),
+                },
+            },
+        },
+    )
+
+
 class GoogleDomainParam(BaseModel):
     domain: GoogleDomain = Field(
         title="Domain",
@@ -131,4 +183,21 @@ def parse_serp(self, response, page_number) -> Iterable[Union[Request, Serp]]:
             next_url = add_or_replace_parameter(serp.url, "start", str(next_start))
             yield self.get_serp_request(next_url, page_number=page_number + 1)
 
-        yield serp
+        if self.args.item_type == SerpItemType.serp:
+            yield serp
+            return
+
+        for result in serp.organicResults:
+            yield response.follow(
+                result.url,
+                callback=self.parse_result,
+                meta={
+                    "crawling_logs": {"page_type": self.args.item_type.value},
+                    "inject": [ITEM_TYPE_CLASSES[self.args.item_type]],
+                },
+            )
+
+    def parse_result(
+        self, response: DummyResponse, dynamic: DynamicDeps
+    ) -> Iterable[Any]:
+        yield next(iter(dynamic.values()))

From 9525ca6e9e176478bd04a52ba3c8d5487d6ee28f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 6 Nov 2024 14:32:21 +0100
Subject: [PATCH 02/11] WIP

---
 zyte_spider_templates/spiders/serp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index 37b7e4e..682547b 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -110,6 +110,7 @@ class GoogleDomainParam(BaseModel):
 
 class GoogleSearchSpiderParams(
     MaxRequestsParam,
+    SerpItemTypeParam,
     SerpMaxPagesParam,
     SearchQueriesParam,
     GoogleDomainParam,

From 9a1f471eb8e65ae3d417f8ecbe5de52ca0e4820b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 6 Nov 2024 14:41:06 +0100
Subject: [PATCH 03/11] WIP

---
 zyte_spider_templates/spiders/serp.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index 682547b..c9f1c00 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -51,6 +51,8 @@ class SerpMaxPagesParam(BaseModel):
     )
 
 
+# TODO: Make sure this is covered in the docs the same way as the e-commerce
+# crawl strategy.
 @document_enum
 class SerpItemType(str, Enum):
     serp: str = "serp"
@@ -179,18 +181,22 @@ def start_requests(self) -> Iterable[Request]:
     def parse_serp(self, response, page_number) -> Iterable[Union[Request, Serp]]:
         serp = Serp.from_dict(response.raw_api_response["serp"])
 
-        next_start = page_number * self._results_per_page
-        if serp.organicResults and serp.metadata.totalOrganicResults > next_start:
-            next_url = add_or_replace_parameter(serp.url, "start", str(next_start))
-            yield self.get_serp_request(next_url, page_number=page_number + 1)
+        if page_number < self.args.max_pages:  # TODO: Add a test for this
+            next_start = page_number * self._results_per_page
+            if serp.organicResults and serp.metadata.totalOrganicResults > next_start:
+                next_url = add_or_replace_parameter(serp.url, "start", str(next_start))
+                yield self.get_serp_request(next_url, page_number=page_number + 1)
 
         if self.args.item_type == SerpItemType.serp:
             yield serp
             return
 
+        # TODO: Add a test for this
         for result in serp.organicResults:
             yield response.follow(
-                result.url,
+                result[
+                    "url"
+                ],  # TODO: Why does result.url not work? Bug in zyte-common-items?
                 callback=self.parse_result,
                 meta={
                     "crawling_logs": {"page_type": self.args.item_type.value},

From 6507f12329369038e67a51e3ab3359754b765f27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 6 Nov 2024 15:10:26 +0100
Subject: [PATCH 04/11] WIP

---
 docs/reference/index.rst              |  5 +++
 tests/test_serp.py                    | 23 ++++++++++++-
 zyte_spider_templates/spiders/serp.py | 49 ++++++++++++++++++++++-----
 3 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index dd368dd..d0c3c05 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -44,5 +44,10 @@ Parameter mixins
 
 .. autoenum:: zyte_spider_templates.spiders.ecommerce.EcommerceCrawlStrategy
 
+.. autopydantic_model:: zyte_spider_templates.spiders.serp.SerpItemTypeParam
+    :exclude-members: model_computed_fields
+
+.. autoenum:: zyte_spider_templates.spiders.serp.SerpItemType
+
 .. autopydantic_model:: zyte_spider_templates.spiders.serp.SerpMaxPagesParam
     :exclude-members: model_computed_fields
diff --git a/tests/test_serp.py b/tests/test_serp.py
index 92b19d2..dd30a27 100644
--- a/tests/test_serp.py
+++ b/tests/test_serp.py
@@ -5,7 +5,11 @@
 from scrapy_zyte_api.responses import ZyteAPITextResponse
 from w3lib.url import add_or_replace_parameter
 
-from zyte_spider_templates.spiders.serp import GoogleSearchSpider
+from zyte_spider_templates.spiders.serp import (
+    ITEM_TYPE_CLASSES,
+    GoogleSearchSpider,
+    SerpItemType,
+)
 
 from . import get_crawler
 from .utils import assertEqualSpiderMetadata
@@ -445,3 +449,20 @@ def test_parse_serp():
     # The page_number parameter is required.
     with pytest.raises(TypeError):
         spider.parse_serp(response)
+
+
+def test_item_type_mappings():
+    # Ensure that all SerpItemType keys and values match.
+    for entry in SerpItemType:
+        assert entry.name == entry.value
+
+    # Ensure that the ITEM_TYPE_CLASSES dict maps all values from the
+    # corresponding enum except for serp.
+    actual_keys = set(ITEM_TYPE_CLASSES)
+    expected_keys = set(
+        entry.value for entry in SerpItemType if entry != SerpItemType.serp
+    )
+    assert actual_keys == expected_keys
+
+    # Also ensure that no dict value is repeated.
+    assert len(actual_keys) == len(set(ITEM_TYPE_CLASSES.values()))
diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index c9f1c00..7f48b8f 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -7,7 +7,14 @@
 from scrapy_poet import DummyResponse, DynamicDeps
 from scrapy_spider_metadata import Args
 from w3lib.url import add_or_replace_parameter
-from zyte_common_items import Product, Serp
+from zyte_common_items import (  # TODO: Add ForumThread to zyte-common-items; ForumThread,
+    Article,
+    ArticleList,
+    JobPosting,
+    Product,
+    ProductList,
+    Serp,
+)
 
 from ..documentation import document_enum
 from ..params import MaxRequestsParam
@@ -51,27 +58,53 @@ class SerpMaxPagesParam(BaseModel):
     )
 
 
-# TODO: Make sure this is covered in the docs the same way as the e-commerce
-# crawl strategy.
 @document_enum
 class SerpItemType(str, Enum):
-    serp: str = "serp"
+    article: str = "article"
+    """
+    Article data from result URLs.
+    """
+
+    articleList: str = "articleList"
+    """
+    Article list data from result URLs.
+    """
+
+    # forumThread: str = "forumThread"
+    """
+    Thread data from result URLs.
     """
-    Yield the data of result pages, do not follow result links.
+
+    jobPosting: str = "jobPosting"
+    """
+    Job posting data from result URLs.
     """
 
     product: str = "product"
     """
-    Follow result links and yield product details data from them.
+    Product data from result URLs.
+    """
+
+    productList: str = "productList"
+    """
+    Product list data from result URLs.
     """
 
-    # TODO: extend with additional item types.
+    serp: str = "serp"
+    """
+    Search engine results page data.
+    """
 
 
 # NOTE: serp is excluded on purposed, since it is not used below.
 # TODO: Add a test to make sure that this is in sync with the enum class above.
 ITEM_TYPE_CLASSES = {
+    SerpItemType.article: Article,
+    SerpItemType.articleList: ArticleList,
+    # SerpItemType.forumThread: ForumThread,
+    SerpItemType.jobPosting: JobPosting,
     SerpItemType.product: Product,
+    SerpItemType.productList: ProductList,
 }
 
 
@@ -112,7 +145,7 @@ class GoogleDomainParam(BaseModel):
 
 class GoogleSearchSpiderParams(
     MaxRequestsParam,
-    SerpItemTypeParam,
+    SerpItemTypeParam,  # TODO: Update the test_metadata expectations
     SerpMaxPagesParam,
     SearchQueriesParam,
     GoogleDomainParam,

From 2e9bebab396119dbb795db130cfb9ad3b242cc17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Mon, 11 Nov 2024 18:11:51 +0100
Subject: [PATCH 05/11] WIP

---
 setup.py                              |  2 +-
 tests/test_serp.py                    | 23 +++++++++--
 tox.ini                               |  2 +-
 zyte_spider_templates/spiders/serp.py | 59 +++++++++------------------
 4 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/setup.py b/setup.py
index 76788ff..b5597a1 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
         "scrapy-poet>=0.24.0",
         "scrapy-spider-metadata>=0.2.0",
         "scrapy-zyte-api[provider]>=0.23.0",
-        "zyte-common-items>=0.23.0",
+        "zyte-common-items>=0.26.0",
     ],
     classifiers=[
         "Development Status :: 3 - Alpha",
diff --git a/tests/test_serp.py b/tests/test_serp.py
index dd30a27..b2623dd 100644
--- a/tests/test_serp.py
+++ b/tests/test_serp.py
@@ -263,6 +263,25 @@ def test_metadata():
                     "title": "Max Pages",
                     "type": "integer",
                 },
+                "item_type": {
+                    "anyOf": [{"type": "string"}, {"type": "null"}],
+                    "default": None,
+                    "description": (
+                        "If specified, result URLs are followed to extract "
+                        "the specified item type. Spider output items will be "
+                        "of the specified item type, not search engine "
+                        "results page items."
+                    ),
+                    "enum": [
+                        "article",
+                        "articleList",
+                        "forumThread",
+                        "jobPosting",
+                        "product",
+                        "productList",
+                    ],
+                    "title": "Item type",
+                },
                 "max_requests": {
                     "anyOf": [{"type": "integer"}, {"type": "null"}],
                     "default": 100,
@@ -459,9 +478,7 @@ def test_item_type_mappings():
     # Ensure that the ITEM_TYPE_CLASSES dict maps all values from the
     # corresponding enum except for serp.
     actual_keys = set(ITEM_TYPE_CLASSES)
-    expected_keys = set(
-        entry.value for entry in SerpItemType if entry != SerpItemType.serp
-    )
+    expected_keys = set(entry.value for entry in SerpItemType)
     assert actual_keys == expected_keys
 
     # Also ensure that no dict value is repeated.
diff --git a/tox.ini b/tox.ini
index 3fa9108..55de9d8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -26,7 +26,7 @@ deps =
     scrapy-poet==0.24.0
     scrapy-spider-metadata==0.2.0
     scrapy-zyte-api[provider]==0.23.0
-    zyte-common-items==0.23.0
+    zyte-common-items==0.26.0
 
 [testenv:mypy]
 deps =
diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index 7f48b8f..d61a818 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -7,9 +7,10 @@
 from scrapy_poet import DummyResponse, DynamicDeps
 from scrapy_spider_metadata import Args
 from w3lib.url import add_or_replace_parameter
-from zyte_common_items import (  # TODO: Add ForumThread to zyte-common-items; ForumThread,
+from zyte_common_items import (
     Article,
     ArticleList,
+    ForumThread,
     JobPosting,
     Product,
     ProductList,
@@ -62,46 +63,39 @@ class SerpMaxPagesParam(BaseModel):
 class SerpItemType(str, Enum):
     article: str = "article"
     """
-    Article data from result URLs.
+    Article data.
     """
 
     articleList: str = "articleList"
     """
-    Article list data from result URLs.
+    Article list data.
     """
 
-    # forumThread: str = "forumThread"
+    forumThread: str = "forumThread"
     """
-    Thread data from result URLs.
+    Forum thread data.
     """
 
     jobPosting: str = "jobPosting"
     """
-    Job posting data from result URLs.
+    Job posting data.
     """
 
     product: str = "product"
     """
-    Product data from result URLs.
+    Product data.
     """
 
     productList: str = "productList"
     """
-    Product list data from result URLs.
+    Product list data.
     """
 
-    serp: str = "serp"
-    """
-    Search engine results page data.
-    """
 
-
-# NOTE: serp is excluded on purposed, since it is not used below.
-# TODO: Add a test to make sure that this is in sync with the enum class above.
 ITEM_TYPE_CLASSES = {
     SerpItemType.article: Article,
     SerpItemType.articleList: ArticleList,
-    # SerpItemType.forumThread: ForumThread,
+    SerpItemType.forumThread: ForumThread,
     SerpItemType.jobPosting: JobPosting,
     SerpItemType.product: Product,
     SerpItemType.productList: ProductList,
@@ -109,29 +103,14 @@ class SerpItemType(str, Enum):
 
 
 class SerpItemTypeParam(BaseModel):
-    item_type: SerpItemType = Field(
+    item_type: Optional[SerpItemType] = Field(
         title="Item type",
-        description="Data type of the output items.",
-        default=SerpItemType.serp,
-        json_schema_extra={
-            "enumMeta": {
-                # TODO: Add a test to make sure this is in sync with the enum class above.
-                # TODO: Try automating the generation of this metadata from the enum type above.
-                SerpItemType.serp: {
-                    "title": "serp",
-                    "description": (
-                        "Yield the data of result pages, do not follow result " "links."
-                    ),
-                },
-                SerpItemType.product: {
-                    "title": "product",
-                    "description": (
-                        "Follow result links and yield product details data "
-                        "from them."
-                    ),
-                },
-            },
-        },
+        description=(
+            "If specified, result URLs are followed to extract the specified "
+            "item type. Spider output items will be of the specified item "
+            "type, not search engine results page items."
+        ),
+        default=None,
     )
 
 
@@ -145,7 +124,7 @@ class GoogleDomainParam(BaseModel):
 
 class GoogleSearchSpiderParams(
     MaxRequestsParam,
-    SerpItemTypeParam,  # TODO: Update the test_metadata expectations
+    SerpItemTypeParam,
     SerpMaxPagesParam,
     SearchQueriesParam,
     GoogleDomainParam,
@@ -220,7 +199,7 @@ def parse_serp(self, response, page_number) -> Iterable[Union[Request, Serp]]:
                 next_url = add_or_replace_parameter(serp.url, "start", str(next_start))
                 yield self.get_serp_request(next_url, page_number=page_number + 1)
 
-        if self.args.item_type == SerpItemType.serp:
+        if self.args.item_type is None:
             yield serp
             return
 

From cf97bcbe48afde2f7d9f8a56bbf9a43f54248f06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 12 Nov 2024 10:36:45 +0100
Subject: [PATCH 06/11] Update test expectations after fixing max_pages

---
 tests/test_serp.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/test_serp.py b/tests/test_serp.py
index b2623dd..8c48f08 100644
--- a/tests/test_serp.py
+++ b/tests/test_serp.py
@@ -342,7 +342,9 @@ def test_search_queries():
 
 def test_pagination():
     crawler = get_crawler()
-    spider = GoogleSearchSpider.from_crawler(crawler, search_queries="foo bar")
+    spider = GoogleSearchSpider.from_crawler(
+        crawler, search_queries="foo bar", max_pages=3
+    )
 
     def run_parse_serp(total_results, page=1):
         url = "https://www.google.com/search?q=foo+bar"
@@ -411,6 +413,14 @@ def run_parse_serp(total_results, page=1):
     assert requests[0].url == "https://www.google.com/search?q=foo+bar&start=20"
     assert requests[0].cb_kwargs["page_number"] == 3
 
+    # Do not go over max_pages
+    items, requests = run_parse_serp(
+        total_results=31,
+        page=3,
+    )
+    assert len(items) == 1
+    assert len(requests) == 0
+
 
 def test_get_serp_request():
     crawler = get_crawler()
@@ -427,7 +437,9 @@ def test_get_serp_request():
 
 def test_parse_serp():
     crawler = get_crawler()
-    spider = GoogleSearchSpider.from_crawler(crawler, search_queries="foo bar")
+    spider = GoogleSearchSpider.from_crawler(
+        crawler, search_queries="foo bar", max_pages=43
+    )
     url = "https://www.google.com/search?q=foo+bar"
     response = ZyteAPITextResponse.from_api_response(
         api_response={

From d7a7cb6cc7bde19d3c441cc9a02dd01b9b50cb27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 12 Nov 2024 10:45:04 +0100
Subject: [PATCH 07/11] Remove obsolete TODO

---
 zyte_spider_templates/spiders/serp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index d61a818..5384418 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -193,7 +193,7 @@ def start_requests(self) -> Iterable[Request]:
     def parse_serp(self, response, page_number) -> Iterable[Union[Request, Serp]]:
         serp = Serp.from_dict(response.raw_api_response["serp"])
 
-        if page_number < self.args.max_pages:  # TODO: Add a test for this
+        if page_number < self.args.max_pages:
             next_start = page_number * self._results_per_page
             if serp.organicResults and serp.metadata.totalOrganicResults > next_start:
                 next_url = add_or_replace_parameter(serp.url, "start", str(next_start))

From 7e6d6f195f3b49319660e97f7bf3a0a9b033a379 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 12 Nov 2024 11:26:11 +0100
Subject: [PATCH 08/11] Solve remaining to-do items

---
 setup.py                              |  2 +-
 tests/test_serp.py                    | 53 +++++++++++++++++++++++++++
 tox.ini                               |  2 +-
 zyte_spider_templates/spiders/serp.py |  5 +--
 4 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index b5597a1..4e02cb1 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
         "scrapy-poet>=0.24.0",
         "scrapy-spider-metadata>=0.2.0",
         "scrapy-zyte-api[provider]>=0.23.0",
-        "zyte-common-items>=0.26.0",
+        "zyte-common-items @ git+https://github.com/Gallaecio/zyte-common-items.git@fix-result-inheritance",
     ],
     classifiers=[
         "Development Status :: 3 - Alpha",
diff --git a/tests/test_serp.py b/tests/test_serp.py
index 8c48f08..0a9fad1 100644
--- a/tests/test_serp.py
+++ b/tests/test_serp.py
@@ -4,6 +4,7 @@
 from scrapy_spider_metadata import get_spider_metadata
 from scrapy_zyte_api.responses import ZyteAPITextResponse
 from w3lib.url import add_or_replace_parameter
+from zyte_common_items import Product
 
 from zyte_spider_templates.spiders.serp import (
     ITEM_TYPE_CLASSES,
@@ -482,6 +483,58 @@ def test_parse_serp():
         spider.parse_serp(response)
 
 
+def test_item_type():
+    crawler = get_crawler()
+    spider = GoogleSearchSpider.from_crawler(
+        crawler, search_queries="foo bar", max_pages=43, item_type="product"
+    )
+    url = "https://www.google.com/search?q=foo+bar"
+    response = ZyteAPITextResponse.from_api_response(
+        api_response={
+            "serp": {
+                "organicResults": [
+                    {
+                        "description": "…",
+                        "name": "…",
+                        "url": f"https://example.com/{rank}",
+                        "rank": rank,
+                    }
+                    for rank in range(1, 11)
+                ],
+                "metadata": {
+                    "dateDownloaded": "2024-10-25T08:59:45Z",
+                    "displayedQuery": "foo bar",
+                    "searchedQuery": "foo bar",
+                    "totalOrganicResults": 99999,
+                },
+                "pageNumber": 1,
+                "url": url,
+            },
+            "url": url,
+        },
+    )
+    items = []
+    requests = []
+    for item_or_request in spider.parse_serp(response, page_number=42):
+        if isinstance(item_or_request, Request):
+            requests.append(item_or_request)
+        else:
+            items.append(item_or_request)
+    assert len(items) == 0
+    assert len(requests) == 11
+
+    assert requests[0].url == add_or_replace_parameter(url, "start", "420")
+    assert requests[0].cb_kwargs["page_number"] == 43
+
+    for rank in range(1, 11):
+        assert requests[rank].url == f"https://example.com/{rank}"
+        assert requests[rank].callback == spider.parse_result
+        assert requests[rank].meta == {
+            "crawling_logs": {"page_type": "product"},
+            "inject": [Product],
+        }
+
+
 def test_item_type_mappings():
     # Ensure that all SerpItemType keys and values match.
     for entry in SerpItemType:
diff --git a/tox.ini b/tox.ini
index 55de9d8..3f0b756 100644
--- a/tox.ini
+++ b/tox.ini
@@ -26,7 +26,7 @@ deps =
     scrapy-poet==0.24.0
     scrapy-spider-metadata==0.2.0
     scrapy-zyte-api[provider]==0.23.0
-    zyte-common-items==0.26.0
+    zyte-common-items @ git+https://github.com/Gallaecio/zyte-common-items.git@fix-result-inheritance
 
 [testenv:mypy]
 deps =
diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index 5384418..51fd3af 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -203,12 +203,9 @@ def parse_serp(self, response, page_number) -> Iterable[Union[Request, Serp]]:
             yield serp
             return
 
-        # TODO: Add a test for this
         for result in serp.organicResults:
             yield response.follow(
-                result[
-                    "url"
-                ],  # TODO: Why does result.url not work? Bug in zyte-common-items?
+                result.url,
                 callback=self.parse_result,
                 meta={
                     "crawling_logs": {"page_type": self.args.item_type.value},

From 5cd60724457b0a9ac935236da5fb02471f830470 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 12 Nov 2024 13:14:48 +0100
Subject: [PATCH 09/11] =?UTF-8?q?zyte-common-items=20=E2=89=A5=200.26.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 setup.py | 2 +-
 tox.ini  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 4e02cb1..5940a73 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
         "scrapy-poet>=0.24.0",
         "scrapy-spider-metadata>=0.2.0",
         "scrapy-zyte-api[provider]>=0.23.0",
-        "zyte-common-items @ git+https://github.com/Gallaecio/zyte-common-items.git@fix-result-inheritance",
+        "zyte-common-items>=0.26.2",
     ],
     classifiers=[
         "Development Status :: 3 - Alpha",
diff --git a/tox.ini b/tox.ini
index 3f0b756..7e77c06 100644
--- a/tox.ini
+++ b/tox.ini
@@ -26,7 +26,7 @@ deps =
     scrapy-poet==0.24.0
     scrapy-spider-metadata==0.2.0
     scrapy-zyte-api[provider]==0.23.0
-    zyte-common-items @ git+https://github.com/Gallaecio/zyte-common-items.git@fix-result-inheritance
+    zyte-common-items==0.26.2
 
 [testenv:mypy]
 deps =

From 93599814be488cd5ce94d3e2e816d90412869d91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 13 Nov 2024 08:02:42 +0100
Subject: [PATCH 10/11] =?UTF-8?q?item=5Ftype.title:=20Item=20type=20?=
 =?UTF-8?q?=E2=86=92=20Follow=20and=20Extract?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_serp.py                    | 2 +-
 zyte_spider_templates/spiders/serp.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_serp.py b/tests/test_serp.py
index 0a9fad1..29c9cb2 100644
--- a/tests/test_serp.py
+++ b/tests/test_serp.py
@@ -281,7 +281,7 @@ def test_metadata():
                         "product",
                         "productList",
                     ],
-                    "title": "Item type",
+                    "title": "Follow and Extract",
                 },
                 "max_requests": {
                     "anyOf": [{"type": "integer"}, {"type": "null"}],
diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index 51fd3af..8d4232f 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -104,7 +104,7 @@ class SerpItemType(str, Enum):
 
 class SerpItemTypeParam(BaseModel):
     item_type: Optional[SerpItemType] = Field(
-        title="Item type",
+        title="Follow and Extract",
         description=(
             "If specified, result URLs are followed to extract the specified "
             "item type. Spider output items will be of the specified item "

From 953091b00615bea012a5545b2d01fe700beec3da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 13 Nov 2024 08:04:58 +0100
Subject: [PATCH 11/11] Improve item_type.description

---
 tests/test_serp.py                    | 8 ++++----
 zyte_spider_templates/spiders/serp.py | 7 ++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/test_serp.py b/tests/test_serp.py
index 29c9cb2..9c59cd7 100644
--- a/tests/test_serp.py
+++ b/tests/test_serp.py
@@ -268,10 +268,10 @@ def test_metadata():
                     "anyOf": [{"type": "string"}, {"type": "null"}],
                     "default": None,
                     "description": (
-                        "If specified, result URLs are followed to extract "
-                        "the specified item type. Spider output items will be "
-                        "of the specified item type, not search engine "
-                        "results page items."
+                        "If specified, follow organic search result links, "
+                        "and extract the selected data type from the target "
+                        "pages. Spider output items will be of the specified "
+                        "data type, not search engine results page items."
                     ),
                     "enum": [
                         "article",
diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py
index 8d4232f..d69856e 100644
--- a/zyte_spider_templates/spiders/serp.py
+++ b/zyte_spider_templates/spiders/serp.py
@@ -106,9 +106,10 @@ class SerpItemTypeParam(BaseModel):
     item_type: Optional[SerpItemType] = Field(
         title="Follow and Extract",
         description=(
-            "If specified, result URLs are followed to extract the specified "
-            "item type. Spider output items will be of the specified item "
-            "type, not search engine results page items."
+            "If specified, follow organic search result links, and extract "
+            "the selected data type from the target pages. Spider output "
+            "items will be of the specified data type, not search engine "
+            "results page items."
         ),
         default=None,
     )