diff --git a/CHANGES.rst b/CHANGES.rst index 671fa437..8e6a785d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,27 @@ Changes ======= +To be released +-------------- + +* Cookie support is no longer experimental: + + * If the ``COOKIES_ENABLED`` setting is ``True`` (default), automatic request + parameter mapping now sets ``responseCookies`` to ``True`` and maps request + cookies to ``requestCookies``. + + * The ``ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED`` setting is now deprecated. + When enabled, however, the ``experimental`` name space is still used for + automatic request parameter mapping. + + * If you use ``requestCookies``, ``responseCookies``, or ``cookieManagement`` + within the ``experimental`` name space in request parameters, a deprecation + warning is now logged. + + * The ``responseCookies`` response parameter is now handled the same as + ``experimental.responseCookies``; the latter still works but is deprecated. + + 0.14.1 (2024-01-17) ------------------- @@ -18,6 +39,7 @@ Changes work as expected with :class:`~scrapy.pqueues.DownloaderAwarePriorityQueue`. + 0.14.0 (2024-01-15) ------------------- @@ -29,6 +51,7 @@ Changes * Added support for ``zyte_common_items.JobPosting`` to the scrapy-poet provider. + 0.13.0 (2023-12-13) ------------------- @@ -71,6 +94,7 @@ Changes * Test and CI improvements. + 0.12.2 (2023-10-19) ------------------- @@ -79,6 +103,7 @@ Changes * When logging Zyte API requests, truncation now uses "..." instead of Unicode ellipsis. + 0.12.1 (2023-09-29) ------------------- @@ -106,7 +131,7 @@ Changes Experimental is treated as a namespace, and its parameters are the ones counted, i.e. there is no ``scrapy-zyte-api/request_args/experimental`` stat, but there are stats like - ``scrapy-zyte-api/request_args/experimental.responseCookies``. + ``scrapy-zyte-api/request_args/experimental.foo``. 0.11.1 (2023-08-25) diff --git a/docs/reference/fingerprint-params.rst b/docs/reference/fingerprint-params.rst index 1e8cc144..c94d93cc 100644 --- a/docs/reference/fingerprint-params.rst +++ b/docs/reference/fingerprint-params.rst @@ -24,10 +24,10 @@ fingerprints for Zyte API requests based on the following Zyte API parameters: :http:`request:httpRequestText` values generate the same signature. - Output parameters (:http:`request:browserHtml`, - :http:`request:httpResponseBody`, :http:`request:httpResponseHeaders`, - :http:`request:responseCookies`, :http:`request:screenshot`, and - :ref:`automatic extraction outputs ` like - :http:`request:product`) + :http:`request:httpResponseBody`, :http:`request:responseCookies`, + :http:`request:httpResponseHeaders`, :http:`request:responseCookies`, + :http:`request:screenshot`, and :ref:`automatic extraction outputs + ` like :http:`request:product`) - Rendering option parameters (:http:`request:actions`, :http:`request:device`, :http:`request:javascript`, @@ -42,10 +42,9 @@ The following Zyte API parameters are *not* taken into account for request fingerprinting: - Request header parameters (:http:`request:customHttpRequestHeaders`, - :http:`request:requestHeaders`) + :http:`request:requestHeaders`, :http:`request:requestCookies`) -- Request cookie parameters (:http:`request:cookieManagement`, - :http:`request:requestCookies`) +- :http:`request:cookieManagement` - Session handling parameters (:http:`request:sessionContext`, :http:`request:sessionContextParameters`) diff --git a/docs/reference/request.rst b/docs/reference/request.rst index bd4c9933..4f7847dd 100644 --- a/docs/reference/request.rst +++ b/docs/reference/request.rst @@ -27,15 +27,14 @@ Automatic mapping - :attr:`Request.body ` becomes :http:`request:httpRequestBody`. -- If :ref:`ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED` is ``True``, - :setting:`COOKIES_ENABLED ` is ``True`` (default), - and :attr:`Request.meta ` does not set +- If the :setting:`COOKIES_ENABLED ` is ``True`` + (default), and :attr:`Request.meta ` does not set :reqmeta:`dont_merge_cookies ` to ``True``: - - :http:`request:experimental.responseCookies` becomes ``True``. + - :http:`request:responseCookies` becomes ``True``. - Cookies from the :reqmeta:`cookiejar ` become - :http:`request:experimental.requestCookies`. + :http:`request:requestCookies`. All cookies from the cookie jar are set, regardless of their cookie domain. This is because Zyte API requests may involve requests to @@ -115,20 +114,18 @@ following parameters: "value": "application/json" } ], - "experimental": { - "requestCookies": [ - { - "name": "a", - "value": "b", - "domain": "" - } - ], - "responseCookies": true - }, "httpResponseBody": true, "httpResponseHeaders": true, "httpRequestBody": "eyJmb28iOiAiYmFyIn0=", "httpRequestMethod": "POST", + "requestCookies": [ + { + "name": "a", + "value": "b", + "domain": "" + } + ], + "responseCookies": true, "url": "https://httpbin.org/anything" } diff --git a/docs/reference/response.rst b/docs/reference/response.rst index ce4d78b4..04c7db73 100644 --- a/docs/reference/response.rst +++ b/docs/reference/response.rst @@ -19,12 +19,11 @@ Zyte API response parameters are mapped into :ref:`response class `. - :http:`response:httpResponseHeaders` and - :http:`response:experimental.responseCookies` become - :class:`response.headers + :http:`response:responseCookies` become :class:`response.headers `. -- :http:`response:experimental.responseCookies` is also mapped into the - request :reqmeta:`cookiejar `. +- :http:`response:responseCookies` is also mapped into the request + :reqmeta:`cookiejar `. - :http:`response:browserHtml` and :http:`response:httpResponseBody` are mapped into both diff --git a/docs/reference/settings.rst b/docs/reference/settings.rst index e24c2a16..602cdfe9 100644 --- a/docs/reference/settings.rst +++ b/docs/reference/settings.rst @@ -102,16 +102,6 @@ Default: ``True`` Can be set to ``False`` to disable scrapy-zyte-api. -.. _ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED: - -ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED -===================================== - -Default: ``False`` - -See :ref:`request-automatic`. - - .. _ZYTE_API_FALLBACK_REQUEST_FINGERPRINTER_CLASS: ZYTE_API_FALLBACK_REQUEST_FINGERPRINTER_CLASS @@ -182,11 +172,11 @@ If the cookies to be set during :ref:`request mapping ` exceed this limit, a warning is logged, and only as many cookies as the limit allows are set for the target request. -To silence this warning, set :http:`request:experimental.requestCookies` -manually, e.g. to an empty :class:`dict`. +To silence this warning, set :http:`request:requestCookies` manually, e.g. to +an empty :class:`dict`. -Alternatively, if :http:`request:experimental.requestCookies` starts supporting -more than 100 cookies, update this setting accordingly. +Alternatively, if :http:`request:requestCookies` starts supporting more than +100 cookies, update this setting accordingly. .. _ZYTE_API_MAX_REQUESTS: diff --git a/docs/usage/automap.rst b/docs/usage/automap.rst index 2f81a2fb..839e2570 100644 --- a/docs/usage/automap.rst +++ b/docs/usage/automap.rst @@ -70,9 +70,7 @@ following parameters: { "browserHtml": true, - "experimental": { - "responseCookies": true - }, + "responseCookies": true, "requestHeaders": {"referer": "https://example.com/"}, "url": "https://quotes.toscrape.com" } diff --git a/scrapy_zyte_api/_cookies.py b/scrapy_zyte_api/_cookies.py index 60ae268b..f20518ca 100644 --- a/scrapy_zyte_api/_cookies.py +++ b/scrapy_zyte_api/_cookies.py @@ -30,7 +30,8 @@ def _process_cookies( ): if not cookie_jars: return - response_cookies = api_response.get("experimental", {}).get("responseCookies") + old_response_cookies = api_response.get("experimental", {}).get("responseCookies") + response_cookies = api_response.get("responseCookies", old_response_cookies) if not response_cookies: return cookie_jar = _get_cookie_jar(request, cookie_jars) diff --git a/scrapy_zyte_api/_params.py b/scrapy_zyte_api/_params.py index 5f2f8fbc..47983f5e 100644 --- a/scrapy_zyte_api/_params.py +++ b/scrapy_zyte_api/_params.py @@ -453,64 +453,89 @@ def _set_http_response_headers_from_request( api_params.pop("httpResponseHeaders") -def _set_http_response_cookies_from_request( - *, - api_params: Dict[str, Any], -): - api_params.setdefault("experimental", {}) - api_params["experimental"].setdefault("responseCookies", True) - if api_params["experimental"]["responseCookies"] is False: - del api_params["experimental"]["responseCookies"] +def _handle_experimental_unnamespacing(api_params, request, experimental, field): + experimental_params = api_params.setdefault("experimental", {}) + if not experimental and field in experimental_params: + if field in api_params: + logger.warning( + f"Request {request!r} defines both {field} " + f"({api_params[field]}) and " + f"experimental.{field} " + f"({experimental_params[field]}). " + f"experimental.{field} will be ignored." + ) + del experimental_params[field] + else: + logger.warning( + f"Request {request!r} defines experimental.{field}. " + f"experimental.{field} will be removed, and its value " + f"will be set as {field}." + ) + api_params[field] = experimental_params.pop(field) + elif experimental and field in api_params: + if field in experimental_params: + logger.warning( + f"Request {request!r} defines both {field} " + f"({api_params[field]}) and " + f"experimental.{field} " + f"({experimental_params[field]}). Since the " + f"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting is enabled, " + f"{field} will be removed, and its value will be set " + f"as experimental.{field}, overriding its current " + f"value." + ) + else: + logger.warning( + f"Request {request!r} defines {field}. Since the " + f"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting is enabled, " + f"{field} will be removed, and its value will be set " + f"as experimental.{field}." + ) + experimental_params[field] = api_params.pop(field) + if not experimental_params: + del api_params["experimental"] -def _set_http_request_cookies_from_request( +def _set_http_response_cookies_from_request( *, api_params: Dict[str, Any], + experimental: bool, request: Request, - cookie_jars: Dict[Any, CookieJar], - max_cookies: int, ): - api_params.setdefault("experimental", {}) - if "requestCookies" in api_params["experimental"]: - request_cookies = api_params["experimental"]["requestCookies"] - if request_cookies is False: - del api_params["experimental"]["requestCookies"] - elif not request_cookies and isinstance(request_cookies, list): - logger.warning( - ( - "Request %(request)r is overriding automatic request " - "cookie mapping by explicitly setting " - "experimental.requestCookies to []. If this was your " - "intention, please use False instead of []. Otherwise, " - "stop defining experimental.requestCookies in your " - "request to let automatic mapping work." - ), - { - "request": request, - }, - ) + if "responseCookies" in api_params and api_params["responseCookies"] is False: + del api_params["responseCookies"] return + experimental_params = api_params.get("experimental", {}) + if ( + "responseCookies" in experimental_params + and experimental_params["responseCookies"] is False + ): + del experimental_params["responseCookies"] + if not experimental_params: + del api_params["experimental"] + return + if not experimental: + api_params.setdefault("responseCookies", True) + else: + api_params.setdefault("experimental", {}) + api_params["experimental"].setdefault("responseCookies", True) + + +def _get_output_cookies(request, cookie_jars, max_cookies, field): output_cookies = [] input_cookies = _get_all_cookies(request, cookie_jars) input_cookie_count = len(input_cookies) if input_cookie_count > max_cookies: logger.warning( - ( - "Request %(request)r would get %(count)r cookies, but request " - "cookie automatic mapping is limited to %(max)r cookies " - "(see the ZYTE_API_MAX_COOKIES setting), so only %(max)r " - "cookies have been added to this request. To silence this " - "warning, set the request cookies manually through the " - "experimental.requestCookies Zyte API parameter instead. " - "Alternatively, if Zyte API starts supporting more than " - "%(max)r request cookies, update the ZYTE_API_MAX_COOKIES " - "setting accordingly." - ), - { - "request": request, - "count": input_cookie_count, - "max": max_cookies, - }, + f"Request {request!r} would get {input_cookie_count!r} cookies, " + f"but request cookie automatic mapping is limited to " + f"{max_cookies!r} cookies (see the ZYTE_API_MAX_COOKIES setting), " + f"so only {max_cookies!r} cookies have been added to this " + f"request. To silence this warning, set the request cookies " + f"manually through the {field} Zyte API parameter instead. " + f"Alternatively, if Zyte API starts supporting more than " + f"{max_cookies!r} request cookies, update the ZYTE_API_MAX_COOKIES " + f"setting accordingly." ) input_cookies = input_cookies[:max_cookies] for input_cookie in input_cookies: @@ -522,8 +547,76 @@ def _set_http_request_cookies_from_request( if input_cookie.path_specified: output_cookie["path"] = input_cookie.path output_cookies.append(output_cookie) - if output_cookies: - api_params["experimental"]["requestCookies"] = output_cookies + return output_cookies + + +def _set_http_request_cookies_from_request( + *, + api_params: Dict[str, Any], + request: Request, + cookie_jars: Dict[Any, CookieJar], + max_cookies: int, + experimental: bool, +): + if "requestCookies" in api_params: + request_cookies = api_params["requestCookies"] + if not request_cookies: + del api_params["requestCookies"] + # Note: We do not warn about setting requestCookies to False + # when there is no need (i.e. no input_cookies below) because + # input cookies can change at run time due to cookiejars, so + # False may make sense for some iterations of the code. + if isinstance(request_cookies, list): + logger.warning( + f"Request {request!r} is overriding automatic request " + f"cookie mapping by explicitly setting " + f"requestCookies to []. If this was your intention, " + f"please use False instead of []. Otherwise, stop " + f"defining requestCookies in your request to let " + f"automatic mapping work." + ) + return + + experimental_params = api_params.get("experimental", {}) + if "requestCookies" in experimental_params: + request_cookies = experimental_params["requestCookies"] + if not request_cookies: + del experimental_params["requestCookies"] + if not experimental_params: + del api_params["experimental"] + # Note: We do not warn about setting requestCookies to False + # when there is no need (i.e. no input_cookies below) because + # input cookies can change at run time due to cookiejars, so + # False may make sense for some iterations of the code. + if isinstance(request_cookies, list): + logger.warning( + f"Request {request!r} is overriding automatic request " + f"cookie mapping by explicitly setting " + f"experimental.requestCookies to []. If this was your " + f"intention, please use False instead of []. " + f"Otherwise, stop defining " + f"experimental.requestCookies in your request to let " + f"automatic mapping work." + ) + elif not experimental: + del experimental_params["requestCookies"] + if not experimental_params: + del api_params["experimental"] + api_params.setdefault("requestCookies", request_cookies) + return + + if not experimental: + output_cookies = _get_output_cookies( + request, cookie_jars, max_cookies, "requestCookies" + ) + if output_cookies: + api_params["requestCookies"] = output_cookies + else: + output_cookies = _get_output_cookies( + request, cookie_jars, max_cookies, "experimental.requestCookies" + ) + if output_cookies: + api_params.setdefault("experimental", {})["requestCookies"] = output_cookies def _set_http_request_method_from_request( @@ -580,13 +673,14 @@ def _unset_unneeded_api_params( default_params: Dict[str, Any], request: Request, ): + experimental_had_content = bool(api_params.get("experimental", None)) for param, default_value in _DEFAULT_API_PARAMS.items(): value = api_params.get(param, _Undefined) - if value is _Undefined: - continue - if value != default_value: + if value is _Undefined or value != default_value: continue - if param not in default_params or default_params.get(param) == default_value: + if ( + param not in default_params or default_params.get(param) == default_value + ) and (param != "experimental" or not experimental_had_content): logger.warning( f"Request {request} unnecessarily defines the Zyte API {param!r} " f"parameter with its default value, {default_value!r}. It will " @@ -606,7 +700,25 @@ def _update_api_params_from_request( cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], max_cookies: int, + experimental_cookies: bool, + unreported_deprecated_experimental_fields: Set[str], ): + for field in ("responseCookies", "requestCookies", "cookieManagement"): + if ( + field in unreported_deprecated_experimental_fields + and field in api_params.get("experimental", {}) + ): + unreported_deprecated_experimental_fields.remove(field) + logger.warning( + f"Zyte API parameters for request {request} include " + f"experimental.{field}, which is deprecated. Please, " + f"replace it with {field}, both in request parameters " + f"and in any response parsing logic that might rely " + f"on the old parameter." + ) + _handle_experimental_unnamespacing( + api_params, request, experimental_cookies, field + ) _set_http_response_body_from_request(api_params=api_params, request=request) _set_http_response_headers_from_request( api_params=api_params, @@ -622,16 +734,21 @@ def _update_api_params_from_request( ) _set_http_request_body_from_request(api_params=api_params, request=request) if cookies_enabled: - assert cookie_jars is not None # typing - _set_http_response_cookies_from_request(api_params=api_params) - _set_http_request_cookies_from_request( + _set_http_response_cookies_from_request( api_params=api_params, + experimental=experimental_cookies, request=request, - cookie_jars=cookie_jars, - max_cookies=max_cookies, ) - if not api_params["experimental"]: - del api_params["experimental"] + # cookie_jars can be None when the param parser is used for request + # fingerprinting, in which case request cookies are not relevant. + if cookie_jars is not None: + _set_http_request_cookies_from_request( + api_params=api_params, + request=request, + cookie_jars=cookie_jars, + max_cookies=max_cookies, + experimental=experimental_cookies, + ) _unset_unneeded_api_params( api_params=api_params, request=request, default_params=default_params ) @@ -736,6 +853,8 @@ def _get_automap_params( cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], max_cookies: int, + experimental_cookies: bool, + unreported_deprecated_experimental_fields: Set[str], ): meta_params = request.meta.get("zyte_api_automap", default_enabled) if meta_params is False: @@ -765,6 +884,8 @@ def _get_automap_params( cookies_enabled=cookies_enabled, cookie_jars=cookie_jars, max_cookies=max_cookies, + experimental_cookies=experimental_cookies, + unreported_deprecated_experimental_fields=unreported_deprecated_experimental_fields, ) return params @@ -782,6 +903,8 @@ def _get_api_params( cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], max_cookies: int, + experimental_cookies: bool, + unreported_deprecated_experimental_fields: Set[str], ) -> Optional[dict]: """Returns a dictionary of API parameters that must be sent to Zyte API for the specified request, or None if the request should not be sent through @@ -797,6 +920,8 @@ def _get_api_params( cookies_enabled=cookies_enabled, cookie_jars=cookie_jars, max_cookies=max_cookies, + experimental_cookies=experimental_cookies, + unreported_deprecated_experimental_fields=unreported_deprecated_experimental_fields, ) if api_params is None: return None @@ -805,6 +930,22 @@ def _get_api_params( f"Request {request} combines manually-defined parameters and " f"automatically-mapped parameters." ) + else: + if ( + api_params + and unreported_deprecated_experimental_fields + and "experimental" in api_params + ): + for field in list(unreported_deprecated_experimental_fields): + if field in api_params["experimental"]: + unreported_deprecated_experimental_fields.remove(field) + logger.warning( + f"Zyte API parameters for request {request} include " + f"experimental.{field}, which is deprecated. Please, " + f"replace it with {field}, both in request parameters " + f"and in any response parsing logic that might rely " + f"on the old parameter." + ) if job_id is not None: api_params["jobId"] = job_id @@ -855,22 +996,48 @@ def __init__(self, crawler, cookies_enabled=None): self._job_id = environ.get("SHUB_JOBKEY", None) self._transparent_mode = settings.getbool("ZYTE_API_TRANSPARENT_MODE", False) self._skip_headers = _load_skip_headers(settings) - self._warn_on_cookies = False + self._experimental_cookies = settings.getbool( + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED" + ) + if self._experimental_cookies: + logger.warning( + "The deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting " + "is set to True. Please, remove the deprecated " + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting, and remove " + "the experimental name space from the responseCookies and " + "requestCookies parameters in your code (if any), both when " + "building requests and when parsing responses.", + ) if cookies_enabled is not None: self._cookies_enabled = cookies_enabled - elif settings.getbool("ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED") is True: + else: self._cookies_enabled = settings.getbool("COOKIES_ENABLED") - if not self._cookies_enabled: + if not self._cookies_enabled and self._experimental_cookies: logger.warning( - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, but it " - "will have no effect because COOKIES_ENABLED is False." + "The deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED " + "setting is True, but it will have no effect because the " + "COOKIES_ENABLED setting is False. To silence this " + "warning, remove the deprecated " + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting. To enable " + "automatic cookie mapping, set COOKIES_ENABLED to True. " + "Please, consider removing the deprecated " + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting, and " + "removing the experimental name space from the " + "responseCookies and requestCookies parameters in your " + "code (if any), both when building requests and when " + "parsing responses.", ) - else: - self._cookies_enabled = False - self._warn_on_cookies = settings.getbool("COOKIES_ENABLED") self._max_cookies = settings.getint("ZYTE_API_MAX_COOKIES", 100) self._crawler = crawler self._cookie_jars = None + if not self._experimental_cookies: + self._unreported_deprecated_experimental_fields = { + "requestCookies", + "responseCookies", + "cookieManagement", + } + else: + self._unreported_deprecated_experimental_fields = set() def parse(self, request): dont_merge_cookies = request.meta.get("dont_merge_cookies", False) @@ -887,32 +1054,7 @@ def parse(self, request): cookies_enabled=cookies_enabled, cookie_jars=self._cookie_jars, max_cookies=self._max_cookies, + experimental_cookies=self._experimental_cookies, + unreported_deprecated_experimental_fields=self._unreported_deprecated_experimental_fields, ) - if not dont_merge_cookies and self._warn_on_cookies: - self._handle_warn_on_cookies(request, params) return params - - def _handle_warn_on_cookies(self, request, params): - if params and params.get("experimental", {}).get("requestCookies") is not None: - return - if self._cookie_jars is None: - return - input_cookies = _get_all_cookies(request, self._cookie_jars) - if len(input_cookies) <= 0: - return - logger.warning( - ( - "Cookies are enabled for request %(request)r, and there are " - "cookies in the cookiejar, but " - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is False, so automatic " - "mapping will not map cookies for this or any other request. " - "To silence this warning, disable cookies for all requests " - "that use automatic mapping, either with the " - "COOKIES_ENABLED setting or with the dont_merge_cookies " - "request metadata key." - ), - { - "request": request, - }, - ) - self._warn_on_cookies = False diff --git a/scrapy_zyte_api/_request_fingerprinter.py b/scrapy_zyte_api/_request_fingerprinter.py index 3a023968..7bd04d46 100644 --- a/scrapy_zyte_api/_request_fingerprinter.py +++ b/scrapy_zyte_api/_request_fingerprinter.py @@ -18,6 +18,8 @@ from ._params import _REQUEST_PARAMS, _ParamParser, _uses_browser + _Undefined = object() + class ScrapyZyteAPIRequestFingerprinter: @classmethod def from_crawler(cls, crawler): @@ -36,7 +38,7 @@ def __init__(self, crawler): crawler=crawler, ) self._cache: "WeakKeyDictionary[Request, bytes]" = WeakKeyDictionary() - self._param_parser = _ParamParser(crawler, cookies_enabled=False) + self._param_parser = _ParamParser(crawler) def _normalize_params(self, api_params): api_params["url"] = canonicalize_url( @@ -49,9 +51,19 @@ def _normalize_params(self, api_params): api_params.pop("httpRequestText").encode() ).decode() + if ( + "responseCookies" not in api_params + and "responseCookies" in api_params.get("experimental", {}) + ): + api_params["responseCookies"] = api_params["experimental"].pop( + "responseCookies" + ) + for key, value in _REQUEST_PARAMS.items(): if value["changes_fingerprint"] is False: api_params.pop(key, None) + elif value["default"] == api_params.get(key, _Undefined): + api_params.pop(key) def fingerprint(self, request): if request in self._cache: diff --git a/scrapy_zyte_api/responses.py b/scrapy_zyte_api/responses.py index 1a8c4cef..61ebc956 100644 --- a/scrapy_zyte_api/responses.py +++ b/scrapy_zyte_api/responses.py @@ -90,9 +90,14 @@ def _prepare_headers(cls, api_response: Dict[str, Any]): input_headers: Optional[List[Dict[str, str]]] = api_response.get( "httpResponseHeaders" ) - response_cookies: Optional[List[Dict[str, str]]] = api_response.get( + deprecated_response_cookies: Optional[List[Dict[str, str]]] = api_response.get( "experimental", {} ).get("responseCookies") + response_cookies: Optional[List[Dict[str, str]]] = api_response.get( + "responseCookies", deprecated_response_cookies + ) + # Note: We do not warn about deprecated experimental cookie use because + # _process_cookies is called earlier and already takes care of that. if input_headers: headers_to_remove = copy(cls.REMOVE_HEADERS) if response_cookies: @@ -176,11 +181,10 @@ def _process_response( on which if it can properly decode the HTTP Body or have access to browserHtml. """ - # NOTES: Currently, Zyte API does NOT only allow both 'browserHtml' and + # NOTES: Currently, Zyte API does NOT allow both 'browserHtml' and # 'httpResponseBody' to be present at the same time. The support for both # will be addressed in the future. Reference: # - https://github.com/scrapy-plugins/scrapy-zyte-api/pull/10#issuecomment-1131406460 - # For now, at least one of them should be present. _process_cookies(api_response, request, cookie_jars) diff --git a/tests/test_api_requests.py b/tests/test_api_requests.py index aef54a15..4d38dbdd 100644 --- a/tests/test_api_requests.py +++ b/tests/test_api_requests.py @@ -5,7 +5,7 @@ from functools import partial from http.cookiejar import Cookie from inspect import isclass -from typing import Any, Dict, Type, cast +from typing import Any, Dict, List, Type, cast from unittest import mock from unittest.mock import patch @@ -206,7 +206,7 @@ async def test_exceptions( req = Request("http://example.com", method="POST", meta=meta) with pytest.raises(exception_type): await handler.download_request(req, None) - assert exception_text in caplog.text + _assert_warnings(caplog, [exception_text]) @ensureDeferred @@ -271,8 +271,9 @@ async def parse(self, response): TRANSPARENT_MODE = False SKIP_HEADERS = {b"cookie", b"user-agent"} JOB_ID = None -COOKIES_ENABLED = False +COOKIES_ENABLED = True MAX_COOKIES = 100 +EXPERIMENTAL_COOKIES = False GET_API_PARAMS_KWARGS = { "default_params": DEFAULT_PARAMS, "transparent_mode": TRANSPARENT_MODE, @@ -282,6 +283,7 @@ async def parse(self, response): "job_id": JOB_ID, "cookies_enabled": COOKIES_ENABLED, "max_cookies": MAX_COOKIES, + "experimental_cookies": EXPERIMENTAL_COOKIES, } @@ -291,7 +293,7 @@ async def test_params_parser_input_default(mockserver): for key in GET_API_PARAMS_KWARGS: actual = getattr(handler._param_parser, f"_{key}") expected = GET_API_PARAMS_KWARGS[key] - assert actual == expected, key + assert expected == actual, key @ensureDeferred @@ -314,6 +316,7 @@ async def test_param_parser_input_custom(mockserver): assert parser._max_cookies == 1 assert parser._skip_headers == {b"a"} assert parser._transparent_mode is True + assert parser._experimental_cookies is True @ensureDeferred @@ -350,6 +353,7 @@ async def test_param_parser_output_side_effects(output, uses_zyte_api, mockserve DEFAULT_AUTOMAP_PARAMS: Dict[str, Any] = { "httpResponseBody": True, "httpResponseHeaders": True, + "responseCookies": True, } @@ -444,7 +448,7 @@ def test_transparent_mode_toggling(setting, meta, expected): api_params = func() if api_params is not None: api_params.pop("url") - assert api_params == expected + assert expected == api_params @pytest.mark.parametrize("meta", [None, 0, "", b"", [], ()]) @@ -516,8 +520,13 @@ async def test_default_params_none(mockserver, caplog): async with mockserver.make_handler(settings) as handler: assert handler._param_parser._automap_params == {"e": "f"} assert handler._param_parser._default_params == {"b": "c"} - assert "Parameter 'a' in the ZYTE_API_DEFAULT_PARAMS setting is None" in caplog.text - assert "Parameter 'd' in the ZYTE_API_AUTOMAP_PARAMS setting is None" in caplog.text + _assert_warnings( + caplog, + [ + "Parameter 'a' in the ZYTE_API_DEFAULT_PARAMS setting is None", + "Parameter 'd' in the ZYTE_API_AUTOMAP_PARAMS setting is None", + ], + ) @pytest.mark.parametrize( @@ -551,7 +560,7 @@ async def test_default_params_none(mockserver, caplog): ( "ZYTE_API_AUTOMAP_PARAMS", "zyte_api_automap", - {"httpResponseBody", "httpResponseHeaders"}, + DEFAULT_AUTOMAP_PARAMS.keys(), ), ], ) @@ -581,12 +590,8 @@ def test_default_params_merging( for key in ignore_keys: api_params.pop(key) api_params.pop("url") - assert api_params == expected - if warnings: - for warning in warnings: - assert warning in caplog.text - else: - assert not caplog.records + assert expected == api_params + _assert_warnings(caplog, warnings) @pytest.mark.parametrize( @@ -634,6 +639,36 @@ def test_default_params_immutability(setting_key, meta_key, setting, meta): assert default_params == setting +def _assert_warnings(caplog, warnings): + if warnings: + seen_warnings = {record.getMessage(): False for record in caplog.records} + for warning in warnings: + matched = False + for seen_warning in list(seen_warnings): + if warning in seen_warning: + if seen_warnings[seen_warning] is True: + raise AssertionError( + f"Expected warning {warning!r} matches more than " + f"1 seen warning (all seen warnings: " + f"{list(seen_warnings)!r})" + ) + seen_warnings[seen_warning] = True + matched = True + break + if not matched: + raise AssertionError( + f"Expected warning {warning!r} not found in {list(seen_warnings)!r}" + ) + unexpected_warnings = [ + warning for warning, is_expected in seen_warnings.items() if not is_expected + ] + if unexpected_warnings: + raise AssertionError(f"Got unexpected warnings: {unexpected_warnings}") + else: + assert not caplog.records + caplog.clear() + + def _test_automap( settings, request_kwargs, meta, expected, warnings, caplog, cookie_jar=None ): @@ -676,12 +711,8 @@ def _test_automap( with caplog.at_level("WARNING"): api_params = param_parser.parse(request) api_params.pop("url") - assert api_params == expected - if warnings: - for warning in warnings: - assert warning in caplog.text - else: - assert not caplog.records + assert expected == api_params + _assert_warnings(caplog, warnings) @pytest.mark.parametrize( @@ -689,12 +720,11 @@ def _test_automap( [ # If no other known main output is specified in meta, httpResponseBody # is requested. - ({}, {"httpResponseBody": True, "httpResponseHeaders": True}, []), + ({}, DEFAULT_AUTOMAP_PARAMS, []), ( {"unknownMainOutput": True}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "unknownMainOutput": True, }, [], @@ -704,29 +734,29 @@ def _test_automap( # may stop working for binary responses in the future. ( {"httpResponseBody": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, [], ), - # If other main outputs are specified in meta, httpRequestBody is not - # set. + # If other main outputs are specified in meta, httpResponseBody and + # httpResponseHeaders are not set. ( {"browserHtml": True}, - {"browserHtml": True}, + {"browserHtml": True, "responseCookies": True}, [], ), ( {"screenshot": True}, - {"screenshot": True}, + {"screenshot": True, "responseCookies": True}, [], ), ( {EXTRACT_KEY: True}, - {EXTRACT_KEY: True}, + {EXTRACT_KEY: True, "responseCookies": True}, [], ), ( {"browserHtml": True, "screenshot": True}, - {"browserHtml": True, "screenshot": True}, + {"browserHtml": True, "screenshot": True, "responseCookies": True}, [], ), # If no known main output is specified, and httpResponseBody is @@ -734,12 +764,12 @@ def _test_automap( # is added. ( {"httpResponseBody": False}, - {}, + {"responseCookies": True}, [], ), ( {"httpResponseBody": False, "unknownMainOutput": True}, - {"unknownMainOutput": True}, + {"unknownMainOutput": True, "responseCookies": True}, [], ), # We allow httpResponseBody and browserHtml to be both set to True, in @@ -748,8 +778,7 @@ def _test_automap( {"httpResponseBody": True, "browserHtml": True}, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -770,22 +799,22 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): # not be implicitly set to True, it is passed as such. ( {"httpResponseBody": False, "httpResponseHeaders": True}, - {"httpResponseHeaders": True}, + {"httpResponseHeaders": True, "responseCookies": True}, [], ), ( {"browserHtml": True, "httpResponseHeaders": True}, - {"browserHtml": True, "httpResponseHeaders": True}, + {"browserHtml": True, "httpResponseHeaders": True, "responseCookies": True}, [], ), ( {"screenshot": True, "httpResponseHeaders": True}, - {"screenshot": True, "httpResponseHeaders": True}, + {"screenshot": True, "httpResponseHeaders": True, "responseCookies": True}, [], ), ( {EXTRACT_KEY: True, "httpResponseHeaders": True}, - {EXTRACT_KEY: True, "httpResponseHeaders": True}, + {EXTRACT_KEY: True, "httpResponseHeaders": True, "responseCookies": True}, [], ), ( @@ -794,7 +823,11 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): "httpResponseBody": False, "httpResponseHeaders": True, }, - {"unknownMainOutput": True, "httpResponseHeaders": True}, + { + "unknownMainOutput": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, [], ), # Setting httpResponseHeaders to True where it would be already True @@ -804,12 +837,20 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): # stops being set to True by default in those scenarios. ( {"httpResponseHeaders": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, [], ), ( {"httpResponseBody": True, "httpResponseHeaders": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, [], ), ( @@ -822,6 +863,7 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): "browserHtml": True, "httpResponseBody": True, "httpResponseHeaders": True, + "responseCookies": True, }, [], ), @@ -831,16 +873,21 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): "unknownMainOutput": True, "httpResponseBody": True, "httpResponseHeaders": True, + "responseCookies": True, }, [], ), # If httpResponseHeaders is set to False, httpResponseHeaders is not # defined, even if httpResponseBody is set to True, implicitly or # explicitly. - ({"httpResponseHeaders": False}, {"httpResponseBody": True}, []), + ( + {"httpResponseHeaders": False}, + {"httpResponseBody": True, "responseCookies": True}, + [], + ), ( {"httpResponseBody": True, "httpResponseHeaders": False}, - {"httpResponseBody": True}, + {"httpResponseBody": True, "responseCookies": True}, [], ), ( @@ -849,12 +896,16 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): "browserHtml": True, "httpResponseHeaders": False, }, - {"browserHtml": True, "httpResponseBody": True}, + {"browserHtml": True, "httpResponseBody": True, "responseCookies": True}, [], ), ( {"unknownMainOutput": True, "httpResponseHeaders": False}, - {"unknownMainOutput": True, "httpResponseBody": True}, + { + "unknownMainOutput": True, + "httpResponseBody": True, + "responseCookies": True, + }, [], ), # If httpResponseHeaders is unnecessarily set to False where @@ -863,22 +914,22 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): # logged. ( {"httpResponseBody": False, "httpResponseHeaders": False}, - {}, + {"responseCookies": True}, ["do not need to set httpResponseHeaders to False"], ), ( {"browserHtml": True, "httpResponseHeaders": False}, - {"browserHtml": True}, + {"browserHtml": True, "responseCookies": True}, ["do not need to set httpResponseHeaders to False"], ), ( {"screenshot": True, "httpResponseHeaders": False}, - {"screenshot": True}, + {"screenshot": True, "responseCookies": True}, ["do not need to set httpResponseHeaders to False"], ), ( {EXTRACT_KEY: True, "httpResponseHeaders": False}, - {EXTRACT_KEY: True}, + {EXTRACT_KEY: True, "responseCookies": True}, ["do not need to set httpResponseHeaders to False"], ), ( @@ -887,7 +938,7 @@ def test_automap_main_outputs(meta, expected, warnings, caplog): "httpResponseBody": False, "httpResponseHeaders": False, }, - {"unknownMainOutput": True}, + {"unknownMainOutput": True, "responseCookies": True}, ["do not need to set httpResponseHeaders to False"], ), ], @@ -903,10 +954,7 @@ def test_automap_header_output(meta, expected, warnings, caplog): ( "GET", {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), # Other HTTP methods, regardless of whether they are supported, @@ -917,8 +965,7 @@ def test_automap_header_output(meta, expected, warnings, caplog): method, {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestMethod": method, }, [], @@ -941,18 +988,17 @@ def test_automap_header_output(meta, expected, warnings, caplog): ( None, {"httpRequestMethod": "GET"}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - ["Use Request.method"], + DEFAULT_AUTOMAP_PARAMS, + [ + "Use Request.method", + "unnecessarily defines the Zyte API 'httpRequestMethod' parameter with its default value", + ], ), ( "POST", {"httpRequestMethod": "POST"}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestMethod": "POST", }, ["Use Request.method"], @@ -963,21 +1009,18 @@ def test_automap_header_output(meta, expected, warnings, caplog): ( "POST", {"httpRequestMethod": "GET"}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [ "Use Request.method", "does not match the Zyte API httpRequestMethod", + "unnecessarily defines the Zyte API 'httpRequestMethod' parameter with its default value", ], ), ( "POST", {"httpRequestMethod": "PUT"}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestMethod": "PUT", }, [ @@ -993,6 +1036,7 @@ def test_automap_header_output(meta, expected, warnings, caplog): { "browserHtml": True, "httpRequestMethod": "POST", + "responseCookies": True, }, [], ), @@ -1002,6 +1046,7 @@ def test_automap_header_output(meta, expected, warnings, caplog): { "screenshot": True, "httpRequestMethod": "POST", + "responseCookies": True, }, [], ), @@ -1011,13 +1056,17 @@ def test_automap_header_output(meta, expected, warnings, caplog): { EXTRACT_KEY: True, "httpRequestMethod": "POST", + "responseCookies": True, }, [], ), ], ) def test_automap_method(method, meta, expected, warnings, caplog): - _test_automap({}, {"method": method}, meta, expected, warnings, caplog) + request_kwargs = {} + if method is not None: + request_kwargs["method"] = method + _test_automap({}, request_kwargs, meta, expected, warnings, caplog) @pytest.mark.parametrize( @@ -1032,8 +1081,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1045,6 +1093,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -1054,6 +1103,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "requestHeaders": {"referer": "a"}, "screenshot": True, + "responseCookies": True, }, [], ), @@ -1063,6 +1113,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "requestHeaders": {"referer": "a"}, EXTRACT_KEY: True, + "responseCookies": True, }, [], ), @@ -1078,8 +1129,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1091,8 +1141,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, "screenshot": True, }, @@ -1105,8 +1154,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, EXTRACT_KEY: True, }, @@ -1120,8 +1168,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, "screenshot": True, }, @@ -1143,8 +1190,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "unknownMainOutput": True, }, [], @@ -1158,6 +1204,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"httpResponseBody": False}, { "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -1167,6 +1214,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "requestHeaders": {"referer": "a"}, "unknownMainOutput": True, + "responseCookies": True, }, [], ), @@ -1174,10 +1222,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): ( {"Referer": "a"}, {"customHttpRequestHeaders": False}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ( @@ -1185,6 +1230,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True, "requestHeaders": False}, { "browserHtml": True, + "responseCookies": True, }, [], ), @@ -1197,8 +1243,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1211,8 +1256,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1226,8 +1270,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1239,8 +1282,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1254,6 +1296,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"name": "Referer", "value": "a"}, ], "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -1261,10 +1304,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): ( {"Referer": None}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ( @@ -1272,6 +1312,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, [], ), @@ -1280,8 +1321,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True, "httpResponseBody": True}, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1290,6 +1330,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"screenshot": True}, { "screenshot": True, + "responseCookies": True, }, [], ), @@ -1298,6 +1339,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {EXTRACT_KEY: True}, { EXTRACT_KEY: True, + "responseCookies": True, }, [], ), @@ -1306,8 +1348,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"screenshot": True, "httpResponseBody": True}, { "screenshot": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1316,8 +1357,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {EXTRACT_KEY: True, "httpResponseBody": True}, { EXTRACT_KEY: True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1325,8 +1365,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"Referer": None}, {"unknownMainOutput": True}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "unknownMainOutput": True, }, [], @@ -1336,13 +1375,14 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"unknownMainOutput": True, "httpResponseBody": False}, { "unknownMainOutput": True, + "responseCookies": True, }, [], ), ( {"Referer": None}, {"httpResponseBody": False}, - {}, + {"responseCookies": True}, [], ), # Warn if header parameters are used in meta, even if the values match @@ -1359,8 +1399,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["Use Request.headers instead"], ), @@ -1373,6 +1412,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, ["Use Request.headers instead"], ), @@ -1387,8 +1427,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "b"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["Use Request.headers instead"], ), @@ -1401,6 +1440,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "b"}, + "responseCookies": True, }, ["Use Request.headers instead"], ), @@ -1415,8 +1455,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["Use Request.headers instead"], ), @@ -1429,6 +1468,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, ["Use Request.headers instead"], ), @@ -1446,8 +1486,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "requestHeaders": {"referer": "a"}, }, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1465,6 +1504,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], + "responseCookies": True, }, [], ), @@ -1476,6 +1516,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ), @@ -1485,6 +1526,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ), @@ -1492,10 +1534,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): ( {"user-Agent": ""}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["cannot be mapped"], ), # The Accept, Accept-Encoding and Accept-Language headers, when @@ -1512,6 +1551,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, [], ), @@ -1521,6 +1561,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ) @@ -1542,19 +1583,13 @@ def test_automap_method(method, meta, expected, warnings, caplog): ( {"User-Agent": DEFAULT_USER_AGENT}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ( {"User-Agent": ""}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["cannot be mapped"], ), ( @@ -1562,6 +1597,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, [], ), @@ -1570,6 +1606,7 @@ def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ), @@ -1594,8 +1631,7 @@ def test_automap_headers(headers, meta, expected, warnings, caplog): }, {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "customHttpRequestHeaders": [ {"name": "User-Agent", "value": ""}, ], @@ -1617,6 +1653,7 @@ def test_automap_headers(headers, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"userAgent": ""}, + "responseCookies": True, }, [], ), @@ -1642,8 +1679,13 @@ def test_automap_header_settings(settings, headers, meta, expected, warnings, ca "settings,cookies,meta,params,expected,warnings,cookie_jar", [ # Cookies, both for requests and for responses, are enabled based on - # both ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED (default: False) and - # COOKIES_ENABLED (default: True). + # COOKIES_ENABLED (default: True). Disabling cookie mapping at the + # spider level requires setting COOKIES_ENABLED to False. + # + # ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED (deprecated, default: False), + # when enabled, triggers a deprecation warning, and forces the + # experimental name space to be used for automatic cookie parameters if + # COOKIES_ENABLED is also True. *( ( settings, @@ -1654,40 +1696,24 @@ def test_automap_header_settings(settings, headers, meta, expected, warnings, ca "httpResponseBody": True, "httpResponseHeaders": True, }, - setup_warnings - or ( - run_time_warnings - if cast(Dict, settings).get("COOKIES_ENABLED", True) - else [] - ), + warnings, [], ) - for input_cookies, run_time_warnings in ( - ( - REQUEST_INPUT_COOKIES_EMPTY, - [], - ), - ( - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - [ - "there are cookies in the cookiejar, but ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is False", - ], - ), + for input_cookies in ( + REQUEST_INPUT_COOKIES_EMPTY, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, ) - for settings, setup_warnings in ( - ( - {}, - [], - ), + for settings, warnings in ( ( { - "COOKIES_ENABLED": True, + "COOKIES_ENABLED": False, }, [], ), ( { "COOKIES_ENABLED": False, + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": False, }, [], ), @@ -1697,18 +1723,14 @@ def test_automap_header_settings(settings, headers, meta, expected, warnings, ca "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, [ - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, but it will have no effect because COOKIES_ENABLED is False.", + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "will have no effect", ], ), - ( - { - "COOKIES_ENABLED": False, - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": False, - }, - [], - ), ) ), + # When COOKIES_ENABLED is True, responseCookies is set to True, and + # requestCookies is filled automatically if there are cookies. *( ( settings, @@ -1718,10 +1740,8 @@ def test_automap_header_settings(settings, headers, meta, expected, warnings, ca { "httpResponseBody": True, "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - **cast(Dict, output_cookies), - }, + "responseCookies": True, + **cast(Dict, output_cookies), }, [], [], @@ -1737,83 +1757,57 @@ def test_automap_header_settings(settings, headers, meta, expected, warnings, ca ), ) for settings in ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - { - "COOKIES_ENABLED": True, - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, + {}, + {"COOKIES_ENABLED": True}, ) ), - # Do not warn about request cookies not being mapped if cookies are - # manually set. + # When ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is also True, + # responseCookies and requestCookies are defined within the + # experimental name space, and a deprecation warning is issued. *( ( settings, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + input_cookies, + {}, {}, - { - "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, - } - }, { "httpResponseBody": True, "httpResponseHeaders": True, "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + "responseCookies": True, + **cast(Dict, output_cookies), }, }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], [], - [], + ) + for input_cookies, output_cookies in ( + ( + REQUEST_INPUT_COOKIES_EMPTY, + {}, + ), + ( + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {"requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL}, + ), ) for settings in ( - {}, + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, { "COOKIES_ENABLED": True, + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, ) ), # dont_merge_cookies=True on request metadata disables cookies. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - { - "dont_merge_cookies": True, - }, - {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - { - "dont_merge_cookies": True, - }, - {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - # Do not warn about request cookies not being mapped if - # dont_merge_cookies=True is set on request metadata. *( ( settings, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + input_cookies, { "dont_merge_cookies": True, }, @@ -1822,305 +1816,901 @@ def test_automap_header_settings(settings, headers, meta, expected, warnings, ca "httpResponseBody": True, "httpResponseHeaders": True, }, + warnings, [], - [ - { - "name": "foo", - "value": "bar", - "domain": "example.com", - } - ], ) - for settings in ( - {}, - { - "COOKIES_ENABLED": True, - }, + for input_cookies in ( + REQUEST_INPUT_COOKIES_EMPTY, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + ) + for settings, warnings in ( + ( + {}, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) + ), + # Cookies can be disabled setting the corresponding Zyte API parameter + # to False. + # + # By default, setting experimental parameters to False has no effect. + # If ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, then only + # experimental parameters are taken into account instead. + *( + ( + settings, + input_cookies, + {}, + input_params, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + **cast(Dict, output_params), + }, + warnings, + [], + ) + for settings, input_cookies, input_params, output_params, warnings in ( + # No cookies, responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "responseCookies": False, + }, + {}, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "responseCookies": False, + } + }, + {}, + [ + "include experimental.responseCookies, which is deprecated", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "responseCookies": False, + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "responseCookies": False, + } + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + # No cookies, requestCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + }, + { + "responseCookies": True, + }, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + } + }, + { + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + }, + { + "experimental": {"responseCookies": True}, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + } + }, + { + "experimental": {"responseCookies": True}, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + # No cookies, requestCookies and responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + [ + "include experimental.requestCookies, which is deprecated", + "include experimental.responseCookies, which is deprecated", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + # Cookies, responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "responseCookies": False, + }, + { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "responseCookies": False, + } + }, + { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + [ + "include experimental.responseCookies, which is deprecated", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "responseCookies": False, + }, + { + "experimental": { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "responseCookies": False, + } + }, + { + "experimental": { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + # Cookies, requestCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + }, + { + "responseCookies": True, + }, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + } + }, + { + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + }, + { + "experimental": { + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + } + }, + { + "experimental": { + "responseCookies": True, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + # Cookies, requestCookies and responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + [ + "include experimental.requestCookies, which is deprecated", + "include experimental.responseCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), ) ), - # Cookies can be disabled setting the corresponding Zyte API parameter - # to False. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - {}, - { - "experimental": { - "responseCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - {}, - { - "experimental": { - "requestCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": {"responseCookies": True}, - }, - [], - [], + # requestCookies, if set manually, prevents automatic mapping. + # + # Setting requestCookies to [] disables automatic mapping, but logs a + # a warning recommending to either use False to achieve the same or + # remove the parameter to let automatic mapping work. + *( + ( + settings, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + input_params, + output_params, + warnings, + [], + ) + for override_cookies, override_warnings in ( + ( + cast(List[Dict[str, str]], []), + ["is overriding automatic request cookie mapping"], + ), + ) + for settings, input_params, output_params, warnings in ( + ( + {}, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, + override_warnings, + ), + ( + {}, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + *override_warnings, + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + }, + }, + [ + *cast(List, override_warnings), + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + *override_warnings, + ], + ), + ) ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - {}, - { - "experimental": { - "responseCookies": False, - "requestCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], + *( + ( + settings, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + input_params, + output_params, + warnings, + [], + ) + for override_cookies in ((REQUEST_OUTPUT_COOKIES_MAXIMAL,),) + for settings, input_params, output_params, warnings in ( + ( + {}, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestCookies": override_cookies, + "responseCookies": True, + }, + [], + ), + ( + {}, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestCookies": override_cookies, + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "requestCookies": override_cookies, + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "requestCookies": override_cookies, + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], + ), + ) + ), + # Cookies work for browser and automatic extraction requests as well. + *( + ( + settings, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + params, + { + **params, + **cast(Dict, extra_output_params), + }, + warnings, + [], + ) + for params in ( + { + "browserHtml": True, + }, + { + "screenshot": True, + }, + { + EXTRACT_KEY: True, + }, + ) + for settings, extra_output_params, warnings in ( + ( + {}, + { + "responseCookies": True, + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "experimental": { + "responseCookies": True, + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) + ), + # Cookies are mapped correctly, both with minimum and maximum cookie + # parameters. + *( + ( + settings, + input_cookies, + {}, + {}, + output_params, + warnings, + [], + ) + for input_cookies, output_cookies in ( + ( + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_OUTPUT_COOKIES_MINIMAL, + ), + ( + REQUEST_INPUT_COOKIES_MINIMAL_LIST, + REQUEST_OUTPUT_COOKIES_MINIMAL, + ), + ( + REQUEST_INPUT_COOKIES_MAXIMAL, + REQUEST_OUTPUT_COOKIES_MAXIMAL, + ), + ) + for settings, output_params, warnings in ( + ( + {}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + "requestCookies": output_cookies, + }, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + "requestCookies": output_cookies, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) + ), + # Mapping multiple cookies works. + *( + ( + settings, + input_cookies, + {}, + {}, + output_params, + warnings, + [], + ) + for input_cookies, output_cookies in ( + ( + {"a": "b", "c": "d"}, + [ + {"name": "a", "value": "b", "domain": "example.com"}, + {"name": "c", "value": "d", "domain": "example.com"}, + ], + ), + ) + for settings, output_params, warnings in ( + ( + {}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + "requestCookies": output_cookies, + }, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + "requestCookies": output_cookies, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) ), + # If (contradictory) values are set for requestCookies or + # responseCookies both outside and inside the experimental namespace, + # the non-experimental value takes priority. This is so even if + # ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, in which case the + # outside value is moved into the experimental namespace, overriding + # its value. ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + REQUEST_INPUT_COOKIES_EMPTY, {}, { + "responseCookies": True, "experimental": { "responseCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, }, }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - "experimental": { - "requestCookies": False, - } - }, { "httpResponseBody": True, "httpResponseHeaders": True, - "experimental": {"responseCookies": True}, + "responseCookies": True, }, - [], + [ + "include experimental.responseCookies, which is deprecated", + "defines both responseCookies (True) and experimental.responseCookies (False)", + ], [], ), ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + REQUEST_INPUT_COOKIES_EMPTY, {}, { + "responseCookies": False, "experimental": { - "responseCookies": False, - "requestCookies": False, - } + "responseCookies": True, + }, }, { "httpResponseBody": True, "httpResponseHeaders": True, }, - [], + [ + "defines both responseCookies (False) and experimental.responseCookies (True)", + "include experimental.responseCookies, which is deprecated", + ], [], ), - # Setting requestCookies to [] disables automatic mapping, but logs a - # a warning recommending to either use False to achieve the same or - # remove the parameter to let automatic mapping work. + *( + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + { + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], + "experimental": { + "requestCookies": [ + {"name": experimental_k, "value": experimental_v}, + ], + }, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], + "responseCookies": True, + }, + [ + "include experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be ignored", + ], + [], + ) + for regular_k, regular_v, experimental_k, experimental_v in ( + ("b", "2", "c", "3"), + ("c", "3", "b", "2"), + ) + ), + # Now with ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED=True ( { "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_INPUT_COOKIES_EMPTY, {}, { + "responseCookies": True, "experimental": { - "requestCookies": [], - } + "responseCookies": False, + }, }, { "httpResponseBody": True, "httpResponseHeaders": True, "experimental": { - "requestCookies": [], "responseCookies": True, }, }, [ - "is overriding automatic request cookie mapping", + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "defines both responseCookies (True) and experimental.responseCookies (False)", ], [], ), - # Cookies work for browser and automatic extraction requests as well. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - "browserHtml": True, - }, - { - "browserHtml": True, - "experimental": { - "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, - }, - }, - [], - [], - ), ( { "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_INPUT_COOKIES_EMPTY, {}, { - "screenshot": True, - }, - { - "screenshot": True, + "responseCookies": False, "experimental": { "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, }, }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - EXTRACT_KEY: True, - }, { - EXTRACT_KEY: True, - "experimental": { - "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, - }, + "httpResponseBody": True, + "httpResponseHeaders": True, }, - [], + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "defines both responseCookies (False) and experimental.responseCookies (True)", + ], [], ), - # Cookies are mapped correctly, both with minimum and maximum cookie - # parameters. *( ( { "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, - input, - {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, {}, + { + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], + "experimental": { + "requestCookies": [ + {"name": experimental_k, "value": experimental_v}, + ], + }, + }, { "httpResponseBody": True, "httpResponseHeaders": True, "experimental": { + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], "responseCookies": True, - "requestCookies": output, }, }, - [], + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], [], ) - for input, output in ( - ( - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - REQUEST_OUTPUT_COOKIES_MINIMAL, - ), - ( - REQUEST_INPUT_COOKIES_MINIMAL_LIST, - REQUEST_OUTPUT_COOKIES_MINIMAL, - ), - ( - REQUEST_INPUT_COOKIES_MAXIMAL, - REQUEST_OUTPUT_COOKIES_MAXIMAL, - ), + for regular_k, regular_v, experimental_k, experimental_v in ( + ("b", "2", "c", "3"), + ("c", "3", "b", "2"), ) ), - # requestCookies, if set manually, prevents automatic mapping. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MAXIMAL, - }, - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MAXIMAL, - }, - }, - [], - [], - ), - # Mapping multiple cookies works. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - {"a": "b", "c": "d"}, - {}, - {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - "requestCookies": [ - {"name": "a", "value": "b", "domain": "example.com"}, - {"name": "c", "value": "d", "domain": "example.com"}, - ], - }, - }, - [], - [], - ), ], ) def test_automap_cookies( @@ -2150,7 +2740,6 @@ def test_automap_all_cookies(meta): the target URL domain.""" settings: Dict[str, Any] = { **SETTINGS, - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_TRANSPARENT_MODE": True, } crawler = get_crawler(settings) @@ -2181,7 +2770,7 @@ def test_automap_all_cookies(meta): ) cookie_middleware.process_request(request1, spider=None) api_params = param_parser.parse(request1) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "a", "value": "b", "domain": "a.example"}, # https://github.com/scrapy/scrapy/issues/5841 # {"name": "c", "value": "d", "domain": "b.example"}, @@ -2225,9 +2814,7 @@ def test_automap_all_cookies(meta): cookie_middleware.process_request(request2, spider=None) api_params = param_parser.parse(request2) - assert sort_dict_list( - api_params["experimental"]["requestCookies"] - ) == sort_dict_list( + assert sort_dict_list(api_params["requestCookies"]) == sort_dict_list( [ {"name": "e", "value": "f", "domain": ".c.example"}, {"name": "i", "value": "j", "domain": ".d.example"}, @@ -2258,7 +2845,6 @@ def test_automap_cookie_jar(meta): request4 = Request(url="https://example.com/4", meta={**meta, "cookiejar": "a"}) settings: Dict[str, Any] = { **SETTINGS, - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_TRANSPARENT_MODE": True, } crawler = get_crawler(settings) @@ -2268,20 +2854,18 @@ def test_automap_cookie_jar(meta): cookie_middleware.process_request(request1, spider=None) api_params = param_parser.parse(request1) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "z", "value": "y", "domain": "example.com"} ] cookie_middleware.process_request(request2, spider=None) api_params = param_parser.parse(request2) - assert "requestCookies" not in api_params["experimental"] + assert "requestCookies" not in api_params cookie_middleware.process_request(request3, spider=None) api_params = param_parser.parse(request3) - assert sort_dict_list( - api_params["experimental"]["requestCookies"] - ) == sort_dict_list( + assert sort_dict_list(api_params["requestCookies"]) == sort_dict_list( [ {"name": "x", "value": "w", "domain": "example.com"}, {"name": "z", "value": "y", "domain": "example.com"}, @@ -2290,9 +2874,7 @@ def test_automap_cookie_jar(meta): cookie_middleware.process_request(request4, spider=None) api_params = param_parser.parse(request4) - assert sort_dict_list( - api_params["experimental"]["requestCookies"] - ) == sort_dict_list( + assert sort_dict_list(api_params["requestCookies"]) == sort_dict_list( [ {"name": "x", "value": "w", "domain": "example.com"}, {"name": "z", "value": "y", "domain": "example.com"}, @@ -2310,7 +2892,6 @@ def test_automap_cookie_jar(meta): def test_automap_cookie_limit(meta, caplog): settings: Dict[str, Any] = { **SETTINGS, - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_MAX_COOKIES": 1, "ZYTE_API_TRANSPARENT_MODE": True, } @@ -2330,11 +2911,10 @@ def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "z", "value": "y", "domain": "example.com"} ] - assert not caplog.records - caplog.clear() + _assert_warnings(caplog, []) # Verify that requests with 2 cookies results in only 1 cookie set and a # warning. @@ -2347,13 +2927,16 @@ def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] in [ + assert api_params["requestCookies"] in [ [{"name": "z", "value": "y", "domain": "example.com"}], [{"name": "x", "value": "w", "domain": "example.com"}], ] - assert "would get 2 cookies" in caplog.text - assert "limited to 1 cookies" in caplog.text - caplog.clear() + _assert_warnings( + caplog, + [ + "would get 2 cookies, but request cookie automatic mapping is limited to 1 cookies" + ], + ) # Verify that 1 cookie in the cookie jar and 1 cookie in the request count # as 2 cookies, resulting in only 1 cookie set and a warning. @@ -2372,13 +2955,16 @@ def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] in [ + assert api_params["requestCookies"] in [ [{"name": "z", "value": "y", "domain": "example.com"}], [{"name": "x", "value": "w", "domain": "example.com"}], ] - assert "would get 2 cookies" in caplog.text - assert "limited to 1 cookies" in caplog.text - caplog.clear() + _assert_warnings( + caplog, + [ + "would get 2 cookies, but request cookie automatic mapping is limited to 1 cookies" + ], + ) # Vefify that unrelated-domain cookies count for the limit. pre_request = Request( @@ -2396,13 +2982,16 @@ def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] in [ + assert api_params["requestCookies"] in [ [{"name": "z", "value": "y", "domain": "other.example"}], [{"name": "x", "value": "w", "domain": "example.com"}], ] - assert "would get 2 cookies" in caplog.text - assert "limited to 1 cookies" in caplog.text - caplog.clear() + _assert_warnings( + caplog, + [ + "would get 2 cookies, but request cookie automatic mapping is limited to 1 cookies" + ], + ) class CustomCookieJar(CookieJar): @@ -2445,7 +3034,6 @@ def test_automap_custom_cookie_middleware(): f"{mw_cls.__module__}.{mw_cls.__qualname__}": 700, }, "ZYTE_API_COOKIE_MIDDLEWARE": f"{mw_cls.__module__}.{mw_cls.__qualname__}", - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_TRANSPARENT_MODE": True, } crawler = get_crawler(settings) @@ -2456,7 +3044,7 @@ def test_automap_custom_cookie_middleware(): request = Request(url="https://example.com/1") cookie_middleware.process_request(request, spider=None) api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "z", "value": "y", "domain": "example.com"} ] @@ -2469,8 +3057,7 @@ def test_automap_custom_cookie_middleware(): "a", {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestBody": "YQ==", }, [], @@ -2481,8 +3068,7 @@ def test_automap_custom_cookie_middleware(): "a", {"httpRequestBody": "Yg=="}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestBody": "Yg==", }, [ @@ -2496,8 +3082,7 @@ def test_automap_custom_cookie_middleware(): "a", {"httpRequestBody": "YQ=="}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestBody": "YQ==", }, ["Use Request.body instead"], @@ -2509,6 +3094,7 @@ def test_automap_custom_cookie_middleware(): { "browserHtml": True, "httpRequestBody": "YQ==", + "responseCookies": True, }, [], ), @@ -2518,6 +3104,7 @@ def test_automap_custom_cookie_middleware(): { "httpRequestBody": "YQ==", "screenshot": True, + "responseCookies": True, }, [], ), @@ -2527,6 +3114,7 @@ def test_automap_custom_cookie_middleware(): { "httpRequestBody": "YQ==", EXTRACT_KEY: True, + "responseCookies": True, }, [], ), @@ -2550,6 +3138,7 @@ def test_automap_body(body, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["unnecessarily defines"], ), @@ -2557,20 +3146,14 @@ def test_automap_body(body, meta, expected, warnings, caplog): { "browserHtml": False, }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["unnecessarily defines"], ), ( { "screenshot": False, }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["unnecessarily defines"], ), ( @@ -2580,6 +3163,7 @@ def test_automap_body(body, meta, expected, warnings, caplog): }, { "screenshot": True, + "responseCookies": True, }, ["do not need to set httpResponseHeaders to False"], ), @@ -2587,10 +3171,7 @@ def test_automap_body(body, meta, expected, warnings, caplog): { EXTRACT_KEY: False, }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["unnecessarily defines"], ), ( @@ -2600,6 +3181,7 @@ def test_automap_body(body, meta, expected, warnings, caplog): }, { EXTRACT_KEY: True, + "responseCookies": True, }, ["do not need to set httpResponseHeaders to False"], ), @@ -2617,16 +3199,14 @@ def test_automap_default_parameter_cleanup(meta, expected, warnings, caplog): {"screenshot": True, "browserHtml": False}, { "screenshot": True, + "responseCookies": True, }, [], ), ( {}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ], @@ -2648,12 +3228,8 @@ def test_default_params_automap(default_params, meta, expected, warnings, caplog with caplog.at_level("WARNING"): api_params = param_parser.parse(request) api_params.pop("url") - assert api_params == expected - if warnings: - for warning in warnings: - assert warning in caplog.text - else: - assert not caplog.records + assert expected == api_params + _assert_warnings(caplog, warnings) @pytest.mark.parametrize( @@ -2676,3 +3252,98 @@ def test_default_params_false(default_params): param_parser = handler._param_parser api_params = param_parser.parse(request) assert api_params is None + + +@pytest.mark.parametrize( + "field", + [ + "responseCookies", + "requestCookies", + "cookieManagement", + ], +) +def test_field_deprecation_warnings(field, caplog): + input_params = {"experimental": {field: "foo"}} + + # Raw + raw_request = Request( + url="https://example.com", + meta={"zyte_api": input_params}, + ) + crawler = get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + assert input_params == output_params + _assert_warnings(caplog, [f"experimental.{field}, which is deprecated"]) + with caplog.at_level("WARNING"): + # Only warn once per field. + param_parser.parse(raw_request) + _assert_warnings(caplog, []) + + # Automap + raw_request = Request( + url="https://example.com", + meta={"zyte_api_automap": input_params}, + ) + crawler = get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + for key, value in input_params["experimental"].items(): + assert output_params[key] == value + _assert_warnings( + caplog, + [ + f"experimental.{field}, which is deprecated", + f"experimental.{field} will be removed, and its value will be set as {field}", + ], + ) + with caplog.at_level("WARNING"): + # Only warn once per field. + param_parser.parse(raw_request) + _assert_warnings( + caplog, + [f"experimental.{field} will be removed, and its value will be set as {field}"], + ) + + +def test_field_deprecation_warnings_false_positives(caplog): + """Make sure that the code tested by test_field_deprecation_warnings does + not trigger for unrelated fields that just happen to share their name space + (experimental).""" + + input_params = {"experimental": {"foo": "bar"}} + + # Raw + raw_request = Request( + url="https://example.com", + meta={"zyte_api": input_params}, + ) + crawler = get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + assert input_params == output_params + _assert_warnings(caplog, []) + + # Automap + raw_request = Request( + url="https://example.com", + meta={"zyte_api_automap": input_params}, + ) + crawler = get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + for key, value in input_params.items(): + assert output_params[key] == value + _assert_warnings(caplog, []) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 9011a2a0..51849dee 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -98,8 +98,7 @@ async def test_autothrottle_handling(mw_cls, processor): async def test_cookies(): """Make sure that the downloader middleware does not crash on Zyte API requests with cookies.""" - settings = {"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True} - crawler = get_crawler(settings_dict=settings) + crawler = get_crawler() await crawler.crawl("a") spider = crawler.spider middleware = create_instance( diff --git a/tests/test_request_fingerprinter.py b/tests/test_request_fingerprinter.py index 14dfb736..6e058a93 100644 --- a/tests/test_request_fingerprinter.py +++ b/tests/test_request_fingerprinter.py @@ -81,6 +81,68 @@ def test_headers(): assert fingerprint1 == fingerprint2 +def test_cookies(): + crawler = get_crawler() + fingerprinter = create_instance( + ScrapyZyteAPIRequestFingerprinter, settings=crawler.settings, crawler=crawler + ) + request1 = Request( + "https://example.com", + meta={ + "zyte_api": { + "responseCookies": False, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + "experimental": { + "responseCookies": False, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + }, + } + }, + ) + # Same with responseCookies set to `True`. + request2 = Request( + "https://example.com", + meta={ + "zyte_api": { + "responseCookies": True, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + "experimental": { + "responseCookies": False, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + }, + } + }, + ) + # Same with experimental.responseCookies set to `True`. + request3 = Request( + "https://example.com", + meta={ + "zyte_api": { + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + "experimental": { + "responseCookies": True, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + }, + } + }, + ) + request4 = Request("https://example.com", meta={"zyte_api": True}) + fingerprint1 = fingerprinter.fingerprint(request1) + fingerprint2 = fingerprinter.fingerprint(request2) + fingerprint3 = fingerprinter.fingerprint(request3) + fingerprint4 = fingerprinter.fingerprint(request4) + assert fingerprint1 != fingerprint2 + assert fingerprint1 != fingerprint3 + assert fingerprint1 == fingerprint4 + assert fingerprint2 == fingerprint3 + + @pytest.mark.parametrize( "url,params,fingerprint", ( @@ -230,9 +292,7 @@ def test_only_end_parameters_matter(): "zyte_api": { "httpResponseBody": True, "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - }, + "responseCookies": True, } }, ) diff --git a/tests/test_responses.py b/tests/test_responses.py index e35a079f..9b68a82c 100644 --- a/tests/test_responses.py +++ b/tests/test_responses.py @@ -67,9 +67,7 @@ def raw_api_response_browser(): {"name": "Content-Length", "value": len(PAGE_CONTENT)}, ], "statusCode": 200, - "experimental": { - "responseCookies": INPUT_COOKIES, - }, + "responseCookies": INPUT_COOKIES, } @@ -83,9 +81,7 @@ def raw_api_response_body(): {"name": "Content-Length", "value": len(PAGE_CONTENT)}, ], "statusCode": 200, - "experimental": { - "responseCookies": INPUT_COOKIES, - }, + "responseCookies": INPUT_COOKIES, } @@ -100,9 +96,7 @@ def raw_api_response_mixed(): {"name": "Content-Length", "value": len(PAGE_CONTENT_2)}, ], "statusCode": 200, - "experimental": { - "responseCookies": INPUT_COOKIES, - }, + "responseCookies": INPUT_COOKIES, } @@ -569,3 +563,48 @@ def test_status_code(base_kwargs_func, kwargs, expected_status_code): response = _process_response(api_response, Request(api_response["url"])) assert response is not None assert response.status == expected_status_code + + +@pytest.mark.parametrize( + "api_response", + [ + { + "url": "https://example.com", + "httpResponseBody": b64encode(PAGE_CONTENT.encode("utf-8")), + "statusCode": 200, + "responseCookies": INPUT_COOKIES, + }, + { + "url": "https://example.com", + "httpResponseBody": b64encode(PAGE_CONTENT.encode("utf-8")), + "statusCode": 200, + "experimental": { + "responseCookies": INPUT_COOKIES, + }, + }, + { + "url": "https://example.com", + "httpResponseBody": b64encode(PAGE_CONTENT.encode("utf-8")), + "statusCode": 200, + "responseCookies": INPUT_COOKIES, + "experimental": { + "responseCookies": [ + { + "name": "foo", + "value": "bar", + }, + ], + }, + }, + ], +) +def test_cookies(api_response, caplog): + with caplog.at_level("WARNING"): + response = _process_response(api_response, Request(api_response["url"])) + assert response is not None + assert response.headers == { + **OUTPUT_COOKIE_HEADERS, + } + # Do not warn about the deprecated experimental.responseCookies response + # parameter, we already warn about it when found among request parameters. + assert not caplog.text