From 405d7047a4016f90085d728bd4e29822c46024d8 Mon Sep 17 00:00:00 2001 From: Nathalie Jonathan Date: Thu, 21 Nov 2024 10:24:22 -0800 Subject: [PATCH] Fixed failing unit test and updated CHANGELOG description Signed-off-by: Nathalie Jonathan --- CHANGELOG.md | 2 +- opensearchpy/helpers/asyncsigner.py | 12 +-- test_opensearchpy/test_async/test_signer.py | 89 +++++++++++++++------ 3 files changed, 65 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06c177127..5266c91fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Fixed - Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810)) - Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823)) -- Fix URL bug in Python client (update PR#) +- Fix bug where the URL being sent and being signed is different ([848](https://github.com/opensearch-project/opensearch-py/pull/848)) ### Security ### Dependencies diff --git a/opensearchpy/helpers/asyncsigner.py b/opensearchpy/helpers/asyncsigner.py index ec4364cf6..c045f1384 100644 --- a/opensearchpy/helpers/asyncsigner.py +++ b/opensearchpy/helpers/asyncsigner.py @@ -35,17 +35,7 @@ def __call__( query_string: Optional[str] = None, body: Optional[Union[str, bytes]] = None, ) -> Dict[str, str]: - from urllib.parse import unquote, urlparse - - from ..compat import quote - - parsed_url = urlparse(url) - # Decode the URL encoded path - unquoted_path = unquote(parsed_url.path) - # Create new URL with re-encoded path - signing_url = parsed_url._replace(path=quote(unquoted_path)).geturl() - # Sign the request with the normalized URL - return self._sign_request(method, signing_url, query_string, body) + return self._sign_request(method, url, query_string, body) def _sign_request( self, diff --git a/test_opensearchpy/test_async/test_signer.py b/test_opensearchpy/test_async/test_signer.py index 9c4150647..50c38ab3d 100644 --- a/test_opensearchpy/test_async/test_signer.py +++ b/test_opensearchpy/test_async/test_signer.py @@ -8,6 +8,7 @@ # GitHub history for details. import uuid +from typing import Any, Collection, Dict, Mapping, Optional, Tuple, Union from unittest.mock import Mock import pytest @@ -119,36 +120,72 @@ def mock_session(self) -> Mock: return dummy_session - async def test_aws_signer_async_with_special_chars_in_id(self) -> None: + async def test_aws_signer_async_consitent_url(self) -> None: region = "us-west-2" - from urllib.parse import quote - + from opensearchpy import AsyncOpenSearch + from opensearchpy.connection.http_async import AsyncHttpConnection from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth - auth = AWSV4SignerAsyncAuth(self.mock_session(), region) + # Store URLs for comparison + signed_url = None + sent_url = None + + doc_id = "test:123" + url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{doc_id}" + + # Create a mock signer to capture the signed URL + class MockSigner(AWSV4SignerAsyncAuth): + def _sign_request( + self, + method: str, + url: str, + query_string: Optional[str] = None, + body: Optional[Union[str, bytes]] = None, + ) -> Dict[str, str]: + nonlocal signed_url + signed_url = url + return super()._sign_request(method, url, query_string, body) + + # Create a custom connection class to capture the sent URL + class CapturingConnection(AsyncHttpConnection): + async def perform_request( + self: "CapturingConnection", + method: str, + url: str, + params: Optional[Mapping[str, Any]] = None, + body: Optional[Any] = None, + timeout: Optional[Union[int, float]] = None, + ignore: Collection[int] = (), + headers: Optional[Mapping[str, str]] = None, + ) -> Tuple[int, Mapping[str, str], str]: + nonlocal sent_url + host = self.host + base_url = f"{host}" + sent_url = f"{base_url}{url}" + return 200, {"content-type": "application/json"}, "{}" + + auth = MockSigner(self.mock_session(), region) + auth("GET", url) + + client = AsyncOpenSearch( + hosts=[{"host": "search-domain.region.es.amazonaws.com"}], + http_auth=auth, + use_ssl=True, + verify_certs=True, + connection_class=CapturingConnection, + ) - # ID contains special character - doc_id = "0x4786a3444661f1b9:0x625b2c53dfc9caba" + try: + await client.index("index", {"test": "data"}, id=doc_id) + except Exception as e: + print(f"Expected exception: {e}") - # Test with unencoded ID - url_raw = f"https://search-domain.region.es.amazonaws.com/index/_doc/{doc_id}" - headers_raw = auth("GET", url_raw) + # Verify URLs + assert signed_url is not None, "Signed URL was not captured" + assert sent_url is not None, "Sent URL was not captured" + print(f"Signed URL: {signed_url}") + print(f"Sent URL: {sent_url}") - # Test with properly encoded ID - encoded_id = quote(doc_id, safe="") - url_encoded = ( - f"https://search-domain.region.es.amazonaws.com/index/_doc/{encoded_id}" - ) - headers_encoded = auth("GET", url_encoded) - - # Signatures should be the same since URL encoding happens before signing - assert ( - headers_raw["Authorization"] == headers_encoded["Authorization"] - ), f"Signatures should be identical for raw and encoded ID: {doc_id}" - - # Both should have required headers - for headers in (headers_raw, headers_encoded): - assert "Authorization" in headers - assert "X-Amz-Date" in headers - assert "X-Amz-Security-Token" in headers + # Verify URLs match + assert signed_url == sent_url, "URLs don't match"