Skip to content

Commit

Permalink
Fixed failing unit test and updated CHANGELOG description
Browse files Browse the repository at this point in the history
Signed-off-by: Nathalie Jonathan <[email protected]>
  • Loading branch information
nathaliellenaa committed Nov 21, 2024
1 parent 0ba5a26 commit 405d704
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 38 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Fixed
- Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810))
- Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823))
- Fix URL bug in Python client (update PR#)
- Fix bug where the URL being sent and being signed is different ([848](https://github.com/opensearch-project/opensearch-py/pull/848))
### Security

### Dependencies
Expand Down
12 changes: 1 addition & 11 deletions opensearchpy/helpers/asyncsigner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,7 @@ def __call__(
query_string: Optional[str] = None,
body: Optional[Union[str, bytes]] = None,
) -> Dict[str, str]:
from urllib.parse import unquote, urlparse

from ..compat import quote

parsed_url = urlparse(url)
# Decode the URL encoded path
unquoted_path = unquote(parsed_url.path)
# Create new URL with re-encoded path
signing_url = parsed_url._replace(path=quote(unquoted_path)).geturl()
# Sign the request with the normalized URL
return self._sign_request(method, signing_url, query_string, body)
return self._sign_request(method, url, query_string, body)

def _sign_request(
self,
Expand Down
89 changes: 63 additions & 26 deletions test_opensearchpy/test_async/test_signer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# GitHub history for details.

import uuid
from typing import Any, Collection, Dict, Mapping, Optional, Tuple, Union
from unittest.mock import Mock

import pytest
Expand Down Expand Up @@ -119,36 +120,72 @@ def mock_session(self) -> Mock:

return dummy_session

async def test_aws_signer_async_with_special_chars_in_id(self) -> None:
async def test_aws_signer_async_consitent_url(self) -> None:
region = "us-west-2"

from urllib.parse import quote

from opensearchpy import AsyncOpenSearch
from opensearchpy.connection.http_async import AsyncHttpConnection
from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth

auth = AWSV4SignerAsyncAuth(self.mock_session(), region)
# Store URLs for comparison
signed_url = None
sent_url = None

doc_id = "test:123"
url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{doc_id}"

# Create a mock signer to capture the signed URL
class MockSigner(AWSV4SignerAsyncAuth):
def _sign_request(
self,
method: str,
url: str,
query_string: Optional[str] = None,
body: Optional[Union[str, bytes]] = None,
) -> Dict[str, str]:
nonlocal signed_url
signed_url = url
return super()._sign_request(method, url, query_string, body)

# Create a custom connection class to capture the sent URL
class CapturingConnection(AsyncHttpConnection):
async def perform_request(
self: "CapturingConnection",
method: str,
url: str,
params: Optional[Mapping[str, Any]] = None,
body: Optional[Any] = None,
timeout: Optional[Union[int, float]] = None,
ignore: Collection[int] = (),
headers: Optional[Mapping[str, str]] = None,
) -> Tuple[int, Mapping[str, str], str]:
nonlocal sent_url
host = self.host
base_url = f"{host}"
sent_url = f"{base_url}{url}"
return 200, {"content-type": "application/json"}, "{}"

auth = MockSigner(self.mock_session(), region)
auth("GET", url)

client = AsyncOpenSearch(
hosts=[{"host": "search-domain.region.es.amazonaws.com"}],
http_auth=auth,
use_ssl=True,
verify_certs=True,
connection_class=CapturingConnection,
)

# ID contains special character
doc_id = "0x4786a3444661f1b9:0x625b2c53dfc9caba"
try:
await client.index("index", {"test": "data"}, id=doc_id)
except Exception as e:
print(f"Expected exception: {e}")

# Test with unencoded ID
url_raw = f"https://search-domain.region.es.amazonaws.com/index/_doc/{doc_id}"
headers_raw = auth("GET", url_raw)
# Verify URLs
assert signed_url is not None, "Signed URL was not captured"
assert sent_url is not None, "Sent URL was not captured"
print(f"Signed URL: {signed_url}")
print(f"Sent URL: {sent_url}")

# Test with properly encoded ID
encoded_id = quote(doc_id, safe="")
url_encoded = (
f"https://search-domain.region.es.amazonaws.com/index/_doc/{encoded_id}"
)
headers_encoded = auth("GET", url_encoded)

# Signatures should be the same since URL encoding happens before signing
assert (
headers_raw["Authorization"] == headers_encoded["Authorization"]
), f"Signatures should be identical for raw and encoded ID: {doc_id}"

# Both should have required headers
for headers in (headers_raw, headers_encoded):
assert "Authorization" in headers
assert "X-Amz-Date" in headers
assert "X-Amz-Security-Token" in headers
# Verify URLs match
assert signed_url == sent_url, "URLs don't match"

0 comments on commit 405d704

Please sign in to comment.