Skip to content

Commit

Permalink
Fix AuthorizationException with AWSV4SignerAsyncAuth when the doc ID …
Browse files Browse the repository at this point in the history
…has special characters. (#848)

* Lifecycle integration tests.

Signed-off-by: dblock <[email protected]>

* Added a test that makes sure the slash is properly encoded.

Signed-off-by: dblock <[email protected]>

* Added more tests for signer and _make_path.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Prevent AIOHttpConnection from encoding the url a second time.

Signed-off-by: Nathalie Jonathan <[email protected]>

---------

Signed-off-by: dblock <[email protected]>
Signed-off-by: Nathalie Jonathan <[email protected]>
Co-authored-by: dblock <[email protected]>
  • Loading branch information
nathaliellenaa and dblock authored Nov 27, 2024
1 parent bf9add4 commit b9e48dc
Show file tree
Hide file tree
Showing 13 changed files with 445 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Fixed
- Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810))
- Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823))
- Fix `AuthorizationException` with AWS OpenSearch when the doc ID contains `:` ([848](https://github.com/opensearch-project/opensearch-py/pull/848))
### Security

### Dependencies
Expand Down
6 changes: 5 additions & 1 deletion DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,11 @@ test_opensearchpy/test_connection.py::TestRequestsConnection::test_no_http_compr
test_opensearchpy/test_async/test_connection.py::TestAIOHttpConnection::test_no_http_compression PASSED [100%]
```

Note that integration tests require docker to be installed and running, and downloads quite a bit of data from over the internet and hence take few minutes to complete.
```
./.ci/run-tests false 2.16.0 test_indices_lifecycle
```

Note that integration tests require docker to be installed and running, and downloads quite a bit of data from the internet and hence take few minutes to complete.

## Linter

Expand Down
4 changes: 3 additions & 1 deletion opensearchpy/connection/http_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import warnings
from typing import Any, Collection, Mapping, Optional, Union

import yarl

from .._async._extra_imports import aiohttp, aiohttp_exceptions # type: ignore
from .._async.compat import get_running_loop
from .._async.http_aiohttp import AIOHttpConnection
Expand Down Expand Up @@ -210,7 +212,7 @@ async def perform_request(
try:
async with self.session.request(
method,
url,
yarl.URL(url, encoded=True),
data=body,
auth=auth,
headers=req_headers,
Expand Down
4 changes: 2 additions & 2 deletions samples/hello/hello.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

def main() -> None:
"""
an example showing how to create an synchronous connection to
An example showing how to create a synchronous connection to
OpenSearch, create an index, index a document and search to
return the document
return the document.
"""
host = "localhost"
port = 9200
Expand Down
4 changes: 2 additions & 2 deletions samples/hello/hello_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

async def main() -> None:
"""
an example showing how to create an asynchronous connection
An example showing how to create an asynchronous connection
to OpenSearch, create an index, index a document and
search to return the document
search to return the document.
"""
# connect to OpenSearch
host = "localhost"
Expand Down
63 changes: 63 additions & 0 deletions samples/hello/unicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.


import os

from opensearchpy import OpenSearch

# connect to OpenSearch


def main() -> None:
"""
An example showing how to create a synchronous connection to
OpenSearch, create an index, index a document and search to
return the document.
"""
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = OpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

info = client.info()
print(f"Welcome to {info['version']['distribution']} {info['version']['number']}!")

index_name = "кино"
index_create_result = client.indices.create(index=index_name)
print(index_create_result)

document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = client.index(
index=index_name, body=document, id=id, refresh=True
)
print(doc_insert_result)

doc_delete_result = client.delete(index=index_name, id=id)
print(doc_delete_result)

index_delete_result = client.indices.delete(index=index_name)
print(index_delete_result)


if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions samples/hello/unicode_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.


import asyncio
import os

from opensearchpy import AsyncOpenSearch


async def main() -> None:
"""
An example showing how to create an asynchronous connection
to OpenSearch, create an index, index a document and
search to return the document.
"""
# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = AsyncOpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

try:
info = await client.info()
print(
f"Welcome to {info['version']['distribution']} {info['version']['number']}!"
)

index_name = "кино"
index_create_result = await client.indices.create(index=index_name)
print(index_create_result)

document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = await client.index(
index=index_name, body=document, id=id, refresh=True
)
print(doc_insert_result)

doc_delete_result = await client.delete(index=index_name, id=id)
print(doc_delete_result)

index_delete_result = await client.indices.delete(index=index_name)
print(index_delete_result)

finally:
await client.close()


if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(main())
loop.close()
5 changes: 3 additions & 2 deletions test_opensearchpy/test_async/test_http_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from unittest import mock

import pytest
import yarl
from multidict import CIMultiDict

from opensearchpy._async._extra_imports import aiohttp # type: ignore
Expand Down Expand Up @@ -91,7 +92,7 @@ async def test_basicauth_in_request_session(self, mock_request: Any) -> None:
await c.perform_request("post", "/test")
mock_request.assert_called_with(
"post",
"http://localhost:9200/test",
yarl.URL("http://localhost:9200/test", encoded=True),
data=None,
auth=c._http_auth,
headers={},
Expand Down Expand Up @@ -120,7 +121,7 @@ def auth_fn(*args: Any, **kwargs: Any) -> Any:

mock_request.assert_called_with(
"post",
"http://localhost:9200/test",
yarl.URL("http://localhost:9200/test", encoded=True),
data=None,
auth=None,
headers={
Expand Down
60 changes: 60 additions & 0 deletions test_opensearchpy/test_async/test_server/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,70 @@
import pytest
from _pytest.mark.structures import MarkDecorator

from opensearchpy.exceptions import RequestError

pytestmark: MarkDecorator = pytest.mark.asyncio


class TestSpecialCharacters:
async def test_index_with_slash(self, async_client: Any) -> None:
index_name = "movies/shmovies"
with pytest.raises(RequestError) as e:
await async_client.indices.create(index=index_name)
assert (
str(e.value)
== "RequestError(400, 'invalid_index_name_exception', 'Invalid index name [movies/shmovies], must not contain the following characters [ , \", *, \\\\, <, |, ,, >, /, ?]')"
)


class TestUnicode:
async def test_indices_lifecycle_english(self, async_client: Any) -> None:
index_name = "movies"

index_create_result = await async_client.indices.create(index=index_name)
assert index_create_result["acknowledged"] is True
assert index_name == index_create_result["index"]

document = {"name": "Solaris", "director": "Andrei Tartakovsky", "year": "2011"}
id = "solaris@2011"
doc_insert_result = await async_client.index(
index=index_name, body=document, id=id, refresh=True
)
assert "created" == doc_insert_result["result"]
assert index_name == doc_insert_result["_index"]
assert id == doc_insert_result["_id"]

doc_delete_result = await async_client.delete(index=index_name, id=id)
assert "deleted" == doc_delete_result["result"]
assert index_name == doc_delete_result["_index"]
assert id == doc_delete_result["_id"]

index_delete_result = await async_client.indices.delete(index=index_name)
assert index_delete_result["acknowledged"] is True

async def test_indices_lifecycle_russian(self, async_client: Any) -> None:
index_name = "кино"
index_create_result = await async_client.indices.create(index=index_name)
assert index_create_result["acknowledged"] is True
assert index_name == index_create_result["index"]

document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = await async_client.index(
index=index_name, body=document, id=id, refresh=True
)
assert "created" == doc_insert_result["result"]
assert index_name == doc_insert_result["_index"]
assert id == doc_insert_result["_id"]

doc_delete_result = await async_client.delete(index=index_name, id=id)
assert "deleted" == doc_delete_result["result"]
assert index_name == doc_delete_result["_index"]
assert id == doc_delete_result["_id"]

index_delete_result = await async_client.indices.delete(index=index_name)
assert index_delete_result["acknowledged"] is True

async def test_indices_analyze(self, async_client: Any) -> None:
await async_client.indices.analyze(body='{"text": "привет"}')

Expand Down
73 changes: 73 additions & 0 deletions test_opensearchpy/test_async/test_signer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# GitHub history for details.

import uuid
from typing import Any, Collection, Dict, Mapping, Optional, Tuple, Union
from unittest.mock import Mock

import pytest
Expand Down Expand Up @@ -103,3 +104,75 @@ async def test_aws_signer_async_frozen_credentials_as_http_auth(self) -> None:
assert "X-Amz-Date" in headers
assert "X-Amz-Security-Token" in headers
assert len(mock_session.get_frozen_credentials.mock_calls) == 1


class TestAsyncSignerWithSpecialCharacters:
def mock_session(self) -> Mock:
access_key = uuid.uuid4().hex
secret_key = uuid.uuid4().hex
token = uuid.uuid4().hex
dummy_session = Mock()
dummy_session.access_key = access_key
dummy_session.secret_key = secret_key
dummy_session.token = token

del dummy_session.get_frozen_credentials

return dummy_session

async def test_aws_signer_async_consitent_url(self) -> None:
region = "us-west-2"

from opensearchpy import AsyncOpenSearch
from opensearchpy.connection.http_async import AsyncHttpConnection
from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth

# Store URLs for comparison
signed_url = None
sent_url = None

doc_id = "doc_id:with!special*chars%3A"
quoted_doc_id = "doc_id%3Awith%21special*chars%253A"
url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{quoted_doc_id}"

# Create a mock signer class to capture the signed URL
class MockSigner(AWSV4SignerAsyncAuth):
def _sign_request(
self,
method: str,
url: str,
query_string: Optional[str] = None,
body: Optional[Union[str, bytes]] = None,
) -> Dict[str, str]:
nonlocal signed_url
signed_url = url
return {}

# Create a mock connection class to capture the sent URL
class MockConnection(AsyncHttpConnection):
async def perform_request(
self: "MockConnection",
method: str,
url: str,
params: Optional[Mapping[str, Any]] = None,
body: Optional[Any] = None,
timeout: Optional[Union[int, float]] = None,
ignore: Collection[int] = (),
headers: Optional[Mapping[str, str]] = None,
) -> Tuple[int, Mapping[str, str], str]:
nonlocal sent_url
sent_url = f"{self.host}{url}"
return 200, {}, "{}"

auth = MockSigner(self.mock_session(), region)
auth("GET", url)

client = AsyncOpenSearch(
hosts=[{"host": "search-domain.region.es.amazonaws.com"}],
http_auth=auth,
use_ssl=True,
verify_certs=True,
connection_class=MockConnection,
)
await client.index("index", {"test": "data"}, id=doc_id)
assert signed_url == sent_url, "URLs don't match"
Loading

0 comments on commit b9e48dc

Please sign in to comment.