From b9e48dc847a186a7986afae45e27cdc2011f35da Mon Sep 17 00:00:00 2001 From: nathaliellenaa <143617992+nathaliellenaa@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:50:22 -0800 Subject: [PATCH] Fix AuthorizationException with AWSV4SignerAsyncAuth when the doc ID has special characters. (#848) * Lifecycle integration tests. Signed-off-by: dblock * Added a test that makes sure the slash is properly encoded. Signed-off-by: dblock * Added more tests for signer and _make_path. Signed-off-by: Nathalie Jonathan * Prevent AIOHttpConnection from encoding the url a second time. Signed-off-by: Nathalie Jonathan --------- Signed-off-by: dblock Signed-off-by: Nathalie Jonathan Co-authored-by: dblock --- CHANGELOG.md | 1 + DEVELOPER_GUIDE.md | 6 +- opensearchpy/connection/http_async.py | 4 +- samples/hello/hello.py | 4 +- samples/hello/hello_async.py | 4 +- samples/hello/unicode.py | 63 ++++++++++++++++ samples/hello/unicode_async.py | 72 ++++++++++++++++++ .../test_async/test_http_connection.py | 5 +- .../test_async/test_server/test_clients.py | 60 +++++++++++++++ test_opensearchpy/test_async/test_signer.py | 73 +++++++++++++++++++ test_opensearchpy/test_client/test_utils.py | 41 ++++++++++- .../test_requests_http_connection.py | 59 +++++++++++++++ test_opensearchpy/test_server/test_clients.py | 62 ++++++++++++++++ 13 files changed, 445 insertions(+), 9 deletions(-) create mode 100644 samples/hello/unicode.py create mode 100644 samples/hello/unicode_async.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2691b252..8c14038f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Fixed - Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810)) - Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823)) +- Fix `AuthorizationException` with AWS OpenSearch when the doc ID contains `:` ([848](https://github.com/opensearch-project/opensearch-py/pull/848)) ### Security ### Dependencies diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index f544a520..218eb0b1 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -101,7 +101,11 @@ test_opensearchpy/test_connection.py::TestRequestsConnection::test_no_http_compr test_opensearchpy/test_async/test_connection.py::TestAIOHttpConnection::test_no_http_compression PASSED [100%] ``` -Note that integration tests require docker to be installed and running, and downloads quite a bit of data from over the internet and hence take few minutes to complete. +``` +./.ci/run-tests false 2.16.0 test_indices_lifecycle +``` + +Note that integration tests require docker to be installed and running, and downloads quite a bit of data from the internet and hence take few minutes to complete. ## Linter diff --git a/opensearchpy/connection/http_async.py b/opensearchpy/connection/http_async.py index 9add4785..7fb69093 100644 --- a/opensearchpy/connection/http_async.py +++ b/opensearchpy/connection/http_async.py @@ -15,6 +15,8 @@ import warnings from typing import Any, Collection, Mapping, Optional, Union +import yarl + from .._async._extra_imports import aiohttp, aiohttp_exceptions # type: ignore from .._async.compat import get_running_loop from .._async.http_aiohttp import AIOHttpConnection @@ -210,7 +212,7 @@ async def perform_request( try: async with self.session.request( method, - url, + yarl.URL(url, encoded=True), data=body, auth=auth, headers=req_headers, diff --git a/samples/hello/hello.py b/samples/hello/hello.py index 10dc3a35..d4d4ae69 100755 --- a/samples/hello/hello.py +++ b/samples/hello/hello.py @@ -19,9 +19,9 @@ def main() -> None: """ - an example showing how to create an synchronous connection to + An example showing how to create a synchronous connection to OpenSearch, create an index, index a document and search to - return the document + return the document. """ host = "localhost" port = 9200 diff --git a/samples/hello/hello_async.py b/samples/hello/hello_async.py index 983d7773..7f16a531 100755 --- a/samples/hello/hello_async.py +++ b/samples/hello/hello_async.py @@ -18,9 +18,9 @@ async def main() -> None: """ - an example showing how to create an asynchronous connection + An example showing how to create an asynchronous connection to OpenSearch, create an index, index a document and - search to return the document + search to return the document. """ # connect to OpenSearch host = "localhost" diff --git a/samples/hello/unicode.py b/samples/hello/unicode.py new file mode 100644 index 00000000..30a4b41e --- /dev/null +++ b/samples/hello/unicode.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + + +import os + +from opensearchpy import OpenSearch + +# connect to OpenSearch + + +def main() -> None: + """ + An example showing how to create a synchronous connection to + OpenSearch, create an index, index a document and search to + return the document. + """ + host = "localhost" + port = 9200 + auth = ( + "admin", + os.getenv("OPENSEARCH_PASSWORD", "admin"), + ) # For testing only. Don't store credentials in code. + + client = OpenSearch( + hosts=[{"host": host, "port": port}], + http_auth=auth, + use_ssl=True, + verify_certs=False, + ssl_show_warn=False, + ) + + info = client.info() + print(f"Welcome to {info['version']['distribution']} {info['version']['number']}!") + + index_name = "кино" + index_create_result = client.indices.create(index=index_name) + print(index_create_result) + + document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"} + id = "соларис@2011" + doc_insert_result = client.index( + index=index_name, body=document, id=id, refresh=True + ) + print(doc_insert_result) + + doc_delete_result = client.delete(index=index_name, id=id) + print(doc_delete_result) + + index_delete_result = client.indices.delete(index=index_name) + print(index_delete_result) + + +if __name__ == "__main__": + main() diff --git a/samples/hello/unicode_async.py b/samples/hello/unicode_async.py new file mode 100644 index 00000000..688832df --- /dev/null +++ b/samples/hello/unicode_async.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + + +import asyncio +import os + +from opensearchpy import AsyncOpenSearch + + +async def main() -> None: + """ + An example showing how to create an asynchronous connection + to OpenSearch, create an index, index a document and + search to return the document. + """ + # connect to OpenSearch + host = "localhost" + port = 9200 + auth = ( + "admin", + os.getenv("OPENSEARCH_PASSWORD", "admin"), + ) # For testing only. Don't store credentials in code. + + client = AsyncOpenSearch( + hosts=[{"host": host, "port": port}], + http_auth=auth, + use_ssl=True, + verify_certs=False, + ssl_show_warn=False, + ) + + try: + info = await client.info() + print( + f"Welcome to {info['version']['distribution']} {info['version']['number']}!" + ) + + index_name = "кино" + index_create_result = await client.indices.create(index=index_name) + print(index_create_result) + + document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"} + id = "соларис@2011" + doc_insert_result = await client.index( + index=index_name, body=document, id=id, refresh=True + ) + print(doc_insert_result) + + doc_delete_result = await client.delete(index=index_name, id=id) + print(doc_delete_result) + + index_delete_result = await client.indices.delete(index=index_name) + print(index_delete_result) + + finally: + await client.close() + + +if __name__ == "__main__": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(main()) + loop.close() diff --git a/test_opensearchpy/test_async/test_http_connection.py b/test_opensearchpy/test_async/test_http_connection.py index 34c729e0..8568a6f8 100644 --- a/test_opensearchpy/test_async/test_http_connection.py +++ b/test_opensearchpy/test_async/test_http_connection.py @@ -29,6 +29,7 @@ from unittest import mock import pytest +import yarl from multidict import CIMultiDict from opensearchpy._async._extra_imports import aiohttp # type: ignore @@ -91,7 +92,7 @@ async def test_basicauth_in_request_session(self, mock_request: Any) -> None: await c.perform_request("post", "/test") mock_request.assert_called_with( "post", - "http://localhost:9200/test", + yarl.URL("http://localhost:9200/test", encoded=True), data=None, auth=c._http_auth, headers={}, @@ -120,7 +121,7 @@ def auth_fn(*args: Any, **kwargs: Any) -> Any: mock_request.assert_called_with( "post", - "http://localhost:9200/test", + yarl.URL("http://localhost:9200/test", encoded=True), data=None, auth=None, headers={ diff --git a/test_opensearchpy/test_async/test_server/test_clients.py b/test_opensearchpy/test_async/test_server/test_clients.py index f663f82f..2ec7f0d0 100644 --- a/test_opensearchpy/test_async/test_server/test_clients.py +++ b/test_opensearchpy/test_async/test_server/test_clients.py @@ -30,10 +30,70 @@ import pytest from _pytest.mark.structures import MarkDecorator +from opensearchpy.exceptions import RequestError + pytestmark: MarkDecorator = pytest.mark.asyncio +class TestSpecialCharacters: + async def test_index_with_slash(self, async_client: Any) -> None: + index_name = "movies/shmovies" + with pytest.raises(RequestError) as e: + await async_client.indices.create(index=index_name) + assert ( + str(e.value) + == "RequestError(400, 'invalid_index_name_exception', 'Invalid index name [movies/shmovies], must not contain the following characters [ , \", *, \\\\, <, |, ,, >, /, ?]')" + ) + + class TestUnicode: + async def test_indices_lifecycle_english(self, async_client: Any) -> None: + index_name = "movies" + + index_create_result = await async_client.indices.create(index=index_name) + assert index_create_result["acknowledged"] is True + assert index_name == index_create_result["index"] + + document = {"name": "Solaris", "director": "Andrei Tartakovsky", "year": "2011"} + id = "solaris@2011" + doc_insert_result = await async_client.index( + index=index_name, body=document, id=id, refresh=True + ) + assert "created" == doc_insert_result["result"] + assert index_name == doc_insert_result["_index"] + assert id == doc_insert_result["_id"] + + doc_delete_result = await async_client.delete(index=index_name, id=id) + assert "deleted" == doc_delete_result["result"] + assert index_name == doc_delete_result["_index"] + assert id == doc_delete_result["_id"] + + index_delete_result = await async_client.indices.delete(index=index_name) + assert index_delete_result["acknowledged"] is True + + async def test_indices_lifecycle_russian(self, async_client: Any) -> None: + index_name = "кино" + index_create_result = await async_client.indices.create(index=index_name) + assert index_create_result["acknowledged"] is True + assert index_name == index_create_result["index"] + + document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"} + id = "соларис@2011" + doc_insert_result = await async_client.index( + index=index_name, body=document, id=id, refresh=True + ) + assert "created" == doc_insert_result["result"] + assert index_name == doc_insert_result["_index"] + assert id == doc_insert_result["_id"] + + doc_delete_result = await async_client.delete(index=index_name, id=id) + assert "deleted" == doc_delete_result["result"] + assert index_name == doc_delete_result["_index"] + assert id == doc_delete_result["_id"] + + index_delete_result = await async_client.indices.delete(index=index_name) + assert index_delete_result["acknowledged"] is True + async def test_indices_analyze(self, async_client: Any) -> None: await async_client.indices.analyze(body='{"text": "привет"}') diff --git a/test_opensearchpy/test_async/test_signer.py b/test_opensearchpy/test_async/test_signer.py index 821d5ae7..473faa76 100644 --- a/test_opensearchpy/test_async/test_signer.py +++ b/test_opensearchpy/test_async/test_signer.py @@ -8,6 +8,7 @@ # GitHub history for details. import uuid +from typing import Any, Collection, Dict, Mapping, Optional, Tuple, Union from unittest.mock import Mock import pytest @@ -103,3 +104,75 @@ async def test_aws_signer_async_frozen_credentials_as_http_auth(self) -> None: assert "X-Amz-Date" in headers assert "X-Amz-Security-Token" in headers assert len(mock_session.get_frozen_credentials.mock_calls) == 1 + + +class TestAsyncSignerWithSpecialCharacters: + def mock_session(self) -> Mock: + access_key = uuid.uuid4().hex + secret_key = uuid.uuid4().hex + token = uuid.uuid4().hex + dummy_session = Mock() + dummy_session.access_key = access_key + dummy_session.secret_key = secret_key + dummy_session.token = token + + del dummy_session.get_frozen_credentials + + return dummy_session + + async def test_aws_signer_async_consitent_url(self) -> None: + region = "us-west-2" + + from opensearchpy import AsyncOpenSearch + from opensearchpy.connection.http_async import AsyncHttpConnection + from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth + + # Store URLs for comparison + signed_url = None + sent_url = None + + doc_id = "doc_id:with!special*chars%3A" + quoted_doc_id = "doc_id%3Awith%21special*chars%253A" + url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{quoted_doc_id}" + + # Create a mock signer class to capture the signed URL + class MockSigner(AWSV4SignerAsyncAuth): + def _sign_request( + self, + method: str, + url: str, + query_string: Optional[str] = None, + body: Optional[Union[str, bytes]] = None, + ) -> Dict[str, str]: + nonlocal signed_url + signed_url = url + return {} + + # Create a mock connection class to capture the sent URL + class MockConnection(AsyncHttpConnection): + async def perform_request( + self: "MockConnection", + method: str, + url: str, + params: Optional[Mapping[str, Any]] = None, + body: Optional[Any] = None, + timeout: Optional[Union[int, float]] = None, + ignore: Collection[int] = (), + headers: Optional[Mapping[str, str]] = None, + ) -> Tuple[int, Mapping[str, str], str]: + nonlocal sent_url + sent_url = f"{self.host}{url}" + return 200, {}, "{}" + + auth = MockSigner(self.mock_session(), region) + auth("GET", url) + + client = AsyncOpenSearch( + hosts=[{"host": "search-domain.region.es.amazonaws.com"}], + http_auth=auth, + use_ssl=True, + verify_certs=True, + connection_class=MockConnection, + ) + await client.index("index", {"test": "data"}, id=doc_id) + assert signed_url == sent_url, "URLs don't match" diff --git a/test_opensearchpy/test_client/test_utils.py b/test_opensearchpy/test_client/test_utils.py index a0cde64d..02013f58 100644 --- a/test_opensearchpy/test_client/test_utils.py +++ b/test_opensearchpy/test_client/test_utils.py @@ -154,9 +154,48 @@ def test_per_call_authentication(self) -> None: class TestMakePath(TestCase): def test_handles_unicode(self) -> None: + from urllib.parse import quote + id = "中文" self.assertEqual( - "/some-index/type/%E4%B8%AD%E6%96%87", _make_path("some-index", "type", id) + _make_path("some-index", "type", quote(id)), + "/some-index/type/%25E4%25B8%25AD%25E6%2596%2587", + ) + + def test_handles_single_arg(self) -> None: + from urllib.parse import quote + + id = "idwith!char" + self.assertEqual( + _make_path("some-index", "type", quote(id)), + "/some-index/type/idwith%2521char", + ) + + def test_handles_multiple_args(self) -> None: + from urllib.parse import quote + + ids = ["id!with@char", "another#id$here"] + quoted_ids = [quote(id) for id in ids] + + self.assertEqual( + _make_path("some-index", "type", quoted_ids), + "/some-index/type/id%2521with%2540char,another%2523id%2524here", + ) + + def test_handles_arrays_of_args(self) -> None: + self.assertEqual( + "/index1,index2/type1,type2/doc1,doc2", + _make_path( + ("index1", "index2"), ["type1", "type2"], tuple(["doc1", "doc2"]) + ), + ) + + from urllib.parse import quote + + ids = [quote("$id!1"), quote("id*@2"), quote("#id3#")] + self.assertEqual( + _make_path("some-index", ids, "type"), + "/some-index/%2524id%25211,id%252A%25402,%2523id3%2523/type", ) diff --git a/test_opensearchpy/test_connection/test_requests_http_connection.py b/test_opensearchpy/test_connection/test_requests_http_connection.py index ff6fc8c2..9e3014da 100644 --- a/test_opensearchpy/test_connection/test_requests_http_connection.py +++ b/test_opensearchpy/test_connection/test_requests_http_connection.py @@ -513,6 +513,65 @@ def test_aws_signer_signs_with_query_string(self, mock_sign: Any) -> None: ("GET", "http://localhost/?key1=value1&key2=value2", None), ) + def test_aws_signer_consitent_url(self) -> None: + region = "us-west-2" + + from typing import Any, Collection, Mapping, Optional, Union + + from opensearchpy import OpenSearch + from opensearchpy.helpers.signer import RequestsAWSV4SignerAuth + + # Store URLs for comparison + signed_url = None + sent_url = None + + doc_id = "doc_id:with!special*chars%3A" + quoted_doc_id = "doc_id%3Awith%21special*chars%253A" + url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{quoted_doc_id}" + + # Create a mock signer class to capture the signed URL + class MockSigner(RequestsAWSV4SignerAuth): + def __call__(self, prepared_request): # type: ignore + nonlocal signed_url + if isinstance(prepared_request, str): + signed_url = prepared_request + else: + signed_url = prepared_request.url + return prepared_request + + # Create a mock connection class to capture the sent URL + class MockConnection(RequestsHttpConnection): + def perform_request( # type: ignore + self, + method: str, + url: str, + params: Optional[Mapping[str, Any]] = None, + body: Optional[bytes] = None, + timeout: Optional[Union[int, float]] = None, + allow_redirects: Optional[bool] = True, + ignore: Collection[int] = (), + headers: Optional[Mapping[str, str]] = None, + ) -> Any: + nonlocal sent_url + sent_url = f"{self.host}{url}" + return 200, {}, "{}" + + auth = MockSigner(self.mock_session(), region) + + client = OpenSearch( + hosts=[{"host": "search-domain.region.es.amazonaws.com"}], + http_auth=auth(url), + use_ssl=True, + verify_certs=True, + connection_class=MockConnection, + ) + client.index("index", {"test": "data"}, id=doc_id) + self.assertEqual( + signed_url, + sent_url, + "URLs don't match", + ) + class TestRequestsConnectionRedirect(TestCase): server1: TestHTTPServer diff --git a/test_opensearchpy/test_server/test_clients.py b/test_opensearchpy/test_server/test_clients.py index 7639a161..2363847b 100644 --- a/test_opensearchpy/test_server/test_clients.py +++ b/test_opensearchpy/test_server/test_clients.py @@ -25,10 +25,72 @@ # under the License. +import pytest + +from opensearchpy.exceptions import RequestError + from . import OpenSearchTestCase +class TestSpecialCharacters(OpenSearchTestCase): + def test_index_with_slash(self) -> None: + index_name = "movies/shmovies" + with pytest.raises(RequestError) as e: + self.client.indices.create(index=index_name) + self.assertEqual( + str(e.value), + "RequestError(400, 'invalid_index_name_exception', 'Invalid index name [movies/shmovies], must not contain the following characters [ , \", *, \\\\, <, |, ,, >, /, ?]')", + ) + + class TestUnicode(OpenSearchTestCase): + def test_indices_lifecycle_english(self) -> None: + index_name = "movies" + + index_create_result = self.client.indices.create(index=index_name) + self.assertTrue(index_create_result["acknowledged"]) + self.assertEqual(index_name, index_create_result["index"]) + + document = {"name": "Solaris", "director": "Andrei Tartakovsky", "year": "2011"} + id = "solaris@2011" + doc_insert_result = self.client.index( + index=index_name, body=document, id=id, refresh=True + ) + self.assertEqual("created", doc_insert_result["result"]) + self.assertEqual(index_name, doc_insert_result["_index"]) + self.assertEqual(id, doc_insert_result["_id"]) + + doc_delete_result = self.client.delete(index=index_name, id=id) + self.assertEqual("deleted", doc_delete_result["result"]) + self.assertEqual(index_name, doc_delete_result["_index"]) + self.assertEqual(id, doc_delete_result["_id"]) + + index_delete_result = self.client.indices.delete(index=index_name) + self.assertTrue(index_delete_result["acknowledged"]) + + def test_indices_lifecycle_russian(self) -> None: + index_name = "кино" + index_create_result = self.client.indices.create(index=index_name) + self.assertTrue(index_create_result["acknowledged"]) + self.assertEqual(index_name, index_create_result["index"]) + + document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"} + id = "соларис@2011" + doc_insert_result = self.client.index( + index=index_name, body=document, id=id, refresh=True + ) + self.assertEqual("created", doc_insert_result["result"]) + self.assertEqual(index_name, doc_insert_result["_index"]) + self.assertEqual(id, doc_insert_result["_id"]) + + doc_delete_result = self.client.delete(index=index_name, id=id) + self.assertEqual("deleted", doc_delete_result["result"]) + self.assertEqual(index_name, doc_delete_result["_index"]) + self.assertEqual(id, doc_delete_result["_id"]) + + index_delete_result = self.client.indices.delete(index=index_name) + self.assertTrue(index_delete_result["acknowledged"]) + def test_indices_analyze(self) -> None: self.client.indices.analyze(body='{"text": "привет"}')