diff --git a/CHANGELOG.md b/CHANGELOG.md index b54477e5..eedfe893 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added `AsyncSearch#collapse` ([827](https://github.com/opensearch-project/opensearch-py/pull/827)) - Added `pool_maxsize` to `AsyncOpenSearch` ([845](https://github.com/opensearch-project/opensearch-py/pull/845)) - Added `ssl_assert_hostname` to `AsyncOpenSearch` ([843](https://github.com/opensearch-project/opensearch-py/pull/843)) -- Added a sample that uses `search_after` parameter ([859](https://github.com/opensearch-project/opensearch-py/pull/859)) +- Added sync and async sample that uses `search_after` parameter ([859](https://github.com/opensearch-project/opensearch-py/pull/859)) ### Changed ### Deprecated ### Removed diff --git a/samples/aws/search_after.py b/samples/aws/search_after.py deleted file mode 100644 index b07cefac..00000000 --- a/samples/aws/search_after.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python - -# SPDX-License-Identifier: Apache-2.0 -# -# The OpenSearch Contributors require contributions made to -# this file be licensed under the Apache-2.0 license or a -# compatible open source license. -# -# Modifications Copyright OpenSearch Contributors. See -# GitHub history for details. - -import os - -from opensearchpy import OpenSearch - - -def main() -> None: - """ - An example showing how to use search_after to paginate through the search results. - It performs a search query on an index, retrieves the first page of results, - and then fetches the next page of results using the search_after parameter. - """ - - host = "localhost" - port = 9200 - auth = ( - "admin", - os.getenv("OPENSEARCH_PASSWORD", "admin"), - ) # For testing only. Don't store credentials in code. - - client = OpenSearch( - hosts=[{"host": host, "port": port}], - http_auth=auth, - use_ssl=True, - verify_certs=False, - ssl_show_warn=False, - ) - - # create an index - client.indices.create(index="movies") - - try: - # add 10 documents to the index - for i in range(10): - client.index( - index="movies", - id=i, - body={ - "title": f"The Dark Knight {i}", - "director": "Christopher Nolan", - "year": 2008 + i, - }, - ) - - # add additional documents to the index - client.index( - index="movies", - body={ - "title": "The Godfather", - "director": "Francis Ford Coppola", - "year": 1972, - }, - ) - - client.index( - index="movies", - body={ - "title": "The Shawshank Redemption", - "director": "Frank Darabont", - "year": 1994, - }, - ) - - # refresh the index to make the documents searchable - client.indices.refresh(index="movies") - - # define the search query with sorting and pagination options - search_body = { - "query": {"match": {"title": "dark knight"}}, - "sort": [{"year": {"order": "asc"}}], - "size": 2, - } - - # perform the search operation on the 'movies' index with the defined query and pagination options - response = client.search(index="movies", body=search_body) - - # extract the hits from the response - hits = response["hits"]["hits"] - - # get the last sort value from the first page - search_after = hits[-1]["sort"] - - # fetch page 2 - search_body["search_after"] = search_after - response = client.search(index="movies", body=search_body) - hits_page_2 = response["hits"]["hits"] - - # get the last sort value from page 2 - search_after = hits_page_2[-1]["sort"] - - # fetch page 3 - search_body["search_after"] = search_after - response = client.search(index="movies", body=search_body) - - hits_page_3 = response["hits"]["hits"] - # print the hits from each page - print("Page 1:") - for hit in hits: - print(hit) - print("\nPage 2:") - for hit in hits_page_2: - print(hit) - print("\nPage 3:") - for hit in hits_page_3: - print(hit) - finally: - # delete the index - client.indices.delete(index="movies") - - -if __name__ == "__main__": - main() diff --git a/samples/search/search_after_async.py b/samples/search/search_after_async.py new file mode 100644 index 00000000..45930f03 --- /dev/null +++ b/samples/search/search_after_async.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import asyncio +import os + +from opensearchpy import AsyncOpenSearch + + +async def main() -> None: + """ + This sample uses asyncio and AsyncOpenSearch to asynchronously + connect to local OpenSearch cluster, performs a search query on an index, + retrieves the first page of results, and fetches the next page of results + using the search_after parameter. + """ + + # connect to OpenSearch + host = "localhost" + port = 9200 + auth = ( + "admin", + os.getenv("OPENSEARCH_PASSWORD", "admin"), + ) # For testing only. Don't store credentials in code. + + client = AsyncOpenSearch( + hosts=[{"host": host, "port": port}], + http_auth=auth, + use_ssl=True, + verify_certs=False, + ssl_show_warn=False, + ) + + # create an index + await client.indices.create(index="movies") + + try: + # add a large dataset (100 movies) + for i in range(15): + await client.index( + index="movies", + id=i, + body={ + "title": f"The Dark Knight {i}", + "director": "Christopher Nolan", + "year": 2008 + i, + }, + ) + + for i in range(95): + await client.index( + index="movies", + id=i + 15, + body={ + "title": f"Movie Title {i + 15}", + "director": f"Director {i + 15}", + "year": 1950 + i + 15, + }, + ) + + # refresh the index to make the documents searchable + await client.indices.refresh(index="movies") + + # define the search query with sorting and pagination options + search_body = { + "query": {"match": {"title": "dark knight"}}, + "sort": [{"year": {"order": "asc"}}], + "size": 10, + } + + page = 1 + total_hits = 0 + while True: + # execute the search + response = await client.search(index="movies", body=search_body) + hits = response["hits"]["hits"] + + # break if no more results + if not hits: + break + + print(f"\nPage {page}:") + + for hit in hits: + print(hit) + total_hits += 1 + + # get the sort values of the last document for the next page + last_sort = hits[-1]["sort"] + search_body["search_after"] = last_sort + page += 1 + + print("\nPagination Summary:") + print(f"Total pages: {page - 1}") + print(f"Total hits: {total_hits}") + print(f"Results per page: {search_body['size']}") + finally: + # delete the index + await client.indices.delete(index="movies") + await client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/search/search_after_sync.py b/samples/search/search_after_sync.py new file mode 100644 index 00000000..a647d34b --- /dev/null +++ b/samples/search/search_after_sync.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import os + +from opensearchpy import OpenSearch + + +def main() -> None: + """ + This sample shows how to use search_after to paginate through the search results. + It performs a search query on an index, retrieves the first page of results, + and then fetches the next page of results using the search_after parameter. + """ + + # connect to OpenSearch + host = "localhost" + port = 9200 + auth = ( + "admin", + os.getenv("OPENSEARCH_PASSWORD", "admin"), + ) # For testing only. Don't store credentials in code. + + client = OpenSearch( + hosts=[{"host": host, "port": port}], + http_auth=auth, + use_ssl=True, + verify_certs=False, + ssl_show_warn=False, + ) + + # create an index + client.indices.create(index="movies") + + try: + # add a large dataset (100 movies) + for i in range(15): + client.index( + index="movies", + id=i, + body={ + "title": f"The Dark Knight {i}", + "director": "Christopher Nolan", + "year": 2008 + i, + }, + ) + + for i in range(95): + client.index( + index="movies", + id=i + 15, + body={ + "title": f"Movie Title {i + 15}", + "director": f"Director {i + 15}", + "year": 1950 + i + 15, + }, + ) + + # refresh the index to make the documents searchable + client.indices.refresh(index="movies") + + # define the search query with sorting and pagination options + search_body = { + "query": {"match": {"title": "dark knight"}}, + "sort": [{"year": {"order": "asc"}}], + "size": 10, + } + + page = 1 + total_hits = 0 + while True: + # execute the search + response = client.search(index="movies", body=search_body) + hits = response["hits"]["hits"] + + # break if no more results + if not hits: + break + + print(f"\nPage {page}:") + + for hit in hits: + print(hit) + total_hits += 1 + + # get the sort values of the last document for the next page + last_sort = hits[-1]["sort"] + search_body["search_after"] = last_sort + page += 1 + + print("\nPagination Summary:") + print(f"Total pages: {page - 1}") + print(f"Total hits: {total_hits}") + print(f"Results per page: {search_body['size']}") + finally: + # delete the index + client.indices.delete(index="movies") + + +if __name__ == "__main__": + main()