From 42ddc44e3e182b0ca0edb90d3bc809e8e3f13fd6 Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Tue, 30 Jul 2024 18:50:55 +0000 Subject: [PATCH] Added custom runner for setting concurrent segment search params Signed-off-by: Martin Gaievski --- trec_covid_semantic_search/README.md | 2 + ...rams_no_ml_node_concurrent_seg_search.json | 14 +++++++ ...ms_only_ml_node_concurrent_seg_search.json | 14 +++++++ trec_covid_semantic_search/runners.py | 38 +++++++++++++++++++ .../test_procedures/procedures.json | 10 ++++- trec_covid_semantic_search/workload.py | 6 ++- 6 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json create mode 100644 trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json create mode 100644 trec_covid_semantic_search/runners.py diff --git a/trec_covid_semantic_search/README.md b/trec_covid_semantic_search/README.md index 6374cdf6..6567d4fa 100644 --- a/trec_covid_semantic_search/README.md +++ b/trec_covid_semantic_search/README.md @@ -71,6 +71,8 @@ This workload allows the following parameters to be specified using `--workload- * `num_variable_queries` (default: 0) Number of variable queries will be used for the semantic search task, 0 means fixed query and max value is 50. * `range_gte` (default: 100) Number that defines the lower bound (inclusive) for range query when it's used as elemnts in semantic search query * `range_lte` (default: 10000000) Number that defines the upper bound (inclusive) for range query when it's used as elemnts in semantic search query +* `concurent_segment_search_enabled` (default: `false`) Enables or disables concurrent segment search feature +* `max_slice_count` (default: 0) Set the maximum number of slices for concurrent segment search feature. 0 means we use Lucene meachnism of calculating the number of slices ### Running a benchmark diff --git a/trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json b/trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json new file mode 100644 index 00000000..bc47d9d3 --- /dev/null +++ b/trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json @@ -0,0 +1,14 @@ +{ + "bulk_indexing_clients": 4, + "bulk_size": 200, + "number_of_replicas": 1, + "number_of_shards" :8, + "ingest_percentage":100, + "search_clients": 8, + "warmup_iterations": 20, + "iterations": 100, + "variable_queries": 50, + "k": 100, + "only_run_on_ml_node" : "false", + "concurent_segment_search_enabled": "true" +} diff --git a/trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json b/trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json new file mode 100644 index 00000000..5ea45814 --- /dev/null +++ b/trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json @@ -0,0 +1,14 @@ +{ + "bulk_indexing_clients": 4, + "bulk_size": 200, + "number_of_replicas": 1, + "number_of_shards" :8, + "ingest_percentage":100, + "search_clients": 8, + "warmup_iterations": 20, + "iterations": 100, + "variable_queries": 50, + "k": 100, + "only_run_on_ml_node" : "true", + "concurent_segment_search_enabled": "true" +} diff --git a/trec_covid_semantic_search/runners.py b/trec_covid_semantic_search/runners.py new file mode 100644 index 00000000..37ce4376 --- /dev/null +++ b/trec_covid_semantic_search/runners.py @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. + +from osbenchmark.worker_coordinator.runner import Retry, Runner +from osbenchmark.client import RequestContextHolder + +# This runner class and registration is a temporary workaround while the next version of OSB is pending release +def register(registry): + registry.register_runner( + UpdateConcurrentSegmentSearchSettings.RUNNER_NAME, + Retry(UpdateConcurrentSegmentSearchSettings()), async_runner=True + ) + +request_context_holder = RequestContextHolder() + +class UpdateConcurrentSegmentSearchSettings(Runner): + + RUNNER_NAME = "update-concurrent-segment-search-settings" + + async def __call__(self, opensearch, params): + enable_setting = params.get("enable", "false") + max_slice_count = params.get("max_slice_count", None) + body = { + "persistent": { + "search.concurrent_segment_search.enabled": enable_setting + } + } + if max_slice_count is not None: + body["persistent"]["search.concurrent.max_slice_count"] = max_slice_count + request_context_holder.on_client_request_start() + await opensearch.cluster.put_settings(body=body) + request_context_holder.on_client_request_end() + + def __repr__(self, *args, **kwargs): + return self.RUNNER_NAME diff --git a/trec_covid_semantic_search/test_procedures/procedures.json b/trec_covid_semantic_search/test_procedures/procedures.json index 1a9a58bf..9ca2ddc7 100644 --- a/trec_covid_semantic_search/test_procedures/procedures.json +++ b/trec_covid_semantic_search/test_procedures/procedures.json @@ -22,7 +22,15 @@ } }, { - "operation": "delete-index" + "operation": "delete-index" + }, + { + "name": "set-concurrent-segment-search", + "operation": { + "operation-type": "update-concurrent-segment-search-settings", + "enabled": "{{concurent_segment_search_enabled | default('false')}}", + "max_slice_count": "{{max_slice_count | default(0)}}" + } }, { "operation": "delete-ingest-pipeline" diff --git a/trec_covid_semantic_search/workload.py b/trec_covid_semantic_search/workload.py index 073b8344..20408125 100644 --- a/trec_covid_semantic_search/workload.py +++ b/trec_covid_semantic_search/workload.py @@ -6,6 +6,8 @@ from osbenchmark.workload.loader import Downloader from osbenchmark.workload.loader import Decompressor from osbenchmark.workload.loader import Decompressor +from osbenchmark.worker_coordinator.runner import Retry +from .runners import register as register_runners script_dir = os.path.dirname(os.path.realpath(__file__)) @@ -183,4 +185,6 @@ def register(registry): registry.register_param_source("semantic-search-neural-source", QueryParamSourceNeural) registry.register_param_source("hybrid-query-bm25-neural-search-source", QueryParamSourceHybridBm25Neural) registry.register_param_source("hybrid-query-bm25-knn-search-source", QueryParamSourceHybridBm25Knn) - registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source) \ No newline at end of file + # This runner class and registration is a temporary workaround while the next version of OSB is pending release + registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source) + register_runners(registry) \ No newline at end of file