Skip to content

Commit

Permalink
Added custom runner for setting concurrent segment search params
Browse files Browse the repository at this point in the history
Signed-off-by: Martin Gaievski <[email protected]>
  • Loading branch information
martin-gaievski committed Jul 30, 2024
1 parent b816836 commit 42ddc44
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 2 deletions.
2 changes: 2 additions & 0 deletions trec_covid_semantic_search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ This workload allows the following parameters to be specified using `--workload-
* `num_variable_queries` (default: 0) Number of variable queries will be used for the semantic search task, 0 means fixed query and max value is 50.
* `range_gte` (default: 100) Number that defines the lower bound (inclusive) for range query when it's used as elemnts in semantic search query
* `range_lte` (default: 10000000) Number that defines the upper bound (inclusive) for range query when it's used as elemnts in semantic search query
* `concurent_segment_search_enabled` (default: `false`) Enables or disables concurrent segment search feature
* `max_slice_count` (default: 0) Set the maximum number of slices for concurrent segment search feature. 0 means we use Lucene meachnism of calculating the number of slices

### Running a benchmark

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"bulk_indexing_clients": 4,
"bulk_size": 200,
"number_of_replicas": 1,
"number_of_shards" :8,
"ingest_percentage":100,
"search_clients": 8,
"warmup_iterations": 20,
"iterations": 100,
"variable_queries": 50,
"k": 100,
"only_run_on_ml_node" : "false",
"concurent_segment_search_enabled": "true"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"bulk_indexing_clients": 4,
"bulk_size": 200,
"number_of_replicas": 1,
"number_of_shards" :8,
"ingest_percentage":100,
"search_clients": 8,
"warmup_iterations": 20,
"iterations": 100,
"variable_queries": 50,
"k": 100,
"only_run_on_ml_node" : "true",
"concurent_segment_search_enabled": "true"
}
38 changes: 38 additions & 0 deletions trec_covid_semantic_search/runners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from osbenchmark.worker_coordinator.runner import Retry, Runner
from osbenchmark.client import RequestContextHolder

# This runner class and registration is a temporary workaround while the next version of OSB is pending release
def register(registry):
registry.register_runner(
UpdateConcurrentSegmentSearchSettings.RUNNER_NAME,
Retry(UpdateConcurrentSegmentSearchSettings()), async_runner=True
)

request_context_holder = RequestContextHolder()

class UpdateConcurrentSegmentSearchSettings(Runner):

RUNNER_NAME = "update-concurrent-segment-search-settings"

async def __call__(self, opensearch, params):
enable_setting = params.get("enable", "false")
max_slice_count = params.get("max_slice_count", None)
body = {
"persistent": {
"search.concurrent_segment_search.enabled": enable_setting
}
}
if max_slice_count is not None:
body["persistent"]["search.concurrent.max_slice_count"] = max_slice_count
request_context_holder.on_client_request_start()
await opensearch.cluster.put_settings(body=body)
request_context_holder.on_client_request_end()

def __repr__(self, *args, **kwargs):
return self.RUNNER_NAME
10 changes: 9 additions & 1 deletion trec_covid_semantic_search/test_procedures/procedures.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@
}
},
{
"operation": "delete-index"
"operation": "delete-index"
},
{
"name": "set-concurrent-segment-search",
"operation": {
"operation-type": "update-concurrent-segment-search-settings",
"enabled": "{{concurent_segment_search_enabled | default('false')}}",
"max_slice_count": "{{max_slice_count | default(0)}}"
}
},
{
"operation": "delete-ingest-pipeline"
Expand Down
6 changes: 5 additions & 1 deletion trec_covid_semantic_search/workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from osbenchmark.workload.loader import Downloader
from osbenchmark.workload.loader import Decompressor
from osbenchmark.workload.loader import Decompressor
from osbenchmark.worker_coordinator.runner import Retry
from .runners import register as register_runners

script_dir = os.path.dirname(os.path.realpath(__file__))

Expand Down Expand Up @@ -183,4 +185,6 @@ def register(registry):
registry.register_param_source("semantic-search-neural-source", QueryParamSourceNeural)
registry.register_param_source("hybrid-query-bm25-neural-search-source", QueryParamSourceHybridBm25Neural)
registry.register_param_source("hybrid-query-bm25-knn-search-source", QueryParamSourceHybridBm25Knn)
registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source)
# This runner class and registration is a temporary workaround while the next version of OSB is pending release
registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source)
register_runners(registry)

0 comments on commit 42ddc44

Please sign in to comment.