From 42ddc44e3e182b0ca0edb90d3bc809e8e3f13fd6 Mon Sep 17 00:00:00 2001
From: Martin Gaievski <gaievski@amazon.com>
Date: Tue, 30 Jul 2024 18:50:55 +0000
Subject: [PATCH] Added custom runner for setting concurrent segment search
 params

Signed-off-by: Martin Gaievski <gaievski@amazon.com>
---
 trec_covid_semantic_search/README.md          |  2 +
 ...rams_no_ml_node_concurrent_seg_search.json | 14 +++++++
 ...ms_only_ml_node_concurrent_seg_search.json | 14 +++++++
 trec_covid_semantic_search/runners.py         | 38 +++++++++++++++++++
 .../test_procedures/procedures.json           | 10 ++++-
 trec_covid_semantic_search/workload.py        |  6 ++-
 6 files changed, 82 insertions(+), 2 deletions(-)
 create mode 100644 trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json
 create mode 100644 trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json
 create mode 100644 trec_covid_semantic_search/runners.py

diff --git a/trec_covid_semantic_search/README.md b/trec_covid_semantic_search/README.md
index 6374cdf6..6567d4fa 100644
--- a/trec_covid_semantic_search/README.md
+++ b/trec_covid_semantic_search/README.md
@@ -71,6 +71,8 @@ This workload allows the following parameters to be specified using `--workload-
 * `num_variable_queries` (default: 0) Number of variable queries will be used for the semantic search task, 0 means fixed query and max value is 50.
 * `range_gte` (default: 100) Number that defines the lower bound (inclusive) for range query when it's used as elemnts in semantic search query
 * `range_lte` (default: 10000000) Number that defines the upper bound (inclusive) for range query when it's used as elemnts in semantic search query
+* `concurent_segment_search_enabled` (default: `false`) Enables or disables concurrent segment search feature
+* `max_slice_count` (default: 0) Set the maximum number of slices for concurrent segment search feature. 0 means we use Lucene meachnism of calculating the number of slices
 
 ### Running a benchmark
 
diff --git a/trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json b/trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json
new file mode 100644
index 00000000..bc47d9d3
--- /dev/null
+++ b/trec_covid_semantic_search/params/params_no_ml_node_concurrent_seg_search.json
@@ -0,0 +1,14 @@
+{
+    "bulk_indexing_clients": 4,
+    "bulk_size": 200,
+    "number_of_replicas": 1,
+    "number_of_shards" :8,
+    "ingest_percentage":100,
+    "search_clients": 8,
+    "warmup_iterations": 20,
+    "iterations": 100,
+    "variable_queries": 50,
+    "k": 100,
+    "only_run_on_ml_node" : "false",
+    "concurent_segment_search_enabled": "true"
+}
diff --git a/trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json b/trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json
new file mode 100644
index 00000000..5ea45814
--- /dev/null
+++ b/trec_covid_semantic_search/params/params_only_ml_node_concurrent_seg_search.json
@@ -0,0 +1,14 @@
+{
+    "bulk_indexing_clients": 4,
+    "bulk_size": 200,
+    "number_of_replicas": 1,
+    "number_of_shards" :8,
+    "ingest_percentage":100,
+    "search_clients": 8,
+    "warmup_iterations": 20,
+    "iterations": 100,
+    "variable_queries": 50,
+    "k": 100,
+    "only_run_on_ml_node" : "true",
+    "concurent_segment_search_enabled": "true"
+}
diff --git a/trec_covid_semantic_search/runners.py b/trec_covid_semantic_search/runners.py
new file mode 100644
index 00000000..37ce4376
--- /dev/null
+++ b/trec_covid_semantic_search/runners.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from osbenchmark.worker_coordinator.runner import Retry, Runner
+from osbenchmark.client import RequestContextHolder
+
+# This runner class and registration is a temporary workaround while the next version of OSB is pending release
+def register(registry):
+    registry.register_runner(
+        UpdateConcurrentSegmentSearchSettings.RUNNER_NAME,
+                    Retry(UpdateConcurrentSegmentSearchSettings()), async_runner=True
+    )
+
+request_context_holder = RequestContextHolder()
+
+class UpdateConcurrentSegmentSearchSettings(Runner):
+
+    RUNNER_NAME = "update-concurrent-segment-search-settings"
+
+    async def __call__(self, opensearch, params):
+        enable_setting = params.get("enable", "false")
+        max_slice_count = params.get("max_slice_count", None)
+        body = {
+            "persistent": {
+                "search.concurrent_segment_search.enabled": enable_setting
+            }
+        }
+        if max_slice_count is not None:
+            body["persistent"]["search.concurrent.max_slice_count"] = max_slice_count
+        request_context_holder.on_client_request_start()
+        await opensearch.cluster.put_settings(body=body)
+        request_context_holder.on_client_request_end()
+
+    def __repr__(self, *args, **kwargs):
+        return self.RUNNER_NAME
diff --git a/trec_covid_semantic_search/test_procedures/procedures.json b/trec_covid_semantic_search/test_procedures/procedures.json
index 1a9a58bf..9ca2ddc7 100644
--- a/trec_covid_semantic_search/test_procedures/procedures.json
+++ b/trec_covid_semantic_search/test_procedures/procedures.json
@@ -22,7 +22,15 @@
           }
         },
         {
-            "operation": "delete-index"
+          "operation": "delete-index"
+        },
+        {
+          "name": "set-concurrent-segment-search",
+          "operation": {
+            "operation-type": "update-concurrent-segment-search-settings",
+            "enabled": "{{concurent_segment_search_enabled | default('false')}}",
+            "max_slice_count": "{{max_slice_count | default(0)}}"
+          }
         },
         {
           "operation": "delete-ingest-pipeline"
diff --git a/trec_covid_semantic_search/workload.py b/trec_covid_semantic_search/workload.py
index 073b8344..20408125 100644
--- a/trec_covid_semantic_search/workload.py
+++ b/trec_covid_semantic_search/workload.py
@@ -6,6 +6,8 @@
 from osbenchmark.workload.loader import Downloader
 from osbenchmark.workload.loader import Decompressor
 from osbenchmark.workload.loader import Decompressor
+from osbenchmark.worker_coordinator.runner import Retry
+from .runners import register as register_runners
 
 script_dir = os.path.dirname(os.path.realpath(__file__))
 
@@ -183,4 +185,6 @@ def register(registry):
     registry.register_param_source("semantic-search-neural-source", QueryParamSourceNeural)
     registry.register_param_source("hybrid-query-bm25-neural-search-source", QueryParamSourceHybridBm25Neural)
     registry.register_param_source("hybrid-query-bm25-knn-search-source", QueryParamSourceHybridBm25Knn)
-    registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source)
\ No newline at end of file
+    # This runner class and registration is a temporary workaround while the next version of OSB is pending release
+    registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source)
+    register_runners(registry)
\ No newline at end of file