From 875dd1ddc4391d32e986d63c69fef6a9e3975049 Mon Sep 17 00:00:00 2001
From: Martin Gaievski <gaievski@amazon.com>
Date: Sun, 14 Jul 2024 22:03:52 +0000
Subject: [PATCH] Adding semantic search workload that includes vector and bm25
 search

Signed-off-by: Martin Gaievski <gaievski@amazon.com>
---
 trec_covid_semantic_search/README.md          | 233 ++++++++++++++++++
 trec_covid_semantic_search/index.json         |  46 ++++
 .../operations/default.json                   | 172 +++++++++++++
 trec_covid_semantic_search/params/params.json |  12 +
 .../test_procedures/procedures.json           | 162 ++++++++++++
 trec_covid_semantic_search/workload.json      |  30 +++
 trec_covid_semantic_search/workload.py        | 183 ++++++++++++++
 .../workload_queries_knn.json                 |   6 +
 8 files changed, 844 insertions(+)
 create mode 100644 trec_covid_semantic_search/README.md
 create mode 100644 trec_covid_semantic_search/index.json
 create mode 100644 trec_covid_semantic_search/operations/default.json
 create mode 100644 trec_covid_semantic_search/params/params.json
 create mode 100644 trec_covid_semantic_search/test_procedures/procedures.json
 create mode 100644 trec_covid_semantic_search/workload.json
 create mode 100644 trec_covid_semantic_search/workload.py
 create mode 100644 trec_covid_semantic_search/workload_queries_knn.json

diff --git a/trec_covid_semantic_search/README.md b/trec_covid_semantic_search/README.md
new file mode 100644
index 00000000..93fa67e0
--- /dev/null
+++ b/trec_covid_semantic_search/README.md
@@ -0,0 +1,233 @@
+# Semantic Search Workload
+
+This workload is to benchmark performance of search of Semantic Search queries of OpenSearch. Ingested documents will have embeddings that are generated during ingestion process by one of pre-trained models. 
+
+## Datasets
+
+We usae processed version of trec-covid dataset. Trec-Covid is a dataset collection of documents about COVID-19 information.
+
+- Trec-Covid website: https://ir.nist.gov/covidSubmit/index.html
+- Dataset: https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/trec-covid.zip
+
+We processed the dataset by creating 6 copies of the same document and shuffle copies so they are ingested in random order. We create custom artifact for queries by extracting queries portion from original `trec-covid` dataset and generating vector embeddings for query text using 768 dimension vector, same dimensions as used for document ingestion.
+
+### Example Document
+
+Following is example of document that is beeing ingested during indexing:
+
+```json
+{
+  "title": "Simultaneous Video-EEG-ECG Monitoring to Identify Neurocardiac Dysfunction in Mouse Models of Epilepsy.",
+  "metadata": {
+    "url": "https://doi.org/10.3791/57300; https://www.ncbi.nlm.nih.gov/pubmed/29443088/",
+    "pubmed_id": "29443088"
+  }
+}
+```
+
+Following is example of query:
+
+```json
+{
+  "_id": "1",
+  "query": "what is the origin of COVID-19",
+  "vector_embedding": [
+    -0.06979332,
+    0.05764826,
+    ...
+  ]
+}
+
+```
+
+## Parameters
+
+This workload allows the following parameters to be specified using `--workload-params`:
+
+* `bulk_size` (default: 100)
+* `bulk_indexing_clients` (default: 1): Number of clients that issue bulk indexing requests.
+* `ingest_percentage` (default: 100): A number between 0 and 100 that defines how much of the document corpus should be ingested.
+* `number_of_replicas` (default: 0)
+* `number_of_shards` (default: 1)
+* `query_cache_enabled` (default: false)
+* `requests_cache_enabled` (default: false)
+* `source_enabled` (default: true): A boolean defining whether the `_source` field is stored in the index.
+* `force_merge_max_num_segments` (default: unset): An integer specifying the max amount of segments the force-merge operation should use.
+* `index_settings`: A list of index settings. Index settings defined elsewhere (e.g. `number_of_replicas`) need to be overridden explicitly.
+* `cluster_health` (default: "green"): The minimum required cluster health.
+* `error_level` (default: "non-fatal"): Available for bulk operations only to specify ignore-response-error-level.
+* `target_throughput` (default: default values for each operation): Number of requests per second, `""` for no limit.
+* `search_clients`: Number of clients that issue search requests.
+* `model_name` (default: huggingface/sentence-transformers/all-mpnet-base-v2) OpenSearch-provided pretrained model name.
+* `model_version` (default: 1.0.1) Model version.
+* `model_format` (default: TORCH_SCRIPT) Model format.
+* `dimensions` (default: 768): Vector dimensions, needed to match the model.
+* `engine` (default:` lucene): The approximate k-NN library to use for indexing and search.
+* `method` (default:` hnsw): K-NN search algorithm.
+* `space_type` (default:` l2): The vector space used to calculate the distance between vectors.
+* `k` (default: 10) Number of nearest neighbors are returned.
+* `warmup_iterations` Number of Warmup iteration of each search client executes.
+* `iterations`  Number of test iterations of each search client executes.
+* `num_variable_queries` (default: 0) Number of variable queries will be used for the semantic search task, 0 means fixed query and max value is 50.
+* `range_gte` (default: 100) Number that defines the lower bound (inclusive) for range query when it's used as elemnts in semantic search query
+* `range_lte` (default: 10000000) Number that defines the upper bound (inclusive) for range query when it's used as elemnts in semantic search query
+
+### Running a benchmark
+
+Before running a benchmark, ensure that the load generation host is able to access your cluster endpoint and that the 
+appropriate dataset is available on the host.
+
+Currently, we support 2 test procedures for the semantic search workload. The default procedure is `create-index-ingest-data-search` and does create an index, ingest data and run a base set of search queries.
+
+To run the default workload, invoke the following command.
+
+```
+# OpenSearch Cluster End point url with hostname and port
+export ENDPOINT=  
+# Absolute file path of Workload file
+export WORKLOAD_FILE=
+
+opensearch-benchmark execute-test \
+ --workload-path="/opensearch-benchmark-workloads/trec_covid_semantic_search/" \
+ --workload-params="/trec_covid_semantic_search/params/params.json" \
+ --pipeline=benchmark-only \
+ --target-host=$ENDPOINT \
+ --kill-running-processes \
+ --test-procedure="search"
+```
+
+## Current Procedures
+
+### Create index with data
+
+This procedure creates index, deploy model localy, creaes pipeline with ingest and search processors and ingest documents. At the end we ran the match_all query that returns all documents in the index.
+Procedure name `create-index-ingest-data-search`.
+This is a default precedure for this workload.
+
+### Run semantic search queries
+
+This search procedure runs semantic search queries: neural, hybrid. It deletes and deploys an ml model and creates processor and uses this model to generate search specific embeddings.
+Procedure name `search`.
+
+#### Sample Output
+
+The output of a sample test run is provided below. Metrics are captured in the result's data store as usual, and this can be configured to be 
+either in-memory, or an external OpenSearch cluster.
+
+```
+
+   ____                  _____                      __       ____                  __                         __
+  / __ \____  ___  ____ / ___/___  ____ ___________/ /_     / __ )___  ____  _____/ /_  ____ ___  ____ ______/ /__
+ / / / / __ \/ _ \/ __ \\__ \/ _ \/ __ `/ ___/ ___/ __ \   / __  / _ \/ __ \/ ___/ __ \/ __ `__ \/ __ `/ ___/ //_/
+/ /_/ / /_/ /  __/ / / /__/ /  __/ /_/ / /  / /__/ / / /  / /_/ /  __/ / / / /__/ / / / / / / / / /_/ / /  / ,<
+\____/ .___/\___/_/ /_/____/\___/\__,_/_/   \___/_/ /_/  /_____/\___/_/ /_/\___/_/ /_/_/ /_/ /_/\__,_/_/  /_/|_|
+    /_/
+
+[INFO] Executing test with workload [workload], test_procedure [hybrid-query-aggs-light] and provision_config_instance ['external'] with version [2.14.0].
+
+[WARNING] indexing_total_time is 11 ms indicating that the cluster is not in a defined clean state. Recorded index time metrics may be misleading.
+[WARNING] refresh_total_time is 27 ms indicating that the cluster is not in a defined clean state. Recorded index time metrics may be misleading.
+[WARNING] flush_total_time is 11 ms indicating that the cluster is not in a defined clean state. Recorded index time metrics may be misleading.
+
+Running delete-ml-model                                                        [100% done]
+Running register-ml-model                                                      [100% done]
+Running deploy-ml-model                                                        [100% done]
+Running create-normalization-processor-no-weights-search-pipeline              [100% done]
+Running semantic-search-neural                                                 [100% done]
+Running semantic-search-hybrid-bm25-and-neural-search                          [100% done]
+Running semantic-search-hybrid-bm25-range-and-neural-search                    [100% done]
+------------------------------------------------------
+    _______             __   _____
+   / ____(_)___  ____ _/ /  / ___/_________  ________
+  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \
+ / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/
+/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/
+|                        Cumulative merge time of primary shards |                                                     |   1.05333 |    min |
+|                       Cumulative merge count of primary shards |                                                     |        58 |        |
+|                Min cumulative merge time across primary shards |                                                     |         0 |    min |
+|             Median cumulative merge time across primary shards |                                                     |   0.02405 |    min |
+|                Max cumulative merge time across primary shards |                                                     |  0.740383 |    min |
+|               Cumulative merge throttle time of primary shards |                                                     |  0.718733 |    min |
+|       Min cumulative merge throttle time across primary shards |                                                     |         0 |    min |
+|    Median cumulative merge throttle time across primary shards |                                                     |         0 |    min |
+|       Max cumulative merge throttle time across primary shards |                                                     |  0.718733 |    min |
+|                      Cumulative refresh time of primary shards |                                                     |   15.7122 |    min |
+|                     Cumulative refresh count of primary shards |                                                     |       877 |        |
+|              Min cumulative refresh time across primary shards |                                                     |         0 |    min |
+|           Median cumulative refresh time across primary shards |                                                     |   1.64122 |    min |
+|              Max cumulative refresh time across primary shards |                                                     |   3.16232 |    min |
+|                        Cumulative flush time of primary shards |                                                     |   27.8492 |    min |
+|                       Cumulative flush count of primary shards |                                                     |        35 |        |
+|                Min cumulative flush time across primary shards |                                                     |         0 |    min |
+|             Median cumulative flush time across primary shards |                                                     |      2.21 |    min |
+|                Max cumulative flush time across primary shards |                                                     |   5.80563 |    min |
+|                                        Total Young Gen GC time |                                                     |     0.193 |      s |
+|                                       Total Young Gen GC count |                                                     |        10 |        |
+|                                          Total Old Gen GC time |                                                     |         0 |      s |
+|                                         Total Old Gen GC count |                                                     |         0 |        |
+|                                                     Store size |                                                     |   30.2634 |     GB |
+|                                                  Translog size |                                                     | 0.0721769 |     GB |
+|                                         Heap used for segments |                                                     |         0 |     MB |
+|                                       Heap used for doc values |                                                     |         0 |     MB |
+|                                            Heap used for terms |                                                     |         0 |     MB |
+|                                            Heap used for norms |                                                     |         0 |     MB |
+|                                           Heap used for points |                                                     |         0 |     MB |
+|                                    Heap used for stored fields |                                                     |         0 |     MB |
+|                                                  Segment count |                                                     |       225 |        |
+|                                                 Min Throughput |                              semantic-search-neural |     27.82 |  ops/s |
+|                                                Mean Throughput |                              semantic-search-neural |     33.37 |  ops/s |
+|                                              Median Throughput |                              semantic-search-neural |     34.08 |  ops/s |
+|                                                 Max Throughput |                              semantic-search-neural |     35.37 |  ops/s |
+|                                        50th percentile latency |                              semantic-search-neural |   211.371 |     ms |
+|                                        90th percentile latency |                              semantic-search-neural |   230.603 |     ms |
+|                                        99th percentile latency |                              semantic-search-neural |   248.195 |     ms |
+|                                       100th percentile latency |                              semantic-search-neural |   260.313 |     ms |
+|                                   50th percentile service time |                              semantic-search-neural |   211.371 |     ms |
+|                                   90th percentile service time |                              semantic-search-neural |   230.603 |     ms |
+|                                   99th percentile service time |                              semantic-search-neural |   248.195 |     ms |
+|                                  100th percentile service time |                              semantic-search-neural |   260.313 |     ms |
+|                                                     error rate |                              semantic-search-neural |         0 |      % |
+|                                                 Min Throughput |       semantic-search-hybrid-bm25-and-neural-search |     35.62 |  ops/s |
+|                                                Mean Throughput |       semantic-search-hybrid-bm25-and-neural-search |     36.61 |  ops/s |
+|                                              Median Throughput |       semantic-search-hybrid-bm25-and-neural-search |     36.74 |  ops/s |
+|                                                 Max Throughput |       semantic-search-hybrid-bm25-and-neural-search |     36.96 |  ops/s |
+|                                        50th percentile latency |       semantic-search-hybrid-bm25-and-neural-search |   212.232 |     ms |
+|                                        90th percentile latency |       semantic-search-hybrid-bm25-and-neural-search |   228.367 |     ms |
+|                                        99th percentile latency |       semantic-search-hybrid-bm25-and-neural-search |   252.348 |     ms |
+|                                       100th percentile latency |       semantic-search-hybrid-bm25-and-neural-search |   270.056 |     ms |
+|                                   50th percentile service time |       semantic-search-hybrid-bm25-and-neural-search |   212.232 |     ms |
+|                                   90th percentile service time |       semantic-search-hybrid-bm25-and-neural-search |   228.367 |     ms |
+|                                   99th percentile service time |       semantic-search-hybrid-bm25-and-neural-search |   252.348 |     ms |
+|                                  100th percentile service time |       semantic-search-hybrid-bm25-and-neural-search |   270.056 |     ms |
+|                                                     error rate |       semantic-search-hybrid-bm25-and-neural-search |         0 |      % |
+|                                                 Min Throughput | semantic-search-hybrid-bm25-range-and-neural-search |      34.9 |  ops/s |
+|                                                Mean Throughput | semantic-search-hybrid-bm25-range-and-neural-search |     36.06 |  ops/s |
+|                                              Median Throughput | semantic-search-hybrid-bm25-range-and-neural-search |     36.23 |  ops/s |
+|                                                 Max Throughput | semantic-search-hybrid-bm25-range-and-neural-search |     36.62 |  ops/s |
+|                                        50th percentile latency | semantic-search-hybrid-bm25-range-and-neural-search |   213.919 |     ms |
+|                                        90th percentile latency | semantic-search-hybrid-bm25-range-and-neural-search |   231.788 |     ms |
+|                                        99th percentile latency | semantic-search-hybrid-bm25-range-and-neural-search |   248.793 |     ms |
+|                                       100th percentile latency | semantic-search-hybrid-bm25-range-and-neural-search |   265.484 |     ms |
+|                                   50th percentile service time | semantic-search-hybrid-bm25-range-and-neural-search |   213.919 |     ms |
+|                                   90th percentile service time | semantic-search-hybrid-bm25-range-and-neural-search |   231.788 |     ms |
+|                                   99th percentile service time | semantic-search-hybrid-bm25-range-and-neural-search |   248.793 |     ms |
+|                                  100th percentile service time | semantic-search-hybrid-bm25-range-and-neural-search |   265.484 |     ms |
+|                                                     error rate | semantic-search-hybrid-bm25-range-and-neural-search |         0 |      % |
+
+
+---------------------------------
+[INFO] SUCCESS (took 164 seconds)
+---------------------------------
+```
+
+## License
+
+Following license used by original dataset and we're using it too.
+```
+               Apache License
+           Version 2.0, January 2004
+         http://www.apache.org/licenses/
+```
+Covid-trec [1] is part of the COVID-19 Open Research dataset [2], which is licensed under Apache 2.0.  
+[1] https://arxiv.org/pdf/2005.04474v1.pdf  
+[2] https://github.com/allenai/cord19/ 
diff --git a/trec_covid_semantic_search/index.json b/trec_covid_semantic_search/index.json
new file mode 100644
index 00000000..b3a10a2b
--- /dev/null
+++ b/trec_covid_semantic_search/index.json
@@ -0,0 +1,46 @@
+{
+  "settings": {
+    "index.number_of_shards": {{number_of_shards | default(1)}},
+    "index.number_of_replicas": {{number_of_replicas | default(0)}},
+    "index.queries.cache.enabled": {{query_cache_enabled | default(false) | tojson}},
+    "index.requests.cache.enable": {{requests_cache_enabled | default(false) | tojson}},
+    "index.merge.policy.max_merged_segment": "100GB",
+    "index.knn": true,
+    "default_pipeline": "nlp-ingest-pipeline"
+  },
+  "mappings": {
+    "dynamic": "true",
+    "_source": {
+      "enabled": {{ source_enabled | default(true) | tojson }}
+    },
+    "properties": {
+      "title": {
+        "type": "text"
+      },
+      "metadata": {
+        "type": "nested",
+        "properties": {
+          "url": {
+            "type": "text"
+          },
+          "pubmed_id": {
+            "type": "integer"
+          }
+        }
+      }, 
+      "passage_embedding": {
+        "type": "knn_vector",
+        "dimension": 768,
+        "method": {
+            "name": "hnsw",
+            "space_type": "innerproduct",
+            "engine": "faiss",
+            "parameters": {
+                "ef_construction": 256,
+                "m": 256
+            }
+        }
+      }
+    }
+  }
+}
diff --git a/trec_covid_semantic_search/operations/default.json b/trec_covid_semantic_search/operations/default.json
new file mode 100644
index 00000000..9821be46
--- /dev/null
+++ b/trec_covid_semantic_search/operations/default.json
@@ -0,0 +1,172 @@
+{
+    "name": "index",
+    "operation-type": "bulk",
+    "bulk-size": {{bulk_size | default(100)}},
+    "ingest-percentage": {{ingest_percentage | default(100)}}
+},
+{
+    "name": "delete-ingest-pipeline",
+    "operation-type": "delete-pipeline",
+    "id": "nlp-ingest-pipeline"
+  },
+  {
+    "name": "create-ingest-pipeline",
+    "operation-type": "put-pipeline",
+    "param-source": "create-ingest-pipeline",
+    "id": "nlp-ingest-pipeline",  
+    "body": {
+      "description": "An NLP ingest pipeline",
+      "processors": [
+        {
+          "text_embedding": {
+            "model_id": "",
+            "field_map": {
+              "title": "passage_embedding"
+            }
+          }
+        }
+      ]
+    }
+  },
+  {
+    "name": "index-append",
+    "operation-type": "bulk",
+    "bulk-size": {{bulk_size | default(100)}},
+    "ingest-percentage": {{ingest_percentage | default(100)}}
+  },
+  {
+    "name": "default",
+    "operation-type": "search",
+    "body": {
+      "query": {
+        "match_all": {}
+      }
+    }
+  },
+  {
+    "name": "semantic-search-neural",
+    "operation-type": "search",
+    "variable-queries": {{variable_queries | default(0)}},
+    "param-source": "semantic-search-neural-source",
+    "body": {
+      "_source": {
+        "excludes": [
+          "passage_embedding"
+        ]
+      },
+      "query": {
+        "neural": {
+          "passage_embedding": {
+            "query_text": "what types of rapid testing for Covid-19 have been developed?",
+            "model_id": "",
+            "k": {{k | default(10)}}
+          }
+        }
+      }
+    }
+  },
+  {
+    "name": "create-normalization-processor-no-weights-search-pipeline",
+    "operation-type": "create-search-pipeline",
+    "id": "nlp-min-max-arithmetic-search-pipeline",
+    "body": {
+      "description": "Post processor for hybrid search with min_max normalization and arithmetic_mean combination",
+      "phase_results_processors": [
+        {
+            "normalization-processor": {
+                "normalization": {
+                    "technique": "min_max"
+                },
+                "combination": {
+                    "technique": "arithmetic_mean"
+                }
+            }
+        }
+      ]
+    }
+  },
+  {
+    "name": "semantic-search-hybrid-bm25-and-neural-search",
+    "operation-type": "search",
+    "request-params": {
+      "search_pipeline": "nlp-min-max-arithmetic-search-pipeline"
+    },
+    "variable-queries": {{variable_queries | default(0)}},
+    "param-source": "hybrid-query-bm25-neural-search-source",
+    "body": {
+      "_source": {
+        "excludes": [
+          "passage_embedding"
+        ]
+      },
+      "query": {
+        "hybrid": {
+          "queries": [
+            {
+              "match": {
+                "title": ""
+              }
+            },
+            {
+              "neural": {
+                "passage_embedding": {
+                  "query_text": "what types of rapid testing for Covid-19 have been developed?",
+                  "model_id": "",
+                  "k": {{k | default(10)}}
+                }
+              }
+            }
+          ] 
+        }
+      }
+    }
+  },
+  {
+    "name": "semantic-search-hybrid-bm25-range-and-neural-search",
+    "operation-type": "search",
+    "request-params": {
+      "search_pipeline": "nlp-min-max-arithmetic-search-pipeline"
+    },
+    "variable-queries": {{variable_queries | default(0)}},
+    "param-source": "hybrid-query-bm25-neural-search-source",
+    "body": {
+      "_source": {
+        "excludes": [
+          "passage_embedding"
+        ]
+      },
+      "query": {
+        "hybrid": {
+          "queries": [
+            {
+              "match": {
+                "title": ""
+              }
+            },
+            {
+              "neural": {
+                "passage_embedding": {
+                  "query_text": "what types of rapid testing for Covid-19 have been developed?",
+                  "model_id": "",
+                  "k": {{k | default(10)}}
+                }
+              }
+            },
+            {
+              "nested": {
+                "path": "metadata",
+                  "query": {
+                    "range": {
+                      "metadata.pubmed_id": {
+                        "gte": {{range_gte | default(100)}},
+                        "lte": {{range_lte | default(10000000)}}
+                      }
+                    }
+                  }
+              }
+            }
+          ] 
+        }
+      }
+    }
+  }
diff --git a/trec_covid_semantic_search/params/params.json b/trec_covid_semantic_search/params/params.json
new file mode 100644
index 00000000..310565b0
--- /dev/null
+++ b/trec_covid_semantic_search/params/params.json
@@ -0,0 +1,12 @@
+{
+    "bulk_indexing_clients": 2,
+    "bulk_size": 100,
+    "number_of_replicas": 1,
+    "number_of_shards" :8,
+    "ingest_percentage":100,
+    "search_clients": 8,
+    "warmup_iterations": 20,
+    "iterations": 100,
+    "variable_queries": 50,
+    "k": 100
+}
diff --git a/trec_covid_semantic_search/test_procedures/procedures.json b/trec_covid_semantic_search/test_procedures/procedures.json
new file mode 100644
index 00000000..8c5b5f6d
--- /dev/null
+++ b/trec_covid_semantic_search/test_procedures/procedures.json
@@ -0,0 +1,162 @@
+{
+    "name": "create-index-ingest-data-search",
+    "description": "Indexes the whole document corpus using OpenSearch default settings. After that several query groups are run.",
+    "default": true,
+    "schedule": [
+        {
+          "name": "cluster-settings",
+          "operation": {
+            "operation-type": "put-settings",
+            "body": {
+              "persistent": {
+                "plugins": {
+                  "ml_commons": {
+                    "only_run_on_ml_node": "false",
+                    "native_memory_threshold": "100",
+                    "allow_registering_model_via_local_file": "true",
+                    "allow_registering_model_via_url": "true"
+                  }
+                }
+              }
+            }
+          }
+        },
+        {
+            "operation": "delete-index"
+        },
+        {
+          "operation": "delete-ingest-pipeline"
+        },
+        {
+          "operation": {
+            "operation-type": "delete-ml-model",
+            "model-name": "{{ model_name | default('huggingface/sentence-transformers/all-mpnet-base-v2')}}"
+          }
+        },
+        {
+          "operation": {
+            "operation-type": "register-ml-model",
+            "model-name": "{{ model_name | default('huggingface/sentence-transformers/all-mpnet-base-v2')}}",
+            "model-version": "{{ model_version | default('1.0.1') }}",
+            "model-format": "{{ model_format | default('TORCH_SCRIPT') }}",
+            "model-config-file": "{{ model_config_file | default('') }}"
+          }
+        },
+        {
+          "operation": "deploy-ml-model"
+        },
+        {
+          "operation": "create-ingest-pipeline"
+        },
+        {
+            "operation": {
+              "operation-type": "create-index",
+              "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} {
+                "index.number_of_shards": {{number_of_shards | default(3)}},
+                "index.number_of_replicas": {{number_of_replicas | default(0)}},
+                "index.store.type": "{{store_type | default('fs')}}"
+              }{%- endif %}
+            }
+        },
+        {
+            "name": "check-cluster-health-before-index-creation",
+            "operation": {
+              "operation-type": "cluster-health",
+              "index": "trec-covid",
+              "request-params": {
+                "wait_for_status": "{{cluster_health | default('green')}}",
+                "wait_for_no_relocating_shards": "true"
+              },
+              "retry-until-success": true
+            }
+        },
+        {
+          "operation": "index-append",
+          "warmup-time-period": 60,
+          "clients": {{bulk_indexing_clients | default(1)}},
+          "ignore-response-error-level": "{{error_level | default('non-fatal')}}"
+        },
+        {
+            "name": "refresh-after-index-created",
+            "operation": "refresh"
+        },
+        {
+          "operation": {
+            "operation-type": "force-merge",
+            "request-timeout": 7200{%- if force_merge_max_num_segments is defined %},
+            "max-num-segments": {{ force_merge_max_num_segments | tojson }}
+            {%- endif %}
+          }
+        },
+        {
+          "name": "refresh-after-force-merge",
+          "operation": "refresh"
+        },
+        {
+          "name": "wait-until-merges-finish",
+          "operation": {
+            "operation-type": "index-stats",
+            "index": "_all",
+            "condition": {
+              "path": "_all.total.merges.current",
+              "expected-value": 0
+            },
+            "retry-until-success": true,
+            "include-in-reporting": false
+          }
+        },
+        {
+          "operation": "default",
+          "warmup-iterations": {{warmup_iterations | default(500) | tojson}},
+          "iterations": {{iterations | default(500) | tojson }},
+          "target-throughput": {{ target_throughput | default(100) | tojson}},
+          "clients": {{ search_clients | default(1) }}
+        }
+    ]
+},
+{
+  "name": "search",
+  "description": "Run semantic search work.",
+  "default": false,
+  "schedule": [
+    {
+      "operation": {
+        "operation-type": "delete-ml-model",
+        "model-name": "{{ model_name | default('huggingface/sentence-transformers/all-mpnet-base-v2')}}"
+      }
+    },
+    {
+      "operation": {
+        "operation-type": "register-ml-model",
+        "model-name": "{{ model_name | default('huggingface/sentence-transformers/all-mpnet-base-v2')}}",
+        "model-version": "{{ model_version | default('1.0.1') }}",
+        "model-format": "{{ model_format | default('TORCH_SCRIPT') }}",
+        "model-config-file": "{{ model_config_file | default('') }}"
+      }
+    },
+    {
+      "operation": "deploy-ml-model"
+    },
+    {
+      "operation": "create-normalization-processor-no-weights-search-pipeline"
+    },
+    {
+      "operation": "semantic-search-neural",
+      "warmup-iterations": {{warmup_iterations | default(50) | tojson}},
+      "iterations": {{iterations | default(100) | tojson }},
+      "clients": {{ search_clients | default(1)}}
+    },
+    {
+      "operation": "semantic-search-hybrid-bm25-and-neural-search",
+      "warmup-iterations": {{warmup_iterations | default(50) | tojson}},
+      "iterations": {{iterations | default(100) | tojson }},
+      "clients": {{ search_clients | default(1)}}
+    },
+    {
+      "operation": "semantic-search-hybrid-bm25-range-and-neural-search",
+      "warmup-iterations": {{warmup_iterations | default(50) | tojson}},
+      "iterations": {{iterations | default(100) | tojson }},
+      "clients": {{ search_clients | default(1)}}
+    }
+  ]
+}
\ No newline at end of file
diff --git a/trec_covid_semantic_search/workload.json b/trec_covid_semantic_search/workload.json
new file mode 100644
index 00000000..b8eedc27
--- /dev/null
+++ b/trec_covid_semantic_search/workload.json
@@ -0,0 +1,30 @@
+{% import "benchmark.helpers" as benchmark with context %}
+
+{
+  "version": 2,
+  "description": "Benchmark performance of semantic search queries based on dataset of global daily weather measurements from NOAA",
+  "indices": [
+    {
+      "name": "trec-covid",
+      "body": "index.json"
+    }
+  ],
+  "corpora": [
+    {
+      "name": "trec-covid",
+      "base-url": "https://github.com/martin-gaievski/neural-search/releases/download/trec_covid_dataset_1M_v1",
+      "documents": [
+        {
+          "source-file": "documents.json.zip",
+          "document-count": 1027950
+        }
+      ]
+    }
+  ],
+  "operations": [
+    {{ benchmark.collect(parts="operations/*.json") }}
+  ],
+  "test_procedures": [
+    {{ benchmark.collect(parts="test_procedures/*.json") }}
+  ]
+}
diff --git a/trec_covid_semantic_search/workload.py b/trec_covid_semantic_search/workload.py
new file mode 100644
index 00000000..64932c5d
--- /dev/null
+++ b/trec_covid_semantic_search/workload.py
@@ -0,0 +1,183 @@
+import random
+import os
+import json
+from pathlib import Path
+
+from osbenchmark.workload.loader import Downloader
+from osbenchmark.workload.loader import Decompressor
+from osbenchmark.workload.loader import Decompressor
+
+script_dir = os.path.dirname(os.path.realpath(__file__))
+
+def ingest_pipeline_param_source(workload, params, **kwargs):
+    model_id = params['body']['processors'][0]['text_embedding']['model_id']
+    if not model_id:
+        with open('model_id.json') as f:
+            d = json.loads(f.read())
+            model_id = d['model_id']
+            params['body']['processors'][0]['text_embedding']['model_id'] = model_id
+    return params
+
+class QueryParamSourceNeural:
+    def __init__(self, workload, params, **kwargs):
+        if len(workload.indices) == 1:
+            index = workload.indices[0].name
+            if len(workload.indices[0].types) == 1:
+                type = workload.indices[0].types[0].name
+            else:
+                type = None
+        else:
+            index = "_all"
+            type = None
+
+        self._params = params
+        self._params['index'] = index
+        self._params['type'] = type
+        self._params['variable-queries'] = params.get("variable-queries", 0)
+        self.infinite = True
+
+        if self._params['variable-queries'] > 0:
+            with open(script_dir + os.sep + 'workload_queries_knn.json', 'r') as f:
+                d = json.loads(f.read())
+                source_file = d['source-file']
+                base_url = d['base-url']
+                compressed_bytes = d['compressed-bytes']
+                uncompressed_bytes = d['uncompressed-bytes']
+                compressed_path = script_dir + os.sep + source_file
+                uncompressed_path = script_dir + os.sep + Path(source_file).stem
+            if not os.path.exists(compressed_path):
+                downloader = Downloader(False, False)
+                downloader.download(base_url, None, compressed_path, compressed_bytes)
+            if not os.path.exists(uncompressed_path):
+                decompressor = Decompressor()
+                decompressor.decompress(compressed_path, uncompressed_path, uncompressed_bytes)
+
+    def partition(self, partition_index, total_partitions):
+        return self
+
+    def params(self):
+        params = self._params
+        with open('model_id.json', 'r') as f:
+            d = json.loads(f.read())
+            params['body']['query']['neural']['passage_embedding']['model_id'] = d['model_id']
+        count = self._params.get("variable-queries", 0)
+        if count > 0:
+            script_dir = os.path.dirname(os.path.realpath(__file__))
+            with open(script_dir + '/queries.json', 'r') as f:
+                lines = f.read().splitlines()
+                line =random.choice(lines)
+                query_text = json.loads(line)['query']
+                params['body']['query']['neural']['passage_embedding']['query_text'] = query_text
+
+        return params
+
+class QueryParamSourceHybridBm25:
+    def __init__(self, workload, params, **kwargs):
+        if len(workload.indices) == 1:
+            index = workload.indices[0].name
+            if len(workload.indices[0].types) == 1:
+                type = workload.indices[0].types[0].name
+            else:
+                type = None
+        else:
+            index = "_all"
+            type = None
+
+        self._params = params
+        self._params['index'] = index
+        self._params['type'] = type
+        self._params['variable-queries'] = params.get("variable-queries", 0)
+        self.infinite = True
+
+        if self._params['variable-queries'] > 0:
+            with open(script_dir + os.sep + 'workload_queries_knn.json', 'r') as f:
+                d = json.loads(f.read())
+                source_file = d['source-file']
+                base_url = d['base-url']
+                compressed_bytes = d['compressed-bytes']
+                uncompressed_bytes = d['uncompressed-bytes']
+                compressed_path = script_dir + os.sep + source_file
+                uncompressed_path = script_dir + os.sep + Path(source_file).stem
+            if not os.path.exists(compressed_path):
+                downloader = Downloader(False, False)
+                downloader.download(base_url, None, compressed_path, compressed_bytes)
+            if not os.path.exists(uncompressed_path):
+                decompressor = Decompressor()
+                decompressor.decompress(compressed_path, uncompressed_path, uncompressed_bytes)
+
+    def partition(self, partition_index, total_partitions):
+        return self
+
+    def params(self):
+        params = self._params
+        count = self._params.get("variable-queries", 0)
+        if count > 0:
+            script_dir = os.path.dirname(os.path.realpath(__file__))
+            with open(script_dir + '/queries.json', 'r') as f:
+                lines = f.read().splitlines()
+                line =random.choice(lines)
+                query_text = json.loads(line)['query']
+                match_query = random.choice(query_text.split()).lower()
+                params['body']['query']['hybrid']['queries'][0]['match']['title'] = match_query
+        return params
+
+class QueryParamSourceHybridBm25Neural:
+    def __init__(self, workload, params, **kwargs):
+        if len(workload.indices) == 1:
+            index = workload.indices[0].name
+            if len(workload.indices[0].types) == 1:
+                type = workload.indices[0].types[0].name
+            else:
+                type = None
+        else:
+            index = "_all"
+            type = None
+
+        self._params = params
+        self._params['index'] = index
+        self._params['type'] = type
+        self._params['variable-queries'] = params.get("variable-queries", 0)
+        self.infinite = True
+
+        if self._params['variable-queries'] > 0:
+            with open(script_dir + os.sep + 'workload_queries_knn.json', 'r') as f:
+                d = json.loads(f.read())
+                source_file = d['source-file']
+                base_url = d['base-url']
+                compressed_bytes = d['compressed-bytes']
+                uncompressed_bytes = d['uncompressed-bytes']
+                compressed_path = script_dir + os.sep + source_file
+                uncompressed_path = script_dir + os.sep + Path(source_file).stem
+            if not os.path.exists(compressed_path):
+                downloader = Downloader(False, False)
+                downloader.download(base_url, None, compressed_path, compressed_bytes)
+            if not os.path.exists(uncompressed_path):
+                decompressor = Decompressor()
+                decompressor.decompress(compressed_path, uncompressed_path, uncompressed_bytes)
+
+    def partition(self, partition_index, total_partitions):
+        return self
+
+    def params(self):
+        params = self._params
+        count = self._params.get("variable-queries", 0)
+        if count > 0:
+            script_dir = os.path.dirname(os.path.realpath(__file__))
+            model_id = ''
+            with open('model_id.json', 'r') as f:
+                    d = json.loads(f.read())
+                    model_id = d['model_id']
+            with open(script_dir + '/queries.json', 'r') as f:
+                lines = f.read().splitlines()
+                line =random.choice(lines)
+                query_text = json.loads(line)['query']
+                match_query = random.choice(query_text.split()).lower()
+                params['body']['query']['hybrid']['queries'][0]['match']['title'] = match_query
+                params['body']['query']['hybrid']['queries'][1]['neural']['passage_embedding']['model_id'] = model_id
+                params['body']['query']['hybrid']['queries'][1]['neural']['passage_embedding']['query_text'] = query_text
+        return params
+
+def register(registry):
+    registry.register_param_source("semantic-search-neural-source", QueryParamSourceNeural)
+    registry.register_param_source("hybrid-query-bm25-neural-search-source", QueryParamSourceHybridBm25Neural)
+    registry.register_param_source("create-ingest-pipeline", ingest_pipeline_param_source)
\ No newline at end of file
diff --git a/trec_covid_semantic_search/workload_queries_knn.json b/trec_covid_semantic_search/workload_queries_knn.json
new file mode 100644
index 00000000..e9073cfe
--- /dev/null
+++ b/trec_covid_semantic_search/workload_queries_knn.json
@@ -0,0 +1,6 @@
+{
+  "base-url": "https://github.com/martin-gaievski/neural-search/releases/download/trec_covid_queries_knn",
+  "source-file": "queries.json.zip",
+  "compressed-bytes" : 98855,
+  "uncompressed-bytes": 260018
+}