From 707b312667b2d9891cfd619c080d62420ebe4e00 Mon Sep 17 00:00:00 2001 From: Varun Jain Date: Sun, 4 Aug 2024 11:17:45 -0700 Subject: [PATCH] Sorting Hybrid Search Signed-off-by: Varun Jain --- .../operations/hybrid_search.json | 327 +++++++++++++++++- .../test_procedures/hybrid_search.json | 2 +- .../multiple-subqueries-search.json | 20 +- .../single-subquery-large-set-search.json | 41 +-- .../single-subquery-medium-set-search.json | 41 +-- .../single-subquery-small-set-search.json | 41 +-- 6 files changed, 381 insertions(+), 91 deletions(-) diff --git a/noaa_semantic_search/operations/hybrid_search.json b/noaa_semantic_search/operations/hybrid_search.json index 214d1111..c70167a3 100644 --- a/noaa_semantic_search/operations/hybrid_search.json +++ b/noaa_semantic_search/operations/hybrid_search.json @@ -52,6 +52,47 @@ } } }, + { + "name": "bool-only-term-range-date-sort", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool": { + "should": [ + { + "term": { + "station.country_code": "JA" + } + }, + { + "range": { + "TRANGE": { + "gte": 0, + "lte": 30 + } + } + }, + { + "range": { + "date": { + "gte": "2016-06-04", + "format":"yyyy-MM-dd" + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, { "name": "hybrid-query-only-term-range-date", "operation-type": "search", @@ -89,6 +130,50 @@ } } }, + { + "name": "hybrid-query-only-term-range-date-sort", + "operation-type": "search", + "request-params": { + "search_pipeline": "nlp-min-max-arithmetic-search-pipeline" + }, + "body": { + "size": 100, + "query": { + "hybrid": { + "queries": [ + { + "term": { + "station.country_code": "JA" + } + }, + { + "range": { + "TRANGE": { + "gte": 0, + "lte": 30 + } + } + }, + { + "range": { + "date": { + "gte": "2016-06-04", + "format":"yyyy-MM-dd" + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, { "name": "aggs-query-term-min-hybrid", "operation-type": "search", @@ -307,6 +392,27 @@ } } }, + { + "name": "bool-query-only-range", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool": { + "should": [ + { + "range": { + "TRANGE": { + "gte": -100, + "lte": -50 + } + } + } + ] + } + } + } + }, { "name": "hybrid-query-only-range", "operation-type": "search", @@ -331,6 +437,65 @@ } } }, + { + "name": "bool-query-only-range-sort", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool": { + "should": [ + { + "range": { + "TRANGE": { + "gte": -100, + "lte": -50 + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, + { + "name": "hybrid-query-only-range-sort", + "operation-type": "search", + "request-params": { + "search_pipeline": "nlp-min-max-arithmetic-search-pipeline" + }, + "body": { + "size": 100, + "query": { + "hybrid": { + "queries": [ + { + "range": { + "TRANGE": { + "gte": -100, + "lte": -50 + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, { "name": "aggs-query-term-min-hybrid-one-subquery", "operation-type": "search", @@ -497,6 +662,55 @@ } } }, + { + "name": "bool-query-only-range-medium-subset", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool": { + "should": [ + { + "range": { + "TRANGE": { + "gte": -90, + "lte": -7 + } + } + } + ] + } + } + } + }, + { + "name": "bool-query-only-range-medium-subset-sort", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool":{ + "should": [ + { + "range": { + "TRANGE": { + "gte": -90, + "lte": -7 + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, { "name": "hybrid-query-only-range-medium-subset", "operation-type": "search", @@ -520,6 +734,37 @@ } } } + }, + { + "name": "hybrid-query-only-range-medium-subset-sort", + "operation-type": "search", + "request-params": { + "search_pipeline": "nlp-min-max-arithmetic-search-pipeline" + }, + "body": { + "size": 100, + "query": { + "hybrid": { + "queries": [ + { + "range": { + "TRANGE": { + "gte": -90, + "lte": -7 + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } }, { "name": "aggs-query-term-min-hybrid-one-subquery-medium-subset", @@ -687,6 +932,55 @@ } } }, + { + "name": "bool-query-only-range-large-subset", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool": { + "should": [ + { + "range": { + "TRANGE": { + "gte": 1, + "lte": 35 + } + } + } + ] + } + } + } + }, + { + "name": "bool-query-only-range-large-subset-sort", + "operation-type": "search", + "body": { + "size": 100, + "query": { + "bool": { + "should": [ + { + "range": { + "TRANGE": { + "gte": 1, + "lte": 35 + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, { "name": "hybrid-query-only-range-large-subset", "operation-type": "search", @@ -711,6 +1005,37 @@ } } }, + { + "name": "hybrid-query-only-range-large-subset-sort", + "operation-type": "search", + "request-params": { + "search_pipeline": "nlp-min-max-arithmetic-search-pipeline" + }, + "body": { + "size": 100, + "query": { + "hybrid": { + "queries": [ + { + "range": { + "TRANGE": { + "gte": 1, + "lte": 35 + } + } + } + ] + } + }, + "sort":[ + { + "AWND":{ + "order":"desc" + } + } + ] + } + }, { "name": "aggs-query-term-min-hybrid-one-subquery-large-subset", "operation-type": "search", @@ -876,4 +1201,4 @@ } } } - } + } \ No newline at end of file diff --git a/noaa_semantic_search/test_procedures/hybrid_search.json b/noaa_semantic_search/test_procedures/hybrid_search.json index ab024bf4..da688451 100644 --- a/noaa_semantic_search/test_procedures/hybrid_search.json +++ b/noaa_semantic_search/test_procedures/hybrid_search.json @@ -9,7 +9,7 @@ ] }, { - "name": "hybrid-query-aggs-full", + "name": "bool-vs-hybrid", "description": "Indexes the whole document corpus using OpenSearch default settings. After that several query groups are run.", "default": false, "schedule": [ diff --git a/noaa_semantic_search/test_procedures/semantic-search-common/multiple-subqueries-search.json b/noaa_semantic_search/test_procedures/semantic-search-common/multiple-subqueries-search.json index cf89de7f..48557c6c 100644 --- a/noaa_semantic_search/test_procedures/semantic-search-common/multiple-subqueries-search.json +++ b/noaa_semantic_search/test_procedures/semantic-search-common/multiple-subqueries-search.json @@ -1,33 +1,19 @@ { - "operation": "hybrid-query-only-term-range-date", + "operation": "bool-only-term-range-date", "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, "iterations": {{ test_iterations | default(100) | tojson }}, "target-throughput": {{ target_throughput | default(2) | tojson }}, "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-min-avg-sum-hybrid", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - }, - { - "operation": "aggs-query-term-min-hybrid", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - }, - { - "operation": "aggs-query-date-histo-geohash-grid-hybrid", + "operation": "hybrid-query-only-term-range-date", "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, "iterations": {{ test_iterations | default(100) | tojson }}, "target-throughput": {{ target_throughput | default(2) | tojson }}, "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-range-numeric-significant-terms-hybrid", + "operation": "bool-only-term-range-date-sort", "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, "iterations": {{ test_iterations | default(100) | tojson }}, "target-throughput": {{ target_throughput | default(2) | tojson }}, diff --git a/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-large-set-search.json b/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-large-set-search.json index ada59c89..53b95575 100644 --- a/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-large-set-search.json +++ b/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-large-set-search.json @@ -1,35 +1,28 @@ { - "operation": "hybrid-query-only-range-large-subset", + "operation": "bool-query-only-range-large-subset", "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, "iterations": {{ test_iterations | default(100) | tojson }}, "target-throughput": {{ target_throughput | default(2) | tojson }}, "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-min-avg-sum-hybrid-one-subquery-large-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - }, - { - "operation": "aggs-query-term-min-hybrid-one-subquery-large-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} + "operation": "hybrid-query-only-range-large-subset", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-date-histo-geohash-grid-hybrid-one-subquery-large-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} + "operation": "bool-query-only-range-large-subset-sort", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-range-numeric-significant-terms-hybrid-one-subquery-large-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - } \ No newline at end of file + "operation": "hybrid-query-only-range-large-subset-sort", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} + } \ No newline at end of file diff --git a/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-medium-set-search.json b/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-medium-set-search.json index 907f086b..9aa7d877 100644 --- a/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-medium-set-search.json +++ b/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-medium-set-search.json @@ -1,35 +1,28 @@ { - "operation": "hybrid-query-only-range-medium-subset", + "operation": "bool-query-only-range-medium-subset", "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, "iterations": {{ test_iterations | default(100) | tojson }}, "target-throughput": {{ target_throughput | default(2) | tojson }}, "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-min-avg-sum-hybrid-one-subquery-medium-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - }, - { - "operation": "aggs-query-term-min-hybrid-one-subquery-medium-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} + "operation": "hybrid-query-only-range-medium-subset", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-date-histo-geohash-grid-hybrid-one-subquery-medium-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} + "operation": "bool-query-only-range-medium-subset-sort", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-range-numeric-significant-terms-hybrid-one-subquery-medium-subset", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - } \ No newline at end of file + "operation": "hybrid-query-only-range-medium-subset-sort", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} + } \ No newline at end of file diff --git a/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-small-set-search.json b/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-small-set-search.json index 337f1d14..6c1c582e 100644 --- a/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-small-set-search.json +++ b/noaa_semantic_search/test_procedures/semantic-search-common/single-subquery-small-set-search.json @@ -1,35 +1,28 @@ { - "operation": "hybrid-query-only-range", + "operation": "bool-query-only-range", "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, "iterations": {{ test_iterations | default(100) | tojson }}, "target-throughput": {{ target_throughput | default(2) | tojson }}, "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-min-avg-sum-hybrid-one-subquery", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - }, - { - "operation": "aggs-query-term-min-hybrid-one-subquery", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} + "operation": "hybrid-query-only-range", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-date-histo-geohash-grid-hybrid-one-subquery", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} + "operation": "bool-query-only-range-sort", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} }, { - "operation": "aggs-query-range-numeric-significant-terms-hybrid-one-subquery", - "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, - "iterations": {{ test_iterations | default(100) | tojson }}, - "target-throughput": {{ target_throughput | default(2) | tojson }}, - "clients": {{ search_clients | default(1) }} - } \ No newline at end of file + "operation": "hybrid-query-only-range-sort", + "warmup-iterations": {{ warmup_iterations | default(25) | tojson }}, + "iterations": {{ test_iterations | default(100) | tojson }}, + "target-throughput": {{ target_throughput | default(2) | tojson }}, + "clients": {{ search_clients | default(1) }} + } \ No newline at end of file