From dce2dd3e6cd4bad0c0d1d1333b5f56978436c169 Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Fri, 15 Nov 2024 16:33:17 -0800 Subject: [PATCH] Switch to approximation/two phase approach everywhere in hybrid query Signed-off-by: Martin Gaievski --- CHANGELOG.md | 1 + .../org/opensearch/neuralsearch/query/HybridQueryScorer.java | 2 +- .../search/collector/HybridTopScoreDocCollector.java | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 595ea7dd4..7c376d037 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Features ### Enhancements ### Bug Fixes +- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998)) ### Infrastructure ### Documentation ### Maintenance diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java index eb410aa23..81d1b552b 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java +++ b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java @@ -187,7 +187,7 @@ public int docID() { */ public float[] hybridScores() throws IOException { float[] scores = new float[numSubqueries]; - DisiWrapper topList = subScorersPQ.topList(); + DisiWrapper topList = getSubMatches(); for (HybridDisiWrapper disiWrapper = (HybridDisiWrapper) topList; disiWrapper != null; disiWrapper = (HybridDisiWrapper) disiWrapper.next) { // check if this doc has match in the subQuery. If not, add score as 0.0 and continue diff --git a/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java b/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java index 4e72b55bf..4ab10bceb 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java +++ b/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java @@ -108,12 +108,17 @@ public void collect(int doc) throws IOException { } // Increment total hit count which represents unique doc found on the shard totalHits++; + hitsThresholdChecker.incrementHitCount(); for (int i = 0; i < subScoresByQuery.length; i++) { float score = subScoresByQuery[i]; // if score is 0.0 there is no hits for that sub-query if (score == 0) { continue; } + if (hitsThresholdChecker.isThresholdReached() && totalHitsRelation == TotalHits.Relation.EQUAL_TO) { + log.info("reached hits threshold check"); + totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; + } collectedHitsPerSubQuery[i]++; PriorityQueue pq = compoundScores[i]; ScoreDoc currentDoc = new ScoreDoc(doc + docBase, score);