Skip to content

Commit

Permalink
Support expand_nested_docs parameter for nmslib engine
Browse files Browse the repository at this point in the history
Signed-off-by: Heemin Kim <[email protected]>
  • Loading branch information
heemin32 committed Dec 12, 2024
1 parent 2b9a741 commit 90f9873
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.18...2.x)
### Features
- Add Support for Multi Values in innerHit for Nested k-NN Fields in Lucene and FAISS (#2283)[https://github.com/opensearch-project/k-NN/pull/2283]
- Add expand_nested_docs Parameter support to NMSLIB engine (#2331)[https://github.com/opensearch-project/k-NN/pull/2331]
### Enhancements
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
- Allow method parameter override for training based indices (#2290) https://github.com/opensearch-project/k-NN/pull/2290]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ public enum KNNEngine implements KNNLibrary {
private static final Set<KNNEngine> CUSTOM_SEGMENT_FILE_ENGINES = ImmutableSet.of(KNNEngine.NMSLIB, KNNEngine.FAISS);
private static final Set<KNNEngine> ENGINES_SUPPORTING_FILTERS = ImmutableSet.of(KNNEngine.LUCENE, KNNEngine.FAISS);
public static final Set<KNNEngine> ENGINES_SUPPORTING_RADIAL_SEARCH = ImmutableSet.of(KNNEngine.LUCENE, KNNEngine.FAISS);
public static final Set<KNNEngine> ENGINES_SUPPORTING_MULTI_VECTORS = ImmutableSet.of(KNNEngine.LUCENE, KNNEngine.FAISS);

private static Map<KNNEngine, Integer> MAX_DIMENSIONS_BY_ENGINE = Map.of(
KNNEngine.NMSLIB,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH;
import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;
import static org.opensearch.knn.index.VectorDataType.SUPPORTED_VECTOR_DATA_TYPES;
import static org.opensearch.knn.index.engine.KNNEngine.ENGINES_SUPPORTING_MULTI_VECTORS;

/**
* Creates the Lucene k-NN queries
Expand All @@ -50,7 +49,6 @@ public static Query create(CreateQueryRequest createQueryRequest) {
final Query filterQuery = getFilterQuery(createQueryRequest);
final Map<String, ?> methodParameters = createQueryRequest.getMethodParameters();
final RescoreContext rescoreContext = createQueryRequest.getRescoreContext().orElse(null);
final KNNEngine knnEngine = createQueryRequest.getKnnEngine();
final boolean expandNested = createQueryRequest.isExpandNested();
BitSetProducer parentFilter = null;
if (createQueryRequest.getContext().isPresent()) {
Expand Down Expand Up @@ -110,15 +108,7 @@ public static Query create(CreateQueryRequest createQueryRequest) {
.build();
}

if (createQueryRequest.getRescoreContext().isPresent()) {
return new NativeEngineKnnVectorQuery(knnQuery, QueryUtils.INSTANCE, expandNested);
}

if (ENGINES_SUPPORTING_MULTI_VECTORS.contains(knnEngine) && expandNested) {
return new NativeEngineKnnVectorQuery(knnQuery, QueryUtils.INSTANCE, expandNested);
}

return knnQuery;
return new NativeEngineKnnVectorQuery(knnQuery, QueryUtils.INSTANCE, expandNested);
}

Integer requestEfSearch = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
* A `DocIdSetIterator` that iterates over all nested document IDs belongs to the same parent document for a given
* set of nested document IDs.
*
* The {@link #docIds} should include only a single nested document ID per parent document. Otherwise, the nested documents
* of that parent document will be iterated multiple times.
*
* It is permissible for {@link #docIds} to contain multiple nested document IDs linked to a single parent document.
* In such cases, this iterator will still iterate over each nested document ID only once.
*/
public class GroupedNestedDocIdSetIterator extends DocIdSetIterator {
private final BitSet parentBitSet;
Expand Down Expand Up @@ -99,9 +98,14 @@ public long cost() {

private long calculateCost() {
long numDocs = 0;
int lastDocId = -1;
for (int docId : docIds) {
for (int i = parentBitSet.prevSetBit(docId) + 1; i < parentBitSet.nextSetBit(docId); i++) {
if (filterBits.get(i)) {
if (docId < lastDocId) {
continue;
}

for (lastDocId = parentBitSet.prevSetBit(docId) + 1; lastDocId < parentBitSet.nextSetBit(docId); lastDocId++) {
if (filterBits.get(lastDocId)) {
numDocs++;
}
}
Expand All @@ -111,12 +115,19 @@ private long calculateCost() {

private void moveToNextIndex() {
currentIndex++;
if (currentIndex >= docIds.size()) {
currentDocId = NO_MORE_DOCS;
while (currentIndex < docIds.size()) {
// Advance currentIndex until the docId at the currentIndex is greater than currentDocId.
// This ensures proper handling when docIds contain multiple entries under the same parent ID
// that have already been iterated.
if (docIds.get(currentIndex) <= currentDocId) {
currentIndex++;
continue;
}
currentDocId = parentBitSet.prevSetBit(docIds.get(currentIndex)) + 1;
currentParentId = parentBitSet.nextSetBit(docIds.get(currentIndex));
assert currentParentId != NO_MORE_DOCS;
return;
}
currentDocId = parentBitSet.prevSetBit(docIds.get(currentIndex)) + 1;
currentParentId = parentBitSet.nextSetBit(docIds.get(currentIndex));
assert currentParentId != NO_MORE_DOCS;
currentDocId = NO_MORE_DOCS;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,33 @@ public void testGroupedNestedDocIdSetIterator_whenAdvanceIsCalled_thenBehaveAsEx
assertEquals(DocIdSetIterator.NO_MORE_DOCS, groupedNestedDocIdSetIterator.docID());
assertEquals(expectedDocIds.size(), groupedNestedDocIdSetIterator.cost());
}

public void testGroupedNestedDocIdSetIterator_whenGivenMultipleDocsUnderSameParent_thenBehaveAsExpected() throws Exception {
// 0, 1, 2(parent), 3, 4, 5, 6, 7(parent), 8, 9, 10(parent)
BitSet parentBitSet = new FixedBitSet(new long[1], 11);
parentBitSet.set(2);
parentBitSet.set(7);
parentBitSet.set(10);

BitSet filterBits = new FixedBitSet(new long[1], 11);
filterBits.set(1);
filterBits.set(8);
filterBits.set(9);

// Run
Set<Integer> docIds = Set.of(0, 1, 3, 4, 5, 8, 9);
GroupedNestedDocIdSetIterator groupedNestedDocIdSetIterator = new GroupedNestedDocIdSetIterator(parentBitSet, docIds, filterBits);

// Verify
Set<Integer> expectedDocIds = Set.of(1, 8, 9);
groupedNestedDocIdSetIterator.advance(1);
assertEquals(1, groupedNestedDocIdSetIterator.docID());
groupedNestedDocIdSetIterator.nextDoc();
assertEquals(8, groupedNestedDocIdSetIterator.docID());
groupedNestedDocIdSetIterator.advance(9);
assertEquals(9, groupedNestedDocIdSetIterator.docID());
groupedNestedDocIdSetIterator.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, groupedNestedDocIdSetIterator.docID());
assertEquals(expectedDocIds.size(), groupedNestedDocIdSetIterator.cost());
}
}

0 comments on commit 90f9873

Please sign in to comment.