Skip to content

Commit

Permalink
[Backport 2.x] Add support for search using the "fields" parameter wi…
Browse files Browse the repository at this point in the history
…th knn_vector field (#2411)

* Add support for search using the "fields" parameter with knn_vector field (#2314)

Signed-off-by: Ethan Emoto <[email protected]>
Signed-off-by: Ethan Emoto <[email protected]>
(cherry picked from commit 5c63fde)

* Fix NPE in backporting fields parameter ITs

Signed-off-by: Ethan Emoto <[email protected]>

* Mute testKNNWarmupCustomLegacyFieldMapping (#2416)

Signed-off-by: Kunal Kotwani <[email protected]>

---------

Signed-off-by: Ethan Emoto <[email protected]>
Signed-off-by: Kunal Kotwani <[email protected]>
Co-authored-by: Kunal Kotwani <[email protected]>
  • Loading branch information
e-emoto and kotwanikunal authored Jan 23, 2025
1 parent 6ca19c1 commit d69d485
Show file tree
Hide file tree
Showing 5 changed files with 370 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Remove DocsWithFieldSet reference from NativeEngineFieldVectorsWriter (#2408)[https://github.com/opensearch-project/k-NN/pull/2408]
### Bug Fixes
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
* Fixing the bug where search fails with "fields" parameter for an index with a knn_vector field (#2314)[https://github.com/opensearch-project/k-NN/pull/2314]
* Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365]
* Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315]
* Fixing the bug to prevent updating the index.knn setting after index creation(#2348)[https://github.com/opensearch-project/k-NN/pull/2348]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
package org.opensearch.knn.index.mapper;

import lombok.Getter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.ArraySourceValueFetcher;
import org.opensearch.index.mapper.TextSearchInfo;
import org.opensearch.index.mapper.ValueFetcher;
import org.opensearch.index.query.QueryShardContext;
Expand All @@ -24,6 +27,8 @@
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
import org.opensearch.search.lookup.SearchLookup;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
Expand All @@ -36,6 +41,7 @@
*/
@Getter
public class KNNVectorFieldType extends MappedFieldType {
private static final Logger logger = LogManager.getLogger(KNNVectorFieldType.class);
KNNMappingConfig knnMappingConfig;
VectorDataType vectorDataType;

Expand All @@ -55,7 +61,17 @@ public KNNVectorFieldType(String name, Map<String, String> metadata, VectorDataT

@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
throw new UnsupportedOperationException("KNN Vector do not support fields search");
return new ArraySourceValueFetcher(name(), context) {
@Override
protected Object parseSourceValue(Object value) {
if (value instanceof ArrayList) {
return value;
} else {
logger.warn("Expected type ArrayList for value, but got {} ", value.getClass());
return Collections.emptyList();
}
}
};
}

@Override
Expand Down
298 changes: 297 additions & 1 deletion src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Floats;
import java.util.Locale;

import lombok.SneakyThrows;
import org.apache.http.ParseException;
import org.junit.BeforeClass;
Expand All @@ -39,6 +39,7 @@
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;

Expand Down Expand Up @@ -924,6 +925,301 @@ public void testKNNIndex_whenBuildVectorGraphThresholdIsProvidedEndToEnd_thenBui
deleteKNNIndex(indexName);
}

public void testKNNIndexSearchFieldsParameter() throws Exception {
createKnnIndex(INDEX_NAME, createKnnIndexMapping(Arrays.asList("vector1", "vector2", "vector3"), Arrays.asList(2, 3, 5)));
// Add docs with knn_vector fields
for (int i = 1; i <= 20; i++) {
Float[] vector1 = { (float) i, (float) (i + 1) };
Float[] vector2 = { (float) i, (float) (i + 1), (float) (i + 2) };
Float[] vector3 = { (float) i, (float) (i + 1), (float) (i + 2), (float) (i + 3), (float) (i + 4) };
addKnnDoc(
INDEX_NAME,
Integer.toString(i),
Arrays.asList("vector1", "vector2", "vector3"),
Arrays.asList(vector1, vector2, vector3)
);
}
int k = 10; // nearest 10 neighbors

// Create match_all search body, all fields
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "*" })
.startObject("query")
.startObject("match_all")
.endObject()
.endObject()
.endObject();
Response response = searchKNNIndex(INDEX_NAME, builder, k);
String entity = EntityUtils.toString(response.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity, "vector2"));
assertEquals(k, parseSearchResponseFieldsCount(entity, "vector3"));

// Create match_all search body, some fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "vector1", "vector2" })
.startObject("query")
.startObject("match_all")
.endObject()
.endObject()
.endObject();
Response response2 = searchKNNIndex(INDEX_NAME, builder, k);
String entity2 = EntityUtils.toString(response2.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity2, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity2, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity2, "vector3"));

// Create knn search body, all fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "*" })
.startObject("query")
.startObject("knn")
.startObject("vector2")
.field("vector", new float[] { 2.0f, 2.0f, 2.0f })
.field("k", k)
.endObject()
.endObject()
.endObject()
.endObject();
Response response3 = searchKNNIndex(INDEX_NAME, builder, k);
String entity3 = EntityUtils.toString(response3.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector2"));
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector3"));

// Create knn search body, some fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "vector1", "vector2" })
.startObject("query")
.startObject("knn")
.startObject("vector2")
.field("vector", new float[] { 2.0f, 2.0f, 2.0f })
.field("k", k)
.endObject()
.endObject()
.endObject()
.endObject();
Response response4 = searchKNNIndex(INDEX_NAME, builder, k);
String entity4 = EntityUtils.toString(response4.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity4, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity4, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity4, "vector3"));
}

public void testKNNIndexSearchFieldsParameterWithOtherFields() throws Exception {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("vector1")
.field("type", "knn_vector")
.field("dimension", "2")
.endObject()
.startObject("vector2")
.field("type", "knn_vector")
.field("dimension", "3")
.endObject()
.startObject("float1")
.field("type", "float")
.endObject()
.startObject("float2")
.field("type", "float")
.endObject()
.endObject()
.endObject();
createKnnIndex(INDEX_NAME, xContentBuilder.toString());
// Add docs with knn_vector and other fields
for (int i = 1; i <= 20; i++) {
Float[] vector1 = { (float) i, (float) (i + 1) };
Float[] vector2 = { (float) i, (float) (i + 1), (float) (i + 2) };
Float[] float1 = { (float) i };
Float[] float2 = { (float) (i + 1) };
addKnnDoc(
INDEX_NAME,
Integer.toString(i),
Arrays.asList("vector1", "vector2", "float1", "float2"),
Arrays.asList(vector1, vector2, float1, float2)
);
}
int k = 10; // nearest 10 neighbors

// Create match_all search body, all fields
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "*" })
.startObject("query")
.startObject("match_all")
.endObject()
.endObject()
.endObject();
Response response = searchKNNIndex(INDEX_NAME, builder, k);
String entity = EntityUtils.toString(response.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity, "vector2"));
assertEquals(k, parseSearchResponseFieldsCount(entity, "float1"));
assertEquals(k, parseSearchResponseFieldsCount(entity, "float2"));

// Create match_all search body, some fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "vector1", "float2" })
.startObject("query")
.startObject("match_all")
.endObject()
.endObject()
.endObject();
Response response2 = searchKNNIndex(INDEX_NAME, builder, k);
String entity2 = EntityUtils.toString(response2.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity2, "vector1"));
assertEquals(0, parseSearchResponseFieldsCount(entity2, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity2, "float1"));
assertEquals(k, parseSearchResponseFieldsCount(entity2, "float2"));

// Create knn search body, all fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "*" })
.startObject("query")
.startObject("knn")
.startObject("vector2")
.field("vector", new float[] { 2.0f, 2.0f, 2.0f })
.field("k", k)
.endObject()
.endObject()
.endObject()
.endObject();
Response response3 = searchKNNIndex(INDEX_NAME, builder, k);
String entity3 = EntityUtils.toString(response3.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector2"));
assertEquals(k, parseSearchResponseFieldsCount(entity3, "float1"));
assertEquals(k, parseSearchResponseFieldsCount(entity3, "float2"));

// Create knn search body, some fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "vector1", "float2" })
.startObject("query")
.startObject("knn")
.startObject("vector2")
.field("vector", new float[] { 2.0f, 2.0f, 2.0f })
.field("k", k)
.endObject()
.endObject()
.endObject()
.endObject();
Response response4 = searchKNNIndex(INDEX_NAME, builder, k);
String entity4 = EntityUtils.toString(response4.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity4, "vector1"));
assertEquals(0, parseSearchResponseFieldsCount(entity4, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity4, "float1"));
assertEquals(k, parseSearchResponseFieldsCount(entity4, "float2"));
}

public void testKNNIndexSearchFieldsParameterDocsWithOnlyOtherFields() throws Exception {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("vector1")
.field("type", "knn_vector")
.field("dimension", "2")
.endObject()
.startObject("vector2")
.field("type", "knn_vector")
.field("dimension", "3")
.endObject()
.startObject("text1")
.field("type", "text")
.endObject()
.endObject()
.endObject();
createKnnIndex(INDEX_NAME, xContentBuilder.toString());
// Add knn_vector docs
for (int i = 1; i <= 20; i++) {
Float[] vector1 = { (float) i, (float) (i + 1) };
Float[] vector2 = { (float) i, (float) (i + 1), (float) (i + 2) };
addKnnDoc(INDEX_NAME, Integer.toString(i), Arrays.asList("vector1", "vector2"), Arrays.asList(vector1, vector2));
}
// Add non knn_vector docs
for (int i = 21; i <= 40; i++) {
addNonKNNDoc(INDEX_NAME, Integer.toString(i), "text1", "text " + i);
}
int k = 10; // nearest 10 neighbors

// Create match search body, all non vector fields
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "text1" })
.startObject("query")
.startObject("match")
.field("text1", "text")
.endObject()
.endObject()
.endObject();
Response response = searchKNNIndex(INDEX_NAME, builder, k);
String entity = EntityUtils.toString(response.getEntity());
assertEquals(0, parseSearchResponseFieldsCount(entity, "vector1"));
assertEquals(0, parseSearchResponseFieldsCount(entity, "vector2"));
assertEquals(k, parseSearchResponseFieldsCount(entity, "text1"));

// Create match search body, all vector fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "vector1", "vector2" })
.startObject("query")
.startObject("match")
.field("text1", "text")
.endObject()
.endObject()
.endObject();
Response response2 = searchKNNIndex(INDEX_NAME, builder, k);
String entity2 = EntityUtils.toString(response2.getEntity());
assertEquals(0, parseSearchResponseFieldsCount(entity2, "vector1"));
assertEquals(0, parseSearchResponseFieldsCount(entity2, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity2, "text1"));

// Create knn search body, all vector fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "vector1", "vector2" })
.startObject("query")
.startObject("knn")
.startObject("vector2")
.field("vector", new float[] { 2.0f, 2.0f, 2.0f })
.field("k", k)
.endObject()
.endObject()
.endObject()
.endObject();
Response response3 = searchKNNIndex(INDEX_NAME, builder, k);
String entity3 = EntityUtils.toString(response3.getEntity());
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector1"));
assertEquals(k, parseSearchResponseFieldsCount(entity3, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity3, "text1"));

// Create knn search body, all non vector fields
builder = XContentFactory.jsonBuilder()
.startObject()
.field("fields", new String[] { "text1" })
.startObject("query")
.startObject("knn")
.startObject("vector2")
.field("vector", new float[] { 2.0f, 2.0f, 2.0f })
.field("k", k)
.endObject()
.endObject()
.endObject()
.endObject();
Response response4 = searchKNNIndex(INDEX_NAME, builder, k);
String entity4 = EntityUtils.toString(response4.getEntity());
assertEquals(0, parseSearchResponseFieldsCount(entity4, "vector1"));
assertEquals(0, parseSearchResponseFieldsCount(entity4, "vector2"));
assertEquals(0, parseSearchResponseFieldsCount(entity4, "text1"));
}

private List<KNNResult> getResults(final String indexName, final String fieldName, final float[] vector, final int k)
throws IOException, ParseException {
final Response searchResponseField = searchKNNIndex(indexName, new KNNQueryBuilder(fieldName, vector, k), k);
Expand Down
Loading

0 comments on commit d69d485

Please sign in to comment.