Skip to content

Commit

Permalink
fix: support for non-string types in object fields (datahub-project#1…
Browse files Browse the repository at this point in the history
…1066)

Co-authored-by: david-leifker <[email protected]>
Co-authored-by: milindgupta <[email protected]>
Co-authored-by: Harshal Sheth <[email protected]>
Co-authored-by: Hyejin Yoon <[email protected]>
Co-authored-by: milindgupta9 <[email protected]>
  • Loading branch information
6 people authored Sep 17, 2024
1 parent a483172 commit c2977d8
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) {
testEntityInfo.getPegasusSchema().getFullName());

// Assert on Searchable Fields
assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 12);
assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 17);
assertEquals(
"customProperties",
testEntityInfo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.data.DataMap;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.MapDataSchema;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.Aspect;
import com.linkedin.events.metadata.ChangeType;
Expand Down Expand Up @@ -290,8 +291,39 @@ public void setSearchableValue(
String key = keyValues[0], value = "";
if (keyValues.length > 1) {
value = keyValues[1];
if (((MapDataSchema) fieldSpec.getPegasusSchema())
.getValues()
.getType()
.equals(DataSchema.Type.BOOLEAN)) {
dictDoc.set(
key, JsonNodeFactory.instance.booleanNode(Boolean.parseBoolean(value)));
} else if (((MapDataSchema) fieldSpec.getPegasusSchema())
.getValues()
.getType()
.equals(DataSchema.Type.INT)) {
dictDoc.set(key, JsonNodeFactory.instance.numberNode(Integer.parseInt(value)));
} else if (((MapDataSchema) fieldSpec.getPegasusSchema())
.getValues()
.getType()
.equals(DataSchema.Type.DOUBLE)) {
dictDoc.set(
key, JsonNodeFactory.instance.numberNode(Double.parseDouble(value)));
} else if (((MapDataSchema) fieldSpec.getPegasusSchema())
.getValues()
.getType()
.equals(DataSchema.Type.LONG)) {
dictDoc.set(key, JsonNodeFactory.instance.numberNode(Long.parseLong(value)));
} else if (((MapDataSchema) fieldSpec.getPegasusSchema())
.getValues()
.getType()
.equals(DataSchema.Type.FLOAT)) {
dictDoc.set(key, JsonNodeFactory.instance.numberNode(Float.parseFloat(value)));
} else {
dictDoc.put(key, value);
}
} else {
dictDoc.put(key, value);
}
dictDoc.put(key, value);
});
searchDocument.set(fieldName, dictDoc);
} else if (!fieldValues.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
import com.google.common.collect.ImmutableMap;
import com.linkedin.common.urn.TestEntityUrn;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.BooleanMap;
import com.linkedin.data.template.DoubleMap;
import com.linkedin.data.template.FloatMap;
import com.linkedin.data.template.IntegerMap;
import com.linkedin.data.template.LongMap;
import com.linkedin.data.template.StringArray;
import com.linkedin.data.template.StringMap;

Expand Down Expand Up @@ -72,6 +77,13 @@ public static TestEntityInfo getTestEntityInfo(Urn urn) {
"longValue",
"0123456789")));
testEntityInfo.setDoubleField(100.456);
testEntityInfo.setEsObjectFieldBoolean(
new BooleanMap(ImmutableMap.of("key1", true, "key2", false)));
testEntityInfo.setEsObjectFieldLong(new LongMap(ImmutableMap.of("key1", 1L, "key2", 2L)));
testEntityInfo.setEsObjectFieldFloat(new FloatMap(ImmutableMap.of("key1", 1.0f, "key2", 2.0f)));
testEntityInfo.setEsObjectFieldDouble(new DoubleMap(ImmutableMap.of("key1", 1.2, "key2", 2.4)));
testEntityInfo.setEsObjectFieldInteger(
new IntegerMap(ImmutableMap.of("key1", 123, "key2", 456)));
return testEntityInfo;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public void testMappingsBuilder() {
Map<String, Object> result = MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec());
assertEquals(result.size(), 1);
Map<String, Object> properties = (Map<String, Object>) result.get("properties");
assertEquals(properties.size(), 22);
assertEquals(properties.size(), 27);
assertEquals(
properties.get("urn"),
ImmutableMap.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,22 @@ public void testQueryBuilderFulltext() {
assertEquals(keywordQuery.value(), "testQuery");
assertEquals(keywordQuery.analyzer(), "keyword");
Map<String, Float> keywordFields = keywordQuery.fields();
assertEquals(keywordFields.size(), 9);
assertEquals(
keywordFields,
Map.of(
"urn", 10.f,
"textArrayField", 1.0f,
"customProperties", 1.0f,
"wordGramField", 1.0f,
"nestedArrayArrayField", 1.0f,
"textFieldOverride", 1.0f,
"nestedArrayStringField", 1.0f,
"keyPart1", 10.0f,
"esObjectField", 1.0f));
assertEquals(keywordFields.size(), 14);

assertEquals(keywordFields.get("urn"), 10);
assertEquals(keywordFields.get("textArrayField"), 1);
assertEquals(keywordFields.get("customProperties"), 1);
assertEquals(keywordFields.get("wordGramField"), 1);
assertEquals(keywordFields.get("nestedArrayArrayField"), 1);
assertEquals(keywordFields.get("textFieldOverride"), 1);
assertEquals(keywordFields.get("nestedArrayStringField"), 1);
assertEquals(keywordFields.get("keyPart1"), 10);
assertEquals(keywordFields.get("esObjectField"), 1);
assertEquals(keywordFields.get("esObjectFieldFloat"), 1);
assertEquals(keywordFields.get("esObjectFieldDouble"), 1);
assertEquals(keywordFields.get("esObjectFieldLong"), 1);
assertEquals(keywordFields.get("esObjectFieldInteger"), 1);
assertEquals(keywordFields.get("esObjectFieldBoolean"), 1);

SimpleQueryStringBuilder urnComponentQuery =
(SimpleQueryStringBuilder) analyzerGroupQuery.should().get(1);
Expand Down Expand Up @@ -174,7 +177,7 @@ public void testQueryBuilderFulltext() {
})
.collect(Collectors.toList());

assertEquals(prefixFieldWeights.size(), 29);
assertEquals(prefixFieldWeights.size(), 39);

List.of(
Pair.of("urn", 100.0f),
Expand Down Expand Up @@ -209,7 +212,7 @@ public void testQueryBuilderStructured() {
assertEquals(keywordQuery.queryString(), "testQuery");
assertNull(keywordQuery.analyzer());
Map<String, Float> keywordFields = keywordQuery.fields();
assertEquals(keywordFields.size(), 22);
assertEquals(keywordFields.size(), 27);
assertEquals(keywordFields.get("keyPart1").floatValue(), 10.0f);
assertFalse(keywordFields.containsKey("keyPart3"));
assertEquals(keywordFields.get("textFieldOverride").floatValue(), 1.0f);
Expand Down Expand Up @@ -376,7 +379,7 @@ public void testGetStandardFields() {
Set<SearchFieldConfig> fieldConfigs =
TEST_CUSTOM_BUILDER.getStandardFields(
mock(EntityRegistry.class), ImmutableList.of(TestEntitySpecBuilder.getSpec()));
assertEquals(fieldConfigs.size(), 22);
assertEquals(fieldConfigs.size(), 27);
assertEquals(
fieldConfigs.stream().map(SearchFieldConfig::fieldName).collect(Collectors.toSet()),
Set.of(
Expand All @@ -401,7 +404,12 @@ public void testGetStandardFields() {
"textFieldOverride.delimited",
"urn",
"wordGramField.wordGrams2",
"customProperties.delimited")); // customProperties.delimited Saas only
"customProperties.delimited",
"esObjectFieldBoolean",
"esObjectFieldInteger",
"esObjectFieldDouble",
"esObjectFieldFloat",
"esObjectFieldLong")); // customProperties.delimited Saas only

assertEquals(
fieldConfigs.stream()
Expand Down Expand Up @@ -487,7 +495,7 @@ public void testGetStandardFields() {
ImmutableList.of(TestEntitySpecBuilder.getSpec(), mockEntitySpec));
// Same 22 from the original entity + newFieldNotInOriginal + 3 word gram fields from the
// textFieldOverride
assertEquals(fieldConfigs.size(), 27);
assertEquals(fieldConfigs.size(), 32);
assertEquals(
fieldConfigs.stream().map(SearchFieldConfig::fieldName).collect(Collectors.toSet()),
Set.of(
Expand Down Expand Up @@ -517,7 +525,12 @@ public void testGetStandardFields() {
"textFieldOverride.wordGrams2",
"textFieldOverride.wordGrams3",
"textFieldOverride.wordGrams4",
"customProperties.delimited"));
"customProperties.delimited",
"esObjectFieldBoolean",
"esObjectFieldInteger",
"esObjectFieldDouble",
"esObjectFieldFloat",
"esObjectFieldLong"));

// Field which only exists in first one: Should be the same
assertEquals(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ public void testSearchRequestHandler() {
highlightBuilder.fields().stream()
.map(HighlightBuilder.Field::name)
.collect(Collectors.toList());
assertEquals(fields.size(), 22);
assertEquals(fields.size(), 32);
List<String> highlightableFields =
ImmutableList.of(
"keyPart1",
Expand All @@ -240,7 +240,12 @@ public void testSearchRequestHandler() {
"nestedArrayArrayField",
"customProperties",
"esObjectField",
"wordGramField");
"wordGramField",
"esObjectFieldLong",
"esObjectFieldBoolean",
"esObjectFieldFloat",
"esObjectFieldDouble",
"esObjectFieldInteger");
highlightableFields.forEach(
field -> {
assertTrue(fields.contains(field), "Missing: " + field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,52 @@ public void testTransform() throws IOException {
assertEquals(parsedJson.get("feature2").asInt(), 1);
JsonNode browsePathV2 = (JsonNode) parsedJson.get("browsePathV2");
assertEquals(browsePathV2.asText(), "␟levelOne␟levelTwo");
assertEquals(
parsedJson.get("esObjectFieldBoolean").get("key1").getNodeType(),
JsonNodeFactory.instance.booleanNode(true).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldLong").get("key1").getNodeType(),
JsonNodeFactory.instance.numberNode(1L).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldFloat").get("key2").getNodeType(),
JsonNodeFactory.instance.numberNode(2.0f).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldDouble").get("key1").getNodeType(),
JsonNodeFactory.instance.numberNode(1.2).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldInteger").get("key2").getNodeType(),
JsonNodeFactory.instance.numberNode(456).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldBoolean").get("key2").getNodeType(),
JsonNodeFactory.instance.booleanNode(false).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldLong").get("key2").getNodeType(),
JsonNodeFactory.instance.numberNode(2L).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldFloat").get("key1").getNodeType(),
JsonNodeFactory.instance.numberNode(1.0f).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldDouble").get("key2").getNodeType(),
JsonNodeFactory.instance.numberNode(2.4).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldInteger").get("key1").getNodeType(),
JsonNodeFactory.instance.numberNode(123).getNodeType());
assertEquals(parsedJson.get("esObjectField").get("key3").asText(), "");
assertEquals(
parsedJson.get("esObjectFieldBoolean").get("key2").getNodeType(),
JsonNodeFactory.instance.booleanNode(false).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldLong").get("key2").getNodeType(),
JsonNodeFactory.instance.numberNode(2L).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldFloat").get("key1").getNodeType(),
JsonNodeFactory.instance.numberNode(1.0f).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldDouble").get("key2").getNodeType(),
JsonNodeFactory.instance.numberNode(2.4).getNodeType());
assertEquals(
parsedJson.get("esObjectFieldInteger").get("key1").getNodeType(),
JsonNodeFactory.instance.numberNode(123).getNodeType());
}

@Test
Expand Down
48 changes: 47 additions & 1 deletion test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,50 @@ record TestEntityInfo includes CustomProperties {
"fieldType": "BOOLEAN"
}
removed: optional boolean
}

@Searchable = {
"/*": {
"name": "esObjectFieldLong",
"fieldType": "OBJECT",
"queryByDefault": true
}
}
esObjectFieldLong: optional map[string, long]

@Searchable = {
"/*": {
"name": "esObjectFieldBoolean",
"fieldType": "OBJECT",
"queryByDefault": true
}
}
esObjectFieldBoolean: optional map[string, boolean]

@Searchable = {
"/*": {
"name": "esObjectFieldFloat",
"fieldType": "OBJECT",
"queryByDefault": true
}
}
esObjectFieldFloat: optional map[string, float]

@Searchable = {
"/*": {
"name": "esObjectFieldDouble",
"fieldType": "OBJECT",
"queryByDefault": true
}
}
esObjectFieldDouble: optional map[string, double]

@Searchable = {
"/*": {
"name": "esObjectFieldInteger",
"fieldType": "OBJECT",
"queryByDefault": true
}
}
esObjectFieldInteger: optional map[string, int]

}

0 comments on commit c2977d8

Please sign in to comment.