-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: David Zane <[email protected]>
- Loading branch information
Showing
4 changed files
with
561 additions
and
21 deletions.
There are no files selected for viewing
276 changes: 276 additions & 0 deletions
276
...ain/java/org/opensearch/plugin/insights/core/service/categorizer/QueryShapeGenerator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.plugin.insights.core.service.categorizer; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.function.Function; | ||
import org.opensearch.core.common.io.stream.NamedWriteable; | ||
import org.opensearch.index.query.AbstractGeometryQueryBuilder; | ||
import org.opensearch.index.query.CommonTermsQueryBuilder; | ||
import org.opensearch.index.query.ExistsQueryBuilder; | ||
import org.opensearch.index.query.FieldMaskingSpanQueryBuilder; | ||
import org.opensearch.index.query.FuzzyQueryBuilder; | ||
import org.opensearch.index.query.GeoDistanceQueryBuilder; | ||
import org.opensearch.index.query.GeoPolygonQueryBuilder; | ||
import org.opensearch.index.query.MatchBoolPrefixQueryBuilder; | ||
import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; | ||
import org.opensearch.index.query.MatchPhraseQueryBuilder; | ||
import org.opensearch.index.query.MatchQueryBuilder; | ||
import org.opensearch.index.query.MultiTermQueryBuilder; | ||
import org.opensearch.index.query.PrefixQueryBuilder; | ||
import org.opensearch.index.query.QueryBuilder; | ||
import org.opensearch.index.query.RangeQueryBuilder; | ||
import org.opensearch.index.query.RegexpQueryBuilder; | ||
import org.opensearch.index.query.SpanNearQueryBuilder; | ||
import org.opensearch.index.query.SpanTermQueryBuilder; | ||
import org.opensearch.index.query.TermQueryBuilder; | ||
import org.opensearch.index.query.TermsQueryBuilder; | ||
import org.opensearch.index.query.WildcardQueryBuilder; | ||
import org.opensearch.search.aggregations.AggregationBuilder; | ||
import org.opensearch.search.aggregations.AggregatorFactories; | ||
import org.opensearch.search.aggregations.PipelineAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.histogram.VariableWidthHistogramAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.missing.MissingAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.range.AbstractRangeBuilder; | ||
import org.opensearch.search.aggregations.bucket.range.GeoDistanceAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.range.IpRangeAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.terms.RareTermsAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder; | ||
import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.AvgAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.CardinalityAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.ExtendedStatsAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.GeoCentroidAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.MinAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.StatsAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.SumAggregationBuilder; | ||
import org.opensearch.search.aggregations.metrics.ValueCountAggregationBuilder; | ||
import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; | ||
import org.opensearch.search.builder.SearchSourceBuilder; | ||
import org.opensearch.search.sort.FieldSortBuilder; | ||
import org.opensearch.search.sort.SortBuilder; | ||
|
||
/** | ||
* Class to generate query shape | ||
*/ | ||
public class QueryShapeGenerator { | ||
static final String TWO_SPACE_INDENT = " "; | ||
static final Map<Class<?>, List<Function<Object, String>>> QUERY_FIELD_DATA_MAP = FieldDataMapHelper.getQueryFieldDataMap(); | ||
static final Map<Class<?>, List<Function<Object, String>>> AGG_FIELD_DATA_MAP = FieldDataMapHelper.getAggFieldDataMap(); | ||
static final Map<Class<?>, List<Function<Object, String>>> SORT_FIELD_DATA_MAP = FieldDataMapHelper.getSortFieldDataMap(); | ||
|
||
public static String buildShape(SearchSourceBuilder source, Boolean showFields) { | ||
StringBuilder shape = new StringBuilder(); | ||
shape.append(buildQueryShape(source.query(), showFields)); | ||
shape.append(buildAggregationShape(source.aggregations(), showFields)); | ||
shape.append(buildSortShape(source.sorts(), showFields)); | ||
return shape.toString(); | ||
} | ||
|
||
static String buildQueryShape(QueryBuilder queryBuilder, Boolean showFields) { | ||
if (queryBuilder == null) { | ||
return ""; | ||
} | ||
QueryShapeVisitor shapeVisitor = new QueryShapeVisitor(); | ||
queryBuilder.visit(shapeVisitor); | ||
return shapeVisitor.prettyPrintTree("", showFields); | ||
} | ||
|
||
static String buildAggregationShape(AggregatorFactories.Builder aggregationsBuilder, Boolean showFields) { | ||
if (aggregationsBuilder == null) { | ||
return ""; | ||
} | ||
StringBuilder aggregationShape = recursiveAggregationShapeBuilder( | ||
aggregationsBuilder.getAggregatorFactories(), | ||
aggregationsBuilder.getPipelineAggregatorFactories(), | ||
new StringBuilder(), | ||
0, | ||
showFields | ||
); | ||
return aggregationShape.toString(); | ||
} | ||
|
||
static StringBuilder recursiveAggregationShapeBuilder( | ||
Collection<AggregationBuilder> aggregationBuilders, | ||
Collection<PipelineAggregationBuilder> pipelineAggregations, | ||
StringBuilder outputBuilder, | ||
int indentCount, | ||
Boolean showFields | ||
) { | ||
String baseIndent = TWO_SPACE_INDENT.repeat(indentCount); | ||
|
||
//// Normal Aggregations //// | ||
if (aggregationBuilders.isEmpty() == false) { | ||
outputBuilder.append(baseIndent).append("aggregation:").append("\n"); | ||
} | ||
List<String> aggShapeStrings = new ArrayList<>(); | ||
for (AggregationBuilder aggBuilder : aggregationBuilders) { | ||
StringBuilder stringBuilder = new StringBuilder(); | ||
stringBuilder.append(baseIndent).append(TWO_SPACE_INDENT).append(aggBuilder.getType()); | ||
if (showFields) { | ||
stringBuilder.append(buildFieldDataString(AGG_FIELD_DATA_MAP.get(aggBuilder.getClass()), aggBuilder)); | ||
} | ||
stringBuilder.append("\n"); | ||
|
||
if (aggBuilder.getSubAggregations().isEmpty() == false) { | ||
// Recursive call on sub-aggregations | ||
recursiveAggregationShapeBuilder( | ||
aggBuilder.getSubAggregations(), | ||
aggBuilder.getPipelineAggregations(), | ||
stringBuilder, | ||
indentCount + 2, | ||
showFields | ||
); | ||
} | ||
aggShapeStrings.add(stringBuilder.toString()); | ||
} | ||
|
||
// Sort alphanumerically and append aggregations list | ||
Collections.sort(aggShapeStrings); | ||
for (String shapeString : aggShapeStrings) { | ||
outputBuilder.append(shapeString); | ||
} | ||
|
||
//// Pipeline Aggregation (cannot have sub-aggregations) //// | ||
if (pipelineAggregations.isEmpty() == false) { | ||
outputBuilder.append(baseIndent).append(TWO_SPACE_INDENT).append("pipeline aggregation:").append("\n"); | ||
|
||
List<String> pipelineAggShapeStrings = new ArrayList<>(); | ||
for (PipelineAggregationBuilder pipelineAgg : pipelineAggregations) { | ||
pipelineAggShapeStrings.add(baseIndent + TWO_SPACE_INDENT.repeat(2) + pipelineAgg.getType() + "\n"); | ||
} | ||
|
||
// Sort alphanumerically and append pipeline aggregations list | ||
Collections.sort(pipelineAggShapeStrings); | ||
for (String shapeString : pipelineAggShapeStrings) { | ||
outputBuilder.append(shapeString); | ||
} | ||
} | ||
|
||
return outputBuilder; | ||
} | ||
|
||
static String buildSortShape(List<SortBuilder<?>> sortBuilderList, Boolean showFields) { | ||
if (sortBuilderList == null || sortBuilderList.isEmpty()) { | ||
return ""; | ||
} | ||
StringBuilder sortShape = new StringBuilder(); | ||
sortShape.append("sort:\n"); | ||
|
||
List<String> shapeStrings = new ArrayList<>(); | ||
for (SortBuilder<?> sortBuilder : sortBuilderList) { | ||
StringBuilder stringBuilder = new StringBuilder(); | ||
stringBuilder.append(TWO_SPACE_INDENT).append(sortBuilder.order()); | ||
if (showFields) { | ||
stringBuilder.append(buildFieldDataString(SORT_FIELD_DATA_MAP.get(sortBuilder.getClass()), sortBuilder)); | ||
} | ||
shapeStrings.add(stringBuilder.toString()); | ||
} | ||
|
||
Collections.sort(shapeStrings); | ||
for (String line : shapeStrings) { | ||
sortShape.append(line).append("\n"); | ||
} | ||
return sortShape.toString(); | ||
} | ||
|
||
static String buildFieldDataString(List<Function<Object, String>> methods, NamedWriteable builder) { | ||
List<String> fieldDataList = new ArrayList<>(); | ||
if (methods != null) { | ||
for (Function<Object, String> lambda : methods) { | ||
fieldDataList.add(lambda.apply(builder)); | ||
} | ||
} | ||
return String.format(" [%s]", String.join(", ", fieldDataList)); | ||
} | ||
|
||
/** | ||
* Class to create field data map for query, agg, sort | ||
* Map | ||
*/ | ||
public static class FieldDataMapHelper { | ||
|
||
// Helper method to create map entries | ||
private static <T> Map.Entry<Class<?>, List<Function<Object, String>>> createEntry(Class<T> clazz, Function<T, String> extractor) { | ||
return Map.entry(clazz, List.of(obj -> extractor.apply(clazz.cast(obj)))); | ||
} | ||
|
||
// Method to return the QUERY_FIELD_DATA_MAP | ||
public static Map<Class<?>, List<Function<Object, String>>> getQueryFieldDataMap() { | ||
return Map.ofEntries( | ||
createEntry(AbstractGeometryQueryBuilder.class, AbstractGeometryQueryBuilder::fieldName), | ||
createEntry(CommonTermsQueryBuilder.class, CommonTermsQueryBuilder::fieldName), | ||
createEntry(org.opensearch.index.query.ExistsQueryBuilder.class, ExistsQueryBuilder::fieldName), | ||
createEntry(org.opensearch.index.query.FieldMaskingSpanQueryBuilder.class, FieldMaskingSpanQueryBuilder::fieldName), | ||
createEntry(FuzzyQueryBuilder.class, FuzzyQueryBuilder::fieldName), | ||
createEntry( | ||
org.opensearch.index.query.GeoBoundingBoxQueryBuilder.class, | ||
org.opensearch.index.query.GeoBoundingBoxQueryBuilder::fieldName | ||
), | ||
createEntry(org.opensearch.index.query.GeoDistanceQueryBuilder.class, GeoDistanceQueryBuilder::fieldName), | ||
createEntry(GeoPolygonQueryBuilder.class, GeoPolygonQueryBuilder::fieldName), | ||
createEntry(MatchBoolPrefixQueryBuilder.class, MatchBoolPrefixQueryBuilder::fieldName), | ||
createEntry(MatchQueryBuilder.class, MatchQueryBuilder::fieldName), | ||
createEntry(org.opensearch.index.query.MatchPhraseQueryBuilder.class, MatchPhraseQueryBuilder::fieldName), | ||
createEntry(MatchPhrasePrefixQueryBuilder.class, MatchPhrasePrefixQueryBuilder::fieldName), | ||
createEntry(MultiTermQueryBuilder.class, MultiTermQueryBuilder::fieldName), | ||
createEntry(PrefixQueryBuilder.class, PrefixQueryBuilder::fieldName), | ||
createEntry(RangeQueryBuilder.class, RangeQueryBuilder::fieldName), | ||
createEntry(RegexpQueryBuilder.class, RegexpQueryBuilder::fieldName), | ||
createEntry(SpanNearQueryBuilder.SpanGapQueryBuilder.class, SpanNearQueryBuilder.SpanGapQueryBuilder::fieldName), | ||
createEntry(SpanTermQueryBuilder.class, SpanTermQueryBuilder::fieldName), | ||
createEntry(TermQueryBuilder.class, TermQueryBuilder::fieldName), | ||
createEntry(TermsQueryBuilder.class, TermsQueryBuilder::fieldName), | ||
createEntry(WildcardQueryBuilder.class, WildcardQueryBuilder::fieldName) | ||
); | ||
} | ||
|
||
// Method to return the AGG_FIELD_DATA_MAP | ||
public static Map<Class<?>, List<Function<Object, String>>> getAggFieldDataMap() { | ||
return Map.ofEntries( | ||
createEntry(IpRangeAggregationBuilder.class, IpRangeAggregationBuilder::field), | ||
createEntry(AutoDateHistogramAggregationBuilder.class, AutoDateHistogramAggregationBuilder::field), | ||
createEntry(DateHistogramAggregationBuilder.class, DateHistogramAggregationBuilder::field), | ||
createEntry(HistogramAggregationBuilder.class, HistogramAggregationBuilder::field), | ||
createEntry(VariableWidthHistogramAggregationBuilder.class, VariableWidthHistogramAggregationBuilder::field), | ||
createEntry(MissingAggregationBuilder.class, MissingAggregationBuilder::field), | ||
createEntry(AbstractRangeBuilder.class, AbstractRangeBuilder::field), | ||
createEntry(GeoDistanceAggregationBuilder.class, GeoDistanceAggregationBuilder::field), | ||
createEntry(DiversifiedAggregationBuilder.class, DiversifiedAggregationBuilder::field), | ||
createEntry(RareTermsAggregationBuilder.class, RareTermsAggregationBuilder::field), | ||
createEntry(SignificantTermsAggregationBuilder.class, SignificantTermsAggregationBuilder::field), | ||
createEntry(TermsAggregationBuilder.class, TermsAggregationBuilder::field), | ||
createEntry(AvgAggregationBuilder.class, AvgAggregationBuilder::field), | ||
createEntry(CardinalityAggregationBuilder.class, CardinalityAggregationBuilder::field), | ||
createEntry(ExtendedStatsAggregationBuilder.class, ExtendedStatsAggregationBuilder::field), | ||
createEntry(GeoCentroidAggregationBuilder.class, GeoCentroidAggregationBuilder::field), | ||
createEntry(MaxAggregationBuilder.class, MaxAggregationBuilder::field), | ||
createEntry(MinAggregationBuilder.class, MinAggregationBuilder::field), | ||
createEntry(StatsAggregationBuilder.class, StatsAggregationBuilder::field), | ||
createEntry(SumAggregationBuilder.class, SumAggregationBuilder::field), | ||
createEntry(ValueCountAggregationBuilder.class, ValueCountAggregationBuilder::field), | ||
createEntry(ValuesSourceAggregationBuilder.class, ValuesSourceAggregationBuilder::field) | ||
); | ||
} | ||
|
||
// Method to return the SORT_FIELD_DATA_MAP | ||
public static Map<Class<?>, List<Function<Object, String>>> getSortFieldDataMap() { | ||
return Map.ofEntries(createEntry(FieldSortBuilder.class, FieldSortBuilder::getFieldName)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.