Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Apply the date histogram rewrite optimization to range aggregation #14463

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Added
- Add fingerprint ingest processor ([#13724](https://github.com/opensearch-project/OpenSearch/pull/13724))
- [Remote Store] Rate limiter for remote store low priority uploads ([#14374](https://github.com/opensearch-project/OpenSearch/pull/14374/))
- Apply the date histogram rewrite optimization to range aggregation ([#13865](https://github.com/opensearch-project/OpenSearch/pull/13865))

### Dependencies
- Update to Apache Lucene 9.11.0 ([#14042](https://github.com/opensearch-project/OpenSearch/pull/14042))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.fasterxml.jackson.core.JsonParseException;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
Expand Down Expand Up @@ -165,7 +166,7 @@ public ScaledFloatFieldMapper build(BuilderContext context) {

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));

public static final class ScaledFloatFieldType extends SimpleMappedFieldType {
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {

private final double scalingFactor;
private final Double nullValue;
Expand All @@ -188,6 +189,21 @@ public ScaledFloatFieldType(String name, double scalingFactor) {
this(name, true, false, true, Collections.emptyMap(), scalingFactor, null);
}

@Override
public byte[] encodePoint(Number value) {
assert value instanceof Double;
double doubleValue = (Double) value;
byte[] point = new byte[Long.BYTES];
if (doubleValue == Double.POSITIVE_INFINITY) {
LongPoint.encodeDimension(Long.MAX_VALUE, point, 0);
} else if (doubleValue == Double.NEGATIVE_INFINITY) {
LongPoint.encodeDimension(Long.MIN_VALUE, point, 0);
} else {
LongPoint.encodeDimension(Math.round(scale(value)), point, 0);
}
return point;
}

public double getScalingFactor() {
return scalingFactor;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ setup:
date:
type: date
format: epoch_second
scaled_field:
type: scaled_float
scaling_factor: 100

- do:
cluster.health:
Expand Down Expand Up @@ -528,3 +531,139 @@ setup:
- is_false: aggregations.unsigned_long_range.buckets.2.to

- match: { aggregations.unsigned_long_range.buckets.2.doc_count: 0 }

---
"Double range profiler shows filter rewrite info":
- skip:
version: " - 2.99.99"
reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.15.0)

- do:
indices.create:
index: test_profile
body:
settings:
number_of_replicas: 0
refresh_interval: -1
mappings:
properties:
ip:
type: ip
double:
type: double
date:
type: date
format: epoch_second

- do:
bulk:
index: test_profile
refresh: true
body:
- '{"index": {}}'
- '{"double" : 42}'
- '{"index": {}}'
- '{"double" : 100}'
- '{"index": {}}'
- '{"double" : 50}'

- do:
search:
index: test_profile
body:
size: 0
profile: true
aggs:
double_range:
range:
field: double
ranges:
- to: 50
- from: 50
to: 150
- from: 150

- length: { aggregations.double_range.buckets: 3 }

- match: { aggregations.double_range.buckets.0.key: "*-50.0" }
- is_false: aggregations.double_range.buckets.0.from
- match: { aggregations.double_range.buckets.0.to: 50.0 }
- match: { aggregations.double_range.buckets.0.doc_count: 1 }
- match: { aggregations.double_range.buckets.1.key: "50.0-150.0" }
- match: { aggregations.double_range.buckets.1.from: 50.0 }
- match: { aggregations.double_range.buckets.1.to: 150.0 }
- match: { aggregations.double_range.buckets.1.doc_count: 2 }
- match: { aggregations.double_range.buckets.2.key: "150.0-*" }
- match: { aggregations.double_range.buckets.2.from: 150.0 }
- is_false: aggregations.double_range.buckets.2.to
- match: { aggregations.double_range.buckets.2.doc_count: 0 }

- match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 }
- match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
- match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 }
- match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }

---
"Scaled Float Range Aggregation":
- do:
index:
index: test
id: 1
body: { "scaled_field": 1 }

- do:
index:
index: test
id: 2
body: { "scaled_field": 1.53 }

- do:
index:
index: test
id: 3
body: { "scaled_field": -2.1 }

- do:
index:
index: test
id: 4
body: { "scaled_field": 1.53 }

- do:
indices.refresh: { }

- do:
search:
index: test
body:
size: 0
aggs:
my_range:
range:
field: scaled_field
ranges:
- to: 0
- from: 0
to: 1
- from: 1
to: 1.5
- from: 1.5

- length: { aggregations.my_range.buckets: 4 }

- match: { aggregations.my_range.buckets.0.key: "*-0.0" }
- is_false: aggregations.my_range.buckets.0.from
- match: { aggregations.my_range.buckets.0.to: 0.0 }
- match: { aggregations.my_range.buckets.0.doc_count: 1 }
- match: { aggregations.my_range.buckets.1.key: "0.0-1.0" }
- match: { aggregations.my_range.buckets.1.from: 0.0 }
- match: { aggregations.my_range.buckets.1.to: 1.0 }
- match: { aggregations.my_range.buckets.1.doc_count: 0 }
- match: { aggregations.my_range.buckets.2.key: "1.0-1.5" }
- match: { aggregations.my_range.buckets.2.from: 1.0 }
- match: { aggregations.my_range.buckets.2.to: 1.5 }
- match: { aggregations.my_range.buckets.2.doc_count: 1 }
- match: { aggregations.my_range.buckets.3.key: "1.5-*" }
- match: { aggregations.my_range.buckets.3.from: 1.5 }
- is_false: aggregations.my_range.buckets.3.to
- match: { aggregations.my_range.buckets.3.doc_count: 2 }
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ public DateFieldMapper build(BuilderContext context) {
*
* @opensearch.internal
*/
public static final class DateFieldType extends MappedFieldType {
public static final class DateFieldType extends MappedFieldType implements NumericPointEncoder {
protected final DateFormatter dateTimeFormatter;
protected final DateMathParser dateMathParser;
protected final Resolution resolution;
Expand Down Expand Up @@ -554,6 +554,13 @@ public static long parseToLong(
return resolution.convert(dateParser.parse(BytesRefs.toString(value), now, roundUp, zone));
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Long.BYTES];
LongPoint.encodeDimension(value.longValue(), point, 0);
return point;
}

@Override
public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
Expand Down
Loading
Loading