From 444a31998cc597c50cf129a421097b53a71efacb Mon Sep 17 00:00:00 2001 From: Sorabh Date: Wed, 20 Sep 2023 20:32:00 -0700 Subject: [PATCH] Fix NPE in ConcurrentQueryProfile while computing the breakdown map for slices (#10111) * Fix NPE in ConcurrentQueryProfile while computing the breakdown map for slices. There can be cases where one or more slice may not have timing related information for its leaves in contexts map. During creation of slice and query level breakdown map it needs to handle such cases by using default values correctly. Also updating the min/max/avg sliceNodeTime to not include time to create weight and wait times by slice threads. It will reflect the min/max/avg execution time of each slice whereas totalNodeTime will reflect the total query time. Signed-off-by: Sorabh Hamirwasia * Address review comments Signed-off-by: Sorabh Hamirwasia --------- Signed-off-by: Sorabh Hamirwasia --- .../ConcurrentQueryProfileBreakdown.java | 84 +++++++++++--- .../ConcurrentQueryProfileBreakdownTests.java | 103 ++++++++++++++---- 2 files changed, 147 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java index f37d53737d52b..e567fdd2d436c 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java +++ b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java @@ -10,6 +10,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Collector; +import org.opensearch.OpenSearchException; import org.opensearch.search.profile.AbstractProfileBreakdown; import org.opensearch.search.profile.ContextualProfileBreakdown; @@ -73,14 +74,14 @@ public Map toBreakdownMap() { // If there are no leaf contexts, then return the default concurrent query level breakdown, which will include the // create_weight time/count queryNodeTime = createWeightTime; - maxSliceNodeTime = queryNodeTime; - minSliceNodeTime = queryNodeTime; - avgSliceNodeTime = queryNodeTime; + maxSliceNodeTime = 0L; + minSliceNodeTime = 0L; + avgSliceNodeTime = 0L; return buildDefaultQueryBreakdownMap(createWeightTime); } // first create the slice level breakdowns - final Map> sliceLevelBreakdowns = buildSliceLevelBreakdown(createWeightStartTime); + final Map> sliceLevelBreakdowns = buildSliceLevelBreakdown(); return buildQueryBreakdownMap(sliceLevelBreakdowns, createWeightTime, createWeightStartTime); } @@ -124,18 +125,19 @@ private Map buildDefaultQueryBreakdownMap(long createWeightTime) { /** * Computes the slice level breakdownMap. It uses sliceCollectorsToLeaves to figure out all the leaves or segments part of a slice. * Then use the breakdown timing stats for each of these leaves to calculate the breakdown stats at slice level. - * @param createWeightStartTime start time when createWeight is called + * * @return map of collector (or slice) to breakdown map */ - Map> buildSliceLevelBreakdown(long createWeightStartTime) { + Map> buildSliceLevelBreakdown() { final Map> sliceLevelBreakdowns = new HashMap<>(); - long totalSliceNodeTime = 0; + long totalSliceNodeTime = 0L; for (Map.Entry> slice : sliceCollectorsToLeaves.entrySet()) { final Collector sliceCollector = slice.getKey(); // initialize each slice level breakdown final Map currentSliceBreakdown = sliceLevelBreakdowns.computeIfAbsent(sliceCollector, k -> new HashMap<>()); // max slice end time across all timing types long sliceMaxEndTime = Long.MIN_VALUE; + long sliceMinStartTime = Long.MAX_VALUE; for (QueryTimingType timingType : QueryTimingType.values()) { if (timingType.equals(QueryTimingType.CREATE_WEIGHT)) { // do nothing for create weight as that is query level time and not slice level @@ -155,6 +157,12 @@ Map> buildSliceLevelBreakdown(long createWeightStar // leaf, but the leaf level breakdown will not be created in the contexts map. // This is because before updating the contexts map, the query hits earlyTerminationException. // To handle such case, we will ignore the leaf that is not present. + // + // Other than early termination, it can also happen in other cases. For example: there is a must boolean query + // with 2 boolean clauses. While creating scorer for first clause if no docs are found for the field in a leaf + // context then it will return null scorer. Then for 2nd clause weight as well no scorer will be created for this + // leaf context (as it is a must query). Due to this it will end up missing the leaf context in the contexts map + // for second clause weight. continue; } final Map currentSliceLeafBreakdownMap = contexts.get(sliceLeaf).toBreakdownMap(); @@ -182,15 +190,36 @@ Map> buildSliceLevelBreakdown(long createWeightStar ); } // compute sliceMaxEndTime as max of sliceEndTime across all timing types - sliceMaxEndTime = Math.max(sliceMaxEndTime, currentSliceBreakdown.get(timingTypeSliceEndTimeKey)); + sliceMaxEndTime = Math.max(sliceMaxEndTime, currentSliceBreakdown.getOrDefault(timingTypeSliceEndTimeKey, Long.MIN_VALUE)); + sliceMinStartTime = Math.min( + sliceMinStartTime, + currentSliceBreakdown.getOrDefault(timingTypeSliceStartTimeKey, Long.MAX_VALUE) + ); // compute total time for each timing type at slice level using sliceEndTime and sliceStartTime currentSliceBreakdown.put( timingType.toString(), - currentSliceBreakdown.get(timingTypeSliceEndTimeKey) - currentSliceBreakdown.get(timingTypeSliceStartTimeKey) + currentSliceBreakdown.getOrDefault(timingTypeSliceEndTimeKey, 0L) - currentSliceBreakdown.getOrDefault( + timingTypeSliceStartTimeKey, + 0L + ) ); } - // currentSliceNodeTime includes the create weight time as well which will be same for all the slices - long currentSliceNodeTime = sliceMaxEndTime - createWeightStartTime; + // currentSliceNodeTime does not include the create weight time, as that is computed in non-concurrent part + long currentSliceNodeTime; + if (sliceMinStartTime == Long.MAX_VALUE && sliceMaxEndTime == Long.MIN_VALUE) { + currentSliceNodeTime = 0L; + } else if (sliceMinStartTime == Long.MAX_VALUE || sliceMaxEndTime == Long.MIN_VALUE) { + throw new OpenSearchException( + "Unexpected value of sliceMinStartTime [" + + sliceMinStartTime + + "] or sliceMaxEndTime [" + + sliceMaxEndTime + + "] while computing the slice level timing profile breakdowns" + ); + } else { + currentSliceNodeTime = sliceMaxEndTime - sliceMinStartTime; + } + // compute max/min slice times maxSliceNodeTime = Math.max(maxSliceNodeTime, currentSliceNodeTime); minSliceNodeTime = Math.min(minSliceNodeTime, currentSliceNodeTime); @@ -245,8 +274,8 @@ public Map buildQueryBreakdownMap( // for all other timing types, we will compute min/max/avg/total across slices for (Map.Entry> sliceBreakdown : sliceLevelBreakdowns.entrySet()) { - Long sliceBreakdownTypeTime = sliceBreakdown.getValue().get(timingTypeKey); - Long sliceBreakdownTypeCount = sliceBreakdown.getValue().get(timingTypeCountKey); + long sliceBreakdownTypeTime = sliceBreakdown.getValue().getOrDefault(timingTypeKey, 0L); + long sliceBreakdownTypeCount = sliceBreakdown.getValue().getOrDefault(timingTypeCountKey, 0L); // compute max/min/avg TimingType time across slices queryBreakdownMap.compute( maxBreakdownTypeTime, @@ -276,17 +305,38 @@ public Map buildQueryBreakdownMap( ); // query start/end time for a TimingType is min/max of start/end time across slices for that TimingType - queryTimingTypeEndTime = Math.max(queryTimingTypeEndTime, sliceBreakdown.getValue().get(sliceEndTimeForTimingType)); - queryTimingTypeStartTime = Math.min(queryTimingTypeStartTime, sliceBreakdown.getValue().get(sliceStartTimeForTimingType)); + queryTimingTypeEndTime = Math.max( + queryTimingTypeEndTime, + sliceBreakdown.getValue().getOrDefault(sliceEndTimeForTimingType, Long.MIN_VALUE) + ); + queryTimingTypeStartTime = Math.min( + queryTimingTypeStartTime, + sliceBreakdown.getValue().getOrDefault(sliceStartTimeForTimingType, Long.MAX_VALUE) + ); queryTimingTypeCount += sliceBreakdownTypeCount; } + + if (queryTimingTypeStartTime == Long.MAX_VALUE || queryTimingTypeEndTime == Long.MIN_VALUE) { + throw new OpenSearchException( + "Unexpected timing type [" + + timingTypeKey + + "] start [" + + queryTimingTypeStartTime + + "] or end time [" + + queryTimingTypeEndTime + + "] computed across slices for profile results" + ); + } queryBreakdownMap.put(timingTypeKey, queryTimingTypeEndTime - queryTimingTypeStartTime); queryBreakdownMap.put(timingTypeCountKey, queryTimingTypeCount); - queryBreakdownMap.compute(avgBreakdownTypeTime, (key, value) -> (value == null) ? 0 : value / sliceLevelBreakdowns.size()); - queryBreakdownMap.compute(avgBreakdownTypeCount, (key, value) -> (value == null) ? 0 : value / sliceLevelBreakdowns.size()); + queryBreakdownMap.compute(avgBreakdownTypeTime, (key, value) -> (value == null) ? 0L : value / sliceLevelBreakdowns.size()); + queryBreakdownMap.compute(avgBreakdownTypeCount, (key, value) -> (value == null) ? 0L : value / sliceLevelBreakdowns.size()); // compute query end time using max of query end time across all timing types queryEndTime = Math.max(queryEndTime, queryTimingTypeEndTime); } + if (queryEndTime == Long.MIN_VALUE) { + throw new OpenSearchException("Unexpected error while computing the query end time across slices in profile result"); + } queryNodeTime = queryEndTime - createWeightStartTime; return queryBreakdownMap; } diff --git a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java index 065c90b262e11..f29ba3b0cea07 100644 --- a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java +++ b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java @@ -81,9 +81,9 @@ public void testBreakdownMapWithNoLeafContext() throws Exception { ); // verify total/min/max/avg node time is same as weight time assertEquals(createWeightTime, testQueryProfileBreakdown.toNodeTime()); - assertEquals(createWeightTime, testQueryProfileBreakdown.getMaxSliceNodeTime()); - assertEquals(createWeightTime, testQueryProfileBreakdown.getMinSliceNodeTime()); - assertEquals(createWeightTime, testQueryProfileBreakdown.getAvgSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getAvgSliceNodeTime()); continue; } assertEquals(0, (long) queryBreakDownMap.get(timingTypeKey)); @@ -103,16 +103,15 @@ public void testBuildSliceLevelBreakdownWithSingleSlice() throws Exception { final LeafReaderContext sliceLeaf = directoryReader.leaves().get(0); final Collector sliceCollector = mock(Collector.class); final long createWeightEarliestStartTime = createWeightTimer.getEarliestTimerStartTime(); - final Map leafProfileBreakdownMap = getLeafBreakdownMap(createWeightEarliestStartTime + 10, 10, 1); + final long createWeightEndTime = createWeightEarliestStartTime + createWeightTimer.getApproximateTiming(); + final Map leafProfileBreakdownMap = getLeafBreakdownMap(createWeightEndTime + 10, 10, 1); final AbstractProfileBreakdown leafProfileBreakdown = new TestQueryProfileBreakdown( QueryTimingType.class, leafProfileBreakdownMap ); testQueryProfileBreakdown.associateCollectorToLeaves(sliceCollector, sliceLeaf); testQueryProfileBreakdown.getContexts().put(sliceLeaf, leafProfileBreakdown); - final Map> sliceBreakdownMap = testQueryProfileBreakdown.buildSliceLevelBreakdown( - createWeightEarliestStartTime - ); + final Map> sliceBreakdownMap = testQueryProfileBreakdown.buildSliceLevelBreakdown(); assertFalse(sliceBreakdownMap == null || sliceBreakdownMap.isEmpty()); assertEquals(1, sliceBreakdownMap.size()); assertTrue(sliceBreakdownMap.containsKey(sliceCollector)); @@ -141,9 +140,9 @@ public void testBuildSliceLevelBreakdownWithSingleSlice() throws Exception { (long) sliceBreakdown.get(timingTypeKey + SLICE_END_TIME_SUFFIX) ); } - assertEquals(20, testQueryProfileBreakdown.getMaxSliceNodeTime()); - assertEquals(20, testQueryProfileBreakdown.getMinSliceNodeTime()); - assertEquals(20, testQueryProfileBreakdown.getAvgSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getAvgSliceNodeTime()); directoryReader.close(); directory.close(); } @@ -154,8 +153,9 @@ public void testBuildSliceLevelBreakdownWithMultipleSlices() throws Exception { final Collector sliceCollector_1 = mock(Collector.class); final Collector sliceCollector_2 = mock(Collector.class); final long createWeightEarliestStartTime = createWeightTimer.getEarliestTimerStartTime(); - final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEarliestStartTime + 10, 10, 1); - final Map leafProfileBreakdownMap_2 = getLeafBreakdownMap(createWeightEarliestStartTime + 40, 10, 1); + final long createWeightEndTime = createWeightEarliestStartTime + createWeightTimer.getApproximateTiming(); + final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEndTime + 10, 10, 1); + final Map leafProfileBreakdownMap_2 = getLeafBreakdownMap(createWeightEndTime + 40, 10, 1); final AbstractProfileBreakdown leafProfileBreakdown_1 = new TestQueryProfileBreakdown( QueryTimingType.class, leafProfileBreakdownMap_1 @@ -168,9 +168,7 @@ public void testBuildSliceLevelBreakdownWithMultipleSlices() throws Exception { testQueryProfileBreakdown.associateCollectorToLeaves(sliceCollector_2, directoryReader.leaves().get(1)); testQueryProfileBreakdown.getContexts().put(directoryReader.leaves().get(0), leafProfileBreakdown_1); testQueryProfileBreakdown.getContexts().put(directoryReader.leaves().get(1), leafProfileBreakdown_2); - final Map> sliceBreakdownMap = testQueryProfileBreakdown.buildSliceLevelBreakdown( - createWeightEarliestStartTime - ); + final Map> sliceBreakdownMap = testQueryProfileBreakdown.buildSliceLevelBreakdown(); assertFalse(sliceBreakdownMap == null || sliceBreakdownMap.isEmpty()); assertEquals(2, sliceBreakdownMap.size()); @@ -208,9 +206,9 @@ public void testBuildSliceLevelBreakdownWithMultipleSlices() throws Exception { } } - assertEquals(50, testQueryProfileBreakdown.getMaxSliceNodeTime()); - assertEquals(20, testQueryProfileBreakdown.getMinSliceNodeTime()); - assertEquals(35, testQueryProfileBreakdown.getAvgSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getAvgSliceNodeTime()); directoryReader.close(); directory.close(); } @@ -221,8 +219,9 @@ public void testBreakDownMapWithMultipleSlices() throws Exception { final Collector sliceCollector_1 = mock(Collector.class); final Collector sliceCollector_2 = mock(Collector.class); final long createWeightEarliestStartTime = createWeightTimer.getEarliestTimerStartTime(); - final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEarliestStartTime + 10, 10, 1); - final Map leafProfileBreakdownMap_2 = getLeafBreakdownMap(createWeightEarliestStartTime + 40, 20, 1); + final long createWeightEndTime = createWeightEarliestStartTime + createWeightTimer.getApproximateTiming(); + final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEndTime + 10, 10, 1); + final Map leafProfileBreakdownMap_2 = getLeafBreakdownMap(createWeightEndTime + 40, 20, 1); final AbstractProfileBreakdown leafProfileBreakdown_1 = new TestQueryProfileBreakdown( QueryTimingType.class, leafProfileBreakdownMap_1 @@ -269,9 +268,67 @@ public void testBreakDownMapWithMultipleSlices() throws Exception { assertEquals(1, (long) queryBreakDownMap.get(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey)); } - assertEquals(60, testQueryProfileBreakdown.getMaxSliceNodeTime()); - assertEquals(20, testQueryProfileBreakdown.getMinSliceNodeTime()); - assertEquals(40, testQueryProfileBreakdown.getAvgSliceNodeTime()); + assertEquals(20, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(10, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(15, testQueryProfileBreakdown.getAvgSliceNodeTime()); + directoryReader.close(); + directory.close(); + } + + public void testBreakDownMapWithMultipleSlicesAndOneSliceWithNoLeafContext() throws Exception { + final DirectoryReader directoryReader = getDirectoryReader(2); + final Directory directory = directoryReader.directory(); + final Collector sliceCollector_1 = mock(Collector.class); + final Collector sliceCollector_2 = mock(Collector.class); + final long createWeightEarliestStartTime = createWeightTimer.getEarliestTimerStartTime(); + final long createWeightEndTime = createWeightEarliestStartTime + createWeightTimer.getApproximateTiming(); + final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEndTime + 10, 10, 1); + final AbstractProfileBreakdown leafProfileBreakdown_1 = new TestQueryProfileBreakdown( + QueryTimingType.class, + leafProfileBreakdownMap_1 + ); + testQueryProfileBreakdown.associateCollectorToLeaves(sliceCollector_1, directoryReader.leaves().get(0)); + testQueryProfileBreakdown.associateCollectorToLeaves(sliceCollector_2, directoryReader.leaves().get(1)); + testQueryProfileBreakdown.getContexts().put(directoryReader.leaves().get(0), leafProfileBreakdown_1); + // leaf2 profile breakdown is not present in contexts map + + Map queryBreakDownMap = testQueryProfileBreakdown.toBreakdownMap(); + assertFalse(queryBreakDownMap == null || queryBreakDownMap.isEmpty()); + assertEquals(66, queryBreakDownMap.size()); + + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + String timingTypeKey = queryTimingType.toString(); + String timingTypeCountKey = queryTimingType + TIMING_TYPE_COUNT_SUFFIX; + + if (queryTimingType.equals(QueryTimingType.CREATE_WEIGHT)) { + final long createWeightTime = queryBreakDownMap.get(timingTypeKey); + assertEquals(createWeightTimer.getApproximateTiming(), createWeightTime); + assertEquals(1, (long) queryBreakDownMap.get(timingTypeCountKey)); + // verify there is no min/max/avg for weight type stats + assertFalse( + queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey) + ); + continue; + } + assertEquals(10, (long) queryBreakDownMap.get(timingTypeKey)); + assertEquals(10, (long) queryBreakDownMap.get(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeKey)); + assertEquals(5, (long) queryBreakDownMap.get(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeKey)); + assertEquals(0, (long) queryBreakDownMap.get(MIN_PREFIX + timingTypeKey)); + assertEquals(1, (long) queryBreakDownMap.get(timingTypeCountKey)); + assertEquals(1, (long) queryBreakDownMap.get(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeCountKey)); + // min of 0 means one of the slice didn't worked on any leaf context + assertEquals(0, (long) queryBreakDownMap.get(MIN_PREFIX + timingTypeCountKey)); + assertEquals(0, (long) queryBreakDownMap.get(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey)); + } + + assertEquals(10, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(5, testQueryProfileBreakdown.getAvgSliceNodeTime()); directoryReader.close(); directory.close(); }