apache · soumyava · Oct 7, 2023 · Oct 3, 2023 · Oct 4, 2023 · Oct 4, 2023
diff --git a/processing/src/main/java/org/apache/druid/query/JoinDataSource.java b/processing/src/main/java/org/apache/druid/query/JoinDataSource.java
@@ -476,10 +476,25 @@ private Function<SegmentReference, SegmentReference> createSegmentMapFunctionInt
                            .orElse(null)
                 )
             );
-
+            final Function<SegmentReference, SegmentReference> baseMapFn;
+            // A join data source is not concrete
+            // And isConcrete() of an unnest datasource delegates to its base
+            // Hence, in the case of a Join -> Unnest -> Join
+            // if we just use isConcrete on the left
+            // the segment map function for the unnest would never get called
+            // This calls us to delegate to the segmentMapFunction of the left
+            // only when it is not a JoinDataSource
+            if (left instanceof JoinDataSource) {
+              baseMapFn = Function.identity();
+            } else {
+              baseMapFn = left.createSegmentMapFunction(
+                  query,
+                  cpuTimeAccumulator
+              );
+            }
             return baseSegment ->
                 new HashJoinSegment(
-                    baseSegment,
+                    baseMapFn.apply(baseSegment),
                     baseFilterToUse,
                     GuavaUtils.firstNonNull(clausesToUse, ImmutableList.of()),
                     joinFilterPreAnalysis
@@ -501,18 +516,39 @@ private static Triple<DataSource, DimFilter, List<PreJoinableClause>> flattenJoi
     DimFilter currentDimFilter = null;
     final List<PreJoinableClause> preJoinableClauses = new ArrayList<>();
 
-    while (current instanceof JoinDataSource) {
-      final JoinDataSource joinDataSource = (JoinDataSource) current;
-      current = joinDataSource.getLeft();
-      currentDimFilter = validateLeftFilter(current, joinDataSource.getLeftFilter());
-      preJoinableClauses.add(
-          new PreJoinableClause(
-              joinDataSource.getRightPrefix(),
-              joinDataSource.getRight(),
-              joinDataSource.getJoinType(),
-              joinDataSource.getConditionAnalysis()
-          )
-      );
+    // There can be queries like
+    // Join of Unnest of Join of Unnest of Filter
+    // so these checks are needed to be ORed
+    // to get the base
+    // This method is called to get the analysis for the join data source
+    // Since the analysis of an UnnestDS or FilteredDS always delegates to its base
+    // To obtain the base data source underneath a Join
+    // we also iterate through the base of the  FilterDS and UnnestDS in its path
+    // the base of which can be a concrete data source
+    // This also means that an addition of a new datasource
+    // Will need an instanceof check here
+    // A future work should look into if the flattenJoin
+    // can be refactored to omit these instanceof checks
+    while (current instanceof JoinDataSource || current instanceof UnnestDataSource || current instanceof FilteredDataSource) {
+      if (current instanceof JoinDataSource) {
+        final JoinDataSource joinDataSource = (JoinDataSource) current;
+        current = joinDataSource.getLeft();
+        currentDimFilter = validateLeftFilter(current, joinDataSource.getLeftFilter());
+        preJoinableClauses.add(
+            new PreJoinableClause(
+                joinDataSource.getRightPrefix(),
+                joinDataSource.getRight(),
+                joinDataSource.getJoinType(),
+                joinDataSource.getConditionAnalysis()
+            )
+        );
+      } else if (current instanceof UnnestDataSource) {
+        final UnnestDataSource unnestDataSource = (UnnestDataSource) current;
+        current = unnestDataSource.getBase();
+      } else {
+        final FilteredDataSource filteredDataSource = (FilteredDataSource) current;
+        current = filteredDataSource.getBase();
+      }
     }
 
     // Join clauses were added in the order we saw them while traversing down, but we need to apply them in the

diff --git a/processing/src/test/java/org/apache/druid/query/JoinDataSourceTest.java b/processing/src/test/java/org/apache/druid/query/JoinDataSourceTest.java
@@ -29,11 +29,14 @@
 import org.apache.druid.math.expr.ExprMacroTable;
 import org.apache.druid.query.filter.InDimFilter;
 import org.apache.druid.query.filter.TrueDimFilter;
+import org.apache.druid.query.planning.DataSourceAnalysis;
 import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.join.JoinConditionAnalysis;
 import org.apache.druid.segment.join.JoinType;
 import org.apache.druid.segment.join.JoinableFactoryWrapper;
 import org.apache.druid.segment.join.NoopJoinableFactory;
+import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
 import org.easymock.Mock;
 import org.junit.Assert;
 import org.junit.Rule;
@@ -433,6 +436,51 @@ public void test_computeJoinDataSourceCacheKey_keyChangesWithPrefix()
     Assert.assertFalse(Arrays.equals(cacheKey1, cacheKey2));
   }
 
+  @Test
+  public void testGetAnalysisWithUnnestDS()
+  {
+    JoinDataSource dataSource = JoinDataSource.create(
+        UnnestDataSource.create(
+            new TableDataSource("table1"),
+            new ExpressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING, ExprMacroTable.nil()),
+            null
+        ),
+        new TableDataSource("table2"),
+        "j.",
+        "x == \"j.x\"",
+        JoinType.LEFT,
+        null,
+        ExprMacroTable.nil(),
+        null
+    );
+    DataSourceAnalysis analysis = dataSource.getAnalysis();
+    Assert.assertEquals("table1", analysis.getBaseDataSource().getTableNames().iterator().next());
+  }
+
+  @Test
+  public void testGetAnalysisWithFilteredDS()
+  {
+    JoinDataSource dataSource = JoinDataSource.create(
+        UnnestDataSource.create(
+            FilteredDataSource.create(
+                new TableDataSource("table1"),
+                TrueDimFilter.instance()
+            ),
+            new ExpressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING, ExprMacroTable.nil()),
+            null
+        ),
+        new TableDataSource("table2"),
+        "j.",
+        "x == \"j.x\"",
+        JoinType.LEFT,
+        null,
+        ExprMacroTable.nil(),
+        null
+    );
+    DataSourceAnalysis analysis = dataSource.getAnalysis();
+    Assert.assertEquals("table1", analysis.getBaseDataSource().getTableNames().iterator().next());
+  }
+
   @Test
   public void test_computeJoinDataSourceCacheKey_keyChangesWithBaseFilter()
   {

diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java
@@ -66,7 +66,7 @@ public static boolean isScanOrMapping(final DruidRel<?> druidRel, final boolean
    */
   public static boolean isScanOrProject(final DruidRel<?> druidRel, final boolean canBeJoinOrUnion)
   {
-    if (druidRel instanceof DruidQueryRel || (canBeJoinOrUnion && (druidRel instanceof DruidJoinQueryRel
+    if (druidRel instanceof DruidQueryRel || (canBeJoinOrUnion && (druidRel instanceof DruidJoinQueryRel || druidRel instanceof DruidCorrelateUnnestRel
                                                                    || druidRel instanceof DruidUnionDataSourceRel))) {
       final PartialDruidQuery partialQuery = druidRel.getPartialDruidQuery();
       final PartialDruidQuery.Stage stage = partialQuery.stage();