Merge remote-tracking branch 'apache/master' into kinesis-adaptive-me…

…mory-management
zachjsh · Nov 14, 2023 · 97a7ae3 · 97a7ae3
2 parents ff77302 + 5446494
commit 97a7ae3
Show file tree

Hide file tree

Showing 367 changed files with 13,501 additions and 4,402 deletions.
diff --git a/.github/workflows/standard-its.yml b/.github/workflows/standard-its.yml
@@ -77,7 +77,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        testing_group: [query, query-retry, query-error, security, high-availability]
+        testing_group: [query, query-retry, query-error, security, high-availability, centralized-table-schema]
     uses: ./.github/workflows/reusable-standard-its.yml
     if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
     with:
@@ -195,6 +195,6 @@ jobs:
     with:
       build_jdk: 8
       runtime_jdk: 8
-      testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties
+      testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-table-schema
       use_indexer: ${{ matrix.indexer }}
       group: other
diff --git a/...arks/src/test/java/org/apache/druid/benchmark/DruidSchemaInternRowSignatureBenchmark.java b/...arks/src/test/java/org/apache/druid/benchmark/DruidSchemaInternRowSignatureBenchmark.java
@@ -22,8 +22,9 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
-import org.apache.druid.client.BrokerInternalQueryConfig;
+import org.apache.druid.client.InternalQueryConfig;
 import org.apache.druid.client.TimelineServerView;
+import org.apache.druid.client.coordinator.NoopCoordinatorClient;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.guava.Sequences;
@@ -37,9 +38,9 @@
 import org.apache.druid.server.coordination.ServerType;
 import org.apache.druid.server.metrics.NoopServiceEmitter;
 import org.apache.druid.server.security.Escalator;
-import org.apache.druid.sql.calcite.planner.PlannerConfig;
-import org.apache.druid.sql.calcite.planner.SegmentMetadataCacheConfig;
-import org.apache.druid.sql.calcite.schema.SegmentMetadataCache;
+import org.apache.druid.sql.calcite.schema.BrokerSegmentMetadataCache;
+import org.apache.druid.sql.calcite.schema.BrokerSegmentMetadataCacheConfig;
+import org.apache.druid.sql.calcite.schema.PhysicalDatasourceMetadataFactory;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.SegmentId;
 import org.apache.druid.timeline.partition.LinearShardSpec;
@@ -71,27 +72,26 @@ public class DruidSchemaInternRowSignatureBenchmark
 {
   private SegmentMetadataCacheForBenchmark cache;
 
-  private static class SegmentMetadataCacheForBenchmark extends SegmentMetadataCache
+  private static class SegmentMetadataCacheForBenchmark extends BrokerSegmentMetadataCache
   {
     public SegmentMetadataCacheForBenchmark(
         final QueryLifecycleFactory queryLifecycleFactory,
         final TimelineServerView serverView,
         final SegmentManager segmentManager,
         final JoinableFactory joinableFactory,
-        final PlannerConfig config,
         final Escalator escalator,
-        final BrokerInternalQueryConfig brokerInternalQueryConfig
+        final InternalQueryConfig brokerInternalQueryConfig
     )
     {
       super(
           queryLifecycleFactory,
           serverView,
-          segmentManager,
-          joinableFactory,
-          SegmentMetadataCacheConfig.create(),
+          BrokerSegmentMetadataCacheConfig.create(),
           escalator,
           brokerInternalQueryConfig,
-          new NoopServiceEmitter()
+          new NoopServiceEmitter(),
+          new PhysicalDatasourceMetadataFactory(joinableFactory, segmentManager),
+          new NoopCoordinatorClient()
       );
     }
 
@@ -109,7 +109,7 @@ public void addSegment(final DruidServerMetadata server, final DataSegment segme
     }
 
     @Override
-    protected Sequence<SegmentAnalysis> runSegmentMetadataQuery(Iterable<SegmentId> segments)
+    public Sequence<SegmentAnalysis> runSegmentMetadataQuery(Iterable<SegmentId> segments)
     {
       final int numColumns = 1000;
       LinkedHashMap<String, ColumnAnalysis> columnToAnalysisMap = new LinkedHashMap<>();
@@ -178,10 +178,10 @@ public void setup()
         EasyMock.mock(TimelineServerView.class),
         null,
         null,
-        EasyMock.mock(PlannerConfig.class),
         null,
         null
     );
+
     DruidServerMetadata serverMetadata = new DruidServerMetadata(
         "dummy",
         "dummy",

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java
@@ -46,6 +46,7 @@
 import org.apache.druid.segment.generator.GeneratorSchemaInfo;
 import org.apache.druid.segment.generator.SegmentGenerator;
 import org.apache.druid.server.QueryStackTests;
+import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.server.security.AuthConfig;
 import org.apache.druid.server.security.AuthTestUtils;
 import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator;
@@ -63,7 +64,6 @@
 import org.apache.druid.sql.calcite.run.SqlEngine;
 import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
 import org.apache.druid.sql.calcite.util.CalciteTests;
-import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.partition.LinearShardSpec;
 import org.openjdk.jmh.annotations.Benchmark;

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -36,6 +36,7 @@
 import org.apache.druid.segment.generator.GeneratorSchemaInfo;
 import org.apache.druid.segment.generator.SegmentGenerator;
 import org.apache.druid.server.QueryStackTests;
+import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.server.security.AuthConfig;
 import org.apache.druid.server.security.AuthTestUtils;
 import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
@@ -48,7 +49,6 @@
 import org.apache.druid.sql.calcite.run.SqlEngine;
 import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
 import org.apache.druid.sql.calcite.util.CalciteTests;
-import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.partition.LinearShardSpec;
 import org.openjdk.jmh.annotations.Benchmark;

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
@@ -45,6 +45,7 @@
 import org.apache.druid.segment.transform.ExpressionTransform;
 import org.apache.druid.segment.transform.TransformSpec;
 import org.apache.druid.server.QueryStackTests;
+import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.server.security.AuthConfig;
 import org.apache.druid.server.security.AuthTestUtils;
 import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
@@ -57,7 +58,6 @@
 import org.apache.druid.sql.calcite.run.SqlEngine;
 import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
 import org.apache.druid.sql.calcite.util.CalciteTests;
-import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.partition.LinearShardSpec;
 import org.openjdk.jmh.annotations.Benchmark;

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java
@@ -38,6 +38,7 @@
 import org.apache.druid.segment.generator.GeneratorSchemaInfo;
 import org.apache.druid.segment.generator.SegmentGenerator;
 import org.apache.druid.server.QueryStackTests;
+import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.server.security.AuthConfig;
 import org.apache.druid.server.security.AuthTestUtils;
 import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
@@ -49,7 +50,6 @@
 import org.apache.druid.sql.calcite.run.SqlEngine;
 import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
 import org.apache.druid.sql.calcite.util.CalciteTests;
-import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.partition.LinearShardSpec;
 import org.openjdk.jmh.annotations.Benchmark;

diff --git a/codestyle/druid-forbidden-apis.txt b/codestyle/druid-forbidden-apis.txt
@@ -44,6 +44,13 @@ java.util.LinkedList @ Use ArrayList or ArrayDeque instead
 java.util.Random#<init>() @ Use ThreadLocalRandom.current() or the constructor with a seed (the latter in tests only!)
 java.lang.Math#random() @ Use ThreadLocalRandom.current()
 java.util.regex.Pattern#matches(java.lang.String,java.lang.CharSequence) @ Use String.startsWith(), endsWith(), contains(), or compile and cache a Pattern explicitly
+org.apache.calcite.sql.type.OperandTypes#LITERAL @ LITERAL type checker throws when literals with CAST are passed. Use org.apache.druid.sql.calcite.expression.DefaultOperandTypeChecker instead.
+org.apache.calcite.sql.type.OperandTypes#BOOLEAN_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
+org.apache.calcite.sql.type.OperandTypes#ARRAY_BOOLEAN_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
+org.apache.calcite.sql.type.OperandTypes#POSITIVE_INTEGER_LITERAL @ Use org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL instead
+org.apache.calcite.sql.type.OperandTypes#UNIT_INTERVAL_NUMERIC_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
+org.apache.calcite.sql.type.OperandTypes#NUMERIC_UNIT_INTERVAL_NUMERIC_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
+org.apache.calcite.sql.type.OperandTypes#NULLABLE_LITERAL @ Create an instance of org.apache.calcite.sql.type.CastedLiteralOperandTypeChecker that allows nulls and use that instead
 org.apache.commons.io.FileUtils#getTempDirectory() @ Use org.junit.rules.TemporaryFolder for tests instead
 org.apache.commons.io.FileUtils#deleteDirectory(java.io.File) @ Use org.apache.druid.java.util.common.FileUtils#deleteDirectory()
 org.apache.commons.io.FileUtils#forceMkdir(java.io.File) @ Use org.apache.druid.java.util.common.FileUtils.mkdirp instead

diff --git a/dev/style-conventions.md b/dev/style-conventions.md
@@ -36,7 +36,7 @@ While this page might discuss conventions that are also enforced via said mechan
 discuss style-related convention that cannot be (or are extremely difficult to be) enforced through such automated
 mechanisms.
 
-## Message Formatting (Logs and Exceptions)
+## Message formatting for logs and exceptions
 
 The way that log and exception messages get formatted is an important part of a project.  Specifically, it is
 important that there is consistency in formatting such that someone can easily identify and interpret messages.
@@ -60,3 +60,8 @@ This consistency applies to both log *and* exception messages.
    * Good: `log.info("Filter [%s] on column [%s] cannot be applied to type [%s]", "is not null", "null", "INTEGER")`
      * After interpolation, clear separation: `"Filter [is not null] on column [null] cannot be applied to type [INTEGER]"`
      * With interpolations removed, it is clear what happened, though still hard to figure out which specific thing to adjust: `"Filter on column cannot be applied to type"`
+
+
+## Documentation style
+
+For the majority of style considerations, the Apache Druid documentation follows the [Google Developer Documentation Style Guide](https://developers.google.com/style). For more details, see [Contribute to Druid docs](../docs/development/docs-contribute.md#style-guide).
diff --git a/distribution/docker/Dockerfile b/distribution/docker/Dockerfile
@@ -17,7 +17,7 @@
 # under the License.
 #
 
-ARG JDK_VERSION=11
+ARG JDK_VERSION=17
 
 # The platform is explicitly specified as x64 to build the Druid distribution.
 # This is because it's not able to build the distribution on arm64 due to dependency problem of web-console. See: https://github.com/apache/druid/issues/13012
@@ -49,17 +49,8 @@ RUN --mount=type=cache,target=/root/.m2 VERSION=$(mvn -B -q org.apache.maven.plu
  && tar -zxf ./distribution/target/apache-druid-${VERSION}-bin.tar.gz -C /opt \
  && mv /opt/apache-druid-${VERSION} /opt/druid
 
-FROM busybox:1.34.1-glibc as busybox
-
-FROM gcr.io/distroless/java$JDK_VERSION-debian11
-LABEL maintainer="Apache Druid Developers <[email protected]>"
-
-COPY --from=busybox /bin/busybox /busybox/busybox
-RUN ["/busybox/busybox", "--install", "/bin"]
-
-# Predefined builtin arg, see: https://docs.docker.com/engine/reference/builder/#automatic-platform-args-in-the-global-scope
+FROM alpine:3 as bash-static
 ARG TARGETARCH
-
 #
 # Download bash-static binary to execute scripts that require bash.
 # Although bash-static supports multiple platforms, but there's no need for us to support all those platform, amd64 and arm64 are enough.
@@ -73,12 +64,24 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \
       echo "Unsupported architecture ($TARGETARCH)" && exit 1; \
     fi; \
     echo "Downloading bash-static from ${BASH_URL}" \
-    && wget ${BASH_URL} -O /bin/bash \
-    && chmod 755 /bin/bash
+    && wget ${BASH_URL} -O /bin/bash
+
+FROM busybox:1.35.0-glibc as busybox
+
+FROM gcr.io/distroless/java$JDK_VERSION-debian12
+LABEL maintainer="Apache Druid Developers <[email protected]>"
+
+COPY --from=busybox /bin/busybox /busybox/busybox
+RUN ["/busybox/busybox", "--install", "/bin"]
+
 
 RUN addgroup -S -g 1000 druid \
  && adduser -S -u 1000 -D -H -h /opt/druid -s /bin/sh -g '' -G druid druid
 
+
+COPY --from=bash-static /bin/bash /bin/bash
+RUN chmod 755 /bin/bash
+
 COPY --chown=druid:druid --from=builder /opt /opt
 COPY distribution/docker/druid.sh /druid.sh
 COPY distribution/docker/peon.sh /peon.sh

diff --git a/docs/api-reference/legacy-metadata-api.md b/docs/api-reference/legacy-metadata-api.md
@@ -116,10 +116,18 @@ Returns a list of all segments for one or more specific datasources enabled in t
 
 Returns a list of all segments for each datasource with the full segment metadata and an extra field `overshadowed`.
 
+`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&includeRealtimeSegments`
+
+Returns a list of all published and realtime segments for each datasource with the full segment metadata and extra fields `overshadowed`,`realtime` & `numRows`. Realtime segments are returned only when `druid.coordinator.centralizedTableSchema.enabled` is set on the Coordinator. 
+
 `GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&datasources={dataSourceName1}&datasources={dataSourceName2}`
 
 Returns a list of all segments for one or more specific datasources with the full segment metadata and an extra field `overshadowed`.
 
+`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&includeRealtimeSegments&datasources={dataSourceName1}&datasources={dataSourceName2}`
+
+Returns a list of all published and realtime segments for the specified datasources with the full segment metadata and extra fields `overshadwed`,`realtime` & `numRows`. Realtime segments are returned only when `druid.coordinator.centralizedTableSchema.enabled` is set on the Coordinator.
+
 `GET /druid/coordinator/v1/metadata/datasources`
 
 Returns a list of the names of datasources with at least one used segment in the cluster, retrieved from the metadata database. Users should call this API to get the eventual state that the system will be in.
@@ -166,6 +174,10 @@ Returns a list of all segments, overlapping with any of given intervals,  for a
 
 Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string ISO 8601 intervals like `[interval1, interval2,...]`&mdash;for example, `["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]`.
 
+`POST /druid/coordinator/v1/metadata/dataSourceInformation`
+
+Returns information about the specified datasources, including the datasource schema.  
+
 <a name="coordinator-datasources"></a>
 
 ## Datasources