Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/master' into kinesis-adaptive-me…
Browse files Browse the repository at this point in the history
…mory-management
  • Loading branch information
zachjsh committed Nov 14, 2023
2 parents ff77302 + 5446494 commit 97a7ae3
Show file tree
Hide file tree
Showing 367 changed files with 13,501 additions and 4,402 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/standard-its.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
strategy:
fail-fast: false
matrix:
testing_group: [query, query-retry, query-error, security, high-availability]
testing_group: [query, query-retry, query-error, security, high-availability, centralized-table-schema]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
Expand Down Expand Up @@ -195,6 +195,6 @@ jobs:
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-table-schema
use_indexer: ${{ matrix.indexer }}
group: other
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import org.apache.druid.client.BrokerInternalQueryConfig;
import org.apache.druid.client.InternalQueryConfig;
import org.apache.druid.client.TimelineServerView;
import org.apache.druid.client.coordinator.NoopCoordinatorClient;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
Expand All @@ -37,9 +38,9 @@
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.Escalator;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.SegmentMetadataCacheConfig;
import org.apache.druid.sql.calcite.schema.SegmentMetadataCache;
import org.apache.druid.sql.calcite.schema.BrokerSegmentMetadataCache;
import org.apache.druid.sql.calcite.schema.BrokerSegmentMetadataCacheConfig;
import org.apache.druid.sql.calcite.schema.PhysicalDatasourceMetadataFactory;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.partition.LinearShardSpec;
Expand Down Expand Up @@ -71,27 +72,26 @@ public class DruidSchemaInternRowSignatureBenchmark
{
private SegmentMetadataCacheForBenchmark cache;

private static class SegmentMetadataCacheForBenchmark extends SegmentMetadataCache
private static class SegmentMetadataCacheForBenchmark extends BrokerSegmentMetadataCache
{
public SegmentMetadataCacheForBenchmark(
final QueryLifecycleFactory queryLifecycleFactory,
final TimelineServerView serverView,
final SegmentManager segmentManager,
final JoinableFactory joinableFactory,
final PlannerConfig config,
final Escalator escalator,
final BrokerInternalQueryConfig brokerInternalQueryConfig
final InternalQueryConfig brokerInternalQueryConfig
)
{
super(
queryLifecycleFactory,
serverView,
segmentManager,
joinableFactory,
SegmentMetadataCacheConfig.create(),
BrokerSegmentMetadataCacheConfig.create(),
escalator,
brokerInternalQueryConfig,
new NoopServiceEmitter()
new NoopServiceEmitter(),
new PhysicalDatasourceMetadataFactory(joinableFactory, segmentManager),
new NoopCoordinatorClient()
);
}

Expand All @@ -109,7 +109,7 @@ public void addSegment(final DruidServerMetadata server, final DataSegment segme
}

@Override
protected Sequence<SegmentAnalysis> runSegmentMetadataQuery(Iterable<SegmentId> segments)
public Sequence<SegmentAnalysis> runSegmentMetadataQuery(Iterable<SegmentId> segments)
{
final int numColumns = 1000;
LinkedHashMap<String, ColumnAnalysis> columnToAnalysisMap = new LinkedHashMap<>();
Expand Down Expand Up @@ -178,10 +178,10 @@ public void setup()
EasyMock.mock(TimelineServerView.class),
null,
null,
EasyMock.mock(PlannerConfig.class),
null,
null
);

DruidServerMetadata serverMetadata = new DruidServerMetadata(
"dummy",
"dummy",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator;
Expand All @@ -63,7 +64,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
Expand All @@ -48,7 +49,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
Expand All @@ -57,7 +58,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
Expand All @@ -49,7 +50,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
7 changes: 7 additions & 0 deletions codestyle/druid-forbidden-apis.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ java.util.LinkedList @ Use ArrayList or ArrayDeque instead
java.util.Random#<init>() @ Use ThreadLocalRandom.current() or the constructor with a seed (the latter in tests only!)
java.lang.Math#random() @ Use ThreadLocalRandom.current()
java.util.regex.Pattern#matches(java.lang.String,java.lang.CharSequence) @ Use String.startsWith(), endsWith(), contains(), or compile and cache a Pattern explicitly
org.apache.calcite.sql.type.OperandTypes#LITERAL @ LITERAL type checker throws when literals with CAST are passed. Use org.apache.druid.sql.calcite.expression.DefaultOperandTypeChecker instead.
org.apache.calcite.sql.type.OperandTypes#BOOLEAN_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#ARRAY_BOOLEAN_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#POSITIVE_INTEGER_LITERAL @ Use org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL instead
org.apache.calcite.sql.type.OperandTypes#UNIT_INTERVAL_NUMERIC_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#NUMERIC_UNIT_INTERVAL_NUMERIC_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#NULLABLE_LITERAL @ Create an instance of org.apache.calcite.sql.type.CastedLiteralOperandTypeChecker that allows nulls and use that instead
org.apache.commons.io.FileUtils#getTempDirectory() @ Use org.junit.rules.TemporaryFolder for tests instead
org.apache.commons.io.FileUtils#deleteDirectory(java.io.File) @ Use org.apache.druid.java.util.common.FileUtils#deleteDirectory()
org.apache.commons.io.FileUtils#forceMkdir(java.io.File) @ Use org.apache.druid.java.util.common.FileUtils.mkdirp instead
Expand Down
7 changes: 6 additions & 1 deletion dev/style-conventions.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ While this page might discuss conventions that are also enforced via said mechan
discuss style-related convention that cannot be (or are extremely difficult to be) enforced through such automated
mechanisms.

## Message Formatting (Logs and Exceptions)
## Message formatting for logs and exceptions

The way that log and exception messages get formatted is an important part of a project. Specifically, it is
important that there is consistency in formatting such that someone can easily identify and interpret messages.
Expand All @@ -60,3 +60,8 @@ This consistency applies to both log *and* exception messages.
* Good: `log.info("Filter [%s] on column [%s] cannot be applied to type [%s]", "is not null", "null", "INTEGER")`
* After interpolation, clear separation: `"Filter [is not null] on column [null] cannot be applied to type [INTEGER]"`
* With interpolations removed, it is clear what happened, though still hard to figure out which specific thing to adjust: `"Filter on column cannot be applied to type"`


## Documentation style

For the majority of style considerations, the Apache Druid documentation follows the [Google Developer Documentation Style Guide](https://developers.google.com/style). For more details, see [Contribute to Druid docs](../docs/development/docs-contribute.md#style-guide).
29 changes: 16 additions & 13 deletions distribution/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.
#

ARG JDK_VERSION=11
ARG JDK_VERSION=17

# The platform is explicitly specified as x64 to build the Druid distribution.
# This is because it's not able to build the distribution on arm64 due to dependency problem of web-console. See: https://github.com/apache/druid/issues/13012
Expand Down Expand Up @@ -49,17 +49,8 @@ RUN --mount=type=cache,target=/root/.m2 VERSION=$(mvn -B -q org.apache.maven.plu
&& tar -zxf ./distribution/target/apache-druid-${VERSION}-bin.tar.gz -C /opt \
&& mv /opt/apache-druid-${VERSION} /opt/druid

FROM busybox:1.34.1-glibc as busybox

FROM gcr.io/distroless/java$JDK_VERSION-debian11
LABEL maintainer="Apache Druid Developers <[email protected]>"

COPY --from=busybox /bin/busybox /busybox/busybox
RUN ["/busybox/busybox", "--install", "/bin"]

# Predefined builtin arg, see: https://docs.docker.com/engine/reference/builder/#automatic-platform-args-in-the-global-scope
FROM alpine:3 as bash-static
ARG TARGETARCH

#
# Download bash-static binary to execute scripts that require bash.
# Although bash-static supports multiple platforms, but there's no need for us to support all those platform, amd64 and arm64 are enough.
Expand All @@ -73,12 +64,24 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \
echo "Unsupported architecture ($TARGETARCH)" && exit 1; \
fi; \
echo "Downloading bash-static from ${BASH_URL}" \
&& wget ${BASH_URL} -O /bin/bash \
&& chmod 755 /bin/bash
&& wget ${BASH_URL} -O /bin/bash

FROM busybox:1.35.0-glibc as busybox

FROM gcr.io/distroless/java$JDK_VERSION-debian12
LABEL maintainer="Apache Druid Developers <[email protected]>"

COPY --from=busybox /bin/busybox /busybox/busybox
RUN ["/busybox/busybox", "--install", "/bin"]


RUN addgroup -S -g 1000 druid \
&& adduser -S -u 1000 -D -H -h /opt/druid -s /bin/sh -g '' -G druid druid


COPY --from=bash-static /bin/bash /bin/bash
RUN chmod 755 /bin/bash

COPY --chown=druid:druid --from=builder /opt /opt
COPY distribution/docker/druid.sh /druid.sh
COPY distribution/docker/peon.sh /peon.sh
Expand Down
12 changes: 12 additions & 0 deletions docs/api-reference/legacy-metadata-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,18 @@ Returns a list of all segments for one or more specific datasources enabled in t

Returns a list of all segments for each datasource with the full segment metadata and an extra field `overshadowed`.

`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&includeRealtimeSegments`

Returns a list of all published and realtime segments for each datasource with the full segment metadata and extra fields `overshadowed`,`realtime` & `numRows`. Realtime segments are returned only when `druid.coordinator.centralizedTableSchema.enabled` is set on the Coordinator.

`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&datasources={dataSourceName1}&datasources={dataSourceName2}`

Returns a list of all segments for one or more specific datasources with the full segment metadata and an extra field `overshadowed`.

`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&includeRealtimeSegments&datasources={dataSourceName1}&datasources={dataSourceName2}`

Returns a list of all published and realtime segments for the specified datasources with the full segment metadata and extra fields `overshadwed`,`realtime` & `numRows`. Realtime segments are returned only when `druid.coordinator.centralizedTableSchema.enabled` is set on the Coordinator.

`GET /druid/coordinator/v1/metadata/datasources`

Returns a list of the names of datasources with at least one used segment in the cluster, retrieved from the metadata database. Users should call this API to get the eventual state that the system will be in.
Expand Down Expand Up @@ -166,6 +174,10 @@ Returns a list of all segments, overlapping with any of given intervals, for a

Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string ISO 8601 intervals like `[interval1, interval2,...]`&mdash;for example, `["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]`.

`POST /druid/coordinator/v1/metadata/dataSourceInformation`

Returns information about the specified datasources, including the datasource schema.

<a name="coordinator-datasources"></a>

## Datasources
Expand Down
Loading

0 comments on commit 97a7ae3

Please sign in to comment.