Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
317brian committed Nov 14, 2023
2 parents 2bd0d6a + 5edeac2 commit 03cc774
Show file tree
Hide file tree
Showing 240 changed files with 8,853 additions and 2,061 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/standard-its.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
strategy:
fail-fast: false
matrix:
testing_group: [query, query-retry, query-error, security, high-availability]
testing_group: [query, query-retry, query-error, security, high-availability, centralized-table-schema]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
Expand Down Expand Up @@ -195,6 +195,6 @@ jobs:
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-table-schema
use_indexer: ${{ matrix.indexer }}
group: other
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import org.apache.druid.client.BrokerInternalQueryConfig;
import org.apache.druid.client.InternalQueryConfig;
import org.apache.druid.client.TimelineServerView;
import org.apache.druid.client.coordinator.NoopCoordinatorClient;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
Expand All @@ -37,9 +38,9 @@
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.Escalator;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.SegmentMetadataCacheConfig;
import org.apache.druid.sql.calcite.schema.SegmentMetadataCache;
import org.apache.druid.sql.calcite.schema.BrokerSegmentMetadataCache;
import org.apache.druid.sql.calcite.schema.BrokerSegmentMetadataCacheConfig;
import org.apache.druid.sql.calcite.schema.PhysicalDatasourceMetadataFactory;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.partition.LinearShardSpec;
Expand Down Expand Up @@ -71,27 +72,26 @@ public class DruidSchemaInternRowSignatureBenchmark
{
private SegmentMetadataCacheForBenchmark cache;

private static class SegmentMetadataCacheForBenchmark extends SegmentMetadataCache
private static class SegmentMetadataCacheForBenchmark extends BrokerSegmentMetadataCache
{
public SegmentMetadataCacheForBenchmark(
final QueryLifecycleFactory queryLifecycleFactory,
final TimelineServerView serverView,
final SegmentManager segmentManager,
final JoinableFactory joinableFactory,
final PlannerConfig config,
final Escalator escalator,
final BrokerInternalQueryConfig brokerInternalQueryConfig
final InternalQueryConfig brokerInternalQueryConfig
)
{
super(
queryLifecycleFactory,
serverView,
segmentManager,
joinableFactory,
SegmentMetadataCacheConfig.create(),
BrokerSegmentMetadataCacheConfig.create(),
escalator,
brokerInternalQueryConfig,
new NoopServiceEmitter()
new NoopServiceEmitter(),
new PhysicalDatasourceMetadataFactory(joinableFactory, segmentManager),
new NoopCoordinatorClient()
);
}

Expand All @@ -109,7 +109,7 @@ public void addSegment(final DruidServerMetadata server, final DataSegment segme
}

@Override
protected Sequence<SegmentAnalysis> runSegmentMetadataQuery(Iterable<SegmentId> segments)
public Sequence<SegmentAnalysis> runSegmentMetadataQuery(Iterable<SegmentId> segments)
{
final int numColumns = 1000;
LinkedHashMap<String, ColumnAnalysis> columnToAnalysisMap = new LinkedHashMap<>();
Expand Down Expand Up @@ -178,10 +178,10 @@ public void setup()
EasyMock.mock(TimelineServerView.class),
null,
null,
EasyMock.mock(PlannerConfig.class),
null,
null
);

DruidServerMetadata serverMetadata = new DruidServerMetadata(
"dummy",
"dummy",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator;
Expand All @@ -63,7 +64,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
Expand All @@ -48,7 +49,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
Expand All @@ -57,7 +58,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
Expand All @@ -49,7 +50,6 @@
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down
6 changes: 6 additions & 0 deletions codestyle/druid-forbidden-apis.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ java.util.Random#<init>() @ Use ThreadLocalRandom.current() or the constructor w
java.lang.Math#random() @ Use ThreadLocalRandom.current()
java.util.regex.Pattern#matches(java.lang.String,java.lang.CharSequence) @ Use String.startsWith(), endsWith(), contains(), or compile and cache a Pattern explicitly
org.apache.calcite.sql.type.OperandTypes#LITERAL @ LITERAL type checker throws when literals with CAST are passed. Use org.apache.druid.sql.calcite.expression.DefaultOperandTypeChecker instead.
org.apache.calcite.sql.type.OperandTypes#BOOLEAN_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#ARRAY_BOOLEAN_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#POSITIVE_INTEGER_LITERAL @ Use org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL instead
org.apache.calcite.sql.type.OperandTypes#UNIT_INTERVAL_NUMERIC_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#NUMERIC_UNIT_INTERVAL_NUMERIC_LITERAL @ Create a type checker like org.apache.calcite.sql.type.POSITIVE_INTEGER_LITERAL and use that instead
org.apache.calcite.sql.type.OperandTypes#NULLABLE_LITERAL @ Create an instance of org.apache.calcite.sql.type.CastedLiteralOperandTypeChecker that allows nulls and use that instead
org.apache.commons.io.FileUtils#getTempDirectory() @ Use org.junit.rules.TemporaryFolder for tests instead
org.apache.commons.io.FileUtils#deleteDirectory(java.io.File) @ Use org.apache.druid.java.util.common.FileUtils#deleteDirectory()
org.apache.commons.io.FileUtils#forceMkdir(java.io.File) @ Use org.apache.druid.java.util.common.FileUtils.mkdirp instead
Expand Down
12 changes: 12 additions & 0 deletions docs/api-reference/legacy-metadata-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,18 @@ Returns a list of all segments for one or more specific datasources enabled in t

Returns a list of all segments for each datasource with the full segment metadata and an extra field `overshadowed`.

`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&includeRealtimeSegments`

Returns a list of all published and realtime segments for each datasource with the full segment metadata and extra fields `overshadowed`,`realtime` & `numRows`. Realtime segments are returned only when `druid.coordinator.centralizedTableSchema.enabled` is set on the Coordinator.

`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&datasources={dataSourceName1}&datasources={dataSourceName2}`

Returns a list of all segments for one or more specific datasources with the full segment metadata and an extra field `overshadowed`.

`GET /druid/coordinator/v1/metadata/segments?includeOvershadowedStatus&includeRealtimeSegments&datasources={dataSourceName1}&datasources={dataSourceName2}`

Returns a list of all published and realtime segments for the specified datasources with the full segment metadata and extra fields `overshadwed`,`realtime` & `numRows`. Realtime segments are returned only when `druid.coordinator.centralizedTableSchema.enabled` is set on the Coordinator.

`GET /druid/coordinator/v1/metadata/datasources`

Returns a list of the names of datasources with at least one used segment in the cluster, retrieved from the metadata database. Users should call this API to get the eventual state that the system will be in.
Expand Down Expand Up @@ -166,6 +174,10 @@ Returns a list of all segments, overlapping with any of given intervals, for a

Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string ISO 8601 intervals like `[interval1, interval2,...]`&mdash;for example, `["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]`.

`POST /druid/coordinator/v1/metadata/dataSourceInformation`

Returns information about the specified datasources, including the datasource schema.

<a name="coordinator-datasources"></a>

## Datasources
Expand Down
3 changes: 2 additions & 1 deletion docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ Requests that meet the threshold are logged using the request logger type set in
|--------|-----------|-------|
|`druid.request.logging.queryTimeThresholdMs`|Threshold value for the `query/time` metric in milliseconds.|0, i.e., no filtering|
|`druid.request.logging.sqlQueryTimeThresholdMs`|Threshold value for the `sqlQuery/time` metric in milliseconds.|0, i.e., no filtering|
|`druid.request.logging.mutedQueryTypes` | Query requests of these types are not logged. Query types are defined as string objects corresponding to the "queryType" value for the specified query in the Druid's [native JSON query API](http://druid.apache.org/docs/latest/querying/querying). Misspelled query types will be ignored. Example to ignore scan and timeBoundary queries: `["scan", "timeBoundary"]`| []|
|`druid.request.logging.mutedQueryTypes` | Query requests of these types are not logged. Query types are defined as string objects corresponding to the "queryType" value for the specified query in the Druid's [native JSON query API](../querying/querying.md). Misspelled query types will be ignored. Example to ignore scan and timeBoundary queries: `["scan", "timeBoundary"]`| []|
|`druid.request.logging.delegate.type`|Type of delegate request logger to log requests.|none|

#### Composing request logging
Expand Down Expand Up @@ -876,6 +876,7 @@ These Coordinator static configurations can be defined in the `coordinator/runti
|`druid.coordinator.loadqueuepeon.repeatDelay`|The start and repeat delay for the `loadqueuepeon`, which manages the load and drop of segments.|`PT0.050S` (50 ms)|
|`druid.coordinator.asOverlord.enabled`|Boolean value for whether this Coordinator service should act like an Overlord as well. This configuration allows users to simplify a Druid cluster by not having to deploy any standalone Overlord services. If set to true, then Overlord console is available at `http://coordinator-host:port/console.html` and be sure to set `druid.coordinator.asOverlord.overlordService` also.|false|
|`druid.coordinator.asOverlord.overlordService`| Required, if `druid.coordinator.asOverlord.enabled` is `true`. This must be same value as `druid.service` on standalone Overlord services and `druid.selectors.indexing.serviceName` on Middle Managers.|NULL|
|`druid.coordinator.centralizedTableSchema.enabled`|Boolean flag for enabling table schema building on the Coordinator.|false|

##### Metadata management

Expand Down
22 changes: 13 additions & 9 deletions docs/design/metadata-storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,19 @@ See [Metadata storage configuration](../configuration/index.md#metadata-storage)

## Available metadata stores

Druid supports Derby, MySQL, and PostgreSQL for storing metadata.
Druid supports Derby, MySQL, and PostgreSQL for storing metadata.

To avoid issues with upgrades that require schema changes to a large metadata table, consider a metadata store version that supports instant ADD COLUMN semantics.
See the database-specific docs for guidance on versions.

### MySQL

See [mysql-metadata-storage extension documentation](../development/extensions-core/mysql.md).

### PostgreSQL

See [postgresql-metadata-storage](../development/extensions-core/postgresql.md).


### Derby

Expand All @@ -59,14 +71,6 @@ druid.metadata.storage.type=derby
druid.metadata.storage.connector.connectURI=jdbc:derby://localhost:1527//opt/var/druid_state/derby;create=true
```

### MySQL

See [mysql-metadata-storage extension documentation](../development/extensions-core/mysql.md).

### PostgreSQL

See [postgresql-metadata-storage](../development/extensions-core/postgresql.md).

## Adding custom DBCP properties

You can add custom properties to customize the database connection pool (DBCP) for connecting to the metadata store.
Expand Down
4 changes: 2 additions & 2 deletions docs/development/extensions-contrib/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ For metrics which are emitted from multiple services with different dimensions,
the service name. For example:

```json
"coordinator-segment/count" : { "dimensions" : ["dataSource"], "type" : "gauge" },
"historical-segment/count" : { "dimensions" : ["dataSource", "tier", "priority"], "type" : "gauge" }
"druid/coordinator-segment/count" : { "dimensions" : ["dataSource"], "type" : "gauge" },
"druid/historical-segment/count" : { "dimensions" : ["dataSource", "tier", "priority"], "type" : "gauge" }
```

For most use cases, the default mapping is sufficient.
6 changes: 3 additions & 3 deletions docs/development/extensions-contrib/statsd.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ e.g.
For metrics which are emitted from multiple services with different dimensions, the metric name is prefixed with
the service name.
e.g.
`"coordinator-segment/count" : { "dimensions" : ["dataSource"], "type" : "gauge" },
"historical-segment/count" : { "dimensions" : ["dataSource", "tier", "priority"], "type" : "gauge" }`
`"druid/coordinator-segment/count" : { "dimensions" : ["dataSource"], "type" : "gauge" },
"druid/historical-segment/count" : { "dimensions" : ["dataSource", "tier", "priority"], "type" : "gauge" }`

For most use-cases, the default mapping is sufficient.
1 change: 1 addition & 0 deletions docs/development/extensions-core/lookups-cached-global.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ The JDBC lookups will poll a database to populate its local cache. If the `tsCol
|`tsColumn`| The column in `table` which contains when the key was updated|No|Not used|
|`pollPeriod`|How often to poll the DB|No|0 (only once)|
|`jitterSeconds`| How much jitter to add (in seconds) up to maximum as a delay (actual value will be used as random from 0 to `jitterSeconds`), used to distribute db load more evenly|No|0|
|`loadTimeoutSeconds`| How much time (in seconds) it can take to query and populate lookup values. It will be helpful in lookup updates. On lookup update, it will wait maximum of `loadTimeoutSeconds` for new lookup to come up and continue serving from old lookup until new lookup successfully loads. |No|0|
|`maxHeapPercentage`|The maximum percentage of heap size that the lookup should consume. If the lookup grows beyond this size, warning messages will be logged in the respective service logs.|No|10% of JVM heap size|

```json
Expand Down
2 changes: 2 additions & 0 deletions docs/development/extensions-core/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ Depending on the MariaDB client library version, the connector supports both `jd

## Setting up MySQL

To avoid issues with upgrades that require schema changes to a large metadata table, consider a MySQL version that supports instant ADD COLUMN semantics. For example, MySQL 8.

1. Install MySQL

Use your favorite package manager to install mysql, e.g.:
Expand Down
2 changes: 2 additions & 0 deletions docs/development/extensions-core/postgresql.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md#

## Setting up PostgreSQL

To avoid issues with upgrades that require schema changes to a large metadata table, consider a PostgreSQL version that supports instant ADD COLUMN semantics.

1. Install PostgreSQL

Use your favorite package manager to install PostgreSQL, e.g.:
Expand Down
Loading

1 comment on commit 03cc774

@vercel
Copy link

@vercel vercel bot commented on 03cc774 Nov 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

druid – ./

druid-git-master-317brian.vercel.app
druid-317brian.vercel.app
druid-phi.vercel.app

Please sign in to comment.