Skip to content

Commit

Permalink
Merge branch 'apache:dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaochen-zhou authored Dec 14, 2024
2 parents 2d12150 + a139595 commit c634974
Show file tree
Hide file tree
Showing 48 changed files with 672 additions and 312 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 0`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -423,6 +424,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 1`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -453,6 +455,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 2`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -483,6 +486,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 3`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -512,6 +516,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 4`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -541,6 +546,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 5`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -570,6 +576,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 6`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down Expand Up @@ -600,6 +607,7 @@ jobs:
run: |
sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 7`
if [ ! -z $sub_modules ]; then
echo $sub_modules
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
else
echo "sub modules is empty, skipping"
Expand Down
35 changes: 16 additions & 19 deletions docs/en/concept/schema-evolution.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
# Schema evolution
Schema Evolution means that the schema of a data table can be changed and the data synchronization task can automatically adapt to the changes of the new table structure without any other operations.
Now we only support the operation about `add column``drop column``rename column` and `modify column` of the table in CDC source. This feature is only support zeta engine at now.

## Supported engines

- Zeta

## Supported schema change event types

- `ADD COLUMN`
- `DROP COLUMN`
- `RENAME COLUMN`
- `MODIFY COLUMN`

## Supported connectors

Expand All @@ -30,7 +21,7 @@ When you use the Oracle-CDC,you can not use the username named `SYS` or `SYSTE
Otherwise, If your table name start with `ORA_TEMP_` will also has the same problem.

## Enable schema evolution
Schema evolution is disabled by default in CDC source. You need configure `schema-changes.enabled = true` which is only supported in CDC to enable it.
Schema evolution is disabled by default in CDC source. You need configure `debezium.include.schema.changes = true` which is only supported in CDC to enable it. When you use Oracle-CDC with schema-evolution enabled, you must specify `redo_log_catalog` as `log.mining.strategy` in the `debezium` attribute.

## Examples

Expand All @@ -52,8 +43,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down Expand Up @@ -94,8 +86,10 @@ source {
base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
source.reader.close.timeout = 120000
connection.pool.size = 1
schema-changes.enabled = true
debezium {
include.schema.changes = true
log.mining.strategy = redo_log_catalog
}
}
}
Expand Down Expand Up @@ -137,8 +131,10 @@ source {
base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
source.reader.close.timeout = 120000
connection.pool.size = 1
schema-changes.enabled = true
debezium {
include.schema.changes = true
log.mining.strategy = redo_log_catalog
}
}
}
Expand Down Expand Up @@ -173,8 +169,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions docs/en/connector-v2/formats/cdc-compatible-debezium-json.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ source {
# include schema into kafka message
key.converter.schemas.enable = false
value.converter.schemas.enable = false
# include ddl
include.schema.changes = true
# topic prefix
database.server.name = "mysql_cdc_1"
}
Expand Down
5 changes: 3 additions & 2 deletions docs/en/connector-v2/sink/Paimon.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions docs/en/connector-v2/source/MySQL-CDC.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ When an initial consistent snapshot is made for large databases, your establishe
| inverse-sampling.rate | Integer | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. |
| exactly_once | Boolean | No | false | Enable exactly once semantic. |
| format | Enum | No | DEFAULT | Optional output format for MySQL CDC, valid enumerations are `DEFAULT``COMPATIBLE_DEBEZIUM_JSON`. |
| schema-changes.enabled | Boolean | No | false | Schema evolution is disabled by default. Now we only support `add column``drop column``rename column` and `modify column`. |
| debezium | Config | No | - | Pass-through [Debezium's properties](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/mysql.adoc#connector-properties) to Debezium Embedded Engine which is used to capture data changes from MySQL server. |
| debezium | Config | No | - | Pass-through [Debezium's properties](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/mysql.adoc#connector-properties) to Debezium Embedded Engine which is used to capture data changes from MySQL server. Schema evolution is disabled by default. You need configure `debezium.include.schema.changes = true` to enable it. Now we only support `add column``drop column``rename column` and `modify column`. |
| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details |

## Task Example
Expand Down Expand Up @@ -282,8 +281,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down
1 change: 0 additions & 1 deletion docs/en/connector-v2/source/Oracle-CDC.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ exit;
| use_select_count | Boolean | No | false | Use select count for table count rather then other methods in full stage.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table |
| skip_analyze | Boolean | No | false | Skip the analysis of table count in full stage.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently |
| format | Enum | No | DEFAULT | Optional output format for Oracle CDC, valid enumerations are `DEFAULT``COMPATIBLE_DEBEZIUM_JSON`. |
| schema-changes.enabled | Boolean | No | false | Schema evolution is disabled by default. Now we only support `add column``drop column``rename column` and `modify column`. |
| debezium | Config | No | - | Pass-through [Debezium's properties](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/oracle.adoc#connector-properties) to Debezium Embedded Engine which is used to capture data changes from Oracle server. |
| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details |

Expand Down
36 changes: 16 additions & 20 deletions docs/zh/concept/schema-evolution.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
# 模式演进
模式演进是指数据表的Schema可以改变,数据同步任务可以自动适应新的表结构的变化而无需其他操作。

## 已支持的引擎

- Zeta

## 已支持的模式变更事件类型

- `ADD COLUMN`
- `DROP COLUMN`
- `RENAME COLUMN`
- `MODIFY COLUMN`
现在我们只支持对CDC源中的表进行“添加列”、“删除列”、“重命名列”和“修改列”的操作。目前这个功能只支持zeta引擎。

## 已支持的连接器

Expand All @@ -30,7 +20,7 @@
另外,如果你的表名以`ORA_TEMP_`开头,也会有相同的问题。

## 启用Schema evolution功能
在CDC源连接器中模式演进默认是关闭的。你需要在CDC连接器中配置`schema-changes.enabled = true`来启用它。
在CDC源连接器中模式演进默认是关闭的。你需要在CDC连接器中配置`debezium.include.schema.changes = true`来启用它。当你使用Oracle-CDC并且启用schema-evolution时,你必须将`debezium`属性中的`log.mining.strategy`指定为`redo_log_catalog`

## 示例

Expand All @@ -52,8 +42,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down Expand Up @@ -94,8 +85,10 @@ source {
base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
source.reader.close.timeout = 120000
connection.pool.size = 1
schema-changes.enabled = true
debezium {
include.schema.changes = true
log.mining.strategy = redo_log_catalog
}
}
}
Expand Down Expand Up @@ -137,8 +130,10 @@ source {
base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
source.reader.close.timeout = 120000
connection.pool.size = 1
schema-changes.enabled = true
debezium {
include.schema.changes = true
log.mining.strategy = redo_log_catalog
}
}
}
Expand Down Expand Up @@ -173,8 +168,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions docs/zh/connector-v2/formats/cdc-compatible-debezium-json.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ source {
# include schema into kafka message
key.converter.schemas.enable = false
value.converter.schemas.enable = false
# include ddl
include.schema.changes = true
# topic prefix
database.server.name = "mysql_cdc_1"
}
Expand Down
5 changes: 3 additions & 2 deletions docs/zh/connector-v2/sink/Paimon.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ source {
password = "mysqlpw"
table-names = ["shop.products"]
base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
schema-changes.enabled = true
debezium = {
include.schema.changes = true
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,9 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>${maven-dependency-plugin.version}</version>
<configuration>
<appendOutput>true</appendOutput>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
Expand Down
Loading

0 comments on commit c634974

Please sign in to comment.