risingwavelabs · StrikeW · Aug 28, 2024 · Aug 23, 2024 · Aug 26, 2024 · Aug 26, 2024
diff --git a/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt b/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt
@@ -42,6 +42,7 @@ distribution key id NULL NULL
 table description rw_customers NULL NULL
 
 
+# add column
 system ok
 mysql -e "
   USE mytest;
@@ -64,6 +65,57 @@ primary key id NULL NULL
 distribution key id NULL NULL
 table description rw_customers NULL NULL
 
+# rename column on upstream will not be replicated, since we do not support rename column
+system ok
+mysql -e "
+  USE mytest;
+  ALTER TABLE customers RENAME COLUMN v1 TO v11;
+  ALTER TABLE customers CHANGE COLUMN v2 v22 decimal(5,2);
+"
+
+sleep 3s
+
+# table schema unchanges, since we reject rename column
+query TTTT
+describe rw_customers;
+----
+id bigint false NULL
+modified timestamp without time zone false NULL
+custinfo jsonb false NULL
+v1 character varying false NULL
+v2 double precision false NULL
+primary key id NULL NULL
+distribution key id NULL NULL
+table description rw_customers NULL NULL
+
+# revert column rename on upstream
+system ok
+mysql -e "
+  USE mytest;
+  ALTER TABLE customers RENAME COLUMN v11 TO v1;
+  ALTER TABLE customers CHANGE COLUMN v22 v2 double(5,2);
+"
+
+# drop columns
+system ok
+mysql -e "
+  USE mytest;
+  ALTER TABLE customers DROP COLUMN modified;
+  ALTER TABLE customers DROP COLUMN v1;
+  ALTER TABLE customers DROP COLUMN v2;
+"
+
+sleep 3s
+
+# modified column should be dropped
+query TTTT
+describe rw_customers;
+----
+id bigint false NULL
+custinfo jsonb false NULL
+primary key id NULL NULL
+distribution key id NULL NULL
+table description rw_customers NULL NULL
 
 statement ok
 drop source mysql_source cascade;
diff --git a/src/connector/src/parser/unified/debezium.rs b/src/connector/src/parser/unified/debezium.rs
@@ -165,13 +165,29 @@ pub fn parse_schema_change(
 ) -> AccessResult<SchemaChangeEnvelope> {
     let mut schema_changes = vec![];
 
-    let upstream_ddl = accessor
+    let upstream_ddl: String = accessor
         .access(&[UPSTREAM_DDL], &DataType::Varchar)?
         .to_owned_datum()
         .unwrap()
         .as_utf8()
         .to_string();
 
+    // Currently only accept ADD COLUMN and DROP COLUMN,
+    // and we assumes each schema change message only contains one DDL statement.
+    let allowed_ddl = ["ADD COLUMN", "DROP COLUMN"];
+    let upper_upstream_ddl = upstream_ddl.to_uppercase();
+    let is_allowed = allowed_ddl
+        .iter()
+        .any(|&allowed_ddl| upper_upstream_ddl.contains(allowed_ddl));
+    if !is_allowed {
+        Err(AccessError::Uncategorized {
+            message: format!(
+                "skip unsupported table schema change for upstream DDL: {}",
+                upstream_ddl
+            ),
+        })?;
+    }
+
     if let Some(ScalarRefImpl::List(table_changes)) = accessor
         .access(&[TABLE_CHANGES], &DataType::List(Box::new(DataType::Jsonb)))?
         .to_datum_ref()

diff --git a/src/frontend/src/handler/alter_table_column.rs b/src/frontend/src/handler/alter_table_column.rs
@@ -69,6 +69,7 @@ pub async fn replace_table_with_definition(
     Ok(())
 }
 
+/// Used in auto schema change process
 pub async fn get_new_table_definition_for_cdc_table(
     session: &Arc<SessionImpl>,
     table_name: ObjectName,

diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs
@@ -1015,6 +1015,7 @@ pub(super) async fn handle_create_table_plan(
                     &constraints,
                     connect_properties.clone(),
                     wildcard_idx.is_some(),
+                    None,
                 )
                 .await?;
 
@@ -1123,6 +1124,7 @@ async fn derive_schema_for_cdc_table(
     constraints: &Vec<TableConstraint>,
     connect_properties: WithOptionsSecResolved,
     need_auto_schema_map: bool,
+    original_catalog: Option<Arc<TableCatalog>>,
 ) -> Result<(Vec<ColumnCatalog>, Vec<String>)> {
     // read cdc table schema from external db or parsing the schema from SQL definitions
     if need_auto_schema_map {
@@ -1154,10 +1156,20 @@ async fn derive_schema_for_cdc_table(
             table.pk_names().clone(),
         ))
     } else {
-        Ok((
-            bind_sql_columns(column_defs)?,
-            bind_sql_pk_names(column_defs, constraints)?,
-        ))
+        let columns = bind_sql_columns(column_defs)?;
+        // For table created by `create table t (*)` the constraint is empty, we need to
+        // retrieve primary key names from original table catalog if available
+        let pk_names = if let Some(original_catalog) = original_catalog {
+            original_catalog
+                .pk
+                .iter()
+                .map(|x| original_catalog.columns[x.column_index].name().to_string())
+                .collect()
+        } else {
+            bind_sql_pk_names(column_defs, constraints)?
+        };
+
+        Ok((columns, pk_names))
     }
 }
 
@@ -1328,6 +1340,7 @@ pub async fn generate_stream_graph_for_table(
                 &constraints,
                 connect_properties.clone(),
                 false,
+                Some(original_catalog.clone()),
             )
             .await?;