Merge branch 'main' into chore/switch-e2e-sql-backend

risingwavelabs · Aug 28, 2024 · 1019c79 · 1019c79
2 parents d56ee69 + befb04f
commit 1019c79
Show file tree

Hide file tree

Showing 51 changed files with 895 additions and 330 deletions.
diff --git a/dashboard/lib/api/streaming.ts b/dashboard/lib/api/streaming.ts
@@ -28,12 +28,30 @@ import {
 } from "../../proto/gen/catalog"
 import {
   ListObjectDependenciesResponse_ObjectDependencies as ObjectDependencies,
+  RelationIdInfos,
   TableFragments,
 } from "../../proto/gen/meta"
 import { ColumnCatalog, Field } from "../../proto/gen/plan_common"
 import { UserInfo } from "../../proto/gen/user"
 import api from "./api"
 
+// NOTE(kwannoel): This can be optimized further, instead of fetching the entire TableFragments struct,
+// We can fetch the fields we need from TableFragments, in a truncated struct.
+export async function getFragmentsByJobId(
+  jobId: number
+): Promise<TableFragments> {
+  let route = "/fragments/job_id/" + jobId.toString()
+  let tableFragments: TableFragments = TableFragments.fromJSON(
+    await api.get(route)
+  )
+  return tableFragments
+}
+
+export async function getRelationIdInfos(): Promise<RelationIdInfos> {
+  let fragmentIds: RelationIdInfos = await api.get("/relation_id_infos")
+  return fragmentIds
+}
+
 export async function getFragments(): Promise<TableFragments[]> {
   let fragmentList: TableFragments[] = (await api.get("/fragments2")).map(
     TableFragments.fromJSON

diff --git a/dashboard/pages/fragment_graph.tsx b/dashboard/pages/fragment_graph.tsx
@@ -45,7 +45,11 @@ import {
   fetchEmbeddedBackPressure,
   fetchPrometheusBackPressure,
 } from "../lib/api/metric"
-import { getFragments, getStreamingJobs } from "../lib/api/streaming"
+import {
+  getFragmentsByJobId,
+  getRelationIdInfos,
+  getStreamingJobs,
+} from "../lib/api/streaming"
 import { FragmentBox } from "../lib/layout"
 import { TableFragments, TableFragments_Fragment } from "../proto/gen/meta"
 import { Dispatcher, MergeNode, StreamNode } from "../proto/gen/stream_plan"
@@ -194,28 +198,33 @@ interface EmbeddedBackPressureInfo {
 
 export default function Streaming() {
   const { response: relationList } = useFetch(getStreamingJobs)
-  const { response: fragmentList } = useFetch(getFragments)
+  const { response: relationIdInfos } = useFetch(getRelationIdInfos)
 
   const [relationId, setRelationId] = useQueryState("id", parseAsInteger)
   const [selectedFragmentId, setSelectedFragmentId] = useState<number>()
+  const [tableFragments, setTableFragments] = useState<TableFragments>()
 
   const toast = useErrorToast()
 
+  useEffect(() => {
+    if (relationId) {
+      setTableFragments(undefined)
+      getFragmentsByJobId(relationId).then((tf) => {
+        setTableFragments(tf)
+      })
+    }
+  }, [relationId])
+
   const fragmentDependencyCallback = useCallback(() => {
-    if (fragmentList) {
-      if (relationId) {
-        const fragments = fragmentList.find((x) => x.tableId === relationId)
-        if (fragments) {
-          const fragmentDep = buildFragmentDependencyAsEdges(fragments)
-          return {
-            fragments,
-            fragmentDep,
-            fragmentDepDag: dagStratify()(fragmentDep),
-          }
-        }
+    if (tableFragments) {
+      const fragmentDep = buildFragmentDependencyAsEdges(tableFragments)
+      return {
+        fragments: tableFragments,
+        fragmentDep,
+        fragmentDepDag: dagStratify()(fragmentDep),
       }
     }
-  }, [fragmentList, relationId])
+  }, [tableFragments])
 
   useEffect(() => {
     if (relationList) {
@@ -255,38 +264,38 @@ export default function Streaming() {
 
   const handleSearchFragment = () => {
     const searchFragIdInt = parseInt(searchFragId)
-    if (fragmentList) {
-      for (const tf of fragmentList) {
-        for (const fragmentId in tf.fragments) {
-          if (tf.fragments[fragmentId].fragmentId == searchFragIdInt) {
-            setRelationId(tf.tableId)
+    if (relationIdInfos) {
+      let map = relationIdInfos.map
+      for (const relationId in map) {
+        const fragmentIdToRelationId = map[relationId].map
+        for (const fragmentId in fragmentIdToRelationId) {
+          if (parseInt(fragmentId) == searchFragIdInt) {
+            setRelationId(parseInt(relationId))
             setSelectedFragmentId(searchFragIdInt)
             return
           }
         }
       }
     }
-
     toast(new Error(`Fragment ${searchFragIdInt} not found`))
   }
 
   const handleSearchActor = () => {
     const searchActorIdInt = parseInt(searchActorId)
-    if (fragmentList) {
-      for (const tf of fragmentList) {
-        for (const fragmentId in tf.fragments) {
-          const fragment = tf.fragments[fragmentId]
-          for (const actor of fragment.actors) {
-            if (actor.actorId == searchActorIdInt) {
-              setRelationId(tf.tableId)
-              setSelectedFragmentId(fragment.fragmentId)
-              return
-            }
+    if (relationIdInfos) {
+      let map = relationIdInfos.map
+      for (const relationId in map) {
+        const fragmentIdToRelationId = map[relationId].map
+        for (const fragmentId in fragmentIdToRelationId) {
+          let actorIds = fragmentIdToRelationId[fragmentId].ids
+          if (actorIds.includes(searchActorIdInt)) {
+            setRelationId(parseInt(relationId))
+            setSelectedFragmentId(parseInt(fragmentId))
+            return
           }
         }
       }
     }
-
     toast(new Error(`Actor ${searchActorIdInt} not found`))
   }
 

diff --git a/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt b/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt
@@ -42,6 +42,7 @@ distribution key id NULL NULL
 table description rw_customers NULL NULL
 
 
+# add column
 system ok
 mysql -e "
   USE mytest;
@@ -64,6 +65,57 @@ primary key id NULL NULL
 distribution key id NULL NULL
 table description rw_customers NULL NULL
 
+# rename column on upstream will not be replicated, since we do not support rename column
+system ok
+mysql -e "
+  USE mytest;
+  ALTER TABLE customers RENAME COLUMN v1 TO v11;
+  ALTER TABLE customers CHANGE COLUMN v2 v22 decimal(5,2);
+"
+
+sleep 3s
+
+# table schema unchanges, since we reject rename column
+query TTTT
+describe rw_customers;
+----
+id bigint false NULL
+modified timestamp without time zone false NULL
+custinfo jsonb false NULL
+v1 character varying false NULL
+v2 double precision false NULL
+primary key id NULL NULL
+distribution key id NULL NULL
+table description rw_customers NULL NULL
+
+# revert column rename on upstream
+system ok
+mysql -e "
+  USE mytest;
+  ALTER TABLE customers RENAME COLUMN v11 TO v1;
+  ALTER TABLE customers CHANGE COLUMN v22 v2 double(5,2);
+"
+
+# drop columns
+system ok
+mysql -e "
+  USE mytest;
+  ALTER TABLE customers DROP COLUMN modified;
+  ALTER TABLE customers DROP COLUMN v1;
+  ALTER TABLE customers DROP COLUMN v2;
+"
+
+sleep 3s
+
+# modified column should be dropped
+query TTTT
+describe rw_customers;
+----
+id bigint false NULL
+custinfo jsonb false NULL
+primary key id NULL NULL
+distribution key id NULL NULL
+table description rw_customers NULL NULL
 
 statement ok
 drop source mysql_source cascade;
diff --git a/e2e_test/source_inline/kafka/protobuf/basic.slt b/e2e_test/source_inline/kafka/protobuf/basic.slt
@@ -44,7 +44,7 @@ with (
     ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
     topic = 'sr_pb_test',
     scan.startup.mode = 'earliest')
-FORMAT plain ENCODE protobuf(
+FORMAT upsert ENCODE protobuf(
         schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}',
         message = 'test.User'
     );

diff --git a/proto/meta.proto b/proto/meta.proto
@@ -772,3 +772,20 @@ service EventLogService {
   rpc ListEventLog(ListEventLogRequest) returns (ListEventLogResponse);
   rpc AddEventLog(AddEventLogRequest) returns (AddEventLogResponse);
 }
+
+message ActorIds {
+  repeated uint32 ids = 1;
+}
+
+message FragmentIdToActorIdMap {
+  map<uint32, ActorIds> map = 1;
+}
+
+/// Provides all the ids: relation_id, fragment_id, actor_id
+/// in an hierarchical format.
+/// relation_id -> [fragment_id]
+/// fragment_id -> [actor_id]
+message RelationIdInfos {
+  // relation_id -> FragmentIdToActorIdMap
+  map<uint32, FragmentIdToActorIdMap> map = 1;
+}
diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
@@ -93,6 +93,7 @@ opendal = { workspace = true, features = [
     "services-gcs",
     "services-memory",
     "services-s3",
+    "services-webhdfs",
 ] }
 openssl = "0.10"
 parking_lot = { workspace = true }

diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs
@@ -39,6 +39,7 @@ macro_rules! for_all_classified_sources {
                 { Gcs, $crate::source::filesystem::opendal_source::GcsProperties , $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalGcs> },
                 { OpendalS3, $crate::source::filesystem::opendal_source::OpendalS3Properties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalS3> },
                 { PosixFs, $crate::source::filesystem::opendal_source::PosixFsProperties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalPosixFs> },
+                { Azblob, $crate::source::filesystem::opendal_source::AzblobProperties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalAzblob> },
                 { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit},
                 { Iceberg, $crate::source::iceberg::IcebergProperties, $crate::source::iceberg::IcebergSplit}
             }

diff --git a/src/connector/src/parser/additional_columns.rs b/src/connector/src/parser/additional_columns.rs
@@ -31,8 +31,8 @@ use risingwave_pb::plan_common::{
 use crate::error::ConnectorResult;
 use crate::source::cdc::MONGODB_CDC_CONNECTOR;
 use crate::source::{
-    GCS_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, OPENDAL_S3_CONNECTOR, PULSAR_CONNECTOR,
-    S3_CONNECTOR,
+    AZBLOB_CONNECTOR, GCS_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, OPENDAL_S3_CONNECTOR,
+    POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR,
 };
 
 // Hidden additional columns connectors which do not support `include` syntax.
@@ -57,6 +57,8 @@ pub static COMPATIBLE_ADDITIONAL_COLUMNS: LazyLock<HashMap<&'static str, HashSet
             (OPENDAL_S3_CONNECTOR, HashSet::from(["file", "offset"])),
             (S3_CONNECTOR, HashSet::from(["file", "offset"])),
             (GCS_CONNECTOR, HashSet::from(["file", "offset"])),
+            (AZBLOB_CONNECTOR, HashSet::from(["file", "offset"])),
+            (POSIX_FS_CONNECTOR, HashSet::from(["file", "offset"])),
             // mongodb-cdc doesn't support cdc backfill table
             (
                 MONGODB_CDC_CONNECTOR,

diff --git a/src/connector/src/parser/unified/debezium.rs b/src/connector/src/parser/unified/debezium.rs
@@ -165,7 +165,7 @@ pub fn parse_schema_change(
 ) -> AccessResult<SchemaChangeEnvelope> {
     let mut schema_changes = vec![];
 
-    let upstream_ddl = accessor
+    let upstream_ddl: String = accessor
         .access(&[UPSTREAM_DDL], &DataType::Varchar)?
         .to_owned_datum()
         .unwrap()

diff --git a/src/connector/src/sink/file_sink/mod.rs b/src/connector/src/sink/file_sink/mod.rs
@@ -17,3 +17,4 @@ pub mod fs;
 pub mod gcs;
 pub mod opendal_sink;
 pub mod s3;
+pub mod webhdfs;
diff --git a/src/connector/src/sink/file_sink/opendal_sink.rs b/src/connector/src/sink/file_sink/opendal_sink.rs
@@ -87,6 +87,7 @@ pub enum EngineType {
     S3,
     Fs,
     Azblob,
+    Webhdfs,
 }
 
 impl<S: OpendalSinkBackend> Sink for FileSink<S> {
-Original file line number
+Diff line change
@@ Expand Up / @@ -87,6 +87,7 @@ pub enum EngineType { @@
         S3,
         Fs,
         Azblob,
+        Webhdfs,
     }
     impl<S: OpendalSinkBackend> Sink for FileSink<S> {
@@ Expand Down @@