From a164ab79b8a627289000274ddbc07da64cae183d Mon Sep 17 00:00:00 2001
From: xxchan <xxchan22f@gmail.com>
Date: Thu, 15 Jun 2023 06:06:18 +0200
Subject: [PATCH] chore: bump typos version and fix typos (#10342)

---
 .github/workflows/typo.yml                    |  2 +-
 .typos.toml                                   | 19 +++++++++++--------
 Makefile.toml                                 |  2 +-
 integration_tests/datagen/sink/sink.go        |  8 ++++----
 .../tidb-cdc-sink/docker-compose.yml          |  2 +-
 risedev.yml                                   |  2 +-
 src/expr/src/table_function/mod.rs            |  2 +-
 src/expr/src/vector_op/array_length.rs        |  4 ++--
 src/frontend/src/binder/expr/mod.rs           |  2 +-
 src/frontend/src/binder/relation/watermark.rs |  4 ++--
 .../src/optimizer/plan_node/logical_agg.rs    |  2 +-
 .../optimizer/plan_node/logical_multi_join.rs |  2 +-
 .../src/scheduler/distributed/stats.rs        |  2 +-
 src/meta/src/rpc/metrics.rs                   |  4 ++--
 src/storage/src/filter_key_extractor.rs       |  2 +-
 src/storage/src/hummock/compactor/mod.rs      |  2 +-
 .../compactor/shared_buffer_compact.rs        |  2 +-
 src/tests/sqlsmith/src/validation.rs          |  2 +-
 18 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/typo.yml b/.github/workflows/typo.yml
index 88e3fc5014372..bcc115bca7509 100644
--- a/.github/workflows/typo.yml
+++ b/.github/workflows/typo.yml
@@ -10,4 +10,4 @@ jobs:
       uses: actions/checkout@v3
 
     - name: Check spelling of the entire repository
-      uses: crate-ci/typos@v1.14.12
+      uses: crate-ci/typos@v1.15.0
diff --git a/.typos.toml b/.typos.toml
index 5261e7d530380..339ad5c42b703 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -1,13 +1,16 @@
 [default.extend-words]
-indexs = "indices"
-Stichting = "Stichting"
-fo = "fo"
-FPR = "FPR"
+indexs = "indices"      # Both are valid, just pick one.
+Stichting = "Stichting" # This is Dutch for "Foundation". From DuckDB.
+FPR = "FPR"             # False Positive Rate
+inout = "inout"         # This is a SQL keyword!
+numer = "numer"         # numerator
+nd = "nd"               # N-dimentional / 2nd
+steam = "stream"        # You played with Steam games too much.
+# Some weird short variable names
 ot = "ot"
-inout = "inout"
-numer = "numer"
-nd = "nd"
-steam = "stream"
+bui = "bui"
+
+[default.extend-identifiers]
 
 [files]
 extend-exclude = [
diff --git a/Makefile.toml b/Makefile.toml
index 9780bda9d2873..0132ba2668466 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -922,7 +922,7 @@ fi
 private = true
 category = "RiseDev - Check"
 description = "Run cargo typos-cli check"
-install_crate = { min_version = "1.14.8", crate_name = "typos-cli", binary = "typos", test_arg = [
+install_crate = { min_version = "1.15.0", crate_name = "typos-cli", binary = "typos", test_arg = [
   "--help",
 ], install_command = "binstall" }
 script = """
diff --git a/integration_tests/datagen/sink/sink.go b/integration_tests/datagen/sink/sink.go
index 4d4116c1f4153..fbce15cf65d78 100644
--- a/integration_tests/datagen/sink/sink.go
+++ b/integration_tests/datagen/sink/sink.go
@@ -8,15 +8,15 @@ type SinkRecord interface {
 	// Convert the event to an INSERT INTO command.
 	ToPostgresSql() string
 
-	// Convert the event to a Kakfa message in JSON format.
+	// Convert the event to a Kafka message in JSON format.
 	// This interface will also be used for Pulsar and Kinesis.
 	ToJson() (topic string, key string, data []byte)
 
-	// Convert the event to a Kakfa message in Protobuf format.
+	// Convert the event to a Kafka message in Protobuf format.
 	// This interface will also be used for Pulsar and Kinesis.
 	ToProtobuf() (topic string, key string, data []byte)
 
-	// Convert the event to a Kakfa message in Avro format.
+	// Convert the event to a Kafka message in Avro format.
 	// This interface will also be used for Pulsar and Kinesis.
 	ToAvro() (topic string, key string, data []byte)
 }
@@ -40,7 +40,7 @@ func (r BaseSinkRecord) ToAvro() (topic string, key string, data []byte) {
 	panic("not implemented")
 }
 
-// Convert the event to a Kakfa message in the given format.
+// Convert the event to a Kafka message in the given format.
 // This interface will also be used for Pulsar and Kinesis.
 func RecordToKafka(r SinkRecord, format string) (topic string, key string, data []byte) {
 	if format == "json" {
diff --git a/integration_tests/tidb-cdc-sink/docker-compose.yml b/integration_tests/tidb-cdc-sink/docker-compose.yml
index e1b45c81a6bbf..4f5d6653c2dfd 100644
--- a/integration_tests/tidb-cdc-sink/docker-compose.yml
+++ b/integration_tests/tidb-cdc-sink/docker-compose.yml
@@ -156,7 +156,7 @@ services:
       - "tikv2"
     restart: on-failure
 
-  #=================== Kakfa ==================
+  #=================== Kafka ==================
 
   # Adapted from https://github.com/confluentinc/demo-scene/blob/master/connect-jdbc/docker-compose.yml
   zookeeper:
diff --git a/risedev.yml b/risedev.yml
index 3ddbdc9a5b2ca..84b7c7dcb0e72 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -146,7 +146,7 @@ profile:
       - use: meta-node
       - use: compute-node
       - use: frontend
-      # If you want to use google cloud stoage as storage backend, configure bucket name and root path:
+      # If you want to use google cloud storage as storage backend, configure bucket name and root path:
       - use: opendal
         engine: gcs
         bucket: bucket-name
diff --git a/src/expr/src/table_function/mod.rs b/src/expr/src/table_function/mod.rs
index c7f891632b651..0b3f172c63a08 100644
--- a/src/expr/src/table_function/mod.rs
+++ b/src/expr/src/table_function/mod.rs
@@ -54,7 +54,7 @@ pub trait TableFunction: std::fmt::Debug + Sync + Send {
     ///
     /// i.e., for the `i`-th input row, the output rows are `(i, output_1)`, `(i, output_2)`, ...
     ///
-    /// How the output is splited into the `Stream` is arbitrary. It's usually done by a
+    /// How the output is split into the `Stream` is arbitrary. It's usually done by a
     /// `DataChunkBuilder`.
     ///
     /// ## Example
diff --git a/src/expr/src/vector_op/array_length.rs b/src/expr/src/vector_op/array_length.rs
index e4e44179e76a9..81357bef924c9 100644
--- a/src/expr/src/vector_op/array_length.rs
+++ b/src/expr/src/vector_op/array_length.rs
@@ -179,10 +179,10 @@ fn array_length_of_dim(array: ListRef<'_>, d: i32) -> Result<Option<i32>, ExprEr
 /// [1:0]
 ///
 /// statement error
-/// select array_dims(array[]::int[][]); -- would be `[1:0][1:0]` after multidimension support
+/// select array_dims(array[]::int[][]); -- would be `[1:0][1:0]` after multidimensional support
 ///
 /// statement error
-/// select array_dims(array[array[]::int[]]); -- would be `[1:1][1:0]` after multidimension support
+/// select array_dims(array[array[]::int[]]); -- would be `[1:1][1:0]` after multidimensional support
 /// ```
 #[function("array_dims(list) -> varchar")]
 fn array_dims(array: ListRef<'_>, writer: &mut dyn std::fmt::Write) -> Result<(), ExprError> {
diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs
index 7817c67e99911..fabf64289d371 100644
--- a/src/frontend/src/binder/expr/mod.rs
+++ b/src/frontend/src/binder/expr/mod.rs
@@ -156,7 +156,7 @@ impl Binder {
                 timestamp,
                 time_zone,
             } => self.bind_at_time_zone(*timestamp, time_zone),
-            // special syntaxt for string
+            // special syntax for string
             Expr::Trim {
                 expr,
                 trim_where,
diff --git a/src/frontend/src/binder/relation/watermark.rs b/src/frontend/src/binder/relation/watermark.rs
index a702251f14e6f..1d3177d9c7d6f 100644
--- a/src/frontend/src/binder/relation/watermark.rs
+++ b/src/frontend/src/binder/relation/watermark.rs
@@ -22,8 +22,8 @@ use crate::binder::statement::RewriteExprsRecursive;
 use crate::expr::{ExprImpl, InputRef};
 
 const ERROR_1ST_ARG: &str = "The 1st arg of watermark function should be a table name (incl. source, CTE, view) but not complex structure (subquery, join, another table function). Consider using an intermediate CTE or view as workaround.";
-const ERROR_2ND_ARG_EXPR: &str = "The 2st arg of watermark function should be a column name but not complex expression. Consider using an intermediate CTE or view as workaround.";
-const ERROR_2ND_ARG_TYPE: &str = "The 2st arg of watermark function should be a column of type timestamp with time zone, timestamp or date.";
+const ERROR_2ND_ARG_EXPR: &str = "The 2nd arg of watermark function should be a column name but not complex expression. Consider using an intermediate CTE or view as workaround.";
+const ERROR_2ND_ARG_TYPE: &str = "The 2nd arg of watermark function should be a column of type timestamp with time zone, timestamp or date.";
 
 #[derive(Debug, Clone)]
 #[expect(dead_code)]
diff --git a/src/frontend/src/optimizer/plan_node/logical_agg.rs b/src/frontend/src/optimizer/plan_node/logical_agg.rs
index c1a563310af33..905bfd5e2bd2e 100644
--- a/src/frontend/src/optimizer/plan_node/logical_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_agg.rs
@@ -358,7 +358,7 @@ impl LogicalAggBuilder {
             }
         });
 
-        // order by is disallowed occur with distinct because we can not diectly rewrite agg with
+        // order by is disallowed occur with distinct because we can not directly rewrite agg with
         // order by into 2-phase agg.
         if has_distinct && has_order_by {
             return Err(ErrorCode::InvalidInputSyntax(
diff --git a/src/frontend/src/optimizer/plan_node/logical_multi_join.rs b/src/frontend/src/optimizer/plan_node/logical_multi_join.rs
index a16ba4b010bd8..fea28dbef668b 100644
--- a/src/frontend/src/optimizer/plan_node/logical_multi_join.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_multi_join.rs
@@ -504,7 +504,7 @@ impl LogicalMultiJoin {
     /// 2. Second, for every isolated node will create connection to every other nodes.
     /// 3. Third, select and merge one node for a iteration, and use a bfs policy for which node the
     ///    selected node merged with.
-    ///   i. The select node mentioned above is the node with least numer of relations and the
+    ///   i. The select node mentioned above is the node with least number of relations and the
     ///      lowerst join tree.
     ///   ii. nodes with a join tree higher than the temporal optimal join tree will be pruned.
     pub fn as_bushy_tree_join(&self) -> Result<PlanRef> {
diff --git a/src/frontend/src/scheduler/distributed/stats.rs b/src/frontend/src/scheduler/distributed/stats.rs
index c65ced2722ec5..dd4c46b02d069 100644
--- a/src/frontend/src/scheduler/distributed/stats.rs
+++ b/src/frontend/src/scheduler/distributed/stats.rs
@@ -44,7 +44,7 @@ impl DistributedQueryMetrics {
 
         let completed_query_counter = register_int_counter_with_registry!(
             "distributed_completed_query_counter",
-            "The number of query ended sccessfully in distributed execution mode",
+            "The number of query ended successfully in distributed execution mode",
             &registry
         )
         .unwrap();
diff --git a/src/meta/src/rpc/metrics.rs b/src/meta/src/rpc/metrics.rs
index d7d1ce859ef53..6c0aa6e4de801 100644
--- a/src/meta/src/rpc/metrics.rs
+++ b/src/meta/src/rpc/metrics.rs
@@ -627,7 +627,7 @@ pub async fn start_fragment_info_monitor<S: MetaStore>(
                 .collect();
             for table_fragments in fragments {
                 for (fragment_id, fragment) in table_fragments.fragments {
-                    let frament_id_str = fragment_id.to_string();
+                    let fragment_id_str = fragment_id.to_string();
                     for actor in fragment.actors {
                         let actor_id_str = actor.actor_id.to_string();
                         // Report a dummay gauge metrics with (fragment id, actor id, node
@@ -641,7 +641,7 @@ pub async fn start_fragment_info_monitor<S: MetaStore>(
                                         .actor_info
                                         .with_label_values(&[
                                             &actor_id_str,
-                                            &frament_id_str,
+                                            &fragment_id_str,
                                             address,
                                         ])
                                         .set(1);
diff --git a/src/storage/src/filter_key_extractor.rs b/src/storage/src/filter_key_extractor.rs
index e933d9eb1bcac..6763d64cbc622 100644
--- a/src/storage/src/filter_key_extractor.rs
+++ b/src/storage/src/filter_key_extractor.rs
@@ -200,7 +200,7 @@ impl MultiFilterKeyExtractor {
         self.id_to_filter_key_extractor.len()
     }
 
-    pub fn get_exsting_table_ids(&self) -> HashSet<u32> {
+    pub fn get_existing_table_ids(&self) -> HashSet<u32> {
         self.id_to_filter_key_extractor.keys().cloned().collect()
     }
 }
diff --git a/src/storage/src/hummock/compactor/mod.rs b/src/storage/src/hummock/compactor/mod.rs
index 585fbb937b6ee..e6c688f789c93 100644
--- a/src/storage/src/hummock/compactor/mod.rs
+++ b/src/storage/src/hummock/compactor/mod.rs
@@ -201,7 +201,7 @@ impl Compactor {
         };
 
         if let FilterKeyExtractorImpl::Multi(multi) = &multi_filter_key_extractor {
-            let found_tables = multi.get_exsting_table_ids();
+            let found_tables = multi.get_existing_table_ids();
             let removed_tables = compact_table_ids
                 .iter()
                 .filter(|table_id| !found_tables.contains(table_id))
diff --git a/src/storage/src/hummock/compactor/shared_buffer_compact.rs b/src/storage/src/hummock/compactor/shared_buffer_compact.rs
index 63f74d3771724..a8747794e8ffb 100644
--- a/src/storage/src/hummock/compactor/shared_buffer_compact.rs
+++ b/src/storage/src/hummock/compactor/shared_buffer_compact.rs
@@ -120,7 +120,7 @@ async fn compact_shared_buffer(
         .acquire(existing_table_ids.clone())
         .await?;
     if let FilterKeyExtractorImpl::Multi(multi) = &multi_filter_key_extractor {
-        existing_table_ids = multi.get_exsting_table_ids();
+        existing_table_ids = multi.get_existing_table_ids();
     }
     let multi_filter_key_extractor = Arc::new(multi_filter_key_extractor);
 
diff --git a/src/tests/sqlsmith/src/validation.rs b/src/tests/sqlsmith/src/validation.rs
index 735b1f888d3e1..7c88320b8d4ca 100644
--- a/src/tests/sqlsmith/src/validation.rs
+++ b/src/tests/sqlsmith/src/validation.rs
@@ -45,7 +45,7 @@ fn not_unique_error(db_error: &str) -> bool {
 
 fn is_window_error(db_error: &str) -> bool {
     db_error.contains("Bind error: The size arg of window table function should be an interval literal")
-        || db_error.contains("Bind error: The 2st arg of window table function should be a column name but not complex expression. Consider using an intermediate CTE or view as workaround")
+        || db_error.contains("Bind error: The 2nd arg of window table function should be a column name but not complex expression. Consider using an intermediate CTE or view as workaround")
 }
 
 // Streaming nested-loop join is not supported, as it is expensive.