diff --git a/.github/workflows/sdk.yml b/.github/workflows/sdk.yml
index 254e525e7c5..d48dad0e178 100644
--- a/.github/workflows/sdk.yml
+++ b/.github/workflows/sdk.yml
@@ -294,7 +294,7 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
python-sdk-mac:
- runs-on: macos-latest
+ runs-on: macos-12
if: github.event_name == 'push'
env:
SQL_PYSDK_ENABLE: ON
diff --git a/cases/function/window/test_window_exclude_current_time.yaml b/cases/function/window/test_window_exclude_current_time.yaml
index ccef8ae1e28..c890a64116c 100644
--- a/cases/function/window/test_window_exclude_current_time.yaml
+++ b/cases/function/window/test_window_exclude_current_time.yaml
@@ -20,6 +20,9 @@ cases:
- columns: [ "c1 string","c3 int","c4 double","c7 timestamp" ]
indexs: [ "index1:c1:c7" ]
rows:
+ - [ "aa",-2, 1.0, 0 ]
+ - [ "aa",-1, 1.0, 0 ]
+ - [ "aa",0, 1.0, 0 ]
- [ "aa",1, 1.0, 1590738990000 ]
- [ "aa",2, 1.0, 1590738990000 ]
- [ "aa",3, 1.0, 1590738992000 ]
@@ -38,6 +41,9 @@ cases:
order: c3
columns: [ "c1 string", "c3 int", "c7 timestamp", "w1_c4_sum double" ]
rows:
+ - [ "aa", -2, 0, 1.0 ]
+ - [ "aa", -1, 0, 1.0 ]
+ - [ "aa", 0, 0, 1.0 ]
- [ "aa", 1, 1590738990000, 1.0 ]
- [ "aa", 2, 1590738990000, 1.0 ]
- [ "aa", 3, 1590738992000, 3.0 ]
@@ -86,15 +92,17 @@ cases:
- columns: [ "c1 string","c3 int","c4 double","c7 timestamp" ]
indexs: [ "index1:c1:c7" ]
rows:
- - [ "aa",1, 1.0, 1590738990000 ]
- - [ "aa",2, 1.0, 1590738990000 ]
- - [ "aa",3, 1.0, 1590738992000 ]
- - [ "aa",4, 1.0, 1590738993000 ]
- - [ "aa",5, 1.0, 1590738994000 ]
- - [ "aa",6, 1.0, 1590738994000 ]
- - [ "aa",7, 1.0, 1590738999000 ]
- - [ "aa",8, 1.0, 1590739001000 ]
- - [ "aa",9, 1.0, 1590739002000 ]
+ - [ "aa",-1, 1.0, 0 ]
+ - [ "aa", 0, 1.0, 0 ]
+ - [ "aa", 1, 1.0, 1590738990000 ]
+ - [ "aa", 2, 1.0, 1590738990000 ]
+ - [ "aa", 3, 1.0, 1590738992000 ]
+ - [ "aa", 4, 1.0, 1590738993000 ]
+ - [ "aa", 5, 1.0, 1590738994000 ]
+ - [ "aa", 6, 1.0, 1590738994000 ]
+ - [ "aa", 7, 1.0, 1590738999000 ]
+ - [ "aa", 8, 1.0, 1590739001000 ]
+ - [ "aa", 9, 1.0, 1590739002000 ]
sql: |
SELECT c1, c3, c7,
sum(c4) OVER w1 as w1_c4_sum
@@ -104,6 +112,8 @@ cases:
order: c3
columns: [ "c1 string", "c3 int", "c7 timestamp", "w1_c4_sum double" ]
rows:
+ - [ "aa",-1, 0, 1.0 ]
+ - [ "aa", 0, 0, 1.0 ]
- [ "aa", 1, 1590738990000, 1.0 ]
- [ "aa", 2, 1590738990000, 1.0 ]
- [ "aa", 3, 1590738992000, 3.0 ]
@@ -119,15 +129,17 @@ cases:
- columns: [ "c1 string","c3 int","c4 double","c7 timestamp" ]
indexs: [ "index1:c1:c7" ]
rows:
- - [ "aa",1, 1.0, 1590738990000 ]
- - [ "aa",2, 1.0, 1590738990000 ]
- - [ "aa",3, 1.0, 1590738992000 ]
- - [ "aa",4, 1.0, 1590738993000 ]
- - [ "aa",5, 1.0, 1590738994000 ]
- - [ "aa",6, 1.0, 1590738994000 ]
- - [ "aa",7, 1.0, 1590738999000 ]
- - [ "aa",8, 1.0, 1590739001000 ]
- - [ "aa",9, 1.0, 1590739002000 ]
+ - [ "aa",-1, 1.0, 0]
+ - [ "aa", 0, 1.0, 0]
+ - [ "aa", 1, 1.0, 1590738990000 ]
+ - [ "aa", 2, 1.0, 1590738990000 ]
+ - [ "aa", 3, 1.0, 1590738992000 ]
+ - [ "aa", 4, 1.0, 1590738993000 ]
+ - [ "aa", 5, 1.0, 1590738994000 ]
+ - [ "aa", 6, 1.0, 1590738994000 ]
+ - [ "aa", 7, 1.0, 1590738999000 ]
+ - [ "aa", 8, 1.0, 1590739001000 ]
+ - [ "aa", 9, 1.0, 1590739002000 ]
sql: |
SELECT c1, c3, c7,
sum(c4) OVER w1 as w1_c4_sum
@@ -137,12 +149,14 @@ cases:
order: c3
columns: [ "c1 string", "c3 int", "c7 timestamp", "w1_c4_sum double" ]
rows:
- - [ "aa", 1, 1590738990000, 1.0 ]
- - [ "aa", 2, 1590738990000, 1.0 ]
- - [ "aa", 3, 1590738992000, 3.0 ]
- - [ "aa", 4, 1590738993000, 4.0 ]
- - [ "aa", 5, 1590738994000, 5.0 ]
- - [ "aa", 6, 1590738994000, 5.0 ]
+ - [ "aa",-1, 0, 1.0 ]
+ - [ "aa", 0, 0, 1.0 ]
+ - [ "aa", 1, 1590738990000, 3.0 ]
+ - [ "aa", 2, 1590738990000, 3.0 ]
+ - [ "aa", 3, 1590738992000, 5.0 ]
+ - [ "aa", 4, 1590738993000, 6.0 ]
+ - [ "aa", 5, 1590738994000, 7.0 ]
+ - [ "aa", 6, 1590738994000, 7.0 ]
- [ "aa", 7, 1590738999000, 7.0 ]
- [ "aa", 8, 1590739001000, 7.0 ]
- [ "aa", 9, 1590739002000, 7.0 ]
diff --git a/cases/function/window/test_window_union.yaml b/cases/function/window/test_window_union.yaml
index d3fdbed82dd..102934ff116 100644
--- a/cases/function/window/test_window_union.yaml
+++ b/cases/function/window/test_window_union.yaml
@@ -733,8 +733,11 @@ cases:
indexs:
- idx:g:ts
data: |
- 1, 100, 111, 21
- 2, 100, 111, 5
+ 0, 0, 111, 19
+ 1, 0, 111, 18
+ 2, 100, 111, 21
+ 3, 100, 111, 5
+ 4, 101, 111, 100
- name: t2
columns:
- id int
@@ -747,6 +750,15 @@ cases:
1, 99, 111, 233
1, 100, 111, 200
1, 101, 111, 17
+ # raw union window (before filter)
+ # 0, 0, 111, 19
+ # 1, 0, 111, 18
+ # 1, 99, 111, 233 (t2)
+ # 1, 100, 111, 200 (t2)
+ # 2, 100, 111, 21
+ # 3, 100, 111, 5
+ # 1, 101, 111, 17 (t2)
+ # 4, 101, 111, 100
sql: |
select
id, count(val) over w as cnt,
@@ -766,8 +778,11 @@ cases:
- l1 int
order: id
data: |
- 1, 2, 233, 21, 233
- 2, 2, 233, 5, 233
+ 0, 1, 19, 19, NULL
+ 1, 1, 18, 18, NULL
+ 2, 4, 233, 18, 233
+ 3, 4, 233, 5, 233
+ 4, 7, 233, 5, 5
- id: 18-5
desc: |
@@ -1230,7 +1245,7 @@ cases:
3, 2, 233, 200, 200
4, 3, 233, 17, 17
- # rows_range union window with exclude current_row, single window
+ # rows_range union window with exclude current_row, single window
- id: 24
desc: |
rows_range union window with exclude_current_row
@@ -1314,6 +1329,9 @@ cases:
2, 100, 111, 5
3, 101, 111, 0
4, 102, 111, 0
+ 5, 0, 114, 7
+ 6, 0, 114, 8
+ 7, 100, 114, 9
- name: t2
columns:
- id int
@@ -1363,6 +1381,9 @@ cases:
2, 1, 233, 233
3, 4, 233, 5
4, 6, 233, 0
+ 5, 0, NULL, NULL
+ 6, 0, NULL, NULL
+ 7, 2, 8, 7
- id: 26
desc: |
rows_range union window with exclude_current_row and instance_not_in_window
@@ -1647,6 +1668,10 @@ cases:
2, 100, 111, 5
3, 101, 111, 0
4, 102, 111, 0
+ 5, 0, 114, 9
+ 6, 0, 114, 17
+ 7, 100, 114, 11
+ 8, 101, 114, 14
- name: t2
columns:
- id int
@@ -1697,3 +1722,7 @@ cases:
2, 1, 233, 233
3, 2, 21, 5
4, 2, 17, 0
+ 5, 0, NULL, NULL
+ 6, 0, NULL, NULL
+ 7, 2, 17, 9
+ 8, 2, 17, 11
diff --git a/cases/function/window/window_attributes.yaml b/cases/function/window/window_attributes.yaml
index f1e54311993..3080dfeab87 100644
--- a/cases/function/window/window_attributes.yaml
+++ b/cases/function/window/window_attributes.yaml
@@ -18,10 +18,12 @@ cases:
indexs:
- idx:g:ts
data: |
- 1, 99000, 111, 21
- 2, 100000, 111, 22
- 3, 101000, 111, 23
- 4, 100000, 114, 56
+ 0, 0, 111, 0
+ 1, 0, 111, 0
+ 2, 99000, 111, 21
+ 3, 100000, 111, 22
+ 4, 101000, 111, 23
+ 5, 100000, 114, 56
sql: |
select
id,
@@ -58,10 +60,12 @@ cases:
- l1 int
order: id
data: |
- 1, 0, NULL, NULL, NULL
- 2, 1, 21, 21, 21
- 3, 2, 22, 21, 22
- 4, 0, NULL, NULL, NULL
+ 0, 0, NULL, NULL, NULL
+ 1, 1, 0, 0, 0
+ 2, 0, NULL, NULL, 0
+ 3, 1, 21, 21, 21
+ 4, 2, 22, 21, 22
+ 5, 0, NULL, NULL, NULL
- id: 1
desc: |
ROWS window with exclude_current_row, '0 PRECEDING EXCLUDE CURRENT_ROW' actually is the same as '0 OPEN PRECEDING'
@@ -89,7 +93,6 @@ cases:
from t1 window w as(
partition by `g` order by `ts`
ROWS between 2 PRECEDING and 0 preceding EXCLUDE CURRENT_ROW);
- # batch_plan: |
expect:
columns:
- id int
@@ -478,3 +481,53 @@ cases:
4, 3, 23, 21, 23
5, 0, NULL, NULL, NULL
6, 1, 56, 56, 56
+ - id: 9
+ desc: |
+ ROWS Window with exclude current_time and exclude current_row
+ inputs:
+ - name: t1
+ columns:
+ - id int
+ - ts timestamp
+ - g int
+ - val int
+ indexs:
+ - idx:g:ts
+ data: |
+ 1, 99000, 111, 21
+ 2, 100000, 111, 22
+ 3, 101000, 111, 23
+ 4, 102000, 111, 44
+ 5, 0, 114, 0
+ 6, 0, 114, 99
+ 7, 100000, 114, 56
+ 8, 102000, 114, 52
+ 9, 104000, 114, 33
+ sql: |
+ select
+ id,
+ count(val) over w as cnt,
+ max(val) over w as mv,
+ min(val) over w as mi,
+ lag(val, 1) over w as l1
+ FROM t1 WINDOW w as(
+ PARTITION by `g` ORDER by `ts`
+ ROWS BETWEEN 3 PRECEDING AND CURRENT ROW EXCLUDE CURRENT_TIME EXCLUDE CURRENT_ROW);
+ expect:
+ columns:
+ - id int
+ - cnt int64
+ - mv int
+ - mi int
+ - l1 int
+ order: id
+ data: |
+ 1, 0, NULL, NULL, NULL
+ 2, 1, 21, 21, 21
+ 3, 2, 22, 21, 22
+ 4, 3, 23, 21, 23
+ 5, 0, NULL, NULL, NULL
+ 6, 0, NULL, NULL, NULL
+ 7, 2, 99, 0, 99
+ 8, 3, 99, 0, 56
+ 9, 3, 99, 52, 52
diff --git a/demo/init.sh b/demo/init.sh
index d4718a5f58c..2fc90d15de5 100755
--- a/demo/init.sh
+++ b/demo/init.sh
@@ -26,7 +26,7 @@ set +e
pkill python3
set -e
-rm -rf /tmp/*
+rm -rf /tmp/openmldb_offline_storage/*
rm -rf /work/openmldb/logs*
rm -rf /work/openmldb/db*
sleep 2
@@ -36,7 +36,7 @@ if [[ "$MODE" = "standalone" ]]; then
cd /work/openmldb && ./bin/stop-standalone.sh && ./bin/start-standalone.sh
sleep 1
else
- cd /work/zookeeper-3.4.14 && ./bin/zkServer.sh restart
+ cd /work/zookeeper-3.4.14 && ./bin/zkServer.sh stop && rm -rf /tmp/zookeeper && ./bin/zkServer.sh start
sleep 1
cd /work/openmldb && ./bin/stop-all.sh && ./bin/start-all.sh
fi
diff --git a/docs/en/reference/arch/online_arch.md b/docs/en/reference/arch/online_arch.md
index 84314d05775..83065bc5bb9 100644
--- a/docs/en/reference/arch/online_arch.md
+++ b/docs/en/reference/arch/online_arch.md
@@ -2,46 +2,46 @@
## 1. Overview
-The main modules of OpenMLDB's online architecture include Apache ZooKeeper, nameserver, and tablets (further including SQL engine and storage engine). The following figure shows the relationship between these modules. Among them, tablets are the core modules of the entire OpenMLDB storage and computing, and also modules that consume more resources; ZooKeeper and nameserver are mainly used for auxiliary functions, such as metadata management and high availability. The function of each module will be described in detail below.
+The main modules of OpenMLDB's online architecture include an Apache ZooKeeper, a nameserver cluster, and a tablet cluster. A tablet consists of a SQL engine and a storage engine. The following figure shows the relationships between these modules. Among them, tablets are the core modules of the entire OpenMLDB storage and computing, and they also consume more resources than other types of modules. ZooKeeper and nameserver are mainly used for auxiliary functions, such as metadata management and high availability. The function of each module will be described in detail below.
![image-20220316160612968](images/architecture.png)
## 2. Apache ZooKeeper
-OpenMLDB relies on ZooKeeper for service discovery and metadata storage and management functions. There will be interaction between ZooKeeper and OpenMLDB SDK, tablets, namesever for distributing and updating metadata.
+OpenMLDB relies on ZooKeeper for service discovery and for the storage and management of metadata. ZooKeeper will interact with OpenMLDB SDK/API server, tablets and nameserver for distributing and updating metadata.
## 3. Nameserver
-Nameserver is mainly used for tablet management and failover. When a tablet node goes down, the nameserver triggers a series of tasks to perform a failover, and when the node recovers, it reloads the data into the node. In order to ensure the high availability of the nameserver itself, the nameserver will deploy multiple instances during deployment at the same time. The namesaver will then use the primary/secondary node deployment mode, with only be one primary node at the same time. Multiple nameservers implement the preemption of the primary node through ZooKeeper. Therefore, if the current primary node unexpectedly goes offline, the secondary node will use ZooKeeper to elect a node to become the primary node again.
+Nameserver is mainly used for tablet management and fail over. When a tablet node goes down, the nameserver will be triggered to execute a series of tasks to perform a failover. When the tablet node recovers, nameserver will reload the data into this node. In order to ensure the high availability of nameserver itself, multiple nameserver instances will be deployed at the same time with the primary/secondary node deployment mode. Only one primary node can exist at the same time. The primary node will be determined from multiple nameserver instances by preemption through ZooKeeper. If the current primary node unexpectedly goes offline, remaining secondary nodes will use ZooKeeper to elect a new primary node.
## 4. Tablets
-Tablet is used by OpenMLDB to execute SQL and data storage, and it is also the core of the entire OpenMLDB function implementation and the bottleneck of resource consumption. From a functional point of view, Tablet includes two modules: SQL engine and storage engine. Tablet is also the smallest configurable granularity of OpenMLDB deployment resources. A tablet cannot be split across multiple physical nodes; however, there can be multiple tablets on a single physical node.
+Tablet is used by OpenMLDB to execute SQL and data storage. It is the core part of the entire OpenMLDB function implementation and the bottleneck of resource consumption. From the functional point of view, a tablet includes two modules: a SQL engine and a storage engine. Tablet is also the smallest configurable unit of OpenMLDB deployment resources. A tablet cannot be split across multiple physical nodes; however, there can be multiple tablets on a single physical node.
### 4.1 SQL Engine
-The SQL engine is responsible for executing SQL query calculations. The execution process after the SQL engine receives the request of the SQL query is shown in the following figure:
+The SQL engine is responsible for executing SQL queries. The execution process after the SQL engine receives the request of the SQL query is shown in the following figure:
![img](images/sql_engine.png)
-The SQL engine parses SQL into an AST syntax tree through [ZetaSQL](https://github.com/4paradigm/zetasql). Because we have added special SQL syntax for feature engineering extensions such as `LAST JOIN`, `WINDOW UNION`, etc., the open source ZetaSQL has been optimized. After a series of compilation transformation, optimization, and LLVM-based codegen as shown in the figure above, the execution plan is finally generated. Based on the execution plan, the SQL engine obtains the storage layer data through the catalog to perform the final SQL execution operation. In the distributed version, a distributed execution plan is generated, and the execution task is sent to other tablet nodes for execution. Currently, the SQL engine of OpenMLDB adopts the push mode, which distributes tasks to the nodes where the data is located for execution instead of pulling the data back. This has the benefit of reducing data transfers.
+The SQL engine parses SQL into an AST syntax tree through [ZetaSQL](https://github.com/4paradigm/zetasql). We have optimized the open source ZetaSQL to added special SQL syntax ( such as `LAST JOIN`, `WINDOW UNION`, etc) for feature engineering. After compiling, optimization, and LLVM-based codegen as shown in the figure above, the execution plan is finally generated. Then the SQL engine will access the storage layer data through the catalog and perform the final computation based on the execution plan. In the cluster version, a distributed execution plan will be generated, and the computation task will be sent to other tablet nodes to execute. Currently, the SQL engine adopts the **push** mode, which distributes tasks to the nodes where the data is located for execution instead of pulling the data back. This has the benefit of reducing data transfers.
-### 4.2 Stoage Engine
-The Storage engine is responsible for the storage of OpenMLDB data and supports the corresponding high-availability-related functions.
+### 4.2 Storage Engine
+The Storage engine is responsible for the storage of data and supports the corresponding high-availability-related functions.
+
+:::{note}
+OpenMLDB supports two kinds storage engines: memory-based and disk-based. This article introduces the memory storage engine which is self-developed especially for OpenMLDB. The disk storage engine is based on RocksDB and has the same mechanism. See [RocksDB](https://github.com/facebook/rocksdb) for details.
+:::
#### Data Distribution
-OpenMLDB Cluster Edition is a distributed database. The data of a table will be partitioned, and multiple copies will be created, and finally distributed in different nodes. Two important concepts are explained here: replicas and partitioning.
+OpenMLDB cluster version is a distributed database. One table will be partitioned, and multiple copies will be created and finally distributed in different nodes. There are two important concepts: replicas and partitioning.
-- Replication: In order to ensure high availability and improve the efficiency of distributed queries, the data table will be stored in multiple copies, and these copies are called replicas.
+- Replication: In order to ensure high availability and improve the efficiency of distributed queries, the data table will be stored in multiple copies, which are called replicas.
-- Partition: When a table (or specifically a copy) is stored, it will be further divided into multiple partitions for distributed computing. The number of partitions can be specified when the table is created, but once created, the number of partitions cannot be dynamically modified. A partition is the smallest unit of master-slave synchronization and expansion and contraction of a storage engine. A partition can be flexibly migrated between different tablets. At the same time, different partitions of a table can be calculated in parallel, improving the performance of distributed computing. OpenMLDB will automatically try to balance the number of partitions on each tablet to improve the overall performance of the system. Multiple partitions of a table may be distributed on different tablets. The roles of partitions are divided into primary partitions (leader) and secondary partition (followers). When a calculation request is obtained, the request will be sent to the primary partition corresponding to the data for calculation; the secondary partition is used to ensure high availability.
+- Partition: When a table (or specifically a copy) is stored, it will be further divided into multiple partitions for distributed computing. The number of partitions can be specified when the table is created, but once created, the number of partitions cannot be dynamically modified. A partition is the smallest unit of leader-follower synchronization, expansion and contraction of a storage engine. A partition can be flexibly migrated between different tablets. At the same time, different partitions of a table can be computed in parallel, improving the performance of distributed computing. OpenMLDB will automatically try to balance the number of partitions on each tablet to improve the overall performance of the system. Multiple partitions of a table may be distributed on different tablets. Partitions are divided into primary partitions (leader) and secondary partition (followers). When a computing request is received, the request will be sent to the primary partition corresponding to the data for computation; the secondary partition is used to ensure high availability.
The figure below shows the storage layout of a data table on three tablets based on four partitions in the case of two replicas. In actual use, if the load of one or several tablets is too high, data migration can be performed based on partitioning to improve the load balance and overall throughput of the system.
![image-20220317150559595](images/table_partition.png)
-#### Data Persistence and Master-Slave Synchronization
-The online data of the current version of OpenMLDB is all stored in memory. In order to achieve high availability, the data will be persisted to the hard disk in the form of binlog and snapshot.
+#### Data Persistence and Leader-Follower Synchronization
+In current version, the online data is all stored in memory. In order to achieve high availability, the data will be persisted to the disks in the form of binlog and snapshot.
![image-20220317152718586](images/binlog_snapshot.png)
-As shown in the figure above, the server will write memory and binlog at the same time after receiving the write request from the SDK. Binlog is used for master-slave synchronization. After the data is written to binlog, a background thread will asynchronously read the data from binlog and synchronize it to the slave node. After receiving the synchronization request from the node, the memory and binlog are also written. Snapshot can be regarded as a mirror of memory data, but for performance reasons, snapshot is not dumped from memory, but generated by merging binlog and the previous snapshot. Expired data will be deleted during the merging process. OpenMLDB will record the master-slave synchronization and the offset merged into the snapshot. If all the data in a binlog file is synchronized to the slave node and merged into the snapshot, the binlog file will be deleted by the background thread.
+As shown in the figure above, the server will write memory and binlog at the same time after receiving the write request from the SDK. Binlog is used for leader-follower synchronization. After the data is written to binlog, a background thread will asynchronously read the data from binlog and write it to follower nodes. After receiving the synchronization request, the follower will write memory and binlog as well. Snapshot can be regarded as a mirror of memory data, but for performance reasons, snapshot is not dumped from memory, but generated by merging binlog and the previous snapshot. Expired data will be deleted during the merging process. OpenMLDB will record the leader-follower synchronization and the offset merged into the snapshot. If all the data in a binlog file is synchronized to the followers and merged into the snapshot, the binlog file will be deleted by the background thread.
-
-:::{note}
-In the upcoming v0.5.0 version, OpenMLDB will also support disk-based storage engines, and its persistence mechanism will be different from the description in this article.
-:::
diff --git a/docs/en/reference/index.rst b/docs/en/reference/index.rst
index c7d4373fbf4..e8a76710d99 100644
--- a/docs/en/reference/index.rst
+++ b/docs/en/reference/index.rst
@@ -2,8 +2,10 @@
References
=============================
+
.. toctree::
:maxdepth: 1
+
rest_api
arch/index
sql/index
diff --git a/docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md b/docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md
index dcbdd49ac37..c066b616076 100644
--- a/docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md
+++ b/docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md
@@ -614,7 +614,7 @@ Example:
```cpp
-select concat("-", "1", 2, 3, 4, 5.6, 7.8, Timestamp(1590115420000L));
+select concat_ws("-", "1", 2, 3, 4, 5.6, 7.8, Timestamp(1590115420000L));
-- output "1-2-3-4-5.6-7.8-2020-05-22 10:43:40"
```
diff --git a/docs/en/reference/sql/functions_and_operators/operators.md b/docs/en/reference/sql/functions_and_operators/operators.md
index 2262687045e..9794d44d182 100644
--- a/docs/en/reference/sql/functions_and_operators/operators.md
+++ b/docs/en/reference/sql/functions_and_operators/operators.md
@@ -1,13 +1,3 @@
-* [Operator](#operator)
- * [OperatorPrecedence](#operator precedence)
- * [VariousOperations](#various operations)
- * [1. Comparison Operation](#1-ComparisonOperation)
- * [2. Logic Operation](#2-LogicOperation)
- * [3. Arithmetic Operations](#3-ArithmeticOperations)
- * [4. Bit Operation](#4-BitOperation)
- * [5. Type Operations and Functions](#5-TypeOperationsandFunctions)
- * [Assignment Operator](#AssignmentOperator)
-
# Operator
## Operator Precedence
diff --git a/docs/zh/reference/sql/dql/HAVING_CLAUSE.md b/docs/zh/reference/sql/dql/HAVING_CLAUSE.md
index be1aaff389d..473c4e397a3 100644
--- a/docs/zh/reference/sql/dql/HAVING_CLAUSE.md
+++ b/docs/zh/reference/sql/dql/HAVING_CLAUSE.md
@@ -1,6 +1,6 @@
# Having Clause
-Having 子句与 Where 子句作用类似,Having 子句可以让过滤 GroupBy 后的各种数据,Where 子句用于在聚合前过滤记录。
+Having 子句与 Where 子句作用类似.Having 子句过滤 GroupBy 后的各种数据,Where 子句在聚合前进行过滤。
## Syntax
@@ -16,31 +16,27 @@ SELECT select_expr [,select_expr...] FROM ... GROUP BY ... HAVING having_conditi
```
## 边界说明
+在单机版中,所有执行模式均支持`HAVING`。集群版各[执行模式](https://openmldb.ai/docs/zh/main/tutorial/modes.html)的支持情况如下。
-| SELECT语句元素 | 状态 | 说明 |
-| :------------- | ------------- | :----------------------------------------------------------- |
-| HAVING Clause | Online 不支持 | Having 子句与 Where 子句作用类似,Having 子句可以让过滤 GroupBy 后的各种数据,Where 子句用于在聚合前过滤记录。 |
-
-## Example
+| SELECT语句元素 | 离线模式 | 在线预览模式 | 在线请求模式 | 说明 |
+| :--------------------------------------------- | --------- | ------------ | ------------ |:---------------------------------------------------------------------|
+| HAVING Clause | **``✓``** | | | Having 子句与 Where 子句作用类似。Having 子句过滤 GroupBy 后的各种数据,Where 子句在聚合前进行过滤。 |## Example
### 1. 分组后按聚合结果过滤
```SQL
--- desc: 分组后聚合过滤
SELECT COL1, SUM(COL2), AVG(COL2) FROM t1 group by COL1 HAVING SUM(COL2) > 1000;
```
### 2. 两列分组后按聚合结果过滤
```sql
--- desc: 分组后聚合过滤
SELECT COL1, SUM(COL2), AVG(COL2) FROM t1 group by COL1, COL0 HAVING SUM(COL2) > 1000;
```
### 3. 分组后按分组列过滤
```sql
--- desc: 分组后聚合过滤
-SELECT COL1, SUM(COL2), AVG(COL2) FROM t1 group by COL1 HAVING COL2 > 1000;
+SELECT COL1, SUM(COL2), AVG(COL2) FROM t1 group by COL1 HAVING COL1 ='a';
```
diff --git a/docs/zh/reference/sql/dql/LIMIT_CLAUSE.md b/docs/zh/reference/sql/dql/LIMIT_CLAUSE.md
index 113306c2e7c..1ccbd9fa94f 100644
--- a/docs/zh/reference/sql/dql/LIMIT_CLAUSE.md
+++ b/docs/zh/reference/sql/dql/LIMIT_CLAUSE.md
@@ -1,6 +1,6 @@
# Limit Clause
-Limit 子句用于限制结果条数。OpenMLDB 目前仅支持Limit 接受一个参数,表示返回数据的最大行数;
+Limit子句用于限制返回的结果条数。目前Limit仅支持接受一个参数,表示返回数据的最大行数。
## Syntax
@@ -17,16 +17,15 @@ SELECT ... LIMIT ...
## 边界说明
-| SELECT语句元素 | 边界 | 说明 |
-| :------------- | -------------------- | :----------------------------------------------------------- |
-| LIMIT Clause | 不支持Online Serving | Limit 子句用于限制结果条数。OpenMLDB 目前仅支持Limit 接受一个参数,表示返回数据的最大行数; |
+| SELECT语句元素 | 边界 | 说明 |
+| :------------- |--| :----------------------------------------------------------- |
+| LIMIT Clause | 单机版和集群版的所有执行模式均支持 | Limit 子句用于限制返回的结果条数。目前Limit仅支持接受一个参数,表示返回数据的最大行数。 |
## Example
### SELECT with LIMIT
```SQL
--- desc: SELECT Limit
- SELECT t1.COL1 c1 FROM t1 limit 10;
+SELECT t1.COL1 c1 FROM t1 limit 10;
```
diff --git a/docs/zh/reference/sql/dql/NO_TABLE_SELECT_CLAUSE.md b/docs/zh/reference/sql/dql/NO_TABLE_SELECT_CLAUSE.md
index 50881936a0c..1c06021e668 100644
--- a/docs/zh/reference/sql/dql/NO_TABLE_SELECT_CLAUSE.md
+++ b/docs/zh/reference/sql/dql/NO_TABLE_SELECT_CLAUSE.md
@@ -23,8 +23,8 @@ SELECT const_expr [, const_expr ...];
## 2. SELECT语句元素
| SELECT语句元素 | 状态 | 说明 |
-| :------------- | ------------------- | :----------------------------------------------------------- |
-| 无标SELECT语句 | OnlineServing不支持 | 无表Select语句计算常量表达式操作列表,表达式计算不需要依赖表和列 |
+|:-----------| ------------------- | :----------------------------------------------------------- |
+| 无表SELECT语句 | OnlineServing不支持 | 无表Select语句计算常量表达式操作列表,表达式计算不需要依赖表和列 |
#### Examples
@@ -32,7 +32,7 @@ SELECT const_expr [, const_expr ...];
-- desc: SELECT 常量字面量
SELECT 1, 1L, 1.0f, 2.0, 'Hello';
-- desc: SELECT 常量表达式
-SELECT 1+1, 1L + 1L, 1.0f - 1.0f, 2.0*2.0, 'Hello' LIKE 'He%';
+SELECT 1+1, 1L + 1L, 1.0f - 1.0f, 2.0*2.0, 'Hello' LIKE 'He%';
-- desc: SELECT 函数表达式
SELECT substr("hello world", 3, 6);
-```
\ No newline at end of file
+```
diff --git a/docs/zh/reference/sql/dql/SELECT_INTO_STATEMENT.md b/docs/zh/reference/sql/dql/SELECT_INTO_STATEMENT.md
index f6f58b0f4c1..1b87373b863 100644
--- a/docs/zh/reference/sql/dql/SELECT_INTO_STATEMENT.md
+++ b/docs/zh/reference/sql/dql/SELECT_INTO_STATEMENT.md
@@ -1,5 +1,8 @@
# SELECT INTO
-
+`SELECT INTO OUTFILE`语句将表的查询结果导出为一个文件。
+```{note}
+[LOAD DATA INFILE](../dml/LOAD_DATA_STATEMENT.md) 语句与`SELECT INTO OUTFILE`互补,它用于从指定文件创建表以及加载数据到表中。
+```
## Syntax
```sql
@@ -18,22 +21,24 @@ SelectInfoOptionItem
|'MODE' '=' string_literal
```
-`SELECT INTO OUTFILE`语句用户将表的查询结果导出为一个文件。 [`LOAD DATA INFILE`](../dml/LOAD_DATA_STATEMENT.md) 语句与`SELECT INTO OUTFILE`互补,它用于从指定文件创建表以及加载数据到表中。`SELECT INTO OUTFILE`分为三个部分。
+`SELECT INTO OUTFILE`分为三个部分。
- 第一部分是一个普通的SELECT语句,通过这个SELECT语句来查询所需要的数据;
- 第二部分是`filePath`,定义将查询的记录导出到哪个文件中;
- 第三部分是`SelectIntoOptionList`为可选选项,其可能的取值有:
-| 配置项 | 类型 | 默认值 | 描述 |
-| ---------- | ------- | --------------- | ------------------------------------------------------------ |
-| delimiter | String | , | 列分隔符,默认为`,` |
-| header | Boolean | true | 是否包含表头, 默认为`true` |
-| null_value | String | null | NULL填充值,默认填充`"null"` |
-| format | String | csv | 输出文件格式,默认为`csv`。请补充一下其他的可选格式。 |
-| mode | String | error_if_exists | 输出模式:
`error_if_exists`: 表示若文件已经在则报错。
`overwrite`: 表示若文件已存在,数据将覆盖原文件内容。
`append`:表示若文件已存在,数据将追加到原文件后面。
不显示配置时,默认mode为`error_if_exists`。 |
-| quote | String | "" | 输出数据的包围字符串,字符串长度<=1。默认为"",表示输出数据包围字符串为空。当配置包围字符串时,将使用包围字符串包围一个field。例如,我们配置包围字符串为`"#"`,原始数据为{1 1.0, This is a string, with comma}。输出的文本为`#1#, #1.0#, #This is a string, with comma#。`请注意,目前OpenMLDB还不支持quote字符的转义,所以,用户需要谨慎选择quote字符,保证原始字符串内并不包含quote字符。 |
+| 配置项 | 类型 | 默认值 | 描述 |
+| ---------- | ------- | --------------- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| delimiter | String | , | 列分隔符,默认为‘`,`’ |
+| header | Boolean | true | 是否包含表头, 默认为`true` |
+| null_value | String | null | NULL填充值,默认填充`"null"` |
+| format | String | csv | 输出文件格式:
`csv`:不显示指明format时,默认为该值
`parquet`:集群版还支持导出parquet格式文件,单机版不支持 |
+| mode | String | error_if_exists | 输出模式:
`error_if_exists`: 表示若文件已经在则报错。
`overwrite`: 表示若文件已存在,数据将覆盖原文件内容。
`append`:表示若文件已存在,数据将追加到原文件后面。
不显示配置时,默认为`error_if_exists`。 |
+| quote | String | "" | 输出数据的包围字符串,字符串长度<=1。默认为"",表示输出数据包围字符串为空。当配置包围字符串时,将使用包围字符串包围一个field。例如,我们配置包围字符串为`"#"`,原始数据为{1 1.0, This is a string, with comma}。输出的文本为`#1#, #1.0#, #This is a string, with comma#。` |
- [`LOAD DATA INFILE`](../dml/LOAD_DATA_STATEMENT.md) 语句与`SELECT INTO OUTFILE`互补,它用户从指定文件创建表以及加载数据到表中。
+````{important}
+请注意,目前仅有集群版支持quote字符的转义。所以,如果您使用的是单机版,请谨慎选择quote字符,保证原始字符串内并不包含quote字符。
+````
## SQL语句模版
@@ -46,13 +51,13 @@ SELECT ... INTO OUTFILE 'file_path' OPTIONS (key = value, ...)
- 从表`t1`查询输出到`data.csv`文件中,使用`,`作为列分隔符
```SQL
-SELECT col1, col2, col3 FROM t1 INTO OUTFILE 'data.csv' OPTIONS ( delimit = ',' );
+SELECT col1, col2, col3 FROM t1 INTO OUTFILE 'data.csv' OPTIONS ( delimiter = ',' );
```
- 从表`t1`查询输出到`data.csv`文件中,使用`|`作为列分隔符,NULL值的填充值为`NA`字符串:
```SQL
-SELECT col1, col2, col3 FROM t1 INTO OUTFILE 'data2.csv' OPTIONS ( delimit = '|', null_value='NA');
+SELECT col1, col2, col3 FROM t1 INTO OUTFILE 'data2.csv' OPTIONS ( delimiter = '|', null_value='NA');
```
diff --git a/docs/zh/reference/sql/functions_and_operators/Files/udfs_8h.md b/docs/zh/reference/sql/functions_and_operators/Files/udfs_8h.md
index d7362e6e55b..68656a9ee66 100644
--- a/docs/zh/reference/sql/functions_and_operators/Files/udfs_8h.md
+++ b/docs/zh/reference/sql/functions_and_operators/Files/udfs_8h.md
@@ -656,7 +656,7 @@ Example:
```sql
-select concat("-", "1", 2, 3, 4, 5.6, 7.8, Timestamp(1590115420000L));
+select concat_ws("-", "1", 2, 3, 4, 5.6, 7.8, Timestamp(1590115420000L));
-- output "1-2-3-4-5.6-7.8-2020-05-22 10:43:40"
```
diff --git a/docs/zh/reference/sql/functions_and_operators/operators.md b/docs/zh/reference/sql/functions_and_operators/operators.md
index 80196f4b7f6..a97a1bfb982 100644
--- a/docs/zh/reference/sql/functions_and_operators/operators.md
+++ b/docs/zh/reference/sql/functions_and_operators/operators.md
@@ -1,13 +1,3 @@
-* [运算符](#运算符)
- * [运算符优先级](#运算符优先级)
- * [各类运算](#各类运算)
- * [1. 比较运算](#1-比较运算)
- * [2. 逻辑运算](#2-逻辑运算)
- * [3. 算术运算](#3-算术运算)
- * [4. 位运算](#4-位运算)
- * [5. 类型运算和函数](#5-类型运算和函数)
- * [赋值操作符](#赋值操作符)
-
# 运算符
## 运算符优先级
diff --git a/docs/zh/tutorial/modes.md b/docs/zh/tutorial/modes.md
index af457578f84..64375ad0551 100644
--- a/docs/zh/tutorial/modes.md
+++ b/docs/zh/tutorial/modes.md
@@ -1,6 +1,6 @@
# 集群版使用流程及执行模式
-OpenMLDB 针对线上线下的特征工程全流程,在不同阶段提供了不同的执行模式。尤其在生产环境下所使用的集群版,针对不同阶段做了较为复杂的执行模式的划分。本文集中说明在集群版 OpenMLDB 中,从特征开发到上线的全流程,及其相应的执行模式。
+OpenMLDB 针对线上线下的特征工程全流程,在不同阶段提供了不同的执行模式。尤其在生产环境下所使用的集群版,针对不同阶段详细地划分了不同的执行模式。本文集中说明在集群版 OpenMLDB 中,从特征开发到上线的全流程,及其相应的执行模式。
## 1. OpenMLDB 使用流程概览
@@ -22,23 +22,23 @@ OpenMLDB 针对线上线下的特征工程全流程,在不同阶段提供了
由于离线和线上场景的操作数据对象不同,其底层的存储和计算节点亦不同。因此,OpenMLDB 设置了几种不同的执行模式来支持完成以上步骤。以下表格总结了各个步骤所使用的执行模式,稍后将会详细介绍关于执行模式的重要概念。
-| 步骤 | 执行模式 | 开发工具 | 说明 |
-| ----------------------- | -------- |---------------------------------------|---------------------------------------------------------------------------------------------------------------|
-| 1. 离线数据导入 | 离线模式 | CLI | - `LOAD DATA` command
|
-| 2. 离线特征开发 | 离线模式 | CLI | - 支持 OpenMLDB 所有的 SQL 语法
- 部分 SQL (如 `SELECT`)非阻塞式异步运行方式 |
-| 3. 特征方案部署 | 离线模式 | CLI | - `DEPLOY` 命令 |
-| 4. 冷启动在线数据导入 | 在线模式 | CLI,导入工具 | - CLI 使用 `LOAD DATA` 命令
- 也可使用独立导入工具 `openmldb-import` |
-| 5. 实时数据接入 | 在线模式 | connector, REST APIs, Java/Python SDK | - 第三方数据源调用 OpenMLDB 的相关数据插入 API(connector),引入实时数据
- 或使用Java/Python SDK工具,在对请求行的计算完成后,插入主表 |
-| 6. 在线数据预览(可选) | 在线模式 | CLI, Java/Python SDK | - 目前仅支持对列进行 `SELECT` 操作、表达式、以及单行处理函数用于数据预览
- 不支持 `LAST JOIN`, `GROUP BY`, `HAVING`, `WINDOW` 等复杂计算
|
-| 7. 实时特征计算 | 请求模式 | REST APIs, Java/Python SDK | - 支持 OpenMLDB 所有的 SQL 语法
- REST APIs 以及 Java SDK 支持单行或者批请求
- Python SDK 仅支持单行请求 |
+| 步骤 | 执行模式 | 开发工具 | 说明 |
+| ----------------------- |--------|---------------------------------------|---------------------------------------------------------------------------------------------------------------|
+| 1. 离线数据导入 | 离线模式 | CLI | - `LOAD DATA` command
|
+| 2. 离线特征开发 | 离线模式 | CLI | - 支持 OpenMLDB 所有的 SQL 语法
- 部分 SQL (如 `SELECT`)非阻塞式异步运行方式 |
+| 3. 特征方案部署 | 离线模式 | CLI | - `DEPLOY` 命令 |
+| 4. 冷启动在线数据导入 | 在线预览模式 | CLI,导入工具 | - CLI 使用 `LOAD DATA` 命令
- 也可使用独立导入工具 `openmldb-import` |
+| 5. 实时数据接入 | 在线预览模式 | connector, REST APIs, Java/Python SDK | - 第三方数据源调用 OpenMLDB 的相关数据插入 API(connector),引入实时数据
- 或使用Java/Python SDK工具,在对请求行的计算完成后,插入主表 |
+| 6. 在线数据预览(可选) | 在线预览模式 | CLI, Java/Python SDK | - 目前仅支持对列进行 `SELECT` 操作、表达式、以及单行处理函数用于数据预览
- 不支持 `LAST JOIN`, `GROUP BY`, `HAVING`, `WINDOW` 等复杂计算
|
+| 7. 实时特征计算 | 在线请求模式 | REST APIs, Java/Python SDK | - 支持 OpenMLDB 所有的 SQL 语法
- REST APIs 以及 Java SDK 支持单行或者批请求
- Python SDK 仅支持单行请求 |
-从以上的总结表格上可以看到,执行模式分为 `离线模式`,`在线模式`,以及`请求模式`。在后面的章节中,我们将对这几种模式展开详细介绍。下图总结示意了全流程开发和对应的执行模式。
+从以上的总结表格上可以看到,执行模式分为 `离线模式`,`在线预览模式`,以及`在线请求模式`。后续我们将对这几种模式展开详细介绍。下图总结示意了全流程开发和对应的执行模式。
![image-20220310170024349](images/modes-flow.png)
### 1.3 单机版执行模式说明
-虽然本文集中讲解集群版,但是有必要也简单介绍单机版的执行模式。单机版的执行模式相对简单,其离线数据和在线数据的存储和计算节点统一,因此单机版并不区分离线模式和在线模式。即我们可以直观的理解为,在 CLI 下,单机版并没有执行模式的概念,任何OpenMLDB支持的 SQL 语法均可以在 CLI 下直接运行。因此,单机版特别适合用于快速试用或进行 SQL 实践。但是,在实时特征计算阶段,单机版和集群版一样,依然运行于在线请求模式下。
+虽然本文集中讲解集群版,但是有必要也简单介绍单机版的执行模式。单机版的执行模式相对简单,其离线数据和在线数据的存储和计算节点统一,因此单机版并不区分离线模式和在线模式。即我们可以直观的理解为,在 CLI 下,单机版并没有执行模式的概念,绝大多数OpenMLDB支持的 SQL 语法均可以在 CLI 下直接运行(对于部分SQL命令的参数,单机版支持的选项与集群版略有不同,详见[OpenMLDB支持的SQL](https://openmldb.ai/docs/zh/main/reference/sql/index.html))。因此,单机版特别适合用于快速试用或进行 SQL 实践。但是,在实时特征计算阶段,单机版和集群版一样,依然运行于在线请求模式下。
:::{note}
如果仅在非生产环境试用 OpenMLDB或进行SQL学习实践,强烈建议使用单机版,可以获得更快捷方便的部署体验
@@ -55,7 +55,7 @@ OpenMLDB 针对线上线下的特征工程全流程,在不同阶段提供了
- 非阻塞式执行的 SQL 由内部的 TaskManager 进行管理,可以通过 `SHOW JOBS`, `SHOW JOB`, `STOP JOB` 命令进行查看和管理。
:::{tip}
-和很多关系型数据库系统不同,`SELECT` 命令在离线模式下为异步执行。因此在离线特征开发阶段,强烈建议使用 `SELECT INTO` 语句进行开发调试,可以将结果导出到文件,方便查看。
+和很多关系型数据库系统不同,`SELECT`命令在离线模式下默认为异步执行,如需设置为同步执行,见[设置离线模式下命令的同步执行](https://openmldb.ai/docs/zh/main/reference/sql/ddl/SET_STATEMENT.html#id4)因此在离线特征开发阶段,如果使用异步执行,强烈建议使用`SELECT INTO`语句进行开发调试,可以将结果导出到文件,方便查看。
:::
用于特征方案部署的命令`DEPLOY`亦在离线模式下执行。其部署规范对于 SQL 还有一定的限制,详细可以参阅 [OpenMLDB SQL上线规范和要求](https://openmldb.ai/docs/zh/main/reference/sql/deployment_manage/ONLINE_SERVING_REQUIREMENTS.html)。
@@ -65,36 +65,36 @@ OpenMLDB 针对线上线下的特征工程全流程,在不同阶段提供了
- CLI: `SET @@execute_mode='offline'`;CLI 启动以后的默认模式也为离线模式。
- REST APIs, Java/Python SDK:不支持离线模式
-## 3. 在线模式
+## 3. 在线预览模式
-冷启动在线数据导入、实时数据接入、以及在线数据预览在在线模式下执行。在线模式的作用是针对线上数据进行管理和预览。线上数据的存储和计算由 tablet server 支持。
+冷启动在线数据导入、实时数据接入、以及在线数据预览在在线预览模式下执行。在线预览模式的作用是对线上数据进行管理和预览。线上数据的存储和计算由 tablet支持。
-在线模式有以下主要特点:
+在线预览模式有以下主要特点:
- 在线数据导入(`LOAD DATA`)和离线模式下一样,属于非阻塞式的异步执行 SQL,其余均为同步执行。
-- 在线模式目前仅支持简单的 `SELECT ` 列相关操作来查看相关数据,并不支持复杂的 SQL 查询。因此在线模式并不支持 SQL 特征的开发调试,相关开发工作应该在离线模式或者单机版进行。
+- 在线预览模式目前仅支持简单的 `SELECT ` 列相关操作来查看相关数据,并不支持复杂的 SQL 查询。因此在线预览模式并不支持 SQL 特征的开发调试,相关开发工作应该在离线模式或者单机版进行。
-在线模式通过以下形式进行设置:
+在线预览模式通过以下形式进行设置:
- CLI: `SET @@execute_mode='online'`
- REST APIs, Java/Python SDK:默认只支持在线模式下执行,无需进行设置。
-## 4. 请求模式
+## 4. 在线请求模式
-在特征脚本被部署以及接入线上数据以后,实时特征计算服务就已经准备就绪,可以通过请求模式来进行实时特征抽取。请求模式在 REST APIs 以及 SDK 下支持。请求模式是 OpenMLDB 特有的支撑线上实时计算的模式,和常见数据库的 SQL 查询非常不同。
+在部署特征脚本以及接入线上数据以后,实时特征计算服务就已经准备就绪,可以通过在线请求模式来进行实时特征抽取。REST APIs 以及 SDK 支持在线请求模式。在线请求模式是 OpenMLDB 特有的支撑线上实时计算的模式,和常见数据库的 SQL 查询非常不同。
-请求模式需要三个输入:
+在线请求模式需要三个输入:
- SQL 特征脚本,即为特征部署上线过程中所使用的 SQL 脚本,规定了做特征抽取的计算逻辑。
- 在线数据,即为冷启动导入或者实时接入的线上数据。一般为配合 SQL 的窗口计算的最新数据。比如 SQL 脚本的聚合函数会定义一个最近三个月的时间窗口,那么在线存储就需要保留相应的最新三个月数据。
- 实时请求行(a request row),包含了当前正在发生的实时行为,用于实时特征抽取。比如反欺诈场景下的刷卡信息,或者是推荐场景下的搜索关键字等。
-基于上述输入,对于每一个实时请求行,请求模式都会返回一条特征抽取的结果。其计算逻辑为:请求行会依据 SQL 脚本的逻辑(如 `PARTITION BY`, `ORDER BY` 等)被虚拟的插入在线数据表格的正确位置中,然后只针对该行进行特征聚合计算,返回唯一对应的抽取结果。下图直观的解释了在线请求模式的运算过程。
+基于上述输入,对于每一个实时请求行,在线请求模式都会返回一条特征抽取的结果。其计算逻辑为:请求行会依据 SQL 脚本的逻辑(如 `PARTITION BY`, `ORDER BY` 等)被虚拟地插入在线数据表格的正确位置中,然后只针对该行进行特征聚合计算,返回唯一对应的抽取结果。下图直观地解释了在线请求模式的运算过程。
![modes-request](images/modes-request.png)
-请求模式通过以下形式支持:
+在线请求模式通过以下形式支持:
- CLI:不支持
- REST APIs:支持单行或者多行 request rows 的请求,详见:[REST APIs](https://openmldb.ai/docs/zh/main/quickstart/rest_api.html)
diff --git a/extensions/kafka-connect-jdbc/DEVELOP.md b/extensions/kafka-connect-jdbc/DEVELOP.md
new file mode 100644
index 00000000000..90cf747bcf1
--- /dev/null
+++ b/extensions/kafka-connect-jdbc/DEVELOP.md
@@ -0,0 +1,28 @@
+# 开发指南
+kafka jdbc connector for openmldb,可以支持auto.schema,即从openmldb处获取table schema。
+目前仅支持与value JsonConverter搭配使用(OpenMLDB完全不支持key),且需要disable JsonConverter的schema支持,
+connector配置如下:
+```
+value.converter=org.apache.kafka.connect.json.JsonConverter
+value.converter.schemas.enable=false
+```
+而且auto.schema开启时不可以开启auto.create(有配置校验逻辑)。请确保已经建好了OpenMLDB表。
+
+## message convert for auto schema
+
+auto schema开启后,主要逻辑在[BufferedRecords](src/main/java/io/confluent/connect/jdbc/sink/BufferedRecords.java)
+中。核心函数为`public List add(SinkRecord record)`。
+
+如果没有auto schema,add中第一步便是进行validate,`recordValidator.validate(record);`。
+已经设置`value.converter.schemas.enable=false`的情况下,Json Message将直接成为value段,schema==null。
+于是,validate步骤会失败,抛出异常并提示"requires records with a non-null Struct value and non-null Struct schema"。
+
+因此,支持auto schema,需要在validate之前,对Json Value进行格式转换。
+Record中的value实际是需要org.apache.kafka.connect.data.Struct类型,但schemas.enable关闭的JsonConverter只会返回HashMap。
+所以,此处我们将HashMap转为org.apache.kafka.connect.data.Struct,依靠从openmldb处获取的schema。
+
+得到新的value后,重新生产新的SinkRecord,其他member保持不变。
+
+如果想要对某个类型做更多的value转换支持,可以在`convertToLogicalType`或`convertToSchemaType`中加入逻辑。
+
+比如支持多类型json value转为int32类型,就在int32的case中加入对value的类型判断,并支持将该类型转为int32。
diff --git a/extensions/kafka-connect-jdbc/pom.xml b/extensions/kafka-connect-jdbc/pom.xml
index dde83dfc4ee..3beb7f8b844 100644
--- a/extensions/kafka-connect-jdbc/pom.xml
+++ b/extensions/kafka-connect-jdbc/pom.xml
@@ -76,6 +76,10 @@
Confluent
http://packages.confluent.io/maven/
+
+ ossrh
+ https://s01.oss.sonatype.org/content/repositories/snapshots
+
@@ -128,13 +132,13 @@
com.4paradigm.openmldb
openmldb-jdbc
- ${openmldb-jdbc.version}
+ ${openmldb-jdbc.version}-SNAPSHOT
runtime
com.4paradigm.openmldb
openmldb-native
- ${openmldb-jdbc.version}-allinone
+ ${openmldb-jdbc.version}-allinone-SNAPSHOT
runtime
diff --git a/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/dialect/OpenmldbDatabaseDialect.java b/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/dialect/OpenmldbDatabaseDialect.java
index 0a40221ef3b..ae9e6d7a46a 100644
--- a/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/dialect/OpenmldbDatabaseDialect.java
+++ b/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/dialect/OpenmldbDatabaseDialect.java
@@ -15,10 +15,8 @@
package io.confluent.connect.jdbc.dialect;
-import java.util.Collection;
-import java.util.List;
-
import io.confluent.connect.jdbc.sink.metadata.SinkRecordField;
+import io.confluent.connect.jdbc.util.ColumnDefinition;
import io.confluent.connect.jdbc.util.ColumnId;
import io.confluent.connect.jdbc.util.ExpressionBuilder;
import io.confluent.connect.jdbc.util.IdentifierRules;
@@ -26,11 +24,16 @@
import org.apache.kafka.common.config.AbstractConfig;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.sql.Types;
+import java.util.Collection;
+import java.util.List;
+
/**
* A {@link DatabaseDialect} for OpenMLDB.
*/
@@ -102,8 +105,8 @@ protected String getSqlType(SinkRecordField field) {
@Override
public String buildCreateTableStatement(
- TableId table,
- Collection fields
+ TableId table,
+ Collection fields
) {
final List pkFieldNames = extractPrimaryKeyFieldNames(fields);
if (!pkFieldNames.isEmpty()) {
@@ -114,8 +117,8 @@ public String buildCreateTableStatement(
@Override
protected void writeColumnSpec(
- ExpressionBuilder builder,
- SinkRecordField f
+ ExpressionBuilder builder,
+ SinkRecordField f
) {
builder.appendColumnName(f.name());
builder.append(" ");
@@ -124,11 +127,11 @@ protected void writeColumnSpec(
if (f.defaultValue() != null) {
builder.append(" DEFAULT ");
formatColumnValue(
- builder,
- f.schemaName(),
- f.schemaParameters(),
- f.schemaType(),
- f.defaultValue()
+ builder,
+ f.schemaName(),
+ f.schemaParameters(),
+ f.schemaType(),
+ f.defaultValue()
);
} else if (!isColumnOptional(f)) {
builder.append(" NOT NULL");
@@ -137,8 +140,8 @@ protected void writeColumnSpec(
@Override
public String buildDropTableStatement(
- TableId table,
- DropOptions options
+ TableId table,
+ DropOptions options
) {
// no ifExists, no cascade
ExpressionBuilder builder = expressionBuilder();
@@ -150,25 +153,25 @@ public String buildDropTableStatement(
@Override
public List buildAlterTable(
- TableId table,
- Collection fields
+ TableId table,
+ Collection fields
) {
throw new UnsupportedOperationException("alter is unsupported");
}
@Override
public String buildUpdateStatement(
- TableId table,
- Collection keyColumns,
- Collection nonKeyColumns
+ TableId table,
+ Collection keyColumns,
+ Collection nonKeyColumns
) {
throw new UnsupportedOperationException("update is unsupported");
}
@Override
public final String buildDeleteStatement(
- TableId table,
- Collection keyColumns
+ TableId table,
+ Collection keyColumns
) {
throw new UnsupportedOperationException("delete is unsupported");
}
@@ -177,5 +180,57 @@ public final String buildDeleteStatement(
protected Integer getSqlTypeForSchema(Schema schema) {
return 0;
}
+
+ // set name in schema
+ @Override
+ protected String addFieldToSchema(
+ final ColumnDefinition columnDefn,
+ final SchemaBuilder builder,
+ final String fieldName,
+ final int sqlType,
+ final boolean optional
+ ) {
+ SchemaBuilder schemaBuilder = null;
+ switch (sqlType) {
+ // 16 bit ints
+ case Types.SMALLINT: {
+ // TODO(hw): openmldb doesn't support unsigned, but jdbc metadata returns false,
+ // fix it later. columnDefn.isSignedNumber()
+ schemaBuilder = SchemaBuilder.int16();
+ break;
+ }
+
+ // 32 bit int
+ case Types.INTEGER: {
+ schemaBuilder = SchemaBuilder.int32();
+ break;
+ }
+
+ // 64 bit int
+ case Types.BIGINT: {
+ schemaBuilder = SchemaBuilder.int64();
+ break;
+ }
+ // openmldb jdbc use java float, not double
+ case Types.FLOAT: {
+ schemaBuilder = SchemaBuilder.float32();
+ break;
+ }
+
+ // Double is just double
+ // Date is day + moth + year
+ // Time is a time of day -- hour, minute, seconds, nanoseconds
+ // Timestamp is a date + time, openmldb jdbc setTimestamp is compatible
+ default: {
+ }
+ }
+ if (schemaBuilder == null) {
+ log.warn("openmldb schema builder for sqlType {} is null, " +
+ "use GenericDatabaseDialect method", sqlType);
+ return super.addFieldToSchema(columnDefn, builder, fieldName, sqlType, optional);
+ }
+ builder.field(fieldName, optional ? schemaBuilder.optional().build() : schemaBuilder.build());
+ return fieldName;
+ }
}
diff --git a/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/BufferedRecords.java b/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/BufferedRecords.java
index cda5df24fef..43d9655082c 100644
--- a/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/BufferedRecords.java
+++ b/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/BufferedRecords.java
@@ -15,30 +15,40 @@
package io.confluent.connect.jdbc.sink;
+import io.confluent.connect.jdbc.dialect.DatabaseDialect;
+import io.confluent.connect.jdbc.dialect.DatabaseDialect.StatementBinder;
+import io.confluent.connect.jdbc.sink.metadata.FieldsMetadata;
+import io.confluent.connect.jdbc.sink.metadata.SchemaPair;
+import io.confluent.connect.jdbc.util.ColumnDefinition;
+import io.confluent.connect.jdbc.util.ColumnId;
+import io.confluent.connect.jdbc.util.TableDefinition;
+import io.confluent.connect.jdbc.util.TableId;
+import org.apache.kafka.connect.data.Date;
+import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.data.SchemaBuilder;
+import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.data.Time;
+import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.sink.SinkRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.sql.BatchUpdateException;
import java.sql.Connection;
import java.sql.PreparedStatement;
+import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
-import java.sql.BatchUpdateException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
-import io.confluent.connect.jdbc.dialect.DatabaseDialect;
-import io.confluent.connect.jdbc.dialect.DatabaseDialect.StatementBinder;
-import io.confluent.connect.jdbc.sink.metadata.FieldsMetadata;
-import io.confluent.connect.jdbc.sink.metadata.SchemaPair;
-import io.confluent.connect.jdbc.util.ColumnId;
-import io.confluent.connect.jdbc.util.TableId;
-
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
@@ -75,9 +85,107 @@ public BufferedRecords(
this.dbStructure = dbStructure;
this.connection = connection;
this.recordValidator = RecordValidator.create(config);
+ if (config.autoSchema) {
+ TableDefinition tableDefn;
+ try {
+ tableDefn = dbStructure.tableDefinition(connection, tableId);
+ if (tableDefn == null) {
+ throw new SQLException("can't get table info from sink db");
+ }
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ SchemaBuilder builder = SchemaBuilder.struct();
+ for (ColumnDefinition colDefn : tableDefn.definitionsForColumns()) {
+ dbDialect.addFieldToSchema(colDefn, builder);
+ }
+ valueSchema = builder.build();
+ }
+ }
+
+ private Object convertToLogicalType(Field field, Object value) {
+ if (field.schema().name() != null && value != null) {
+ switch (field.schema().name()) {
+ case Date.LOGICAL_NAME:
+ case Time.LOGICAL_NAME:
+ case Timestamp.LOGICAL_NAME:
+ return new java.util.Date((Long) value);
+ default:
+ }
+ }
+ return null;
+ }
+
+ private Object convertToSchemaType(Field field, Object value) {
+ if (value == null) {
+ log.trace("value is null");
+ return null;
+ }
+ Object result = value;
+ switch (field.schema().type()) {
+ case INT16: {
+ if (value instanceof Long) {
+ result = ((Long) value).shortValue();
+ }
+ break;
+ }
+ case INT32: {
+ if (value instanceof Long) {
+ result = ((Long) value).intValue();
+ }
+ break;
+ }
+ case FLOAT32: {
+ if (value instanceof Double) {
+ result = ((Double) value).floatValue();
+ }
+ break;
+ }
+ case FLOAT64: {
+ if (!(value instanceof Double)) {
+ log.warn("json row type is not double");
+ }
+ break;
+ }
+ default: {
+ log.debug("field {}-type {}, value type {}, stay", field.name(),
+ field.schema().type(), value.getClass().getSimpleName());
+ }
+ }
+ return result;
+ }
+
+ private Object convertToStruct(Schema valueSchema, Object value) {
+ Struct structValue = new Struct(valueSchema);
+ HashMap map = (HashMap) value;
+ for (Field field : valueSchema.fields()) {
+ Object v = map.get(field.name());
+ // convert to the right type with schema(logical type first, if not, schema type)
+ Object newV = convertToLogicalType(field, v);
+ if (newV == null) {
+ newV = convertToSchemaType(field, v);
+ }
+ structValue.put(field.name(), newV);
+ }
+ return structValue;
}
public List add(SinkRecord record) throws SQLException, TableAlterOrCreateException {
+ // If auto.schema=true, we will use the insert stmt schema, not the record.valueSchema().
+ // OpenMLDB doesn't support pk mode, so we only handle value schema here. Leave key
+ // schema as it is.
+ if (config.autoSchema && record.valueSchema() == null) {
+ // no value schema, value is a map, must convert to Struct
+ if (!(record.value() instanceof HashMap)) {
+ log.warn("auto schema convertToStruct only support hashmap to struct");
+ }
+ Object structValue = convertToStruct(valueSchema, record.value());
+ record = new SinkRecord(record.topic(), record.kafkaPartition(),
+ record.keySchema(), record.key(),
+ valueSchema, structValue,
+ record.kafkaOffset(), record.timestamp(), record.timestampType(), record.headers());
+ }
+
recordValidator.validate(record);
final List flushed = new ArrayList<>();
@@ -156,7 +264,7 @@ public List add(SinkRecord record) throws SQLException, TableAlterOr
);
}
}
-
+
// set deletesInBatch if schema value is not null
if (isNull(record.value()) && config.deleteEnabled) {
deletesInBatch = true;
@@ -197,7 +305,7 @@ private void executeUpdates() throws SQLException {
for (int updateCount : batchStatus) {
if (updateCount == Statement.EXECUTE_FAILED) {
throw new BatchUpdateException(
- "Execution failed for part of the batch update", batchStatus);
+ "Execution failed for part of the batch update", batchStatus);
}
}
}
@@ -208,7 +316,7 @@ private void executeDeletes() throws SQLException {
for (int updateCount : batchStatus) {
if (updateCount == Statement.EXECUTE_FAILED) {
throw new BatchUpdateException(
- "Execution failed for part of the batch delete", batchStatus);
+ "Execution failed for part of the batch delete", batchStatus);
}
}
}
diff --git a/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/JdbcSinkConfig.java b/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/JdbcSinkConfig.java
index f672a6c8a72..f58d2443381 100644
--- a/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/JdbcSinkConfig.java
+++ b/extensions/kafka-connect-jdbc/src/main/java/io/confluent/connect/jdbc/sink/JdbcSinkConfig.java
@@ -107,9 +107,9 @@ public enum PrimaryKeyMode {
private static final String TABLE_NAME_FORMAT_DEFAULT = "${topic}";
private static final String TABLE_NAME_FORMAT_DOC =
"A format string for the destination table name, which may contain '${topic}' as a "
- + "placeholder for the originating topic name.\n"
- + "For example, ``kafka_${topic}`` for the topic 'orders' will map to the table name "
- + "'kafka_orders'.";
+ + "placeholder for the originating topic name.\n"
+ + "For example, ``kafka_${topic}`` for the topic 'orders' will map to the table name "
+ + "'kafka_orders'.";
private static final String TABLE_NAME_FORMAT_DISPLAY = "Table Name Format";
public static final String MAX_RETRIES = "max.retries";
@@ -128,85 +128,85 @@ public enum PrimaryKeyMode {
private static final int BATCH_SIZE_DEFAULT = 3000;
private static final String BATCH_SIZE_DOC =
"Specifies how many records to attempt to batch together for insertion into the destination"
- + " table, when possible.";
+ + " table, when possible.";
private static final String BATCH_SIZE_DISPLAY = "Batch Size";
public static final String DELETE_ENABLED = "delete.enabled";
private static final String DELETE_ENABLED_DEFAULT = "false";
private static final String DELETE_ENABLED_DOC =
"Whether to treat ``null`` record values as deletes. Requires ``pk.mode`` "
- + "to be ``record_key``.";
+ + "to be ``record_key``.";
private static final String DELETE_ENABLED_DISPLAY = "Enable deletes";
public static final String AUTO_CREATE = "auto.create";
private static final String AUTO_CREATE_DEFAULT = "false";
private static final String AUTO_CREATE_DOC =
"Whether to automatically create the destination table based on record schema if it is "
- + "found to be missing by issuing ``CREATE``.";
+ + "found to be missing by issuing ``CREATE``.";
private static final String AUTO_CREATE_DISPLAY = "Auto-Create";
public static final String AUTO_EVOLVE = "auto.evolve";
private static final String AUTO_EVOLVE_DEFAULT = "false";
private static final String AUTO_EVOLVE_DOC =
"Whether to automatically add columns in the table schema when found to be missing relative "
- + "to the record schema by issuing ``ALTER``.";
+ + "to the record schema by issuing ``ALTER``.";
private static final String AUTO_EVOLVE_DISPLAY = "Auto-Evolve";
public static final String INSERT_MODE = "insert.mode";
private static final String INSERT_MODE_DEFAULT = "insert";
private static final String INSERT_MODE_DOC =
"The insertion mode to use. Supported modes are:\n"
- + "``insert``\n"
- + " Use standard SQL ``INSERT`` statements.\n"
- + "``upsert``\n"
- + " Use the appropriate upsert semantics for the target database if it is supported by "
- + "the connector, e.g. ``INSERT OR IGNORE``.\n"
- + "``update``\n"
- + " Use the appropriate update semantics for the target database if it is supported by "
- + "the connector, e.g. ``UPDATE``.";
+ + "``insert``\n"
+ + " Use standard SQL ``INSERT`` statements.\n"
+ + "``upsert``\n"
+ + " Use the appropriate upsert semantics for the target database if it is supported by "
+ + "the connector, e.g. ``INSERT OR IGNORE``.\n"
+ + "``update``\n"
+ + " Use the appropriate update semantics for the target database if it is supported by "
+ + "the connector, e.g. ``UPDATE``.";
private static final String INSERT_MODE_DISPLAY = "Insert Mode";
public static final String PK_FIELDS = "pk.fields";
private static final String PK_FIELDS_DEFAULT = "";
private static final String PK_FIELDS_DOC =
"List of comma-separated primary key field names. The runtime interpretation of this config"
- + " depends on the ``pk.mode``:\n"
- + "``none``\n"
- + " Ignored as no fields are used as primary key in this mode.\n"
- + "``kafka``\n"
- + " Must be a trio representing the Kafka coordinates, defaults to ``"
- + StringUtils.join(DEFAULT_KAFKA_PK_NAMES, ",") + "`` if empty.\n"
- + "``record_key``\n"
- + " If empty, all fields from the key struct will be used, otherwise used to extract the"
- + " desired fields - for primitive key only a single field name must be configured.\n"
- + "``record_value``\n"
- + " If empty, all fields from the value struct will be used, otherwise used to extract "
- + "the desired fields.";
+ + " depends on the ``pk.mode``:\n"
+ + "``none``\n"
+ + " Ignored as no fields are used as primary key in this mode.\n"
+ + "``kafka``\n"
+ + " Must be a trio representing the Kafka coordinates, defaults to ``"
+ + StringUtils.join(DEFAULT_KAFKA_PK_NAMES, ",") + "`` if empty.\n"
+ + "``record_key``\n"
+ + " If empty, all fields from the key struct will be used, otherwise used to extract the"
+ + " desired fields - for primitive key only a single field name must be configured.\n"
+ + "``record_value``\n"
+ + " If empty, all fields from the value struct will be used, otherwise used to extract "
+ + "the desired fields.";
private static final String PK_FIELDS_DISPLAY = "Primary Key Fields";
public static final String PK_MODE = "pk.mode";
private static final String PK_MODE_DEFAULT = "none";
private static final String PK_MODE_DOC =
"The primary key mode, also refer to ``" + PK_FIELDS + "`` documentation for interplay. "
- + "Supported modes are:\n"
- + "``none``\n"
- + " No keys utilized.\n"
- + "``kafka``\n"
- + " Kafka coordinates are used as the PK.\n"
- + "``record_key``\n"
- + " Field(s) from the record key are used, which may be a primitive or a struct.\n"
- + "``record_value``\n"
- + " Field(s) from the record value are used, which must be a struct.";
+ + "Supported modes are:\n"
+ + "``none``\n"
+ + " No keys utilized.\n"
+ + "``kafka``\n"
+ + " Kafka coordinates are used as the PK.\n"
+ + "``record_key``\n"
+ + " Field(s) from the record key are used, which may be a primitive or a struct.\n"
+ + "``record_value``\n"
+ + " Field(s) from the record value are used, which must be a struct.";
private static final String PK_MODE_DISPLAY = "Primary Key Mode";
public static final String FIELDS_WHITELIST = "fields.whitelist";
private static final String FIELDS_WHITELIST_DEFAULT = "";
private static final String FIELDS_WHITELIST_DOC =
"List of comma-separated record value field names. If empty, all fields from the record "
- + "value are utilized, otherwise used to filter to the desired fields.\n"
- + "Note that ``" + PK_FIELDS + "`` is applied independently in the context of which field"
- + "(s) form the primary key columns in the destination database,"
- + " while this configuration is applicable for the other columns.";
+ + "value are utilized, otherwise used to filter to the desired fields.\n"
+ + "Note that ``" + PK_FIELDS + "`` is applied independently in the context of which field"
+ + "(s) form the primary key columns in the destination database,"
+ + " while this configuration is applicable for the other columns.";
private static final String FIELDS_WHITELIST_DISPLAY = "Fields Whitelist";
private static final ConfigDef.Range NON_NEGATIVE_INT_VALIDATOR = ConfigDef.Range.atLeast(0);
@@ -222,16 +222,16 @@ public enum PrimaryKeyMode {
public static final String DIALECT_NAME_DEFAULT = "";
private static final String DIALECT_NAME_DOC =
"The name of the database dialect that should be used for this connector. By default this "
- + "is empty, and the connector automatically determines the dialect based upon the "
- + "JDBC connection URL. Use this if you want to override that behavior and use a "
- + "specific dialect. All properly-packaged dialects in the JDBC connector plugin "
- + "can be used.";
+ + "is empty, and the connector automatically determines the dialect based upon the "
+ + "JDBC connection URL. Use this if you want to override that behavior and use a "
+ + "specific dialect. All properly-packaged dialects in the JDBC connector plugin "
+ + "can be used.";
public static final String DB_TIMEZONE_CONFIG = "db.timezone";
public static final String DB_TIMEZONE_DEFAULT = "UTC";
private static final String DB_TIMEZONE_CONFIG_DOC =
"Name of the JDBC timezone that should be used in the connector when "
- + "inserting time-based values. Defaults to UTC.";
+ + "inserting time-based values. Defaults to UTC.";
private static final String DB_TIMEZONE_CONFIG_DISPLAY = "DB Time Zone";
public static final String QUOTE_SQL_IDENTIFIERS_CONFIG =
@@ -248,11 +248,17 @@ public enum PrimaryKeyMode {
public static final String TABLE_TYPES_DEFAULT = TableType.TABLE.toString();
private static final String TABLE_TYPES_DOC =
"The comma-separated types of database tables to which the sink connector can write. "
- + "By default this is ``" + TableType.TABLE + "``, but any combination of ``"
- + TableType.TABLE + "`` and ``" + TableType.VIEW + "`` is allowed. Not all databases "
- + "support writing to views, and when they do the the sink connector will fail if the "
- + "view definition does not match the records' schemas (regardless of ``"
- + AUTO_EVOLVE + "``).";
+ + "By default this is ``" + TableType.TABLE + "``, but any combination of ``"
+ + TableType.TABLE + "`` and ``" + TableType.VIEW + "`` is allowed. Not all databases "
+ + "support writing to views, and when they do the the sink connector will fail if the "
+ + "view definition does not match the records' schemas (regardless of ``"
+ + AUTO_EVOLVE + "``).";
+
+ public static final String AUTO_SCHEMA_CONFIG = "auto.schema";
+ private static final String AUTO_SCHEMA_DISPLAY = "Auto Schema";
+ public static final String AUTO_SCHEMA_DEFAULT = "false";
+ private static final String AUTO_SCHEMA_DOC =
+ "Whether to automatically get schema from OpenMLDB, only works with JsonConverter";
private static final EnumRecommender QUOTE_METHOD_RECOMMENDER =
EnumRecommender.in(QuoteMethod.values());
@@ -261,169 +267,169 @@ public enum PrimaryKeyMode {
EnumRecommender.in(TableType.values());
public static final ConfigDef CONFIG_DEF = new ConfigDef()
- // Connection
- .define(
- CONNECTION_URL,
- ConfigDef.Type.STRING,
- ConfigDef.NO_DEFAULT_VALUE,
- ConfigDef.Importance.HIGH,
- CONNECTION_URL_DOC,
- CONNECTION_GROUP,
- 1,
- ConfigDef.Width.LONG,
- CONNECTION_URL_DISPLAY
- )
- .define(
- CONNECTION_USER,
- ConfigDef.Type.STRING,
- null,
- ConfigDef.Importance.HIGH,
- CONNECTION_USER_DOC,
- CONNECTION_GROUP,
- 2,
- ConfigDef.Width.MEDIUM,
- CONNECTION_USER_DISPLAY
- )
- .define(
- CONNECTION_PASSWORD,
- ConfigDef.Type.PASSWORD,
- null,
- ConfigDef.Importance.HIGH,
- CONNECTION_PASSWORD_DOC,
- CONNECTION_GROUP,
- 3,
- ConfigDef.Width.MEDIUM,
- CONNECTION_PASSWORD_DISPLAY
- )
- .define(
- DIALECT_NAME_CONFIG,
- ConfigDef.Type.STRING,
- DIALECT_NAME_DEFAULT,
- DatabaseDialectRecommender.INSTANCE,
- ConfigDef.Importance.LOW,
- DIALECT_NAME_DOC,
- CONNECTION_GROUP,
- 4,
- ConfigDef.Width.LONG,
- DIALECT_NAME_DISPLAY,
- DatabaseDialectRecommender.INSTANCE
- )
- .define(
- CONNECTION_ATTEMPTS,
- ConfigDef.Type.INT,
- CONNECTION_ATTEMPTS_DEFAULT,
- ConfigDef.Range.atLeast(1),
- ConfigDef.Importance.LOW,
- CONNECTION_ATTEMPTS_DOC,
- CONNECTION_GROUP,
- 5,
- ConfigDef.Width.SHORT,
- CONNECTION_ATTEMPTS_DISPLAY
- ).define(
- CONNECTION_BACKOFF,
- ConfigDef.Type.LONG,
- CONNECTION_BACKOFF_DEFAULT,
- ConfigDef.Importance.LOW,
- CONNECTION_BACKOFF_DOC,
- CONNECTION_GROUP,
- 6,
- ConfigDef.Width.SHORT,
- CONNECTION_BACKOFF_DISPLAY
- )
- // Writes
- .define(
- INSERT_MODE,
- ConfigDef.Type.STRING,
- INSERT_MODE_DEFAULT,
- EnumValidator.in(InsertMode.values()),
- ConfigDef.Importance.HIGH,
- INSERT_MODE_DOC,
- WRITES_GROUP,
- 1,
- ConfigDef.Width.MEDIUM,
- INSERT_MODE_DISPLAY
- )
- .define(
- BATCH_SIZE,
- ConfigDef.Type.INT,
- BATCH_SIZE_DEFAULT,
- NON_NEGATIVE_INT_VALIDATOR,
- ConfigDef.Importance.MEDIUM,
- BATCH_SIZE_DOC, WRITES_GROUP,
- 2,
- ConfigDef.Width.SHORT,
- BATCH_SIZE_DISPLAY
- )
- .define(
- DELETE_ENABLED,
- ConfigDef.Type.BOOLEAN,
- DELETE_ENABLED_DEFAULT,
- ConfigDef.Importance.MEDIUM,
- DELETE_ENABLED_DOC, WRITES_GROUP,
- 3,
- ConfigDef.Width.SHORT,
- DELETE_ENABLED_DISPLAY,
- DeleteEnabledRecommender.INSTANCE
- )
- .define(
- TABLE_TYPES_CONFIG,
- ConfigDef.Type.LIST,
- TABLE_TYPES_DEFAULT,
- TABLE_TYPES_RECOMMENDER,
- ConfigDef.Importance.LOW,
- TABLE_TYPES_DOC,
- WRITES_GROUP,
- 4,
- ConfigDef.Width.MEDIUM,
- TABLE_TYPES_DISPLAY
- )
- // Data Mapping
- .define(
- TABLE_NAME_FORMAT,
- ConfigDef.Type.STRING,
- TABLE_NAME_FORMAT_DEFAULT,
- new ConfigDef.NonEmptyString(),
- ConfigDef.Importance.MEDIUM,
- TABLE_NAME_FORMAT_DOC,
- DATAMAPPING_GROUP,
- 1,
- ConfigDef.Width.LONG,
- TABLE_NAME_FORMAT_DISPLAY
- )
- .define(
- PK_MODE,
- ConfigDef.Type.STRING,
- PK_MODE_DEFAULT,
- EnumValidator.in(PrimaryKeyMode.values()),
- ConfigDef.Importance.HIGH,
- PK_MODE_DOC,
- DATAMAPPING_GROUP,
- 2,
- ConfigDef.Width.MEDIUM,
- PK_MODE_DISPLAY,
- PrimaryKeyModeRecommender.INSTANCE
- )
- .define(
- PK_FIELDS,
- ConfigDef.Type.LIST,
- PK_FIELDS_DEFAULT,
- ConfigDef.Importance.MEDIUM,
- PK_FIELDS_DOC,
- DATAMAPPING_GROUP,
- 3,
- ConfigDef.Width.LONG, PK_FIELDS_DISPLAY
- )
- .define(
- FIELDS_WHITELIST,
- ConfigDef.Type.LIST,
- FIELDS_WHITELIST_DEFAULT,
- ConfigDef.Importance.MEDIUM,
- FIELDS_WHITELIST_DOC,
- DATAMAPPING_GROUP,
- 4,
- ConfigDef.Width.LONG,
- FIELDS_WHITELIST_DISPLAY
- ).define(
+ // Connection
+ .define(
+ CONNECTION_URL,
+ ConfigDef.Type.STRING,
+ ConfigDef.NO_DEFAULT_VALUE,
+ ConfigDef.Importance.HIGH,
+ CONNECTION_URL_DOC,
+ CONNECTION_GROUP,
+ 1,
+ ConfigDef.Width.LONG,
+ CONNECTION_URL_DISPLAY
+ )
+ .define(
+ CONNECTION_USER,
+ ConfigDef.Type.STRING,
+ null,
+ ConfigDef.Importance.HIGH,
+ CONNECTION_USER_DOC,
+ CONNECTION_GROUP,
+ 2,
+ ConfigDef.Width.MEDIUM,
+ CONNECTION_USER_DISPLAY
+ )
+ .define(
+ CONNECTION_PASSWORD,
+ ConfigDef.Type.PASSWORD,
+ null,
+ ConfigDef.Importance.HIGH,
+ CONNECTION_PASSWORD_DOC,
+ CONNECTION_GROUP,
+ 3,
+ ConfigDef.Width.MEDIUM,
+ CONNECTION_PASSWORD_DISPLAY
+ )
+ .define(
+ DIALECT_NAME_CONFIG,
+ ConfigDef.Type.STRING,
+ DIALECT_NAME_DEFAULT,
+ DatabaseDialectRecommender.INSTANCE,
+ ConfigDef.Importance.LOW,
+ DIALECT_NAME_DOC,
+ CONNECTION_GROUP,
+ 4,
+ ConfigDef.Width.LONG,
+ DIALECT_NAME_DISPLAY,
+ DatabaseDialectRecommender.INSTANCE
+ )
+ .define(
+ CONNECTION_ATTEMPTS,
+ ConfigDef.Type.INT,
+ CONNECTION_ATTEMPTS_DEFAULT,
+ ConfigDef.Range.atLeast(1),
+ ConfigDef.Importance.LOW,
+ CONNECTION_ATTEMPTS_DOC,
+ CONNECTION_GROUP,
+ 5,
+ ConfigDef.Width.SHORT,
+ CONNECTION_ATTEMPTS_DISPLAY
+ ).define(
+ CONNECTION_BACKOFF,
+ ConfigDef.Type.LONG,
+ CONNECTION_BACKOFF_DEFAULT,
+ ConfigDef.Importance.LOW,
+ CONNECTION_BACKOFF_DOC,
+ CONNECTION_GROUP,
+ 6,
+ ConfigDef.Width.SHORT,
+ CONNECTION_BACKOFF_DISPLAY
+ )
+ // Writes
+ .define(
+ INSERT_MODE,
+ ConfigDef.Type.STRING,
+ INSERT_MODE_DEFAULT,
+ EnumValidator.in(InsertMode.values()),
+ ConfigDef.Importance.HIGH,
+ INSERT_MODE_DOC,
+ WRITES_GROUP,
+ 1,
+ ConfigDef.Width.MEDIUM,
+ INSERT_MODE_DISPLAY
+ )
+ .define(
+ BATCH_SIZE,
+ ConfigDef.Type.INT,
+ BATCH_SIZE_DEFAULT,
+ NON_NEGATIVE_INT_VALIDATOR,
+ ConfigDef.Importance.MEDIUM,
+ BATCH_SIZE_DOC, WRITES_GROUP,
+ 2,
+ ConfigDef.Width.SHORT,
+ BATCH_SIZE_DISPLAY
+ )
+ .define(
+ DELETE_ENABLED,
+ ConfigDef.Type.BOOLEAN,
+ DELETE_ENABLED_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ DELETE_ENABLED_DOC, WRITES_GROUP,
+ 3,
+ ConfigDef.Width.SHORT,
+ DELETE_ENABLED_DISPLAY,
+ DeleteEnabledRecommender.INSTANCE
+ )
+ .define(
+ TABLE_TYPES_CONFIG,
+ ConfigDef.Type.LIST,
+ TABLE_TYPES_DEFAULT,
+ TABLE_TYPES_RECOMMENDER,
+ ConfigDef.Importance.LOW,
+ TABLE_TYPES_DOC,
+ WRITES_GROUP,
+ 4,
+ ConfigDef.Width.MEDIUM,
+ TABLE_TYPES_DISPLAY
+ )
+ // Data Mapping
+ .define(
+ TABLE_NAME_FORMAT,
+ ConfigDef.Type.STRING,
+ TABLE_NAME_FORMAT_DEFAULT,
+ new ConfigDef.NonEmptyString(),
+ ConfigDef.Importance.MEDIUM,
+ TABLE_NAME_FORMAT_DOC,
+ DATAMAPPING_GROUP,
+ 1,
+ ConfigDef.Width.LONG,
+ TABLE_NAME_FORMAT_DISPLAY
+ )
+ .define(
+ PK_MODE,
+ ConfigDef.Type.STRING,
+ PK_MODE_DEFAULT,
+ EnumValidator.in(PrimaryKeyMode.values()),
+ ConfigDef.Importance.HIGH,
+ PK_MODE_DOC,
+ DATAMAPPING_GROUP,
+ 2,
+ ConfigDef.Width.MEDIUM,
+ PK_MODE_DISPLAY,
+ PrimaryKeyModeRecommender.INSTANCE
+ )
+ .define(
+ PK_FIELDS,
+ ConfigDef.Type.LIST,
+ PK_FIELDS_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ PK_FIELDS_DOC,
+ DATAMAPPING_GROUP,
+ 3,
+ ConfigDef.Width.LONG, PK_FIELDS_DISPLAY
+ )
+ .define(
+ FIELDS_WHITELIST,
+ ConfigDef.Type.LIST,
+ FIELDS_WHITELIST_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ FIELDS_WHITELIST_DOC,
+ DATAMAPPING_GROUP,
+ 4,
+ ConfigDef.Width.LONG,
+ FIELDS_WHITELIST_DISPLAY
+ ).define(
DB_TIMEZONE_CONFIG,
ConfigDef.Type.STRING,
DB_TIMEZONE_DEFAULT,
@@ -434,64 +440,74 @@ public enum PrimaryKeyMode {
5,
ConfigDef.Width.MEDIUM,
DB_TIMEZONE_CONFIG_DISPLAY
- )
- // DDL
- .define(
- AUTO_CREATE,
- ConfigDef.Type.BOOLEAN,
- AUTO_CREATE_DEFAULT,
- ConfigDef.Importance.MEDIUM,
- AUTO_CREATE_DOC, DDL_GROUP,
- 1,
- ConfigDef.Width.SHORT,
- AUTO_CREATE_DISPLAY
- )
- .define(
- AUTO_EVOLVE,
- ConfigDef.Type.BOOLEAN,
- AUTO_EVOLVE_DEFAULT,
- ConfigDef.Importance.MEDIUM,
- AUTO_EVOLVE_DOC, DDL_GROUP,
- 2,
- ConfigDef.Width.SHORT,
- AUTO_EVOLVE_DISPLAY
- ).define(
- QUOTE_SQL_IDENTIFIERS_CONFIG,
- ConfigDef.Type.STRING,
- QUOTE_SQL_IDENTIFIERS_DEFAULT,
- ConfigDef.Importance.MEDIUM,
- QUOTE_SQL_IDENTIFIERS_DOC,
- DDL_GROUP,
- 3,
- ConfigDef.Width.MEDIUM,
- QUOTE_SQL_IDENTIFIERS_DISPLAY,
- QUOTE_METHOD_RECOMMENDER
- )
- // Retries
- .define(
- MAX_RETRIES,
- ConfigDef.Type.INT,
- MAX_RETRIES_DEFAULT,
- NON_NEGATIVE_INT_VALIDATOR,
- ConfigDef.Importance.MEDIUM,
- MAX_RETRIES_DOC,
- RETRIES_GROUP,
- 1,
- ConfigDef.Width.SHORT,
- MAX_RETRIES_DISPLAY
- )
- .define(
- RETRY_BACKOFF_MS,
- ConfigDef.Type.INT,
- RETRY_BACKOFF_MS_DEFAULT,
- NON_NEGATIVE_INT_VALIDATOR,
- ConfigDef.Importance.MEDIUM,
- RETRY_BACKOFF_MS_DOC,
- RETRIES_GROUP,
- 2,
- ConfigDef.Width.SHORT,
- RETRY_BACKOFF_MS_DISPLAY
- );
+ )
+ // DDL
+ .define(
+ AUTO_CREATE,
+ ConfigDef.Type.BOOLEAN,
+ AUTO_CREATE_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ AUTO_CREATE_DOC, DDL_GROUP,
+ 1,
+ ConfigDef.Width.SHORT,
+ AUTO_CREATE_DISPLAY
+ )
+ .define(
+ AUTO_EVOLVE,
+ ConfigDef.Type.BOOLEAN,
+ AUTO_EVOLVE_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ AUTO_EVOLVE_DOC, DDL_GROUP,
+ 2,
+ ConfigDef.Width.SHORT,
+ AUTO_EVOLVE_DISPLAY
+ ).define(
+ QUOTE_SQL_IDENTIFIERS_CONFIG,
+ ConfigDef.Type.STRING,
+ QUOTE_SQL_IDENTIFIERS_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ QUOTE_SQL_IDENTIFIERS_DOC,
+ DDL_GROUP,
+ 3,
+ ConfigDef.Width.MEDIUM,
+ QUOTE_SQL_IDENTIFIERS_DISPLAY,
+ QUOTE_METHOD_RECOMMENDER
+ )
+ // Retries
+ .define(
+ MAX_RETRIES,
+ ConfigDef.Type.INT,
+ MAX_RETRIES_DEFAULT,
+ NON_NEGATIVE_INT_VALIDATOR,
+ ConfigDef.Importance.MEDIUM,
+ MAX_RETRIES_DOC,
+ RETRIES_GROUP,
+ 1,
+ ConfigDef.Width.SHORT,
+ MAX_RETRIES_DISPLAY
+ )
+ .define(
+ RETRY_BACKOFF_MS,
+ ConfigDef.Type.INT,
+ RETRY_BACKOFF_MS_DEFAULT,
+ NON_NEGATIVE_INT_VALIDATOR,
+ ConfigDef.Importance.MEDIUM,
+ RETRY_BACKOFF_MS_DOC,
+ RETRIES_GROUP,
+ 2,
+ ConfigDef.Width.SHORT,
+ RETRY_BACKOFF_MS_DISPLAY
+ )
+ .define(
+ AUTO_SCHEMA_CONFIG,
+ ConfigDef.Type.BOOLEAN,
+ AUTO_SCHEMA_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ AUTO_SCHEMA_DOC, DDL_GROUP,
+ 1,
+ ConfigDef.Width.SHORT,
+ AUTO_SCHEMA_DISPLAY
+ );
public final String connectorName;
public final String connectionUrl;
@@ -514,6 +530,8 @@ public enum PrimaryKeyMode {
public final TimeZone timeZone;
public final EnumSet tableTypes;
+ public final boolean autoSchema;
+
public JdbcSinkConfig(Map, ?> props) {
super(CONFIG_DEF, props);
connectorName = ConfigUtils.connectorName(props);
@@ -542,6 +560,13 @@ public JdbcSinkConfig(Map, ?> props) {
"Primary key mode must be 'record_key' when delete support is enabled");
}
tableTypes = TableType.parse(getList(TABLE_TYPES_CONFIG));
+
+ autoSchema = getBoolean(AUTO_SCHEMA_CONFIG);
+ if (autoSchema && autoCreate) {
+ throw new ConfigException(
+ "Auto create must be false when auto schema is enabled, " +
+ "cuz we'll get schema from jdbc table");
+ }
}
private String getPasswordValue(String key) {
diff --git a/hybridse/images/HybridSE.png b/hybridse/images/HybridSE.png
deleted file mode 100644
index c9ecd8f0409..00000000000
Binary files a/hybridse/images/HybridSE.png and /dev/null differ
diff --git a/hybridse/include/vm/mem_catalog.h b/hybridse/include/vm/mem_catalog.h
index 30e1190116b..b393ed861ec 100644
--- a/hybridse/include/vm/mem_catalog.h
+++ b/hybridse/include/vm/mem_catalog.h
@@ -329,13 +329,13 @@ class WindowRange {
bool out_of_rows, bool before_window, bool exceed_window) const {
switch (frame_type_) {
case Window::WindowFrameType::kFrameRows:
- return out_of_rows ? kExceedWindow : kInWindow;
+ return out_of_rows ? kExceedWindow : (before_window ? kBeforeWindow : kInWindow);
case Window::WindowFrameType::kFrameRowsMergeRowsRange: {
return out_of_rows
? (before_window
? kBeforeWindow
: exceed_window ? kExceedWindow : kInWindow)
- : kInWindow;
+ : before_window ? kBeforeWindow : kInWindow;
}
case Window::WindowFrameType::kFrameRowsRange:
return exceed_window
@@ -373,7 +373,7 @@ class HistoryWindow : public Window {
}
}
- virtual void PopEffectiveData() {
+ virtual void PopEffectiveDataIfAny() {
if (!table_.empty()) {
PopFrontRow();
}
@@ -388,7 +388,7 @@ class HistoryWindow : public Window {
auto cur_size = table_.size();
if (cur_size < window_range_.start_row_) {
// current in the ROWS window
- int64_t sub = (key + window_range_.start_offset_);
+ int64_t sub = key + window_range_.start_offset_;
uint64_t start_ts = sub < 0 ? 0u : static_cast(sub);
if (0 == window_range_.end_offset_) {
return BufferCurrentTimeBuffer(key, row, start_ts);
@@ -419,12 +419,27 @@ class HistoryWindow : public Window {
// sliding rows data from `current_history_buffer_` into effective window
// by giving the new start_ts and end_ts.
- // Resulting the new effective window data whose bound is [start_ts, end_ts]
+ // Resulting the new effective window data whose bound is [start_ts, end_ts],
+ // NOTE
+ // - window bounds should be greater or equal to 0, < 0 is not supported yet,
+ // - values greater than int64_max is not considered as well
+ // - start_ts_inclusive > end_ts_inclusive is expected for rows window, e.g.
+ // `(rows between .. and current_row exclude current_time)`.
+ // Absolutely confusing design though, should refactored later
+ // TODO(ace): note above
//
// - elements in `current_history_buffer_` that `ele.first <= end_ts` goes out of
// `current_history_buffer_` and pushed into effective window
// - elements in effective window where `ele.first < start_ts` goes out of effective window
- void SlideWindow(uint64_t start_ts_inclusive, uint64_t end_ts_inclusive) {
+ //
+ // `start_ts_inclusive` and `end_ts_inclusive` can be empty, which effectively means less than 0.
+ // if `start_ts_inclusive` is empty, no rows goes out of effective window
+ // if `end_ts_inclusive` is empty, no rows goes out of history buffer and into effective window
+ void SlideWindow(std::optional start_ts_inclusive, std::optional end_ts_inclusive) {
+ if (!end_ts_inclusive.has_value()) {
+ return;
+ }
+
while (!current_history_buffer_.empty() && current_history_buffer_.back().first <= end_ts_inclusive) {
auto& back = current_history_buffer_.back();
@@ -436,7 +451,9 @@ class HistoryWindow : public Window {
// push the row to the start of window
// - pop last elements in window if exceed max window size
// - also pop last elements in window if there ts less than `start_ts`
- bool BufferEffectiveWindow(uint64_t key, const Row& row, uint64_t start_ts) {
+ //
+ // if `start_ts` is empty, no rows eliminated from window
+ bool BufferEffectiveWindow(uint64_t key, const Row& row, std::optional start_ts) {
AddFrontRow(key, row);
auto cur_size = table_.size();
while (window_range_.max_size_ > 0 &&
@@ -445,19 +462,18 @@ class HistoryWindow : public Window {
--cur_size;
}
- // Slide window when window size >= rows_preceding
+ // Slide window if window start bound >= rows/range preceding
while (cur_size > 0) {
const auto& pair = GetBackRow();
- if ((kFrameRows == window_range_.frame_type_ ||
- kFrameRowsMergeRowsRange == window_range_.frame_type_) &&
+ if ((kFrameRows == window_range_.frame_type_ || kFrameRowsMergeRowsRange == window_range_.frame_type_) &&
cur_size <= window_range_.start_row_ + 1) {
+ // note it is always current rows window
break;
}
if (kFrameRows == window_range_.frame_type_ ||
pair.first < start_ts) {
PopBackRow();
--cur_size;
-
} else {
break;
}
@@ -470,7 +486,11 @@ class HistoryWindow : public Window {
// slide window first so current row kept in `current_history_buffer_`
// and will go into window in next action
if (exclude_current_time_) {
- SlideWindow(start_ts, key - 1);
+ if (key == 0) {
+ SlideWindow(start_ts, {});
+ } else {
+ SlideWindow(start_ts, key - 1);
+ }
} else {
SlideWindow(start_ts, key);
}
@@ -482,9 +502,13 @@ class HistoryWindow : public Window {
// except `exclude current_row`, the current row is always added to the effective window
// but for next buffer action, previous current row already buffered in `current_history_buffer_`
// so the previous current row need eliminated for this next buf action
- PopEffectiveData();
+ PopEffectiveDataIfAny();
+ if (key == 0) {
+ SlideWindow(start_ts, {});
+ } else {
+ SlideWindow(start_ts, key - 1);
+ }
current_history_buffer_.emplace_front(key, row);
- SlideWindow(start_ts, key - 1);
}
// in queue the current row
diff --git a/hybridse/include/vm/physical_op.h b/hybridse/include/vm/physical_op.h
index 506832c2f3c..e3d5ad10bb7 100644
--- a/hybridse/include/vm/physical_op.h
+++ b/hybridse/include/vm/physical_op.h
@@ -1533,11 +1533,10 @@ class PhysicalRequestAggUnionNode : public PhysicalOpNode {
const bool Valid() { return true; }
static PhysicalRequestAggUnionNode *CastFrom(PhysicalOpNode *node);
- const bool instance_not_in_window() const {
- return instance_not_in_window_;
- }
+ const bool instance_not_in_window() const { return instance_not_in_window_; }
const bool exclude_current_time() const { return exclude_current_time_; }
const bool output_request_row() const { return output_request_row_; }
+ void set_out_request_row(bool flag) { output_request_row_ = flag; }
const RequestWindowOp &window() const { return window_; }
base::Status WithNewChildren(node::NodeManager *nm,
@@ -1555,7 +1554,14 @@ class PhysicalRequestAggUnionNode : public PhysicalOpNode {
private:
const bool instance_not_in_window_;
const bool exclude_current_time_;
- const bool output_request_row_;
+
+ // Exclude the request row from request union results
+ //
+ // The option is different from `output_request_row_` in `PhysicalRequestUnionNode`.
+ // Here it is only `false` when the SQL Window clause has attribute `EXCLUDE CURRENT_ROW`,
+ // whereas in `PhysicalRequestUnionNode`, it is about common column optimized and not related to
+ // `EXCLUDE CURRENT_ROW`
+ bool output_request_row_;
void AddProducers(PhysicalOpNode *request, PhysicalOpNode *raw, PhysicalOpNode *aggr) {
AddProducer(request);
diff --git a/hybridse/src/codegen/udf_ir_builder_test.cc b/hybridse/src/codegen/udf_ir_builder_test.cc
index 206b7a8a4d8..70ecad1c7fd 100644
--- a/hybridse/src/codegen/udf_ir_builder_test.cc
+++ b/hybridse/src/codegen/udf_ir_builder_test.cc
@@ -115,7 +115,7 @@ TEST_F(UdfIRBuilderTest, dayofweek_date_udf_test) {
Date date(2020, 05, 22);
CheckUdf("dayofweek", 6, date);
}
-TEST_F(UdfIRBuilderTest, dayofyear_date_udf_test) {
+TEST_F(UdfIRBuilderTest, DayofyearDateUdfTest) {
{
Date date(2020, 05, 22);
CheckUdf("dayofyear", 143, date);
@@ -134,31 +134,31 @@ TEST_F(UdfIRBuilderTest, dayofyear_date_udf_test) {
}
{
Date date(2021, 13, 31);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
{
Date date(2021, 0, 31);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
{
Date date(2021, -1, 31);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
{
Date date(2021, 12, 32);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
{
Date date(2021, 12, 0);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
{
Date date(2021, 12, -10);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
{
Date date(2021, 2, 29);
- CheckUdf("dayofyear", 0, date);
+ CheckUdf, Date>("dayofyear", nullptr, date);
}
}
TEST_F(UdfIRBuilderTest, weekofyear_date_udf_test) {
@@ -225,7 +225,7 @@ TEST_F(UdfIRBuilderTest, dayofweek_timestamp_udf_test) {
Timestamp time(1590115420000L);
CheckUdf("dayofweek", 6, time);
}
-TEST_F(UdfIRBuilderTest, dayofyear_timestamp_udf_test) {
+TEST_F(UdfIRBuilderTest, DayofyearTimestampUdfTest) {
Timestamp time(1590115420000L);
CheckUdf("dayofyear", 143, time);
}
@@ -272,11 +272,13 @@ TEST_F(UdfIRBuilderTest, dayofweek_int64_udf_test) {
CheckUdf("dayofweek", 1, 1590115420000L + 2 * 86400000L);
CheckUdf("dayofweek", 2, 1590115420000L + 3 * 86400000L);
}
-TEST_F(UdfIRBuilderTest, dayofyear_int64_udf_test) {
+TEST_F(UdfIRBuilderTest, DayofyearInt64UdfTest) {
CheckUdf("dayofyear", 143, 1590115420000L);
CheckUdf("dayofyear", 144, 1590115420000L + 86400000L);
CheckUdf("dayofyear", 145, 1590115420000L + 2 * 86400000L);
CheckUdf("dayofyear", 146, 1590115420000L + 3 * 86400000L);
+
+ CheckUdf, int64_t>("dayofyear", nullptr, -1);
}
TEST_F(UdfIRBuilderTest, weekofyear_int64_udf_test) {
CheckUdf("weekofyear", 21, 1590115420000L);
diff --git a/hybridse/src/passes/physical/long_window_optimized.cc b/hybridse/src/passes/physical/long_window_optimized.cc
index 05e87d4420c..0c8ae8b99b8 100644
--- a/hybridse/src/passes/physical/long_window_optimized.cc
+++ b/hybridse/src/passes/physical/long_window_optimized.cc
@@ -212,6 +212,9 @@ bool LongWindowOptimized::OptimizeWithPreAggr(vm::PhysicalAggregationNode* in, i
req_union_op->instance_not_in_window(), req_union_op->exclude_current_time(),
req_union_op->output_request_row(), aggr_op->GetFnDef(),
aggr_op->GetChild(0));
+ if (req_union_op->exclude_current_row_) {
+ request_aggr_union->set_out_request_row(false);
+ }
if (!status.isOK()) {
LOG(ERROR) << "Fail to create PhysicalRequestAggUnionNode: " << status;
return false;
diff --git a/hybridse/src/plan/planner.cc b/hybridse/src/plan/planner.cc
index 68f2eb58413..c3ff6dab70f 100644
--- a/hybridse/src/plan/planner.cc
+++ b/hybridse/src/plan/planner.cc
@@ -458,33 +458,29 @@ bool Planner::IsTable(node::PlanNode *node, node::PlanNode** output) {
}
return false;
}
-/**
- * Validate online serving op with given plan tree
- * - Support Ops:
- * - TABLE
- * - SELECT
- * - JOIN
- * - WINDOW
- * - UnSupport Ops::
- * - CREATE TABLE
- * - INSERT TABLE
- * - GROUP BY
- * - HAVING clause
- * - FILTER
- * -
- * @param node
- * @return
- */
+
+// Validate online serving op with given plan tree
+// - Support Ops:
+// - TABLE
+// - SELECT
+// - JOIN
+// - WINDOW
+// - UnSupport Ops::
+// - CREATE TABLE
+// - INSERT TABLE
+// - GROUP BY
+// - HAVING clause
+// - FILTER
+//
+// - Not Impl
+// - Order By
base::Status Planner::ValidateOnlineServingOp(node::PlanNode *node) {
CHECK_TRUE(nullptr != node, common::kNullInputPointer,
"Fail to validate request table: input node is "
"null")
switch (node->type_) {
- case node::kPlanTypeTable: {
- break;
- }
case node::kPlanTypeProject: {
- auto project_node = dynamic_cast(node);
+ auto project_node = dynamic_cast(node);
for (auto &each : project_node->project_list_vec_) {
node::ProjectListNode *project_list = dynamic_cast(each);
@@ -493,15 +489,15 @@ base::Status Planner::ValidateOnlineServingOp(node::PlanNode *node) {
CHECK_TRUE(!(nullptr == project_list->GetW() && project_list->HasAggProject()), common::kPlanError,
"Aggregate over a table cannot be supported in online serving")
}
+
+ break;
}
+ case node::kPlanTypeTable:
case node::kPlanTypeRename:
case node::kPlanTypeLimit:
case node::kPlanTypeWindow:
case node::kPlanTypeQuery:
case node::kPlanTypeJoin: {
- for (auto *child : node->GetChildren()) {
- CHECK_STATUS(ValidateOnlineServingOp(child));
- }
break;
}
default: {
@@ -509,52 +505,62 @@ base::Status Planner::ValidateOnlineServingOp(node::PlanNode *node) {
break;
}
}
+
+ for (auto *child : node->GetChildren()) {
+ CHECK_STATUS(ValidateOnlineServingOp(child));
+ }
+
return base::Status::OK();
}
-/**
- * Get the limit count of given SQL query
- * @param node
- * @return
- */
+
+// Get the limit count of given SQL query
int Planner::GetPlanTreeLimitCount(node::PlanNode *node) {
if (nullptr == node) {
return 0;
}
+
int limit_cnt = 0;
switch (node->type_) {
case node::kPlanTypeTable: {
return 0;
}
case node::kPlanTypeLimit: {
- auto limit_node = dynamic_cast(node);
+ auto limit_node = dynamic_cast(node);
limit_cnt = limit_node->GetLimitCnt();
+ break;
}
- default: {
- if (node->GetChildrenSize() > 0) {
- int cnt = GetPlanTreeLimitCount(node->GetChildren()[0]);
- if (cnt > 0) {
- if (limit_cnt == 0) {
- limit_cnt = cnt;
- } else {
- limit_cnt = std::min(cnt, limit_cnt);
- }
- }
- }
+ default:
break;
+ }
+
+ if (node->GetChildrenSize() > 0) {
+ int cnt = GetPlanTreeLimitCount(node->GetChildren()[0]);
+ if (cnt > 0) {
+ if (limit_cnt == 0) {
+ limit_cnt = cnt;
+ } else {
+ limit_cnt = std::min(cnt, limit_cnt);
+ }
}
}
+
return limit_cnt;
}
+
+// Un-support Ops:
+// - GROUP BY
+// - HAVING
+// - WINDOW
+//
+// Not Impl:
+// - Order By
base::Status Planner::ValidateClusterOnlineTrainingOp(node::PlanNode *node) {
if (node == nullptr) {
return base::Status::OK();
}
switch (node->type_) {
- case node::kPlanTypeTable: {
- break;
- }
case node::kPlanTypeProject: {
- auto project_node = dynamic_cast(node);
+ auto project_node = dynamic_cast(node);
for (auto &each : project_node->project_list_vec_) {
node::ProjectListNode *project_list = dynamic_cast(each);
@@ -565,15 +571,14 @@ base::Status Planner::ValidateClusterOnlineTrainingOp(node::PlanNode *node) {
CHECK_TRUE(!project_list->HasAggProject(), common::kPlanError,
"Aggregate over a table cannot be supported in cluster online training")
}
+ break;
}
+ case node::kPlanTypeTable:
case node::kPlanTypeLoadData:
case node::kPlanTypeRename:
case node::kPlanTypeLimit:
case node::kPlanTypeFilter:
case node::kPlanTypeQuery: {
- for (auto *child : node->GetChildren()) {
- CHECK_STATUS(ValidateClusterOnlineTrainingOp(child));
- }
break;
}
default: {
@@ -581,6 +586,11 @@ base::Status Planner::ValidateClusterOnlineTrainingOp(node::PlanNode *node) {
break;
}
}
+
+ for (auto *child : node->GetChildren()) {
+ CHECK_STATUS(ValidateClusterOnlineTrainingOp(child));
+ }
+
return base::Status::OK();
}
/**
diff --git a/hybridse/src/udf/default_udf_library.cc b/hybridse/src/udf/default_udf_library.cc
index 609fc33817f..912e2ec206f 100644
--- a/hybridse/src/udf/default_udf_library.cc
+++ b/hybridse/src/udf/default_udf_library.cc
@@ -670,7 +670,7 @@ void DefaultUdfLibrary::InitStringUdf() {
Example:
@code{.sql}
- select concat("-", "1", 2, 3, 4, 5.6, 7.8, Timestamp(1590115420000L));
+ select concat_ws("-", "1", 2, 3, 4, 5.6, 7.8, Timestamp(1590115420000L));
-- output "1-2-3-4-5.6-7.8-2020-05-22 10:43:40"
@endcode
@since 0.1.0)");
@@ -2019,11 +2019,8 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() {
@since 0.4.0
)");
- RegisterExternal("dayofyear")
- .args(static_cast(v1::dayofyear))
- .args(static_cast(v1::dayofyear))
- .args(static_cast(v1::dayofyear))
- .doc(R"(
+ const std::string dayofyear_doc =
+ R"(
@brief Return the day of year for a timestamp or date. Returns 0 given an invalid date.
Example:
@@ -2041,7 +2038,25 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() {
-- output 0
@endcode
@since 0.1.0
- )");
+ )";
+
+ RegisterExternal("dayofyear")
+ .args(reinterpret_cast(static_cast(v1::dayofyear)))
+ .return_by_arg(true)
+ .returns>()
+ .doc(dayofyear_doc);
+
+ RegisterExternal("dayofyear")
+ .args(reinterpret_cast(static_cast(v1::dayofyear)))
+ .return_by_arg(true)
+ .returns>()
+ .doc(dayofyear_doc);
+
+ RegisterExternal("dayofyear")
+ .args(reinterpret_cast(static_cast(v1::dayofyear)))
+ .return_by_arg(true)
+ .returns>()
+ .doc(dayofyear_doc);
RegisterExternal("weekofyear")
.args(static_cast(v1::weekofyear))
diff --git a/hybridse/src/udf/literal_traits.h b/hybridse/src/udf/literal_traits.h
index 21acc82ee54..f232c14256e 100644
--- a/hybridse/src/udf/literal_traits.h
+++ b/hybridse/src/udf/literal_traits.h
@@ -67,6 +67,15 @@ struct Nullable {
bool is_null() const { return is_null_; }
T* ptr() { return &data_; }
+ // teach gtest print values
+ friend std::ostream& operator<<(std::ostream& os, const Nullable& val) {
+ if (val.is_null_) {
+ return os << "Nullable{null, type=" << DataTypeTrait::to_string() << "}";
+ }
+
+ return os << "Nullable{value=" << val.data_ << ", type=" << DataTypeTrait::to_string() << "}";
+ }
+
T data_;
bool is_null_;
};
@@ -81,6 +90,14 @@ struct Nullable {
bool is_null() const { return is_null_; }
StringRef* ptr() { return &data_; }
+ friend std::ostream& operator<<(std::ostream& os, const Nullable& val) {
+ if (val.is_null_) {
+ return os << "Nullable{null, type=StringRef}";
+ }
+
+ return os << "Nullable{value=" << val.data_.DebugString() << ", type=StringRef}";
+ }
+
StringRef data_;
bool is_null_;
};
diff --git a/hybridse/src/udf/udf.cc b/hybridse/src/udf/udf.cc
index af1f881ed12..e04898836f7 100644
--- a/hybridse/src/udf/udf.cc
+++ b/hybridse/src/udf/udf.cc
@@ -22,6 +22,7 @@
#include
#include "absl/strings/ascii.h"
#include "absl/strings/str_replace.h"
+#include "absl/time/civil_time.h"
#include "base/iterator.h"
#include "boost/date_time.hpp"
#include "boost/date_time/gregorian/parsers.hpp"
@@ -57,11 +58,20 @@ bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY;
void trivial_fun() {}
-int32_t dayofyear(int64_t ts) {
+void dayofyear(int64_t ts, int32_t* out, bool* is_null) {
+ if (ts < 0) {
+ *is_null = true;
+ *out = 0;
+ return;
+ }
+
time_t time = (ts + TZ_OFFSET) / 1000;
struct tm t;
+ memset(&t, 0, sizeof(struct tm));
gmtime_r(&time, &t);
- return t.tm_yday + 1;
+
+ *out = t.tm_yday + 1;
+ *is_null = false;
}
int32_t dayofmonth(int64_t ts) {
time_t time = (ts + TZ_OFFSET) / 1000;
@@ -99,24 +109,27 @@ int32_t year(int64_t ts) {
return t.tm_year + 1900;
}
-int32_t dayofyear(Timestamp *ts) { return dayofyear(ts->ts_); }
-int32_t dayofyear(Date *date) {
+void dayofyear(Timestamp *ts, int32_t *out, bool *is_null) { dayofyear(ts->ts_, out, is_null); }
+void dayofyear(Date *date, int32_t* out, bool* is_null) {
int32_t day, month, year;
if (!Date::Decode(date->date_, &year, &month, &day)) {
- return 0;
+ *out = 0;
+ *is_null = true;
+ return;
}
- try {
- if (month <= 0 || month > 12) {
- return 0;
- } else if (day <= 0 || day > 31) {
- return 0;
- }
- boost::gregorian::date d(year, month, day);
- return d.day_of_year();
- } catch (...) {
- return 0;
+
+ absl::CivilDay civil_day(year, month, day);
+ if (civil_day.year() != year || civil_day.month() != month || civil_day.day() != day) {
+ // CivilTime normalize it because of invalid input
+ *out = 0;
+ *is_null = true;
+ return;
}
+
+ *out = absl::GetYearDay(civil_day);
+ *is_null = false;
}
+
int32_t dayofmonth(Timestamp *ts) { return dayofmonth(ts->ts_); }
int32_t weekofyear(Timestamp *ts) { return weekofyear(ts->ts_); }
int32_t month(Timestamp *ts) { return month(ts->ts_); }
diff --git a/hybridse/src/udf/udf.h b/hybridse/src/udf/udf.h
index afd16fda358..7797091a7c8 100644
--- a/hybridse/src/udf/udf.h
+++ b/hybridse/src/udf/udf.h
@@ -209,9 +209,9 @@ int32_t month(Timestamp *ts);
int32_t year(int64_t ts);
int32_t year(Timestamp *ts);
-int32_t dayofyear(int64_t ts);
-int32_t dayofyear(Timestamp *ts);
-int32_t dayofyear(Date *ts);
+void dayofyear(int64_t ts, int32_t* out, bool* is_null);
+void dayofyear(Timestamp *ts, int32_t* out, bool* is_null);
+void dayofyear(Date *ts, int32_t* out, bool* is_null);
int32_t dayofmonth(int64_t ts);
int32_t dayofmonth(Timestamp *ts);
diff --git a/hybridse/src/vm/runner.cc b/hybridse/src/vm/runner.cc
index 6f349c9b0ae..fd6191f96c6 100644
--- a/hybridse/src/vm/runner.cc
+++ b/hybridse/src/vm/runner.cc
@@ -2774,15 +2774,16 @@ std::shared_ptr RequestAggUnionRunner::Run(
}
}
- // build window with start and end offset
std::shared_ptr window;
if (agg_segment) {
window = RequestUnionWindow(request, union_segments, ts_gen, range_gen_.window_range_, output_request_row_,
exclude_current_time_);
} else {
LOG(WARNING) << "Aggr segment is empty. Fall back to normal RequestUnionRunner";
- window = RequestUnionRunner::RequestUnionWindow(request, union_segments, ts_gen, range_gen_.window_range_,
- output_request_row_, exclude_current_time_);
+ // NOTE: normal request union should always `output_request_row`, while the `output_request_row_`
+ // here indicate whether `EXCLUDE CURRENT_ROW`
+ window = RequestUnionRunner::RequestUnionWindow(request, union_segments, ts_gen, range_gen_.window_range_, true,
+ exclude_current_time_, !output_request_row_);
}
if (ctx.is_debug()) {
@@ -2794,10 +2795,8 @@ std::shared_ptr RequestAggUnionRunner::Run(
}
std::shared_ptr RequestAggUnionRunner::RequestUnionWindow(
- const Row& request,
- std::vector> union_segments, int64_t ts_gen,
- const WindowRange& window_range, const bool output_request_row,
- const bool exclude_current_time) const {
+ const Row& request, std::vector> union_segments, int64_t ts_gen,
+ const WindowRange& window_range, const bool output_request_row, const bool exclude_current_time) const {
// TOOD(zhanghao): for now, we only support AggUnion with 1 base table and 1 agg table
size_t unions_cnt = union_segments.size();
if (unions_cnt != 2) {
@@ -2898,7 +2897,7 @@ std::shared_ptr RequestAggUnionRunner::RequestUnionWindow(
}
};
- auto update_agg_aggregator = [aggregator = aggregator.get(), row_parser = agg_row_parser, this](const Row& row) {
+ auto update_agg_aggregator = [aggregator = aggregator.get(), row_parser = agg_row_parser](const Row& row) {
if (row_parser->IsNull(row, "agg_val")) {
return;
}
@@ -2919,8 +2918,7 @@ std::shared_ptr RequestAggUnionRunner::RequestUnionWindow(
cnt++;
}
- auto window_table =
- std::shared_ptr(new MemTimeTableHandler());
+ auto window_table = std::make_shared();
auto base_it = union_segments[0]->GetIterator();
if (!base_it) {
LOG(WARNING) << "Base window is empty.";
@@ -3083,7 +3081,7 @@ std::shared_ptr ReduceRunner::Run(
LOG(WARNING) << "ReduceRunner input is empty";
return std::shared_ptr();
}
- auto row_handler = std::shared_ptr(new MemRowHandler(iter->GetValue()));
+ std::shared_ptr row_handler = std::make_shared(iter->GetValue());
if (ctx.is_debug()) {
std::ostringstream oss;
@@ -3127,7 +3125,9 @@ std::shared_ptr RequestUnionRunner::RequestUnionWindow(
const Row& request, std::vector> union_segments, int64_t ts_gen,
const WindowRange& window_range, bool output_request_row, bool exclude_current_time, bool exclude_current_row) {
uint64_t start = 0;
- uint64_t end = UINT64_MAX;
+ // end is empty means end value < 0, that there is no effective window range
+ // this happend when `ts_gen` is 0 and exclude current_time needed
+ std::optional end = UINT64_MAX;
uint64_t rows_start_preceding = 0;
uint64_t max_size = 0;
if (ts_gen >= 0) {
@@ -3135,7 +3135,11 @@ std::shared_ptr RequestUnionRunner::RequestUnionWindow(
? 0
: (ts_gen + window_range.start_offset_);
if (exclude_current_time && 0 == window_range.end_offset_) {
- end = (ts_gen - 1) < 0 ? 0 : (ts_gen - 1);
+ if (ts_gen == 0) {
+ end = {};
+ } else {
+ end = ts_gen - 1;
+ }
} else {
end = (ts_gen + window_range.end_offset_) < 0
? 0
@@ -3174,7 +3178,7 @@ std::shared_ptr RequestUnionRunner::RequestUnionWindow(
union_segment_status[i] = IteratorStatus();
continue;
}
- union_segment_iters[i]->Seek(end);
+ union_segment_iters[i]->Seek(end.value_or(0));
if (!union_segment_iters[i]->Valid()) {
union_segment_status[i] = IteratorStatus();
continue;
diff --git a/hybridse/src/vm/runner.h b/hybridse/src/vm/runner.h
index 875c53766c3..0d1ec851b66 100644
--- a/hybridse/src/vm/runner.h
+++ b/hybridse/src/vm/runner.h
@@ -976,7 +976,7 @@ class RequestUnionRunner : public Runner {
std::vector> union_segments,
int64_t request_ts, const WindowRange& window_range,
bool output_request_row, bool exclude_current_time,
- bool exclude_current_row = false);
+ bool exclude_current_row);
void AddWindowUnion(const RequestWindowOp& window, Runner* runner) {
windows_union_gen_.AddWindowUnion(window, runner);
}
@@ -1006,11 +1006,11 @@ class RequestAggUnionRunner : public Runner {
bool InitAggregator();
std::shared_ptr Run(RunnerContext& ctx,
const std::vector>& inputs) override;
- std::shared_ptr RequestUnionWindow(
- const Row& request,
- std::vector> union_segments,
- int64_t request_ts, const WindowRange& window_range,
- const bool output_request_row, const bool exclude_current_time) const;
+ std::shared_ptr RequestUnionWindow(const Row& request,
+ std::vector> union_segments,
+ int64_t request_ts, const WindowRange& window_range,
+ const bool output_request_row,
+ const bool exclude_current_time) const;
void AddWindowUnion(const RequestWindowOp& window, Runner* runner) {
windows_union_gen_.AddWindowUnion(window, runner);
}
@@ -1027,7 +1027,11 @@ class RequestAggUnionRunner : public Runner {
RequestWindowUnionGenerator windows_union_gen_;
RangeGenerator range_gen_;
bool exclude_current_time_;
+
+ // include request row from union.
+ // turn to false if `EXCLUDE CURRENT_ROW` from window definition
bool output_request_row_;
+
const node::FnDefNode* func_ = nullptr;
AggType agg_type_;
const node::ExprNode* agg_col_ = nullptr;
diff --git a/hybridse/src/vm/window_test.cc b/hybridse/src/vm/window_test.cc
index e4c648fec31..ec82795c71f 100644
--- a/hybridse/src/vm/window_test.cc
+++ b/hybridse/src/vm/window_test.cc
@@ -1052,9 +1052,9 @@ void CHECK_REQUEST_UNION_WINDOW(const WindowRange& window_range,
for (uint64_t key : buffered_keys) {
table->AddRow(key, row);
}
- auto union_table = RequestUnionRunner::RequestUnionWindow(
- row, std::vector>({table}), current_key,
- window_range, true, exclude_current_time);
+ auto union_table =
+ RequestUnionRunner::RequestUnionWindow(row, std::vector>({table}), current_key,
+ window_range, true, exclude_current_time, false);
CHECK_TABLE_KEY(union_table, exp_keys);
}
void CHECK_BUFFER_WINDOW(const WindowRange& window_range,
diff --git a/include/base/type.h b/include/base/type.h
index 34315ccb49a..d8e9cf3f5e2 100644
--- a/include/base/type.h
+++ b/include/base/type.h
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
namespace openmldb {
namespace base {
@@ -37,6 +38,8 @@ struct Timestamp {
return *this;
}
int64_t ts_;
+
+ friend std::ostream& operator<<(std::ostream& os, const Timestamp& ts) { return os << ts.ts_; }
};
__attribute__((unused)) static const Timestamp operator+(const Timestamp& a,
@@ -106,6 +109,10 @@ struct Date {
return true;
}
int32_t date_;
+
+ friend std::ostream& operator<<(std::ostream& os, const Date& date) {
+ return os << date.date_;
+ }
};
__attribute__((unused)) static bool operator>(const Date& a, const Date& b) {
diff --git a/java/openmldb-batch/src/main/java/com/_4paradigm/openmldb/batch/utils/VersionCli.java b/java/openmldb-batch/src/main/java/com/_4paradigm/openmldb/batch/utils/VersionCli.java
new file mode 100644
index 00000000000..ff08fd0663c
--- /dev/null
+++ b/java/openmldb-batch/src/main/java/com/_4paradigm/openmldb/batch/utils/VersionCli.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2021 4Paradigm
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com._4paradigm.openmldb.batch.utils;
+
+import org.apache.commons.io.IOUtils;
+
+import java.io.InputStream;
+import java.util.List;
+
+public class VersionCli {
+
+ public static void main(String[] argv) {
+ try {
+ System.out.println(VersionCli.getVersion());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static String getVersion() throws Exception {
+
+ InputStream stream = VersionCli.class.getClassLoader().getResourceAsStream("openmldb_git.properties");
+ if (stream == null) {
+ throw new Exception("Fail to get version from file of openmldb_git.properties");
+ }
+ List gitVersionStrList = IOUtils.readLines(stream, "UTF-8");
+
+ // Only get build version and git commit abbrev
+ String version = "";
+ String gitCommit = "";
+ for (String line : gitVersionStrList) {
+ if (line.startsWith("git.build.version=")) {
+ version = line.split("=")[1];
+ }
+ if (line.startsWith("git.commit.id.abbrev=")) {
+ gitCommit = line.split("=")[1];
+ }
+ }
+
+ return version + "-" + gitCommit;
+ }
+}
diff --git a/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/api/OpenmldbSession.scala b/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/api/OpenmldbSession.scala
index 2d5c794b606..27fdb1ea1ed 100755
--- a/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/api/OpenmldbSession.scala
+++ b/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/api/OpenmldbSession.scala
@@ -17,7 +17,7 @@
package com._4paradigm.openmldb.batch.api
import com._4paradigm.openmldb.batch.catalog.OpenmldbCatalogService
-import com._4paradigm.openmldb.batch.utils.DataTypeUtil
+import com._4paradigm.openmldb.batch.utils.{DataTypeUtil, VersionCli}
import com._4paradigm.openmldb.batch.utils.HybridseUtil.autoLoad
import com._4paradigm.openmldb.batch.{OpenmldbBatchConfig, SparkPlanner}
import org.apache.commons.io.IOUtils
@@ -205,12 +205,14 @@ class OpenmldbSession {
*/
def version(): String = {
// Read OpenMLDB git properties which is added by maven plugin
- val stream = this.getClass.getClassLoader.getResourceAsStream("openmldb_git.properties")
- if (stream == null) {
- logger.error("OpenMLDB git properties is missing")
- SPARK_VERSION
- } else {
- s"$SPARK_VERSION\n${IOUtils.toString(stream, "UTF-8")}"
+ try {
+ val openmldbBatchVersion = VersionCli.getVersion()
+ s"$SPARK_VERSION\n$openmldbBatchVersion"
+ } catch {
+ case e: Exception => {
+ logger.error("Fail to load OpenMLDB git properties " + e.getMessage)
+ SPARK_VERSION
+ }
}
}
diff --git a/java/openmldb-batch/src/test/java/com/_4paradigm/openmldb/batch/utils/TestVersionCli.java b/java/openmldb-batch/src/test/java/com/_4paradigm/openmldb/batch/utils/TestVersionCli.java
new file mode 100644
index 00000000000..b04103e40c5
--- /dev/null
+++ b/java/openmldb-batch/src/test/java/com/_4paradigm/openmldb/batch/utils/TestVersionCli.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2021 4Paradigm
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com._4paradigm.openmldb.batch.utils;
+
+import org.testng.annotations.Ignore;
+import org.testng.annotations.Test;
+
+public class TestVersionCli {
+
+ // TODO(tobe): Make sure to run this case in CICD
+ @Ignore
+ public void testGetVersion() {
+ try {
+ String version = VersionCli.getVersion();
+ assert !version.isEmpty();
+ } catch (Exception e) {
+ e.printStackTrace();
+ assert false;
+ }
+ }
+
+}
diff --git a/java/openmldb-batchjob/src/main/scala/com/_4paradigm/openmldb/batchjob/util/OpenmldbJobUtil.scala b/java/openmldb-batchjob/src/main/scala/com/_4paradigm/openmldb/batchjob/util/OpenmldbJobUtil.scala
index f6a080097b7..aa2a21153cf 100644
--- a/java/openmldb-batchjob/src/main/scala/com/_4paradigm/openmldb/batchjob/util/OpenmldbJobUtil.scala
+++ b/java/openmldb-batchjob/src/main/scala/com/_4paradigm/openmldb/batchjob/util/OpenmldbJobUtil.scala
@@ -31,9 +31,11 @@ object OpenmldbJobUtil {
def getSqlFromFile(spark: SparkSession, sqlFilePath: String): String = {
val sparkMaster = spark.conf.get("spark.master")
+ val sparkDeployMode = spark.conf.get("spark.submit.deployMode")
- val actualSqlFilePath = if (sparkMaster.equals("local")) {
- SparkFiles.get(sqlFilePath)
+ val actualSqlFilePath = if (sparkMaster.equalsIgnoreCase("yarn") &&
+ sparkDeployMode.equalsIgnoreCase("cluster")) {
+ sqlFilePath.split("/").last
} else {
sqlFilePath
}
diff --git a/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/SdkOption.java b/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/SdkOption.java
index ad52fd50a1f..f2781c52834 100644
--- a/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/SdkOption.java
+++ b/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/SdkOption.java
@@ -18,13 +18,14 @@
import lombok.Data;
-
@Data
public class SdkOption {
-// options for cluster mode
+ // options for cluster mode
private String zkCluster;
private String zkPath;
-// options for standalone mode
+ private String sparkConfPath = "";
+
+ // options for standalone mode
private String host;
private long port;
@@ -32,4 +33,5 @@ public class SdkOption {
private Boolean enableDebug = false;
private long requestTimeout = 60000;
private boolean isClusterMode = true;
+
}
diff --git a/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/impl/SqlClusterExecutor.java b/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/impl/SqlClusterExecutor.java
index 835b8c033c5..714a8d68dbb 100644
--- a/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/impl/SqlClusterExecutor.java
+++ b/java/openmldb-jdbc/src/main/java/com/_4paradigm/openmldb/sdk/impl/SqlClusterExecutor.java
@@ -69,6 +69,7 @@ public SqlClusterExecutor(SdkOption option, String libraryPath) throws SqlExcept
sqlOpt.setZk_path(option.getZkPath());
sqlOpt.setEnable_debug(option.getEnableDebug());
sqlOpt.setRequest_timeout(option.getRequestTimeout());
+ sqlOpt.setSpark_conf_path(option.getSparkConfPath());
this.sqlRouter = sql_router_sdk.NewClusterSQLRouter(sqlOpt);
sqlOpt.delete();
} else {
diff --git a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/client/TaskManagerClient.java b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/client/TaskManagerClient.java
index 6208b361feb..a81664c6a9b 100644
--- a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/client/TaskManagerClient.java
+++ b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/client/TaskManagerClient.java
@@ -378,10 +378,13 @@ public String getJobLog(int id) throws Exception {
return response.getLog();
}
- public String geTaskmanagertVersion() throws Exception {
- TaskManager.GetTaskmanagerVersionResponse response = taskManagerInterface.GetTaskmanagerVersion(
+ public ArrayList getVersion() throws Exception {
+ TaskManager.GetVersionResponse response = taskManagerInterface.GetVersion(
TaskManager.EmptyMessage.newBuilder().build());
- return response.getVersion();
+ ArrayList versions = new ArrayList<>();
+ versions.add(response.getTaskmanagerVersion());
+ versions.add(response.getBatchVersion());
+ return versions;
}
/**
diff --git a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/config/TaskManagerConfig.java b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/config/TaskManagerConfig.java
index 938df3750ee..ffef86de7c2 100644
--- a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/config/TaskManagerConfig.java
+++ b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/config/TaskManagerConfig.java
@@ -180,11 +180,11 @@ public static void parse() throws IOException, NumberFormatException, ConfigExce
SPARK_DEFAULT_CONF = prop.getProperty("spark.default.conf", "");
if (!SPARK_DEFAULT_CONF.isEmpty()) {
- String[] defaultSparkConfs = TaskManagerConfig.SPARK_DEFAULT_CONF.split(",");
+ String[] defaultSparkConfs = TaskManagerConfig.SPARK_DEFAULT_CONF.split(";");
for (String sparkConfMap: defaultSparkConfs) {
if (!sparkConfMap.isEmpty()) {
String[] kv = sparkConfMap.split("=");
- if (kv.length != 2) {
+ if (kv.length < 2) {
throw new ConfigException("spark.default.conf", String.format("error format of %s", sparkConfMap));
} else if (!kv[0].startsWith("spark")) {
throw new ConfigException("spark.default.conf", String.format("config key should start with 'spark' but get %s", kv[0]));
diff --git a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/TaskManagerInterface.java b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/TaskManagerInterface.java
index 758ae273273..1c070fb6ae9 100644
--- a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/TaskManagerInterface.java
+++ b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/TaskManagerInterface.java
@@ -62,6 +62,6 @@ public interface TaskManagerInterface {
@BrpcMeta(serviceName = "openmldb.taskmanager.TaskManagerServer", methodName = "GetJobLog")
TaskManager.GetJobLogResponse GetJobLog(TaskManager.GetJobLogRequest request);
- @BrpcMeta(serviceName = "openmldb.taskmanager.TaskManagerServer", methodName = "GetTaskmanagerVersion")
- TaskManager.GetTaskmanagerVersionResponse GetTaskmanagerVersion(TaskManager.EmptyMessage request);
+ @BrpcMeta(serviceName = "openmldb.taskmanager.TaskManagerServer", methodName = "GetVersion")
+ TaskManager.GetVersionResponse GetVersion(TaskManager.EmptyMessage request);
}
diff --git a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/impl/TaskManagerImpl.java b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/impl/TaskManagerImpl.java
index 5730a9d927a..7eae5a0ef9f 100644
--- a/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/impl/TaskManagerImpl.java
+++ b/java/openmldb-taskmanager/src/main/java/com/_4paradigm/openmldb/taskmanager/server/impl/TaskManagerImpl.java
@@ -316,9 +316,11 @@ public TaskManager.GetJobLogResponse GetJobLog(TaskManager.GetJobLogRequest requ
}
@Override
- public TaskManager.GetTaskmanagerVersionResponse GetTaskmanagerVersion(TaskManager.EmptyMessage request) {
- String version = VersionUtil.getVersion();
- return TaskManager.GetTaskmanagerVersionResponse.newBuilder().setVersion(version).build();
+ public TaskManager.GetVersionResponse GetVersion(TaskManager.EmptyMessage request) {
+ String taskmanagerVersion = VersionUtil.getTaskManagerVersion();
+ String batchVersion = VersionUtil.getBatchVersion();
+ return TaskManager.GetVersionResponse.newBuilder().setTaskmanagerVersion(taskmanagerVersion)
+ .setBatchVersion(batchVersion).build();
}
@Override
diff --git a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/OpenmldbBatchjobManager.scala b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/OpenmldbBatchjobManager.scala
index da1b77a0cd2..52396332b69 100644
--- a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/OpenmldbBatchjobManager.scala
+++ b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/OpenmldbBatchjobManager.scala
@@ -49,7 +49,7 @@ object OpenmldbBatchjobManager {
val mainClass = "com._4paradigm.openmldb.batchjob.RunBatchSql"
val tempSqlFile = SqlFileUtil.createTempSqlFile(sql)
- val args = List(tempSqlFile.getName)
+ val args = List(tempSqlFile.getAbsolutePath)
val jobInfo = SparkJobManager.submitSparkJob(jobType, mainClass, args, tempSqlFile.getAbsolutePath,
sparkConf.asScala.toMap, defaultDb, blocking=true)
@@ -62,7 +62,7 @@ object OpenmldbBatchjobManager {
val mainClass = "com._4paradigm.openmldb.batchjob.RunBatchAndShow"
val tempSqlFile = SqlFileUtil.createTempSqlFile(sql)
- val args = List(tempSqlFile.getName)
+ val args = List(tempSqlFile.getAbsolutePath)
SparkJobManager.submitSparkJob(jobType, mainClass, args, tempSqlFile.getAbsolutePath, sparkConf.asScala.toMap,
defaultDb)
@@ -73,7 +73,7 @@ object OpenmldbBatchjobManager {
val mainClass = "com._4paradigm.openmldb.batchjob.ImportOnlineData"
val tempSqlFile = SqlFileUtil.createTempSqlFile(sql)
- val args = List(tempSqlFile.getName)
+ val args = List(tempSqlFile.getAbsolutePath)
SparkJobManager.submitSparkJob(jobType, mainClass, args, tempSqlFile.getAbsolutePath, sparkConf.asScala.toMap,
defaultDb)
@@ -84,7 +84,7 @@ object OpenmldbBatchjobManager {
val mainClass = "com._4paradigm.openmldb.batchjob.ImportOfflineData"
val tempSqlFile = SqlFileUtil.createTempSqlFile(sql)
- val args = List(tempSqlFile.getName)
+ val args = List(tempSqlFile.getAbsolutePath)
SparkJobManager.submitSparkJob(jobType, mainClass, args, tempSqlFile.getAbsolutePath, sparkConf.asScala.toMap,
defaultDb)
@@ -95,7 +95,7 @@ object OpenmldbBatchjobManager {
val mainClass = "com._4paradigm.openmldb.batchjob.ExportOfflineData"
val tempSqlFile = SqlFileUtil.createTempSqlFile(sql)
- val args = List(tempSqlFile.getName)
+ val args = List(tempSqlFile.getAbsolutePath)
SparkJobManager.submitSparkJob(jobType, mainClass, args, tempSqlFile.getAbsolutePath, sparkConf.asScala.toMap,
defaultDb)
diff --git a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/spark/SparkJobManager.scala b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/spark/SparkJobManager.scala
index 0e5328694d1..1a4da2800ff 100644
--- a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/spark/SparkJobManager.scala
+++ b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/spark/SparkJobManager.scala
@@ -94,13 +94,14 @@ object SparkJobManager {
launcher.setConf("spark.yarn.maxAppAttempts", TaskManagerConfig.SPARK_YARN_MAXAPPATTEMPTS.toString)
}
- // TODO: Support escape delimiter
// Set default Spark conf by TaskManager configuration file
- val defaultSparkConfs = TaskManagerConfig.SPARK_DEFAULT_CONF.split(",")
+ val defaultSparkConfs = TaskManagerConfig.SPARK_DEFAULT_CONF.split(";")
defaultSparkConfs.map(sparkConf => {
if (sparkConf.nonEmpty) {
val kvList = sparkConf.split("=")
- launcher.setConf(kvList(0), kvList(1))
+ val key = kvList(0)
+ val value = kvList.drop(1).mkString("=")
+ launcher.setConf(key, value)
}
})
diff --git a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/BatchJobUtil.scala b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/BatchJobUtil.scala
index 9f13bd5f8af..d1dfce6231b 100644
--- a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/BatchJobUtil.scala
+++ b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/BatchJobUtil.scala
@@ -16,11 +16,15 @@
package com._4paradigm.openmldb.taskmanager.util
+import org.slf4j.LoggerFactory
+
import java.io.File
import java.io.IOException
object BatchJobUtil {
+ private val logger = LoggerFactory.getLogger(this.getClass)
+
/**
* Get the default batch jor from presupposed directories.
*
@@ -60,4 +64,20 @@ object BatchJobUtil {
null
}
+ def findOpenmldbBatchJar(libDirectory: String): String = {
+ val libDirectoryFile = new File(libDirectory)
+
+ if (libDirectoryFile != null && libDirectoryFile.listFiles != null) {
+ val fileList = libDirectoryFile.listFiles.filter(_.isFile)
+ for (file <- fileList) {
+ if (file.getName.startsWith("openmldb-batch") && file.getName.endsWith(".jar")
+ && !file.getName.contains("javadoc") && !file.getName.contains("sources")) {
+ return file.getAbsolutePath
+ }
+ }
+ }
+
+ null
+ }
+
}
diff --git a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/VersionUtil.scala b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/VersionUtil.scala
index 5df1797b9bb..a2198bc532b 100644
--- a/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/VersionUtil.scala
+++ b/java/openmldb-taskmanager/src/main/scala/com/_4paradigm/openmldb/taskmanager/util/VersionUtil.scala
@@ -16,12 +16,17 @@
package com._4paradigm.openmldb.taskmanager.util
+import com._4paradigm.openmldb.taskmanager.config.TaskManagerConfig
import org.apache.commons.io.IOUtils
+import org.slf4j.LoggerFactory
+import java.nio.file.Paths
import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
object VersionUtil {
- def getVersion(): String = {
+ private val logger = LoggerFactory.getLogger(this.getClass)
+
+ def getTaskManagerVersion(): String = {
// Read local git properties file
val stream = this.getClass.getClassLoader.getResourceAsStream("git.properties")
@@ -45,4 +50,22 @@ object VersionUtil {
s"$version-$gitCommit"
}
+ def getBatchVersion(): String = {
+ val sparkJarsPath = Paths.get(TaskManagerConfig.SPARK_HOME, "jars").toString
+ val batchJarPath = BatchJobUtil.findOpenmldbBatchJar(sparkJarsPath)
+ if (batchJarPath == null) {
+ logger.error("Fail to find batch jar file and the version is unknown")
+ return "unknown"
+ }
+
+ // Use Java command to get version from jar file
+ val ps = Runtime.getRuntime.exec(Array[String]("java", "-cp", batchJarPath,
+ "com._4paradigm.openmldb.batch.utils.VersionCli"))
+ ps.waitFor
+ val inputStream = ps.getInputStream
+ val bytes = new Array[Byte](inputStream.available)
+ inputStream.read(bytes, 0, bytes.length)
+ new String(bytes)
+ }
+
}
diff --git a/java/openmldb-taskmanager/src/test/scala/com/_4paradigm/openmldb/taskmanager/util/TestVersionUtil.scala b/java/openmldb-taskmanager/src/test/scala/com/_4paradigm/openmldb/taskmanager/util/TestVersionUtil.scala
index 324a4899f98..4e7797274d6 100644
--- a/java/openmldb-taskmanager/src/test/scala/com/_4paradigm/openmldb/taskmanager/util/TestVersionUtil.scala
+++ b/java/openmldb-taskmanager/src/test/scala/com/_4paradigm/openmldb/taskmanager/util/TestVersionUtil.scala
@@ -20,8 +20,8 @@ import org.scalatest.FunSuite
class TestVersionUtil extends FunSuite {
- test("Test getVersion") {
- val version = VersionUtil.getVersion()
+ test("Test getTaskManagerVersion") {
+ val version = VersionUtil.getTaskManagerVersion()
assert(version.nonEmpty)
}
diff --git a/python/openmldb/sdk/sdk.py b/python/openmldb/sdk/sdk.py
index 6c23e4e8ea7..ec4e3230e3a 100644
--- a/python/openmldb/sdk/sdk.py
+++ b/python/openmldb/sdk/sdk.py
@@ -32,10 +32,11 @@
class OpenMLDBClusterSdkOptions(object):
- def __init__(self, zk_cluster, zk_path, session_timeout=3000):
+ def __init__(self, zk_cluster, zk_path, session_timeout=3000, spark_conf_path=""):
self.zk_cluster = zk_cluster
self.zk_path = zk_path
self.zk_session_timeout = session_timeout
+ self.spark_conf_path = spark_conf_path
class OpenMLDBStandaloneSdkOptions(object):
diff --git a/release/bin/start.sh b/release/bin/start.sh
index fd0ba82b158..79ccf60a0f7 100755
--- a/release/bin/start.sh
+++ b/release/bin/start.sh
@@ -122,7 +122,7 @@ case $OP in
shift
cd "$CURDIR" || exit 1
sh "$0" stop "${@}"
- sleep 5
+ sleep 10
sh "$0" start "${@}"
;;
*)
diff --git a/release/conf/taskmanager.properties b/release/conf/taskmanager.properties
index 742a66f3324..83da9194561 100644
--- a/release/conf/taskmanager.properties
+++ b/release/conf/taskmanager.properties
@@ -10,4 +10,5 @@ zookeeper.root_path=/openmldb
# Spark Config
spark.home=
spark.master=local
-offline.data.prefix=file:///tmp/openmldb_offline_storage/
\ No newline at end of file
+offline.data.prefix=file:///tmp/openmldb_offline_storage/
+spark.default.conf=spark.driver.extraJavaOptions=-Dfile.encoding=utf-8;spark.executor.extraJavaOptions=-Dfile.encoding=utf-8
\ No newline at end of file
diff --git a/src/apiserver/api_server_test.cc b/src/apiserver/api_server_test.cc
index b9e200ec04a..c3e637d660c 100644
--- a/src/apiserver/api_server_test.cc
+++ b/src/apiserver/api_server_test.cc
@@ -678,7 +678,7 @@ TEST_F(APIServerTest, getDBs) {
ASSERT_TRUE(document.HasMember("dbs"));
auto& exists_dbs = document["dbs"];
ASSERT_TRUE(exists_dbs.IsArray());
- for (int i = 0; i < exists_dbs.Size(); ++i) {
+ for (decltype(exists_dbs.Size()) i = 0; i < exists_dbs.Size(); ++i) {
auto db = exists_dbs[i].GetString();
if (test_dbs.find(db) != test_dbs.end()) {
FAIL() << "can't have test db " << db;
diff --git a/src/base/file_util.h b/src/base/file_util.h
index f04afbcaa40..89c09fae00b 100644
--- a/src/base/file_util.h
+++ b/src/base/file_util.h
@@ -103,6 +103,26 @@ inline static int GetSubDir(const std::string& path,
return 0;
}
+inline static int GetSubFiles(const std::string& path, std::vector& sub_dir) { // NOLINT
+ if (path.empty()) {
+ return -1;
+ }
+ DIR* dir = opendir(path.c_str());
+ if (dir == NULL) {
+ return -1;
+ }
+ struct dirent* ptr;
+ while ((ptr = readdir(dir)) != NULL) {
+ if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0) {
+ continue;
+ } else if (ptr->d_type == DT_REG) {
+ sub_dir.push_back(ptr->d_name);
+ }
+ }
+ closedir(dir);
+ return 0;
+}
+
inline static int GetFileName(const std::string& path,
std::vector& file_vec) { // NOLINT
if (path.empty()) {
@@ -287,6 +307,27 @@ __attribute__((unused)) static bool CopyFile(const std::string& src_file, const
return has_error == false;
}
+inline static int HardLinkDir(const std::string& src, const std::string& dest) {
+ if (!IsExists(src)) {
+ return -2;
+ }
+
+ if (IsExists(dest)) {
+ RemoveDirRecursive(dest);
+ }
+
+ MkdirRecur(dest);
+ std::vector files;
+ GetSubFiles(src, files);
+ for (const auto& file : files) {
+ int ret = link((src + "/" + file).c_str(), (dest + "/" + file).c_str());
+ if (ret) {
+ return ret;
+ }
+ }
+ return 0;
+}
+
} // namespace base
} // namespace openmldb
diff --git a/src/client/ns_client.cc b/src/client/ns_client.cc
index a4b39208359..e0c98a88607 100644
--- a/src/client/ns_client.cc
+++ b/src/client/ns_client.cc
@@ -190,9 +190,9 @@ bool NsClient::MakeSnapshot(const std::string& name, const std::string& db, uint
bool NsClient::ShowOPStatus(::openmldb::nameserver::ShowOPStatusResponse& response, const std::string& name,
uint32_t pid, std::string& msg) {
::openmldb::nameserver::ShowOPStatusRequest request;
+ request.set_db(GetDb());
if (!name.empty()) {
request.set_name(name);
- request.set_db(GetDb());
}
if (pid != INVALID_PID) {
request.set_pid(pid);
@@ -884,7 +884,7 @@ bool NsClient::AddIndex(const std::string& db_name,
return false;
}
-base::Status NsClient::AddMultiIndex(const std::string& table_name,
+base::Status NsClient::AddMultiIndex(const std::string& db, const std::string& table_name,
const std::vector<::openmldb::common::ColumnKey>& column_keys) {
::openmldb::nameserver::AddIndexRequest request;
::openmldb::nameserver::GeneralResponse response;
@@ -896,7 +896,7 @@ base::Status NsClient::AddMultiIndex(const std::string& table_name,
}
}
request.set_name(table_name);
- request.set_db(GetDb());
+ request.set_db(db);
bool ok = client_.SendRequest(&::openmldb::nameserver::NameServer_Stub::AddIndex, &request, &response,
FLAGS_request_timeout_ms, 1);
if (ok && response.code() == 0) {
diff --git a/src/client/ns_client.h b/src/client/ns_client.h
index e7c23caf7e2..320540ebb02 100644
--- a/src/client/ns_client.h
+++ b/src/client/ns_client.h
@@ -217,7 +217,7 @@ class NsClient : public Client {
std::vector* cols,
std::string& msg); // NOLINT
- base::Status AddMultiIndex(const std::string& table_name,
+ base::Status AddMultiIndex(const std::string& db, const std::string& table_name,
const std::vector<::openmldb::common::ColumnKey>& column_keys);
bool DeleteIndex(const std::string& table_name, const std::string& idx_name,
diff --git a/src/client/taskmanager_client.cc b/src/client/taskmanager_client.cc
index 25703a7d498..a6c0a6a20a4 100644
--- a/src/client/taskmanager_client.cc
+++ b/src/client/taskmanager_client.cc
@@ -22,7 +22,7 @@
namespace openmldb::client {
::openmldb::base::Status TaskManagerClient::ShowJobs(bool only_unfinished, int job_timeout,
- std::vector<::openmldb::taskmanager::JobInfo>& job_infos) {
+ std::vector<::openmldb::taskmanager::JobInfo>* job_infos) {
::openmldb::taskmanager::ShowJobsRequest request;
::openmldb::taskmanager::ShowJobsResponse response;
@@ -36,7 +36,7 @@ ::openmldb::base::Status TaskManagerClient::ShowJobs(bool only_unfinished, int j
for (int32_t i = 0; i < response.jobs_size(); i++) {
::openmldb::taskmanager::JobInfo job_info;
job_info.CopyFrom(response.jobs(i));
- job_infos.push_back(job_info);
+ job_infos->push_back(job_info);
}
}
return {response.code(), response.msg()};
@@ -46,7 +46,7 @@ ::openmldb::base::Status TaskManagerClient::ShowJobs(bool only_unfinished, int j
}
::openmldb::base::Status TaskManagerClient::ShowJob(const int id, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
::openmldb::taskmanager::ShowJobRequest request;
::openmldb::taskmanager::ShowJobResponse response;
@@ -58,7 +58,7 @@ ::openmldb::base::Status TaskManagerClient::ShowJob(const int id, int job_timeou
if (ok) {
if (response.code() == 0) {
if (response.has_job()) {
- job_info.CopyFrom(response.job());
+ job_info->CopyFrom(response.job());
}
}
return {response.code(), response.msg()};
@@ -68,7 +68,7 @@ ::openmldb::base::Status TaskManagerClient::ShowJob(const int id, int job_timeou
}
::openmldb::base::Status TaskManagerClient::StopJob(const int id, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
::openmldb::taskmanager::StopJobRequest request;
::openmldb::taskmanager::StopJobResponse response;
@@ -80,7 +80,7 @@ ::openmldb::base::Status TaskManagerClient::StopJob(const int id, int job_timeou
if (ok) {
if (response.code() == 0) {
if (response.has_job()) {
- job_info.CopyFrom(response.job());
+ job_info->CopyFrom(response.job());
}
}
return {response.code(), response.msg()};
@@ -92,7 +92,7 @@ ::openmldb::base::Status TaskManagerClient::StopJob(const int id, int job_timeou
::openmldb::base::Status TaskManagerClient::RunBatchSql(const std::string& sql,
const std::map& config,
const std::string& default_db, int job_timeout,
- std::string& output) {
+ std::string* output) {
::openmldb::taskmanager::RunBatchSqlRequest request;
::openmldb::taskmanager::RunBatchSqlResponse response;
@@ -107,7 +107,7 @@ ::openmldb::base::Status TaskManagerClient::RunBatchSql(const std::string& sql,
if (ok) {
if (response.code() == 0) {
- output = response.output();
+ *output = response.output();
}
return {response.code(), ""};
} else {
@@ -119,7 +119,7 @@ ::openmldb::base::Status TaskManagerClient::RunBatchAndShow(const std::string& s
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
::openmldb::taskmanager::RunBatchAndShowRequest request;
::openmldb::taskmanager::ShowJobResponse response;
@@ -136,7 +136,7 @@ ::openmldb::base::Status TaskManagerClient::RunBatchAndShow(const std::string& s
if (ok) {
if (response.code() == 0) {
if (response.has_job()) {
- job_info.CopyFrom(response.job());
+ job_info->CopyFrom(response.job());
}
}
return {response.code(), response.msg()};
@@ -149,7 +149,7 @@ ::openmldb::base::Status TaskManagerClient::ImportOnlineData(const std::string&
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
::openmldb::taskmanager::ImportOnlineDataRequest request;
::openmldb::taskmanager::ShowJobResponse response;
@@ -166,7 +166,7 @@ ::openmldb::base::Status TaskManagerClient::ImportOnlineData(const std::string&
if (ok) {
if (response.code() == 0) {
if (response.has_job()) {
- job_info.CopyFrom(response.job());
+ job_info->CopyFrom(response.job());
}
}
return {response.code(), response.msg()};
@@ -179,7 +179,7 @@ ::openmldb::base::Status TaskManagerClient::ImportOfflineData(const std::string&
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
::openmldb::taskmanager::ImportOfflineDataRequest request;
::openmldb::taskmanager::ShowJobResponse response;
@@ -196,7 +196,7 @@ ::openmldb::base::Status TaskManagerClient::ImportOfflineData(const std::string&
if (ok) {
if (response.code() == 0) {
if (response.has_job()) {
- job_info.CopyFrom(response.job());
+ job_info->CopyFrom(response.job());
}
}
return {response.code(), response.msg()};
@@ -209,7 +209,7 @@ ::openmldb::base::Status TaskManagerClient::ExportOfflineData(const std::string&
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
::openmldb::taskmanager::ExportOfflineDataRequest request;
::openmldb::taskmanager::ShowJobResponse response;
@@ -226,7 +226,7 @@ ::openmldb::base::Status TaskManagerClient::ExportOfflineData(const std::string&
if (ok) {
if (response.code() == 0) {
if (response.has_job()) {
- job_info.CopyFrom(response.job());
+ job_info->CopyFrom(response.job());
}
}
return {response.code(), response.msg()};
diff --git a/src/client/taskmanager_client.h b/src/client/taskmanager_client.h
index 4252f5d3ba1..d79cbbad8e3 100644
--- a/src/client/taskmanager_client.h
+++ b/src/client/taskmanager_client.h
@@ -45,31 +45,31 @@ class TaskManagerClient : public Client {
int Init() override { return client_.Init(); }
::openmldb::base::Status ShowJobs(bool only_unfinished, int job_timeout,
- std::vector<::openmldb::taskmanager::JobInfo>& job_infos); // NOLINT
+ std::vector<::openmldb::taskmanager::JobInfo>* job_infos);
- ::openmldb::base::Status ShowJob(int id, int job_timeout, ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::base::Status ShowJob(int id, int job_timeout, ::openmldb::taskmanager::JobInfo* job_info);
- ::openmldb::base::Status StopJob(int id, int job_timeout, ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::base::Status StopJob(int id, int job_timeout, ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status RunBatchSql(const std::string& sql, const std::map& config,
const std::string& default_db, int job_timeout,
- std::string& output); // NOLINT
+ std::string* output);
::openmldb::base::Status RunBatchAndShow(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status ImportOnlineData(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status ImportOfflineData(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status ExportOfflineData(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status DropOfflineTable(const std::string& db, const std::string& table, int job_timeout);
diff --git a/src/cmd/sql_cmd.h b/src/cmd/sql_cmd.h
index 7d0162baa71..92893e5aace 100644
--- a/src/cmd/sql_cmd.h
+++ b/src/cmd/sql_cmd.h
@@ -35,6 +35,7 @@
DEFINE_bool(interactive, true, "Set the interactive");
DEFINE_string(database, "", "Set database");
DECLARE_string(cmd);
+DEFINE_string(spark_conf, "", "The config file of Spark job");
// cluster mode
DECLARE_string(zk_cluster);
@@ -209,6 +210,7 @@ bool InitClusterSDK() {
copt.zk_session_timeout = FLAGS_zk_session_timeout;
copt.zk_log_level = FLAGS_zk_log_level;
copt.zk_log_file = FLAGS_zk_log_file;
+
cs = new ::openmldb::sdk::ClusterSDK(copt);
if (!cs->Init()) {
std::cout << "ERROR: Failed to connect to db" << std::endl;
@@ -220,6 +222,9 @@ bool InitClusterSDK() {
return false;
}
sr->SetInteractive(FLAGS_interactive);
+
+ sr->GetSqlRouterOptions().spark_conf_path = FLAGS_spark_conf;
+
return true;
}
diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc
index 304df3c2963..1171b5dc6a4 100644
--- a/src/cmd/sql_cmd_test.cc
+++ b/src/cmd/sql_cmd_test.cc
@@ -789,6 +789,60 @@ TEST_P(DBSDKTest, DeployLongWindowsEmpty) {
ASSERT_TRUE(ok);
}
+TEST_P(DBSDKTest, DeployLongWindowsWithExcludeCurrentRow) {
+ auto& cli = GetParam();
+ cs = cli->cs;
+ sr = cli->sr;
+
+ ::hybridse::sdk::Status status;
+ sr->ExecuteSQL("SET @@execute_mode='online';", &status);
+ std::string base_table = "t_lw" + GenRand();
+ std::string base_db = "d_lw" + GenRand();
+ bool ok;
+ std::string msg;
+ CreateDBTableForLongWindow(base_db, base_table);
+
+ std::string deploy_sql = "deploy test_aggr options(LONG_WINDOWS='w1:2') select col1, col2,"
+ " sum(i64_col) over w1 as w1_sum_i64_col,"
+ " from " + base_table +
+ " WINDOW w1 AS (PARTITION BY " + base_table + ".col1," + base_table + ".col2 ORDER BY col3"
+ " ROWS_RANGE BETWEEN 5 PRECEDING AND 0 PRECEDING EXCLUDE CURRENT_ROW);";
+ sr->ExecuteSQL(base_db, "use " + base_db + ";", &status);
+ ASSERT_TRUE(status.IsOK()) << status.msg;
+ sr->ExecuteSQL(base_db, deploy_sql, &status);
+ ASSERT_TRUE(status.IsOK()) << status.msg;
+
+ PrepareDataForLongWindow(base_db, base_table);
+ std::string pre_aggr_db = openmldb::nameserver::PRE_AGG_DB;
+ std::string pre_aggr_table = "pre_" + base_db + "_test_aggr_w1_sum_i64_col";
+ std::string result_sql = "select * from " + pre_aggr_table +";";
+ auto rs = sr->ExecuteSQL(pre_aggr_db, result_sql, &status);
+ ASSERT_EQ(5, rs->Size());
+
+ int req_num = 2;
+ for (int i = 0; i < req_num; i++) {
+ std::shared_ptr req;
+ PrepareRequestRowForLongWindow(base_db, "test_aggr", req);
+ auto res = sr->CallProcedure(base_db, "test_aggr", req, &status);
+ ASSERT_TRUE(status.IsOK());
+ ASSERT_EQ(1, res->Size());
+ ASSERT_TRUE(res->Next());
+ ASSERT_EQ("str1", res->GetStringUnsafe(0));
+ ASSERT_EQ("str2", res->GetStringUnsafe(1));
+ int64_t exp = 11 + 10 + 9 + 8 + 7 + 6;
+ ASSERT_EQ(exp, res->GetInt64Unsafe(2));
+ }
+
+ ASSERT_TRUE(cs->GetNsClient()->DropProcedure(base_db, "test_aggr", msg));
+ pre_aggr_table = "pre_" + base_db + "_test_aggr_w1_sum_i64_col";
+ ok = sr->ExecuteDDL(pre_aggr_db, "drop table " + pre_aggr_table + ";", &status);
+ ASSERT_TRUE(ok);
+ ok = sr->ExecuteDDL(base_db, "drop table " + base_table + ";", &status);
+ ASSERT_TRUE(ok);
+ ok = sr->DropDB(base_db, &status);
+ ASSERT_TRUE(ok);
+}
+
TEST_P(DBSDKTest, DeployLongWindowsExecuteSum) {
auto cli = GetParam();
cs = cli->cs;
@@ -2214,7 +2268,7 @@ TEST_P(DBSDKTest, DeployStatsOnlyCollectDeployProcedure) {
env.EnableDeployStats();
absl::SleepFor(absl::Seconds(2));
- for (int i =0; i < 5; ++i) {
+ for (int i = 0; i < 5; ++i) {
env.CallProcedure();
}
diff --git a/src/nameserver/name_server_impl.cc b/src/nameserver/name_server_impl.cc
index f82ecaa369b..a2eef8be8c7 100644
--- a/src/nameserver/name_server_impl.cc
+++ b/src/nameserver/name_server_impl.cc
@@ -2838,13 +2838,16 @@ void NameServerImpl::ShowOPStatus(RpcController* controller, const ShowOPStatusR
std::lock_guard lock(mu_);
DeleteDoneOP();
for (const auto& op_data : done_op_list_) {
+ if (request->has_db() && op_data->op_info_.db() != request->db()) {
+ continue;
+ }
if (request->has_name() && op_data->op_info_.name() != request->name()) {
continue;
}
if (request->has_pid() && op_data->op_info_.pid() != request->pid()) {
continue;
}
- op_map.insert(std::make_pair(op_data->op_info_.op_id(), op_data));
+ op_map.emplace(op_data->op_info_.op_id(), op_data);
}
for (const auto& op_list : task_vec_) {
if (op_list.empty()) {
@@ -2860,7 +2863,7 @@ void NameServerImpl::ShowOPStatus(RpcController* controller, const ShowOPStatusR
if (request->has_pid() && op_data->op_info_.pid() != request->pid()) {
continue;
}
- op_map.insert(std::make_pair(op_data->op_info_.op_id(), op_data));
+ op_map.emplace(op_data->op_info_.op_id(), op_data);
}
}
for (const auto& kv : op_map) {
@@ -9037,7 +9040,7 @@ void NameServerImpl::AddIndex(RpcController* controller, const AddIndexRequest*
std::map> tablet_client_map;
if (!GetTableInfo(name, db, &table_info)) {
base::SetResponseStatus(ReturnCode::kTableIsNotExist, "table is not exist!", response);
- LOG(WARNING) << "table[" << name << "] is not exist!";
+ LOG(WARNING) << "table[" << db << "." << name << "] is not exist!";
return;
}
if (table_info->storage_mode() != ::openmldb::common::kMemory) {
diff --git a/src/proto/taskmanager.proto b/src/proto/taskmanager.proto
index 411aa46363a..a9f53c40ba2 100644
--- a/src/proto/taskmanager.proto
+++ b/src/proto/taskmanager.proto
@@ -141,8 +141,9 @@ message DropFunctionResponse {
optional string msg = 2;
};
-message GetTaskmanagerVersionResponse {
- required string version = 1;
+message GetVersionResponse {
+ required string taskmanager_version = 1;
+ required string batch_version = 2;
};
message EmptyMessage {};
@@ -167,5 +168,5 @@ service TaskManagerServer {
rpc DropFunction(DropFunctionRequest) returns (DropFunctionResponse);
// Other APIs
- rpc GetTaskmanagerVersion(EmptyMessage) returns (GetTaskmanagerVersionResponse);
+ rpc GetVersion(EmptyMessage) returns (GetVersionResponse);
};
diff --git a/src/replica/snapshot_replica_test.cc b/src/replica/snapshot_replica_test.cc
index 9b3adcb9171..ad53f5c9d94 100644
--- a/src/replica/snapshot_replica_test.cc
+++ b/src/replica/snapshot_replica_test.cc
@@ -46,6 +46,7 @@ using ::openmldb::storage::Ticket;
using ::openmldb::tablet::TabletImpl;
DECLARE_string(db_root_path);
+DECLARE_string(ssd_root_path);
DECLARE_string(hdd_root_path);
DECLARE_string(endpoint);
DECLARE_int32(make_snapshot_threshold_offset);
@@ -326,6 +327,226 @@ TEST_P(SnapshotReplicaTest, SendSnapshot) {
sleep(2);
}
+TEST_P(SnapshotReplicaTest, IncompleteSnapshot) {
+ FLAGS_make_snapshot_threshold_offset = 0;
+ uint32_t tid = 2;
+ uint32_t pid = 123;
+ auto storage_mode = GetParam();
+ uint64_t cur_time = ::baidu::common::timer::get_micros() / 1000;
+ {
+ ::openmldb::tablet::TabletImpl* tablet = new ::openmldb::tablet::TabletImpl();
+ MockClosure closure;
+ tablet->Init("");
+ brpc::Server server;
+ if (server.AddService(tablet, brpc::SERVER_OWNS_SERVICE) != 0) {
+ PDLOG(WARNING, "fail to register tablet rpc service");
+ exit(1);
+ }
+ brpc::ServerOptions options;
+ std::string leader_point = "127.0.0.1:18529";
+ if (server.Start(leader_point.c_str(), &options) != 0) {
+ PDLOG(WARNING, "fail to start server %s", leader_point.c_str());
+ exit(1);
+ }
+
+ ::openmldb::client::TabletClient client(leader_point, "");
+ client.Init();
+ std::vector endpoints;
+ bool ret =
+ client.CreateTable("table1", tid, pid, 100000, 0, true, endpoints, ::openmldb::type::TTLType::kAbsoluteTime,
+ 16, 0, ::openmldb::type::CompressType::kNoCompress, storage_mode);
+ ASSERT_TRUE(ret);
+ ret = client.Put(tid, pid, "testkey", cur_time, ::openmldb::test::EncodeKV("testkey", "value1"));
+ ASSERT_TRUE(ret);
+
+ uint32_t count = 0;
+ while (count < 10) {
+ count++;
+ std::string key = "test";
+ client.Put(tid, pid, key, cur_time + count, ::openmldb::test::EncodeKV(key, key));
+ }
+ ::openmldb::api::GeneralRequest grq;
+ grq.set_tid(tid);
+ grq.set_pid(pid);
+ grq.set_storage_mode(storage_mode);
+ ::openmldb::api::GeneralResponse grp;
+ grp.set_code(-1);
+ tablet->MakeSnapshot(NULL, &grq, &grp, &closure);
+ sleep(5);
+ }
+
+ {
+ ::openmldb::tablet::TabletImpl* tablet = new ::openmldb::tablet::TabletImpl();
+ MockClosure closure;
+ tablet->Init("");
+ brpc::Server server;
+ if (server.AddService(tablet, brpc::SERVER_OWNS_SERVICE) != 0) {
+ PDLOG(WARNING, "fail to register tablet rpc service");
+ exit(1);
+ }
+ brpc::ServerOptions options;
+ std::string leader_point = "127.0.0.1:18529";
+ if (server.Start(leader_point.c_str(), &options) != 0) {
+ PDLOG(WARNING, "fail to start server %s", leader_point.c_str());
+ exit(1);
+ }
+
+ ::openmldb::client::TabletClient client(leader_point, "");
+ client.Init();
+
+ // load table
+ ::openmldb::api::TableMeta table_meta;
+ table_meta.set_format_version(1);
+ table_meta.set_name("table1");
+ table_meta.set_tid(tid);
+ table_meta.set_pid(pid);
+ table_meta.set_storage_mode(storage_mode);
+ client.LoadTable(table_meta, nullptr);
+ sleep(5);
+
+ ::openmldb::api::ScanRequest sr;
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("testkey");
+ sr.set_st(cur_time + 1);
+ sr.set_et(cur_time - 1);
+ sr.set_limit(10);
+ ::openmldb::api::ScanResponse srp;
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(1, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("test");
+ sr.set_st(cur_time + 20);
+ sr.set_et(cur_time - 20);
+ sr.set_limit(20);
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(10, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+
+ std::string key = "test2";
+ ASSERT_TRUE(client.Put(tid, pid, key, cur_time, ::openmldb::test::EncodeKV(key, key)));
+
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("test2");
+ sr.set_st(cur_time + 20);
+ sr.set_et(cur_time - 20);
+ sr.set_limit(20);
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(1, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+ }
+
+ // remove the snapshot file, only keeping the MANIFEST
+ // i.e., corrupt the snapshot
+ std::string db_root_path;
+ if (storage_mode == common::kSSD) {
+ db_root_path = FLAGS_ssd_root_path;
+ } else if (storage_mode == common::kHDD) {
+ db_root_path = FLAGS_hdd_root_path;
+ } else {
+ db_root_path = FLAGS_db_root_path;
+ }
+ std::string snapshot_path = absl::StrCat(db_root_path, "/", tid, "_", pid, "/snapshot/");
+ std::vector sub_dirs;
+ ::openmldb::base::GetSubDir(snapshot_path, sub_dirs);
+ for (const auto& dir : sub_dirs) {
+ auto sub_path = absl::StrCat(snapshot_path, dir);
+ DLOG(INFO) << "remove snapshot path: " << sub_path;
+ ASSERT_TRUE(::openmldb::base::RemoveDir(sub_path));
+ }
+ sleep(2);
+
+ {
+ ::openmldb::tablet::TabletImpl* tablet = new ::openmldb::tablet::TabletImpl();
+ MockClosure closure;
+ tablet->Init("");
+ brpc::Server server;
+ if (server.AddService(tablet, brpc::SERVER_OWNS_SERVICE) != 0) {
+ PDLOG(WARNING, "fail to register tablet rpc service");
+ exit(1);
+ }
+ brpc::ServerOptions options;
+ std::string leader_point = "127.0.0.1:18529";
+ if (server.Start(leader_point.c_str(), &options) != 0) {
+ PDLOG(WARNING, "fail to start server %s", leader_point.c_str());
+ exit(1);
+ }
+
+ ::openmldb::client::TabletClient client(leader_point, "");
+ client.Init();
+
+ // load table
+ ::openmldb::api::TableMeta table_meta;
+ table_meta.set_format_version(1);
+ table_meta.set_name("table1");
+ table_meta.set_tid(tid);
+ table_meta.set_pid(pid);
+ table_meta.set_storage_mode(storage_mode);
+ client.LoadTable(table_meta, nullptr);
+ sleep(5);
+
+ ::openmldb::api::ScanRequest sr;
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("testkey");
+ sr.set_st(cur_time + 1);
+ sr.set_et(cur_time - 1);
+ sr.set_limit(10);
+ ::openmldb::api::ScanResponse srp;
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(1, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("test");
+ sr.set_st(cur_time + 20);
+ sr.set_et(cur_time - 20);
+ sr.set_limit(20);
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(10, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("test2");
+ sr.set_st(cur_time + 20);
+ sr.set_et(cur_time - 20);
+ sr.set_limit(20);
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(1, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+
+ uint32_t count = 0;
+ while (count < 10) {
+ count++;
+ std::string key = "test3";
+ client.Put(tid, pid, key, cur_time + count, ::openmldb::test::EncodeKV(key, key));
+ }
+
+ sr.set_tid(tid);
+ sr.set_pid(pid);
+ sr.set_pk("test3");
+ sr.set_st(cur_time + 20);
+ sr.set_et(cur_time - 20);
+ sr.set_limit(20);
+ tablet->Scan(NULL, &sr, &srp, &closure);
+ ASSERT_EQ(10, (int64_t)srp.count());
+ ASSERT_EQ(0, srp.code());
+
+ ::openmldb::api::DropTableRequest dr;
+ dr.set_tid(tid);
+ dr.set_pid(pid);
+ ::openmldb::api::DropTableResponse drs;
+ tablet->DropTable(NULL, &dr, &drs, &closure);
+ sleep(2);
+ }
+}
+
TEST_P(SnapshotReplicaTest, LeaderAndFollowerTS) {
auto storage_mode = GetParam();
::openmldb::tablet::TabletImpl* tablet = new ::openmldb::tablet::TabletImpl();
diff --git a/src/sdk/mini_cluster.h b/src/sdk/mini_cluster.h
index a4c2e87d101..78100fb0225 100644
--- a/src/sdk/mini_cluster.h
+++ b/src/sdk/mini_cluster.h
@@ -75,10 +75,6 @@ class MiniCluster {
if (ns_client_) {
delete ns_client_;
}
-
- for (auto & t : tablets_) {
- delete t.second;
- }
base::RemoveDirRecursive(db_root_path_);
}
@@ -114,11 +110,12 @@ class MiniCluster {
return false;
}
brpc::ServerOptions options;
- if (ns_.AddService(nameserver, brpc::SERVER_DOESNT_OWN_SERVICE) != 0) {
- LOG(WARNING) << "fail to start ns";
+ if (ns_.AddService(nameserver, brpc::SERVER_OWNS_SERVICE) != 0) {
+ LOG(WARNING) << "fail to add ns";
return false;
}
if (ns_.Start(ns_endpoint.c_str(), &options) != 0) {
+ LOG(WARNING) << "fail to start ns";
return false;
}
sleep(2);
@@ -141,6 +138,7 @@ class MiniCluster {
for (int i = 0; i < tablet_num_; i++) {
tb_servers_[i].Stop(10);
+ tb_servers_[i].Join();
}
}
@@ -184,11 +182,12 @@ class MiniCluster {
return false;
}
brpc::ServerOptions ts_opt;
- if (tb_server->AddService(tablet, brpc::SERVER_DOESNT_OWN_SERVICE) != 0) {
- LOG(WARNING) << "fail to start tablet";
+ if (tb_server->AddService(tablet, brpc::SERVER_OWNS_SERVICE) != 0) {
+ LOG(WARNING) << "fail to add tablet";
return false;
}
if (tb_server->Start(tb_endpoint.c_str(), &ts_opt) != 0) {
+ LOG(WARNING) << "fail to start tablet";
return false;
}
ok = tablet->RegisterZK();
@@ -255,11 +254,12 @@ class StandaloneEnv {
return false;
}
brpc::ServerOptions options;
- if (ns_.AddService(nameserver, brpc::SERVER_DOESNT_OWN_SERVICE) != 0) {
- LOG(WARNING) << "fail to start ns";
+ if (ns_.AddService(nameserver, brpc::SERVER_OWNS_SERVICE) != 0) {
+ LOG(WARNING) << "fail to add ns";
return false;
}
if (ns_.Start(ns_endpoint.c_str(), &options) != 0) {
+ LOG(WARNING) << "fail to start ns";
return false;
}
sleep(2);
@@ -277,7 +277,9 @@ class StandaloneEnv {
void Close() {
ns_.Stop(10);
+ ns_.Join();
tb_server_.Stop(10);
+ tb_server_.Join();
}
::openmldb::client::NsClient* GetNsClient() { return ns_client_; }
@@ -302,11 +304,12 @@ class StandaloneEnv {
return false;
}
brpc::ServerOptions ts_opt;
- if (tb_server->AddService(tablet, brpc::SERVER_DOESNT_OWN_SERVICE) != 0) {
- LOG(WARNING) << "fail to start tablet";
+ if (tb_server->AddService(tablet, brpc::SERVER_OWNS_SERVICE) != 0) {
+ LOG(WARNING) << "fail to add tablet";
return false;
}
if (tb_server->Start(tb_endpoint.c_str(), &ts_opt) != 0) {
+ LOG(WARNING) << "fail to start tablet";
return false;
}
sleep(2);
diff --git a/src/sdk/sql_cluster_router.cc b/src/sdk/sql_cluster_router.cc
index d6ba3cc20cd..7a0597deead 100644
--- a/src/sdk/sql_cluster_router.cc
+++ b/src/sdk/sql_cluster_router.cc
@@ -50,7 +50,6 @@
DECLARE_int32(request_timeout_ms);
DECLARE_string(bucket_size);
-DEFINE_string(spark_conf, "", "The config file of Spark job");
DECLARE_uint32(replica_num);
namespace openmldb {
@@ -910,7 +909,7 @@ bool SQLClusterRouter::DropTable(const std::string& db, const std::string& table
std::shared_ptr SQLClusterRouter::GetSQLCache(const std::string& db, const std::string& sql,
const ::hybridse::vm::EngineMode engine_mode,
const std::shared_ptr& parameter,
- hybridse::sdk::Status& status) { // NOLINT
+ hybridse::sdk::Status* status) {
::hybridse::codec::Schema parameter_schema_raw;
if (parameter) {
for (int i = 0; i < parameter->GetSchema()->GetColumnCnt(); i++) {
@@ -919,8 +918,8 @@ std::shared_ptr SQLClusterRouter::GetSQLCache(const std::string& db, c
if (!openmldb::schema::SchemaAdapter::ConvertType(parameter->GetSchema()->GetColumnType(i),
&hybridse_type)) {
LOG(WARNING) << "Invalid parameter type ";
- status.msg = "Invalid parameter type";
- status.code = -1;
+ status->msg = "Invalid parameter type";
+ status->code = -1;
return {};
}
column->set_type(hybridse_type);
@@ -951,9 +950,9 @@ std::shared_ptr SQLClusterRouter::GetSQLCache(const std::string& db, c
cache = std::make_shared(schema, parameter_schema, explain.router, explain.limit_cnt);
SetCache(db, sql, engine_mode, cache);
} else {
- status.msg = base_status.GetMsg();
- status.trace = base_status.GetTraces();
- status.code = -1;
+ status->msg = base_status.GetMsg();
+ status->trace = base_status.GetTraces();
+ status->code = -1;
return {};
}
}
@@ -961,15 +960,15 @@ std::shared_ptr SQLClusterRouter::GetSQLCache(const std::string& db, c
}
std::shared_ptr<::openmldb::client::TabletClient> SQLClusterRouter::GetTabletClient(
const std::string& db, const std::string& sql, const ::hybridse::vm::EngineMode engine_mode,
- const std::shared_ptr& row, hybridse::sdk::Status& status) {
+ const std::shared_ptr& row, hybridse::sdk::Status* status) {
return GetTabletClient(db, sql, engine_mode, row, std::shared_ptr(), status);
}
std::shared_ptr<::openmldb::client::TabletClient> SQLClusterRouter::GetTabletClient(
const std::string& db, const std::string& sql, const ::hybridse::vm::EngineMode engine_mode,
const std::shared_ptr& row, const std::shared_ptr& parameter,
- hybridse::sdk::Status& status) {
+ hybridse::sdk::Status* status) {
auto cache = GetSQLCache(db, sql, engine_mode, parameter, status);
- if (0 != status.code) {
+ if (0 != status->code) {
return {};
}
std::shared_ptr<::openmldb::catalog::TabletAccessor> tablet;
@@ -992,8 +991,8 @@ std::shared_ptr<::openmldb::client::TabletClient> SQLClusterRouter::GetTabletCli
tablet = cluster_sdk_->GetTablet();
}
if (!tablet) {
- status.msg = "fail to get tablet";
- status.code = hybridse::common::kRunError;
+ status->msg = "fail to get tablet";
+ status->code = hybridse::common::kRunError;
LOG(WARNING) << "fail to get tablet";
return {};
}
@@ -1007,7 +1006,7 @@ std::shared_ptr<::openmldb::client::TabletClient> SQLClusterRouter::GetTabletCli
if (status == nullptr) {
return {};
}
- auto cache = GetSQLCache(db, sql, hybridse::vm::kBatchMode, parameter, *status);
+ auto cache = GetSQLCache(db, sql, hybridse::vm::kBatchMode, parameter, status);
if (0 != status->code) {
return {};
}
@@ -1106,7 +1105,7 @@ std::shared_ptr SQLClusterRouter::ExecuteSQLRequest(co
auto cntl = std::make_shared<::brpc::Controller>();
cntl->set_timeout_ms(options_.request_timeout);
auto response = std::make_shared<::openmldb::api::QueryResponse>();
- auto client = GetTabletClient(db, sql, hybridse::vm::kRequestMode, row, *status);
+ auto client = GetTabletClient(db, sql, hybridse::vm::kRequestMode, row, status);
if (0 != status->code) {
return {};
}
@@ -1132,7 +1131,7 @@ std::shared_ptr<::hybridse::sdk::ResultSet> SQLClusterRouter::ExecuteSQLParamete
const std::string& db, const std::string& sql, std::shared_ptr parameter,
::hybridse::sdk::Status* status) {
std::vector parameter_types;
- if (parameter && !ExtractDBTypes(parameter->GetSchema(), parameter_types)) {
+ if (parameter && !ExtractDBTypes(parameter->GetSchema(), ¶meter_types)) {
status->msg = "convert parameter types error";
status->code = -1;
return {};
@@ -1168,7 +1167,7 @@ std::shared_ptr SQLClusterRouter::ExecuteSQLBatchReque
cntl->set_timeout_ms(options_.request_timeout);
auto response = std::make_shared<::openmldb::api::SQLBatchRequestQueryResponse>();
auto client = GetTabletClient(db, sql, hybridse::vm::kBatchRequestMode, std::shared_ptr(),
- std::shared_ptr(), *status);
+ std::shared_ptr(), status);
if (0 != status->code) {
return nullptr;
}
@@ -1785,7 +1784,7 @@ std::shared_ptr SQLClusterRouter::HandleSQLCmd(const h
}
::openmldb::taskmanager::JobInfo job_info;
- StopJob(job_id, job_info);
+ StopJob(job_id, &job_info);
std::vector<::openmldb::taskmanager::JobInfo> job_infos;
if (job_info.id() > 0) {
@@ -2003,7 +2002,7 @@ bool SQLClusterRouter::CheckSQLSyntax(const std::string& sql) {
}
bool SQLClusterRouter::ExtractDBTypes(const std::shared_ptr& schema,
- std::vector& db_types) { // NOLINT
+ std::vector* db_types) {
if (schema) {
for (int i = 0; i < schema->GetColumnCnt(); i++) {
openmldb::type::DataType casted_type;
@@ -2011,7 +2010,7 @@ bool SQLClusterRouter::ExtractDBTypes(const std::shared_ptrGetColumnType(i);
return false;
}
- db_types.push_back(casted_type);
+ db_types->push_back(casted_type);
}
}
return true;
@@ -2137,7 +2136,7 @@ bool SQLClusterRouter::UpdateOfflineTableInfo(const ::openmldb::nameserver::Tabl
}
::openmldb::base::Status SQLClusterRouter::ShowJobs(const bool only_unfinished,
- std::vector<::openmldb::taskmanager::JobInfo>& job_infos) {
+ std::vector<::openmldb::taskmanager::JobInfo>* job_infos) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2145,7 +2144,7 @@ ::openmldb::base::Status SQLClusterRouter::ShowJobs(const bool only_unfinished,
return taskmanager_client_ptr->ShowJobs(only_unfinished, GetJobTimeout(), job_infos);
}
-::openmldb::base::Status SQLClusterRouter::ShowJob(const int id, ::openmldb::taskmanager::JobInfo& job_info) {
+::openmldb::base::Status SQLClusterRouter::ShowJob(const int id, ::openmldb::taskmanager::JobInfo* job_info) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2153,7 +2152,7 @@ ::openmldb::base::Status SQLClusterRouter::ShowJob(const int id, ::openmldb::tas
return taskmanager_client_ptr->ShowJob(id, GetJobTimeout(), job_info);
}
-::openmldb::base::Status SQLClusterRouter::StopJob(const int id, ::openmldb::taskmanager::JobInfo& job_info) {
+::openmldb::base::Status SQLClusterRouter::StopJob(const int id, ::openmldb::taskmanager::JobInfo* job_info) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2164,7 +2163,7 @@ ::openmldb::base::Status SQLClusterRouter::StopJob(const int id, ::openmldb::tas
::openmldb::base::Status SQLClusterRouter::ExecuteOfflineQueryAsync(const std::string& sql,
const std::map& config,
const std::string& default_db, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2174,7 +2173,7 @@ ::openmldb::base::Status SQLClusterRouter::ExecuteOfflineQueryAsync(const std::s
::openmldb::base::Status SQLClusterRouter::ExecuteOfflineQueryGetOutput(
const std::string& sql, const std::map& config, const std::string& default_db,
- int job_timeout, std::string& output) {
+ int job_timeout, std::string* output) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2186,7 +2185,7 @@ ::openmldb::base::Status SQLClusterRouter::ImportOnlineData(const std::string& s
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2198,7 +2197,7 @@ ::openmldb::base::Status SQLClusterRouter::ImportOfflineData(const std::string&
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2210,7 +2209,7 @@ ::openmldb::base::Status SQLClusterRouter::ExportOfflineData(const std::string&
const std::map& config,
const std::string& default_db, bool sync_job,
int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info) {
+ ::openmldb::taskmanager::JobInfo* job_info) {
auto taskmanager_client_ptr = cluster_sdk_->GetTaskManagerClient();
if (!taskmanager_client_ptr) {
return {-1, "Fail to get TaskManager client"};
@@ -2442,8 +2441,8 @@ std::shared_ptr SQLClusterRouter::ExecuteSQL(const std
} else {
::openmldb::taskmanager::JobInfo job_info;
std::map config;
- ReadSparkConfFromFile(FLAGS_spark_conf, &config);
- auto base_status = ExportOfflineData(sql, config, db, is_sync_job, offline_job_timeout, job_info);
+ ReadSparkConfFromFile(options_.spark_conf_path, &config);
+ auto base_status = ExportOfflineData(sql, config, db, is_sync_job, offline_job_timeout, &job_info);
if (base_status.OK()) {
*status = {};
if (job_info.id() > 0) {
@@ -2473,15 +2472,15 @@ std::shared_ptr SQLClusterRouter::ExecuteSQL(const std
// Handle in cluster mode
::openmldb::taskmanager::JobInfo job_info;
std::map config;
- ReadSparkConfFromFile(FLAGS_spark_conf, &config);
+ ReadSparkConfFromFile(options_.spark_conf_path, &config);
::openmldb::base::Status base_status;
if (is_online_mode) {
// Handle in online mode
- base_status = ImportOnlineData(sql, config, database, is_sync_job, offline_job_timeout, job_info);
+ base_status = ImportOnlineData(sql, config, database, is_sync_job, offline_job_timeout, &job_info);
} else {
// Handle in offline mode
- base_status = ImportOfflineData(sql, config, database, is_sync_job, offline_job_timeout, job_info);
+ base_status = ImportOfflineData(sql, config, database, is_sync_job, offline_job_timeout, &job_info);
}
if (base_status.OK() && job_info.id() > 0) {
std::stringstream ss;
@@ -2518,12 +2517,12 @@ std::shared_ptr SQLClusterRouter::ExecuteOfflineQuery(
bool is_sync_job, int job_timeout,
::hybridse::sdk::Status* status) {
std::map config;
- ReadSparkConfFromFile(FLAGS_spark_conf, &config);
+ ReadSparkConfFromFile(options_.spark_conf_path, &config);
if (is_sync_job) {
// Run offline sql and wait to get output
std::string output;
- auto base_status = ExecuteOfflineQueryGetOutput(sql, config, db, job_timeout, output);
+ auto base_status = ExecuteOfflineQueryGetOutput(sql, config, db, job_timeout, &output);
if (!base_status.OK()) {
*status = {::hybridse::common::StatusCode::kCmdError, base_status.msg};
return {};
@@ -2535,7 +2534,7 @@ std::shared_ptr SQLClusterRouter::ExecuteOfflineQuery(
} else {
// Run offline sql and return job info immediately
::openmldb::taskmanager::JobInfo job_info;
- auto base_status = ExecuteOfflineQueryAsync(sql, config, db, job_timeout, job_info);
+ auto base_status = ExecuteOfflineQueryAsync(sql, config, db, job_timeout, &job_info);
if (!base_status.OK()) {
*status = {::hybridse::common::StatusCode::kCmdError, base_status.msg};
return {};
@@ -3055,7 +3054,7 @@ hybridse::sdk::Status SQLClusterRouter::HandleIndex(const std::string& db,
return get_index_status;
}
- auto add_index_status = AddNewIndex(table_map, new_index_map);
+ auto add_index_status = AddNewIndex(db, table_map, new_index_map);
if (!add_index_status.IsOK()) {
return add_index_status;
}
@@ -3179,16 +3178,16 @@ hybridse::sdk::Status SQLClusterRouter::GetNewIndex(
return {};
}
-hybridse::sdk::Status SQLClusterRouter::AddNewIndex(
+hybridse::sdk::Status SQLClusterRouter::AddNewIndex(const std::string& db,
const std::map& table_map,
const std::map>& new_index_map) {
auto ns = cluster_sdk_->GetNsClient();
if (cluster_sdk_->IsClusterMode()) {
for (auto& kv : new_index_map) {
- auto status = ns->AddMultiIndex(kv.first, kv.second);
+ auto status = ns->AddMultiIndex(db, kv.first, kv.second);
if (!status.OK()) {
return {::hybridse::common::StatusCode::kCmdError,
- "table " + kv.first + " add index failed. " + status.msg};
+ "table [" + db + "." + kv.first + "] add index failed. " + status.msg};
}
}
} else {
@@ -3721,15 +3720,15 @@ std::shared_ptr SQLClusterRouter::ExecuteShowTableStat
return ResultSetSQL::MakeResultSet(GetTableStatusSchema(), data, status);
}
-void SQLClusterRouter::ReadSparkConfFromFile(std::string conf_file, std::map* config) {
- if (!conf_file.empty()) {
+void SQLClusterRouter::ReadSparkConfFromFile(std::string conf_file_path, std::map* config) {
+ if (!conf_file_path.empty()) {
boost::property_tree::ptree pt;
try {
- boost::property_tree::ini_parser::read_ini(FLAGS_spark_conf, pt);
- LOG(INFO) << "Load Spark conf file: " << conf_file;
+ boost::property_tree::ini_parser::read_ini(conf_file_path, pt);
+ LOG(INFO) << "Load Spark conf file: " << conf_file_path;
} catch (...) {
- LOG(WARNING) << "Fail to load Spark conf file: " << conf_file;
+ LOG(WARNING) << "Fail to load Spark conf file: " << conf_file_path;
return;
}
diff --git a/src/sdk/sql_cluster_router.h b/src/sdk/sql_cluster_router.h
index 7e3bd125da7..b71e9872335 100644
--- a/src/sdk/sql_cluster_router.h
+++ b/src/sdk/sql_cluster_router.h
@@ -224,16 +224,16 @@ class SQLClusterRouter : public SQLRouter {
std::shared_ptr<::openmldb::client::TabletClient> GetTabletClient(const std::string& db, const std::string& sql,
::hybridse::vm::EngineMode engine_mode,
const std::shared_ptr& row,
- hybridse::sdk::Status& status); // NOLINT
+ hybridse::sdk::Status* status);
std::shared_ptr<::openmldb::client::TabletClient> GetTabletClient(
const std::string& db, const std::string& sql, ::hybridse::vm::EngineMode engine_mode,
const std::shared_ptr& row, const std::shared_ptr& parameter_row,
- hybridse::sdk::Status& status); // NOLINT
+ hybridse::sdk::Status* status);
std::shared_ptr GetSQLCache(const std::string& db, const std::string& sql,
::hybridse::vm::EngineMode engine_mode,
const std::shared_ptr& parameter_row,
- hybridse::sdk::Status& status); // NOLINT
+ hybridse::sdk::Status* status);
std::shared_ptr<::openmldb::client::TabletClient> GetTabletClientForBatchQuery(
const std::string& db, const std::string& sql, const std::shared_ptr& parameter_row,
@@ -258,11 +258,11 @@ class SQLClusterRouter : public SQLRouter {
bool UpdateOfflineTableInfo(const ::openmldb::nameserver::TableInfo& info) override;
::openmldb::base::Status ShowJobs(bool only_unfinished,
- std::vector<::openmldb::taskmanager::JobInfo>& job_infos) override;
+ std::vector<::openmldb::taskmanager::JobInfo>* job_infos) override;
- ::openmldb::base::Status ShowJob(int id, ::openmldb::taskmanager::JobInfo& job_info) override;
+ ::openmldb::base::Status ShowJob(int id, ::openmldb::taskmanager::JobInfo* job_info) override;
- ::openmldb::base::Status StopJob(int id, ::openmldb::taskmanager::JobInfo& job_info) override;
+ ::openmldb::base::Status StopJob(int id, ::openmldb::taskmanager::JobInfo* job_info) override;
std::shared_ptr ExecuteOfflineQuery(const std::string& db, const std::string& sql,
bool is_sync_job, int job_timeout,
@@ -270,15 +270,15 @@ class SQLClusterRouter : public SQLRouter {
::openmldb::base::Status ImportOnlineData(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status ImportOfflineData(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status ExportOfflineData(const std::string& sql, const std::map& config,
const std::string& default_db, bool sync_job, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status CreatePreAggrTable(const std::string& aggr_db, const std::string& aggr_table,
const ::openmldb::base::LongWindowInfo& window_info,
@@ -298,6 +298,10 @@ class SQLClusterRouter : public SQLRouter {
void ReadSparkConfFromFile(std::string conf_file, std::map* config);
+ SQLRouterOptions GetSqlRouterOptions() {
+ return options_;
+ }
+
private:
bool IsSyncJob();
// get job timeout from the session variables, we will use the timeout when sending requests to the taskmanager
@@ -306,12 +310,12 @@ class SQLClusterRouter : public SQLRouter {
::openmldb::base::Status ExecuteOfflineQueryAsync(const std::string& sql,
const std::map& config,
const std::string& default_db, int job_timeout,
- ::openmldb::taskmanager::JobInfo& job_info); // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info);
::openmldb::base::Status ExecuteOfflineQueryGetOutput(const std::string& sql,
const std::map& config,
const std::string& default_db, int job_timeout,
- std::string& output); // NOLINT
+ std::string* output);
void GetTables(::hybridse::vm::PhysicalOpNode* node, std::set* tables);
@@ -347,7 +351,7 @@ class SQLClusterRouter : public SQLRouter {
hybridse::sdk::Status* status);
bool ExtractDBTypes(const std::shared_ptr& schema,
- std::vector& parameter_types); // NOLINT
+ std::vector* parameter_types);
::hybridse::sdk::Status SetVariable(hybridse::node::SetPlanNode* node);
@@ -380,7 +384,7 @@ class SQLClusterRouter : public SQLRouter {
std::map>* new_index_map);
hybridse::sdk::Status AddNewIndex(
- const std::map& table_map,
+ const std::string& db, const std::map& table_map,
const std::map>& new_index_map);
hybridse::sdk::Status HandleCreateFunction(const hybridse::node::CreateFunctionPlanNode* node);
diff --git a/src/sdk/sql_cluster_test.cc b/src/sdk/sql_cluster_test.cc
index f829e15fc5b..cd3f51911ba 100644
--- a/src/sdk/sql_cluster_test.cc
+++ b/src/sdk/sql_cluster_test.cc
@@ -411,7 +411,8 @@ TEST_F(SQLSDKQueryTest, GetTabletClient) {
ASSERT_TRUE(request_row->Build());
auto sql_cluster_router = std::dynamic_pointer_cast(router);
hybridse::sdk::Status sdk_status;
- auto client = sql_cluster_router->GetTabletClient(db, sql, hybridse::vm::kRequestMode, request_row, sdk_status);
+ auto client = sql_cluster_router->GetTabletClient(db, sql, hybridse::vm::kRequestMode,
+ request_row, &sdk_status);
int pid = ::openmldb::base::hash64(pk) % 2;
// only assert leader paritition
for (int i = 0; i < 3; i++) {
diff --git a/src/sdk/sql_insert_row.h b/src/sdk/sql_insert_row.h
index 8f8e00fca73..bfa895d868f 100644
--- a/src/sdk/sql_insert_row.h
+++ b/src/sdk/sql_insert_row.h
@@ -35,30 +35,6 @@ namespace openmldb::sdk {
typedef std::shared_ptr>> DefaultValueMap;
-static inline ::hybridse::sdk::DataType ConvertType(::openmldb::type::DataType type) {
- switch (type) {
- case openmldb::type::kBool:
- return ::hybridse::sdk::kTypeBool;
- case openmldb::type::kSmallInt:
- return ::hybridse::sdk::kTypeInt16;
- case openmldb::type::kInt:
- return ::hybridse::sdk::kTypeInt32;
- case openmldb::type::kBigInt:
- return ::hybridse::sdk::kTypeInt64;
- case openmldb::type::kFloat:
- return ::hybridse::sdk::kTypeFloat;
- case openmldb::type::kDouble:
- return ::hybridse::sdk::kTypeDouble;
- case openmldb::type::kTimestamp:
- return ::hybridse::sdk::kTypeTimestamp;
- case openmldb::type::kString:
- case openmldb::type::kVarchar:
- return ::hybridse::sdk::kTypeString;
- default:
- return ::hybridse::sdk::kTypeUnknow;
- }
-}
-
class SQLInsertRow {
public:
SQLInsertRow(std::shared_ptr<::openmldb::nameserver::TableInfo> table_info,
diff --git a/src/sdk/sql_router.h b/src/sdk/sql_router.h
index 6dc6185288a..a6a30b5ecc3 100644
--- a/src/sdk/sql_router.h
+++ b/src/sdk/sql_router.h
@@ -46,6 +46,7 @@ struct SQLRouterOptions : BasicRouterOptions {
std::string zk_cluster;
std::string zk_path;
uint32_t zk_session_timeout = 2000;
+ std::string spark_conf_path;
};
struct StandaloneOptions : BasicRouterOptions {
@@ -171,13 +172,13 @@ class SQLRouter {
virtual bool UpdateOfflineTableInfo(const ::openmldb::nameserver::TableInfo& info) = 0;
virtual ::openmldb::base::Status ShowJobs(const bool only_unfinished,
- std::vector<::openmldb::taskmanager::JobInfo>& job_infos) = 0; // NOLINT
+ std::vector<::openmldb::taskmanager::JobInfo>* job_infos) = 0;
virtual ::openmldb::base::Status ShowJob(const int id,
- ::openmldb::taskmanager::JobInfo& job_info) = 0; // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info) = 0;
virtual ::openmldb::base::Status StopJob(const int id,
- ::openmldb::taskmanager::JobInfo& job_info) = 0; // NOLINT
+ ::openmldb::taskmanager::JobInfo* job_info) = 0;
virtual std::shared_ptr ExecuteOfflineQuery(const std::string& db, const std::string& sql,
bool is_sync_job, int job_timeout,
diff --git a/src/tablet/tablet_impl.cc b/src/tablet/tablet_impl.cc
index 33a7b9e917c..d196626eec8 100644
--- a/src/tablet/tablet_impl.cc
+++ b/src/tablet/tablet_impl.cc
@@ -3121,16 +3121,17 @@ int TabletImpl::LoadDiskTableInternal(uint32_t tid, uint32_t pid, const ::openml
std::string manifest_file = snapshot_path + "MANIFEST";
if (Snapshot::GetLocalManifest(manifest_file, manifest) == 0) {
std::string snapshot_dir = snapshot_path + manifest.name();
- PDLOG(INFO, "rename dir %s to %s. tid %u pid %u", snapshot_dir.c_str(), data_path.c_str(), tid, pid);
- if (!::openmldb::base::Rename(snapshot_dir, data_path)) {
- PDLOG(WARNING, "rename dir failed. tid %u pid %u path %s", tid, pid, snapshot_dir.c_str());
- break;
- }
- if (unlink(manifest_file.c_str()) < 0) {
- PDLOG(WARNING, "remove manifest failed. tid %u pid %u path %s", tid, pid, manifest_file.c_str());
- break;
+ if (::openmldb::base::IsExists(snapshot_dir)) {
+ PDLOG(INFO, "hardlink dir %s to %s (tid %u pid %u)", snapshot_dir.c_str(), data_path.c_str(), tid, pid);
+ if (::openmldb::base::HardLinkDir(snapshot_dir, data_path)) {
+ PDLOG(WARNING, "hardlink snapshot dir %s to data dir failed (tid %u pid %u)", snapshot_dir.c_str(),
+ tid, pid);
+ break;
+ }
+ snapshot_offset = manifest.offset();
+ } else {
+ PDLOG(WARNING, "snapshot_dir %s with tid %u pid %u not exists", snapshot_dir.c_str(), tid, pid);
}
- snapshot_offset = manifest.offset();
}
std::string msg;
if (CreateTableInternal(&table_meta, msg) < 0) {
@@ -3178,7 +3179,6 @@ int TabletImpl::LoadDiskTableInternal(uint32_t tid, uint32_t pid, const ::openml
task_pool_.DelayTask(FLAGS_binlog_delete_interval,
boost::bind(&TabletImpl::SchedDelBinlog, this, tid, pid));
PDLOG(INFO, "load table success. tid %u pid %u", tid, pid);
- MakeSnapshotInternal(tid, pid, 0, std::shared_ptr<::openmldb::api::TaskInfo>());
std::string old_data_path = table_path + "/old_data";
if (::openmldb::base::IsExists(old_data_path)) {
if (!::openmldb::base::RemoveDir(old_data_path)) {
@@ -3191,6 +3191,8 @@ int TabletImpl::LoadDiskTableInternal(uint32_t tid, uint32_t pid, const ::openml
task_ptr->set_status(::openmldb::api::TaskStatus::kDone);
return 0;
}
+ PDLOG(INFO, "Recover table with tid %u and pid %u from binlog offset %u to %u", tid, pid, snapshot_offset,
+ latest_offset);
} else {
DeleteTableInternal(tid, pid, std::shared_ptr<::openmldb::api::TaskInfo>());
}
diff --git a/src/test/base_test.cc b/src/test/base_test.cc
index e4c67ed73a2..8ddf1c5bdf7 100644
--- a/src/test/base_test.cc
+++ b/src/test/base_test.cc
@@ -485,11 +485,11 @@ void SQLCaseTest::CheckRows(const hybridse::vm::Schema &schema, const std::strin
std::map> rows_map;
if (order_idx >= 0) {
int32_t row_id = 0;
- for (auto row : rows) {
+ for (auto& row : rows) {
row_view.Reset(row.buf());
std::string key = row_view.GetAsString(order_idx);
LOG(INFO) << "Get Order String: " << row_id++ << " key: " << key;
- rows_map.insert(std::make_pair(key, std::make_pair(row, false)));
+ rows_map.try_emplace(key, row, false);
}
}
int32_t index = 0;