From 8c5f58e8d18d14ce4c8bfab6d9b0c1ed575ba39c Mon Sep 17 00:00:00 2001 From: aceforeverd Date: Thu, 21 Jul 2022 17:37:37 +0800 Subject: [PATCH] fix(#2177): avoid ts overflow in window (#2181) * fix(#2177): avoid ts overflow if `window .. and 0 preceding exclude current_time`, and input key is 0 * fix: sql_sdk_test * fix yaml case desc --- .../test_window_exclude_current_time.yaml | 62 +++++++++------- cases/function/window/test_window_union.yaml | 39 ++++++++-- cases/function/window/window_attributes.yaml | 71 ++++++++++++++++--- hybridse/include/vm/mem_catalog.h | 52 ++++++++++---- hybridse/src/vm/runner.cc | 12 +++- src/test/base_test.cc | 4 +- 6 files changed, 183 insertions(+), 57 deletions(-) diff --git a/cases/function/window/test_window_exclude_current_time.yaml b/cases/function/window/test_window_exclude_current_time.yaml index ccef8ae1e28..c890a64116c 100644 --- a/cases/function/window/test_window_exclude_current_time.yaml +++ b/cases/function/window/test_window_exclude_current_time.yaml @@ -20,6 +20,9 @@ cases: - columns: [ "c1 string","c3 int","c4 double","c7 timestamp" ] indexs: [ "index1:c1:c7" ] rows: + - [ "aa",-2, 1.0, 0 ] + - [ "aa",-1, 1.0, 0 ] + - [ "aa",0, 1.0, 0 ] - [ "aa",1, 1.0, 1590738990000 ] - [ "aa",2, 1.0, 1590738990000 ] - [ "aa",3, 1.0, 1590738992000 ] @@ -38,6 +41,9 @@ cases: order: c3 columns: [ "c1 string", "c3 int", "c7 timestamp", "w1_c4_sum double" ] rows: + - [ "aa", -2, 0, 1.0 ] + - [ "aa", -1, 0, 1.0 ] + - [ "aa", 0, 0, 1.0 ] - [ "aa", 1, 1590738990000, 1.0 ] - [ "aa", 2, 1590738990000, 1.0 ] - [ "aa", 3, 1590738992000, 3.0 ] @@ -86,15 +92,17 @@ cases: - columns: [ "c1 string","c3 int","c4 double","c7 timestamp" ] indexs: [ "index1:c1:c7" ] rows: - - [ "aa",1, 1.0, 1590738990000 ] - - [ "aa",2, 1.0, 1590738990000 ] - - [ "aa",3, 1.0, 1590738992000 ] - - [ "aa",4, 1.0, 1590738993000 ] - - [ "aa",5, 1.0, 1590738994000 ] - - [ "aa",6, 1.0, 1590738994000 ] - - [ "aa",7, 1.0, 1590738999000 ] - - [ "aa",8, 1.0, 1590739001000 ] - - [ "aa",9, 1.0, 1590739002000 ] + - [ "aa",-1, 1.0, 0 ] + - [ "aa", 0, 1.0, 0 ] + - [ "aa", 1, 1.0, 1590738990000 ] + - [ "aa", 2, 1.0, 1590738990000 ] + - [ "aa", 3, 1.0, 1590738992000 ] + - [ "aa", 4, 1.0, 1590738993000 ] + - [ "aa", 5, 1.0, 1590738994000 ] + - [ "aa", 6, 1.0, 1590738994000 ] + - [ "aa", 7, 1.0, 1590738999000 ] + - [ "aa", 8, 1.0, 1590739001000 ] + - [ "aa", 9, 1.0, 1590739002000 ] sql: | SELECT c1, c3, c7, sum(c4) OVER w1 as w1_c4_sum @@ -104,6 +112,8 @@ cases: order: c3 columns: [ "c1 string", "c3 int", "c7 timestamp", "w1_c4_sum double" ] rows: + - [ "aa",-1, 0, 1.0 ] + - [ "aa", 0, 0, 1.0 ] - [ "aa", 1, 1590738990000, 1.0 ] - [ "aa", 2, 1590738990000, 1.0 ] - [ "aa", 3, 1590738992000, 3.0 ] @@ -119,15 +129,17 @@ cases: - columns: [ "c1 string","c3 int","c4 double","c7 timestamp" ] indexs: [ "index1:c1:c7" ] rows: - - [ "aa",1, 1.0, 1590738990000 ] - - [ "aa",2, 1.0, 1590738990000 ] - - [ "aa",3, 1.0, 1590738992000 ] - - [ "aa",4, 1.0, 1590738993000 ] - - [ "aa",5, 1.0, 1590738994000 ] - - [ "aa",6, 1.0, 1590738994000 ] - - [ "aa",7, 1.0, 1590738999000 ] - - [ "aa",8, 1.0, 1590739001000 ] - - [ "aa",9, 1.0, 1590739002000 ] + - [ "aa",-1, 1.0, 0] + - [ "aa", 0, 1.0, 0] + - [ "aa", 1, 1.0, 1590738990000 ] + - [ "aa", 2, 1.0, 1590738990000 ] + - [ "aa", 3, 1.0, 1590738992000 ] + - [ "aa", 4, 1.0, 1590738993000 ] + - [ "aa", 5, 1.0, 1590738994000 ] + - [ "aa", 6, 1.0, 1590738994000 ] + - [ "aa", 7, 1.0, 1590738999000 ] + - [ "aa", 8, 1.0, 1590739001000 ] + - [ "aa", 9, 1.0, 1590739002000 ] sql: | SELECT c1, c3, c7, sum(c4) OVER w1 as w1_c4_sum @@ -137,12 +149,14 @@ cases: order: c3 columns: [ "c1 string", "c3 int", "c7 timestamp", "w1_c4_sum double" ] rows: - - [ "aa", 1, 1590738990000, 1.0 ] - - [ "aa", 2, 1590738990000, 1.0 ] - - [ "aa", 3, 1590738992000, 3.0 ] - - [ "aa", 4, 1590738993000, 4.0 ] - - [ "aa", 5, 1590738994000, 5.0 ] - - [ "aa", 6, 1590738994000, 5.0 ] + - [ "aa",-1, 0, 1.0 ] + - [ "aa", 0, 0, 1.0 ] + - [ "aa", 1, 1590738990000, 3.0 ] + - [ "aa", 2, 1590738990000, 3.0 ] + - [ "aa", 3, 1590738992000, 5.0 ] + - [ "aa", 4, 1590738993000, 6.0 ] + - [ "aa", 5, 1590738994000, 7.0 ] + - [ "aa", 6, 1590738994000, 7.0 ] - [ "aa", 7, 1590738999000, 7.0 ] - [ "aa", 8, 1590739001000, 7.0 ] - [ "aa", 9, 1590739002000, 7.0 ] diff --git a/cases/function/window/test_window_union.yaml b/cases/function/window/test_window_union.yaml index d3fdbed82dd..102934ff116 100644 --- a/cases/function/window/test_window_union.yaml +++ b/cases/function/window/test_window_union.yaml @@ -733,8 +733,11 @@ cases: indexs: - idx:g:ts data: | - 1, 100, 111, 21 - 2, 100, 111, 5 + 0, 0, 111, 19 + 1, 0, 111, 18 + 2, 100, 111, 21 + 3, 100, 111, 5 + 4, 101, 111, 100 - name: t2 columns: - id int @@ -747,6 +750,15 @@ cases: 1, 99, 111, 233 1, 100, 111, 200 1, 101, 111, 17 + # raw union window (before filter) + # 0, 0, 111, 19 + # 1, 0, 111, 18 + # 1, 99, 111, 233 (t2) + # 1, 100, 111, 200 (t2) + # 2, 100, 111, 21 + # 3, 100, 111, 5 + # 1, 101, 111, 17 (t2) + # 4, 101, 111, 100 sql: | select id, count(val) over w as cnt, @@ -766,8 +778,11 @@ cases: - l1 int order: id data: | - 1, 2, 233, 21, 233 - 2, 2, 233, 5, 233 + 0, 1, 19, 19, NULL + 1, 1, 18, 18, NULL + 2, 4, 233, 18, 233 + 3, 4, 233, 5, 233 + 4, 7, 233, 5, 5 - id: 18-5 desc: | @@ -1230,7 +1245,7 @@ cases: 3, 2, 233, 200, 200 4, 3, 233, 17, 17 - # rows_range union window with exclude current_row, single window + # rows_range union window with exclude current_row, single window - id: 24 desc: | rows_range union window with exclude_current_row @@ -1314,6 +1329,9 @@ cases: 2, 100, 111, 5 3, 101, 111, 0 4, 102, 111, 0 + 5, 0, 114, 7 + 6, 0, 114, 8 + 7, 100, 114, 9 - name: t2 columns: - id int @@ -1363,6 +1381,9 @@ cases: 2, 1, 233, 233 3, 4, 233, 5 4, 6, 233, 0 + 5, 0, NULL, NULL + 6, 0, NULL, NULL + 7, 2, 8, 7 - id: 26 desc: | rows_range union window with exclude_current_row and instance_not_in_window @@ -1647,6 +1668,10 @@ cases: 2, 100, 111, 5 3, 101, 111, 0 4, 102, 111, 0 + 5, 0, 114, 9 + 6, 0, 114, 17 + 7, 100, 114, 11 + 8, 101, 114, 14 - name: t2 columns: - id int @@ -1697,3 +1722,7 @@ cases: 2, 1, 233, 233 3, 2, 21, 5 4, 2, 17, 0 + 5, 0, NULL, NULL + 6, 0, NULL, NULL + 7, 2, 17, 9 + 8, 2, 17, 11 diff --git a/cases/function/window/window_attributes.yaml b/cases/function/window/window_attributes.yaml index f1e54311993..3080dfeab87 100644 --- a/cases/function/window/window_attributes.yaml +++ b/cases/function/window/window_attributes.yaml @@ -18,10 +18,12 @@ cases: indexs: - idx:g:ts data: | - 1, 99000, 111, 21 - 2, 100000, 111, 22 - 3, 101000, 111, 23 - 4, 100000, 114, 56 + 0, 0, 111, 0 + 1, 0, 111, 0 + 2, 99000, 111, 21 + 3, 100000, 111, 22 + 4, 101000, 111, 23 + 5, 100000, 114, 56 sql: | select id, @@ -58,10 +60,12 @@ cases: - l1 int order: id data: | - 1, 0, NULL, NULL, NULL - 2, 1, 21, 21, 21 - 3, 2, 22, 21, 22 - 4, 0, NULL, NULL, NULL + 0, 0, NULL, NULL, NULL + 1, 1, 0, 0, 0 + 2, 0, NULL, NULL, 0 + 3, 1, 21, 21, 21 + 4, 2, 22, 21, 22 + 5, 0, NULL, NULL, NULL - id: 1 desc: | ROWS window with exclude_current_row, '0 PRECEDING EXCLUDE CURRENT_ROW' actually is the same as '0 OPEN PRECEDING' @@ -89,7 +93,6 @@ cases: from t1 window w as( partition by `g` order by `ts` ROWS between 2 PRECEDING and 0 preceding EXCLUDE CURRENT_ROW); - # batch_plan: | expect: columns: - id int @@ -478,3 +481,53 @@ cases: 4, 3, 23, 21, 23 5, 0, NULL, NULL, NULL 6, 1, 56, 56, 56 + - id: 9 + desc: | + ROWS Window with exclude current_time and exclude current_row + inputs: + - name: t1 + columns: + - id int + - ts timestamp + - g int + - val int + indexs: + - idx:g:ts + data: | + 1, 99000, 111, 21 + 2, 100000, 111, 22 + 3, 101000, 111, 23 + 4, 102000, 111, 44 + 5, 0, 114, 0 + 6, 0, 114, 99 + 7, 100000, 114, 56 + 8, 102000, 114, 52 + 9, 104000, 114, 33 + sql: | + select + id, + count(val) over w as cnt, + max(val) over w as mv, + min(val) over w as mi, + lag(val, 1) over w as l1 + FROM t1 WINDOW w as( + PARTITION by `g` ORDER by `ts` + ROWS BETWEEN 3 PRECEDING AND CURRENT ROW EXCLUDE CURRENT_TIME EXCLUDE CURRENT_ROW); + expect: + columns: + - id int + - cnt int64 + - mv int + - mi int + - l1 int + order: id + data: | + 1, 0, NULL, NULL, NULL + 2, 1, 21, 21, 21 + 3, 2, 22, 21, 22 + 4, 3, 23, 21, 23 + 5, 0, NULL, NULL, NULL + 6, 0, NULL, NULL, NULL + 7, 2, 99, 0, 99 + 8, 3, 99, 0, 56 + 9, 3, 99, 52, 52 diff --git a/hybridse/include/vm/mem_catalog.h b/hybridse/include/vm/mem_catalog.h index 30e1190116b..b393ed861ec 100644 --- a/hybridse/include/vm/mem_catalog.h +++ b/hybridse/include/vm/mem_catalog.h @@ -329,13 +329,13 @@ class WindowRange { bool out_of_rows, bool before_window, bool exceed_window) const { switch (frame_type_) { case Window::WindowFrameType::kFrameRows: - return out_of_rows ? kExceedWindow : kInWindow; + return out_of_rows ? kExceedWindow : (before_window ? kBeforeWindow : kInWindow); case Window::WindowFrameType::kFrameRowsMergeRowsRange: { return out_of_rows ? (before_window ? kBeforeWindow : exceed_window ? kExceedWindow : kInWindow) - : kInWindow; + : before_window ? kBeforeWindow : kInWindow; } case Window::WindowFrameType::kFrameRowsRange: return exceed_window @@ -373,7 +373,7 @@ class HistoryWindow : public Window { } } - virtual void PopEffectiveData() { + virtual void PopEffectiveDataIfAny() { if (!table_.empty()) { PopFrontRow(); } @@ -388,7 +388,7 @@ class HistoryWindow : public Window { auto cur_size = table_.size(); if (cur_size < window_range_.start_row_) { // current in the ROWS window - int64_t sub = (key + window_range_.start_offset_); + int64_t sub = key + window_range_.start_offset_; uint64_t start_ts = sub < 0 ? 0u : static_cast(sub); if (0 == window_range_.end_offset_) { return BufferCurrentTimeBuffer(key, row, start_ts); @@ -419,12 +419,27 @@ class HistoryWindow : public Window { // sliding rows data from `current_history_buffer_` into effective window // by giving the new start_ts and end_ts. - // Resulting the new effective window data whose bound is [start_ts, end_ts] + // Resulting the new effective window data whose bound is [start_ts, end_ts], + // NOTE + // - window bounds should be greater or equal to 0, < 0 is not supported yet, + // - values greater than int64_max is not considered as well + // - start_ts_inclusive > end_ts_inclusive is expected for rows window, e.g. + // `(rows between .. and current_row exclude current_time)`. + // Absolutely confusing design though, should refactored later + // TODO(ace): note above // // - elements in `current_history_buffer_` that `ele.first <= end_ts` goes out of // `current_history_buffer_` and pushed into effective window // - elements in effective window where `ele.first < start_ts` goes out of effective window - void SlideWindow(uint64_t start_ts_inclusive, uint64_t end_ts_inclusive) { + // + // `start_ts_inclusive` and `end_ts_inclusive` can be empty, which effectively means less than 0. + // if `start_ts_inclusive` is empty, no rows goes out of effective window + // if `end_ts_inclusive` is empty, no rows goes out of history buffer and into effective window + void SlideWindow(std::optional start_ts_inclusive, std::optional end_ts_inclusive) { + if (!end_ts_inclusive.has_value()) { + return; + } + while (!current_history_buffer_.empty() && current_history_buffer_.back().first <= end_ts_inclusive) { auto& back = current_history_buffer_.back(); @@ -436,7 +451,9 @@ class HistoryWindow : public Window { // push the row to the start of window // - pop last elements in window if exceed max window size // - also pop last elements in window if there ts less than `start_ts` - bool BufferEffectiveWindow(uint64_t key, const Row& row, uint64_t start_ts) { + // + // if `start_ts` is empty, no rows eliminated from window + bool BufferEffectiveWindow(uint64_t key, const Row& row, std::optional start_ts) { AddFrontRow(key, row); auto cur_size = table_.size(); while (window_range_.max_size_ > 0 && @@ -445,19 +462,18 @@ class HistoryWindow : public Window { --cur_size; } - // Slide window when window size >= rows_preceding + // Slide window if window start bound >= rows/range preceding while (cur_size > 0) { const auto& pair = GetBackRow(); - if ((kFrameRows == window_range_.frame_type_ || - kFrameRowsMergeRowsRange == window_range_.frame_type_) && + if ((kFrameRows == window_range_.frame_type_ || kFrameRowsMergeRowsRange == window_range_.frame_type_) && cur_size <= window_range_.start_row_ + 1) { + // note it is always current rows window break; } if (kFrameRows == window_range_.frame_type_ || pair.first < start_ts) { PopBackRow(); --cur_size; - } else { break; } @@ -470,7 +486,11 @@ class HistoryWindow : public Window { // slide window first so current row kept in `current_history_buffer_` // and will go into window in next action if (exclude_current_time_) { - SlideWindow(start_ts, key - 1); + if (key == 0) { + SlideWindow(start_ts, {}); + } else { + SlideWindow(start_ts, key - 1); + } } else { SlideWindow(start_ts, key); } @@ -482,9 +502,13 @@ class HistoryWindow : public Window { // except `exclude current_row`, the current row is always added to the effective window // but for next buffer action, previous current row already buffered in `current_history_buffer_` // so the previous current row need eliminated for this next buf action - PopEffectiveData(); + PopEffectiveDataIfAny(); + if (key == 0) { + SlideWindow(start_ts, {}); + } else { + SlideWindow(start_ts, key - 1); + } current_history_buffer_.emplace_front(key, row); - SlideWindow(start_ts, key - 1); } // in queue the current row diff --git a/hybridse/src/vm/runner.cc b/hybridse/src/vm/runner.cc index d685477e781..fd6191f96c6 100644 --- a/hybridse/src/vm/runner.cc +++ b/hybridse/src/vm/runner.cc @@ -3125,7 +3125,9 @@ std::shared_ptr RequestUnionRunner::RequestUnionWindow( const Row& request, std::vector> union_segments, int64_t ts_gen, const WindowRange& window_range, bool output_request_row, bool exclude_current_time, bool exclude_current_row) { uint64_t start = 0; - uint64_t end = UINT64_MAX; + // end is empty means end value < 0, that there is no effective window range + // this happend when `ts_gen` is 0 and exclude current_time needed + std::optional end = UINT64_MAX; uint64_t rows_start_preceding = 0; uint64_t max_size = 0; if (ts_gen >= 0) { @@ -3133,7 +3135,11 @@ std::shared_ptr RequestUnionRunner::RequestUnionWindow( ? 0 : (ts_gen + window_range.start_offset_); if (exclude_current_time && 0 == window_range.end_offset_) { - end = (ts_gen - 1) < 0 ? 0 : (ts_gen - 1); + if (ts_gen == 0) { + end = {}; + } else { + end = ts_gen - 1; + } } else { end = (ts_gen + window_range.end_offset_) < 0 ? 0 @@ -3172,7 +3178,7 @@ std::shared_ptr RequestUnionRunner::RequestUnionWindow( union_segment_status[i] = IteratorStatus(); continue; } - union_segment_iters[i]->Seek(end); + union_segment_iters[i]->Seek(end.value_or(0)); if (!union_segment_iters[i]->Valid()) { union_segment_status[i] = IteratorStatus(); continue; diff --git a/src/test/base_test.cc b/src/test/base_test.cc index e4c67ed73a2..8ddf1c5bdf7 100644 --- a/src/test/base_test.cc +++ b/src/test/base_test.cc @@ -485,11 +485,11 @@ void SQLCaseTest::CheckRows(const hybridse::vm::Schema &schema, const std::strin std::map> rows_map; if (order_idx >= 0) { int32_t row_id = 0; - for (auto row : rows) { + for (auto& row : rows) { row_view.Reset(row.buf()); std::string key = row_view.GetAsString(order_idx); LOG(INFO) << "Get Order String: " << row_id++ << " key: " << key; - rows_map.insert(std::make_pair(key, std::make_pair(row, false))); + rows_map.try_emplace(key, row, false); } } int32_t index = 0;