diff --git a/contrib/azure b/contrib/azure index 92c94d7f37a4..ea3e19a7be08 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf +Subproject commit ea3e19a7be08519134c643177d56c7484dfec884 diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 38ebcc8f6805..1fbfd29a3bd0 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -179,12 +179,19 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. -# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. -# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking. +# jemalloc provides support two unwind flavors: +# - JEMALLOC_PROF_LIBUNWIND - unw_backtrace() - gnu libunwind (compatible with llvm libunwind) +# - JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() - the original HP libunwind and the one coming with gcc / g++ / libstdc++. # -# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). -target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +# But for JEMALLOC_PROF_LIBGCC it also calls _Unwind_Backtrace() during +# bootstraping of jemalloc, which may lead to deadlock, if the dlsym will do +# allocations somewhere (like glibc does prio 2.34, see [1]). +# +# [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9 +# +# And since ClickHouse unwind already supports unw_backtrace() we can safely +# switch to it to avoid this deadlock. +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT diff --git a/contrib/pocketfft b/contrib/pocketfft index 9efd4da52cf8..f4c1aa8aa9ce 160000 --- a/contrib/pocketfft +++ b/contrib/pocketfft @@ -1 +1 @@ -Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546 +Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3 diff --git a/contrib/rocksdb b/contrib/rocksdb index 078fa5638690..be3662339212 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304 +Subproject commit be366233921293bd07a84dc4ea6991858665f202 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 943e1d8acbdf..3a14407166cd 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -5,20 +5,13 @@ if (NOT ENABLE_ROCKSDB) return() endif() -## this file is extracted from `contrib/rocksdb/CMakeLists.txt` -set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") -list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") - -set(PORTABLE ON) -## always disable jemalloc for rocksdb by default -## because it introduces non-standard jemalloc APIs +# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs option(WITH_JEMALLOC "build with JeMalloc" OFF) -set(USE_SNAPPY OFF) -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY ON) -endif() -option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY}) -## lz4, zlib, zstd is enabled in ClickHouse by default + +option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING + +# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd +option(WITH_SNAPPY "build with SNAPPY" ON) option(WITH_LZ4 "build with lz4" ON) option(WITH_ZLIB "build with zlib" ON) option(WITH_ZSTD "build with zstd" ON) @@ -26,78 +19,46 @@ option(WITH_ZSTD "build with zstd" ON) # third-party/folly is only validated to work on Linux and Windows for now. # So only turn it on there by default. if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") - if(MSVC AND MSVC_VERSION LESS 1910) - # Folly does not compile with MSVC older than VS2017 - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) - else() - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) - endif() + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) else() option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) endif() -if( NOT DEFINED CMAKE_CXX_STANDARD ) - set(CMAKE_CXX_STANDARD 11) +if(WITH_SNAPPY) + add_definitions(-DSNAPPY) + list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) endif() -if(MSVC) - option(WITH_XPRESS "build with windows built in compression" OFF) - include("${ROCKSDB_SOURCE_DIR}/thirdparty.inc") -else() - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") - # FreeBSD has jemalloc as default malloc - # but it does not have all the jemalloc files in include/... - set(WITH_JEMALLOC ON) - else() - if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc) - add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) - list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc) - endif() - endif() - - if(WITH_SNAPPY) - add_definitions(-DSNAPPY) - list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) - endif() - - if(WITH_ZLIB) - add_definitions(-DZLIB) - list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) - endif() - - if(WITH_LZ4) - add_definitions(-DLZ4) - list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) - endif() - - if(WITH_ZSTD) - add_definitions(-DZSTD) - list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) - endif() +if(WITH_ZLIB) + add_definitions(-DZLIB) + list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") - if(POWER9) - set(HAS_POWER9 1) - set(HAS_ALTIVEC 1) - else() - set(HAS_POWER8 1) - set(HAS_ALTIVEC 1) - endif(POWER9) -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") +if(WITH_LZ4) + add_definitions(-DLZ4) + list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) +endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") - set(HAS_ARMV8_CRC 1) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") +if(WITH_ZSTD) + add_definitions(-DZSTD) + list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) +endif() +option(PORTABLE "build a portable binary" ON) -if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ) +if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) add_definitions(-DHAVE_SSE42) add_definitions(-DHAVE_PCLMUL) endif() +if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") + set (HAS_ARMV8_CRC 1) + # the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general + # ARM flags, see cmake/cpu_features.cmake + # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") +endif() + set (HAVE_THREAD_LOCAL 1) if(HAVE_THREAD_LOCAL) add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) @@ -107,8 +68,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) -elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") - add_definitions(-DOS_SOLARIS) elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_definitions(-DOS_FREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "Android") @@ -123,12 +82,10 @@ endif() if (OS_LINUX) add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) -elseif (OS_FREEBSD) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) endif() +set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") include_directories(${ROCKSDB_SOURCE_DIR}) include_directories("${ROCKSDB_SOURCE_DIR}/include") @@ -136,11 +93,11 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX) include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly") endif() -# Main library source code - set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc @@ -156,6 +113,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc ${ROCKSDB_SOURCE_DIR}/db/column_family.cc @@ -229,6 +187,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc + ${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc ${ROCKSDB_SOURCE_DIR}/file/file_util.cc @@ -247,6 +206,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc ${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc ${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memory/memory_allocator.cc ${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc @@ -322,6 +282,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/unique_id.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc ${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc @@ -333,9 +294,12 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc - ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/util/coding.cc ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc ${ROCKSDB_SOURCE_DIR}/util/comparator.cc @@ -347,6 +311,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc ${ROCKSDB_SOURCE_DIR}/util/random.cc ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc + ${ROCKSDB_SOURCE_DIR}/util/regex.cc ${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc ${ROCKSDB_SOURCE_DIR}/util/slice.cc ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc @@ -362,18 +327,23 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc @@ -393,6 +363,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc @@ -411,6 +382,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc @@ -425,7 +397,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc - rocksdb_build_version.cc) + build_version.cc) # generated by hand if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) set_source_files_properties( @@ -462,5 +434,6 @@ endif() add_library(_rocksdb ${SOURCES}) add_library(ch_contrib::rocksdb ALIAS _rocksdb) target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) + # SYSTEM is required to overcome some issues target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include") diff --git a/contrib/rocksdb-cmake/rocksdb_build_version.cc b/contrib/rocksdb-cmake/build_version.cc similarity index 100% rename from contrib/rocksdb-cmake/rocksdb_build_version.cc rename to contrib/rocksdb-cmake/build_version.cc diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 2215ac2b37c4..80e5e81a4b18 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -16,6 +16,9 @@ dpkg -i package_folder/clickhouse-client_*.deb ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +# shellcheck disable=SC1091 +source /utils.lib + # install test configs /usr/share/clickhouse-test/config/install.sh @@ -272,3 +275,5 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 1e9eba94c4b6..5f2cb95de75f 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,8 +12,7 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} -# disable for now -RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 +RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1 if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 @@ -310,7 +309,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt @@ -483,3 +482,5 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index c069ccbdd8d5..682da1df837f 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -1,8 +1,5 @@ #!/bin/bash -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' - OK="\tOK\t\\N\t" FAIL="\tFAIL\t\\N\t" @@ -315,12 +312,4 @@ function collect_query_and_trace_logs() done } -function collect_core_dumps() -{ - find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do - zstd --threads=0 "$core" - mv "$core.zst" /test_output/ - done -} - # vi: ft=bash diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 833e1a053844..c3bb8ae9ea42 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -1,5 +1,10 @@ #!/bin/bash +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' +# ASAN doesn't work with suid_dumpable=2 +sysctl fs.suid_dumpable=1 + function run_with_retry() { if [[ $- =~ e ]]; then @@ -48,4 +53,12 @@ function timeout_with_logging() { return $exit_code } +function collect_core_dumps() +{ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ + done +} + # vi: ft=bash diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 323944591b12..864673945136 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -21,6 +21,9 @@ source /attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib +# shellcheck disable=SC1091 +source /utils.lib + install_packages package_folder # Thread Fuzzer allows to check more permutations of possible thread scheduling diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index c29be2cff586..f2eac12594db 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -5,11 +5,11 @@ sidebar_label: Object Data Type keywords: [object, data type] --- -# Object Data Type +# Object Data Type (deprecated) -:::note -This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) -::: +**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864). + +
Stores JavaScript Object Notation (JSON) documents in a single column. diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index b665ad85a097..6e18ace10c77 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -58,6 +58,8 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' KILL QUERY WHERE user='username' SYNC ``` +:::tip If you are killing a query in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the query is killed on all replicas::: + Read-only users can only stop their own queries. By default, the asynchronous version of queries is used (`ASYNC`), which does not wait for confirmation that queries have stopped. @@ -131,6 +133,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table' -- Cancel the specific mutation: KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt' ``` +:::tip If you are killing a mutation in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the mutation is killed on all replicas::: The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table). diff --git a/pyproject.toml b/pyproject.toml index 279d077a6957..90f089afa41e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,10 +35,9 @@ disable = ''' broad-except, bare-except, no-else-return, - global-statement + global-statement, ''' [tool.pylint.SIMILARITIES] # due to SQL min-similarity-lines=1000 - diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f7e6b1a1ccc9..cfd07c27765c 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,11 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) +#else +void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) +#endif { const ColumnAggregateFunction & from_concrete = assert_cast(from); @@ -462,7 +466,11 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) +#else +void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) +#endif { insertRangeFrom(from, n, 1); } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index a75b27e835c2..1be7a862438c 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,14 @@ class ColumnAggregateFunction final : public COWHelper(src_); size_t size = src.sizeAt(n); @@ -392,7 +396,11 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#else +int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, nan_direction_hint); } @@ -535,7 +543,11 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 53eb5166df87..6cd3e2f6c3b4 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,10 +84,18 @@ class ColumnArray final : public COWHelper, ColumnArr void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; +#endif void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -95,7 +103,11 @@ class ColumnArray final : public COWHelper, ColumnArr ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 934adf07cf4c..5e455709fec5 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,7 +85,11 @@ class ColumnCompressed : public COWHelper, Colum bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#else + void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#endif void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } @@ -100,7 +104,11 @@ class ColumnCompressed : public COWHelper, Colum void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { throwMustBeDecompressed(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index c2c0fa3027c3..b55a1f42037b 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -32,6 +32,8 @@ class ColumnConst final : public COWHelper, ColumnCon ColumnConst(const ColumnConst & src) = default; public: + bool isConst() const override { return true; } + ColumnPtr convertToFullColumn() const; ColumnPtr convertToFullColumnIfConst() const override @@ -121,7 +123,11 @@ class ColumnConst final : public COWHelper, ColumnCon return data->isNullAt(0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#else + void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#endif { s += length; } @@ -145,12 +151,20 @@ class ColumnConst final : public COWHelper, ColumnCon ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn &, size_t) override +#else + void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#else + void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#endif void insertDefault() override { @@ -223,7 +237,11 @@ class ColumnConst final : public COWHelper, ColumnCon return data->allocatedBytes() + sizeof(s); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#else + int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#endif { return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index eb9784c14dde..cf413f790a78 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -32,7 +32,11 @@ namespace ErrorCodes } template +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#else +int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#endif { auto & other = static_cast(rhs_); const T & a = data[n]; @@ -331,7 +335,11 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnDecimal & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index c4510ba29228..32efeb643a60 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,9 +55,17 @@ class ColumnDecimal final : public COWHelper, Col void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#else + void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -68,7 +76,11 @@ class ColumnDecimal final : public COWHelper, Col void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void popBack(size_t n) override { @@ -92,7 +104,11 @@ class ColumnDecimal final : public COWHelper, Col void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index d56999ce5a27..c735238f5150 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -215,7 +215,11 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +#else +void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) +#endif { const auto & dynamic_src = assert_cast(src_); @@ -265,7 +269,11 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#else +void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#endif { if (start + length > src_.size()) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " @@ -431,7 +439,11 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else +void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { const auto & dynamic_src = assert_cast(src_); @@ -591,7 +603,11 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#else +int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#endif { const auto & left_variant = assert_cast(*variant_column); const auto & right_dynamic = assert_cast(rhs); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 27ad0dd583f2..9abddc7a26d2 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -142,9 +142,16 @@ class ColumnDynamic final : public COWHelper, Colum void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertDefault() override { @@ -213,7 +220,11 @@ class ColumnDynamic final : public COWHelper, Colum return scattered_columns; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index d7e4eff27279..1c2de203a946 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,11 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) +#else +void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) +#endif { const ColumnFixedString & src = assert_cast(src_); @@ -86,7 +90,11 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); if (n != src_concrete.getN()) @@ -219,7 +227,11 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); chassert(this->n == src_concrete.n); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 7b46dc11cd68..6e88136fc50c 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,9 +98,17 @@ class ColumnFixedString final : public COWHelper(rhs_); chassert(this->n == rhs.n); @@ -144,7 +156,11 @@ class ColumnFixedString final : public COWHelper(src); @@ -89,7 +93,11 @@ void ColumnFunction::insertFrom(const IColumn & src, size_t n) ++elements_size; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFunction & src_func = assert_cast(src); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 6fdc6679d3e9..ba924c49a82a 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -94,8 +94,16 @@ class ColumnFunction final : public COWHelper, Col throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; +#endif void insertData(const char *, size_t) override { @@ -137,7 +145,11 @@ class ColumnFunction final : public COWHelper, Col throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName()); } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 208326fe629f..eb694a10b0ff 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -159,7 +159,11 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) +#else +void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) +#endif { const auto * low_cardinality_src = typeid_cast(&src); @@ -187,7 +191,11 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto * low_cardinality_src = typeid_cast(&src); @@ -364,7 +372,11 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index ac3b725b22f0..e99be07cd8d3 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,10 +78,18 @@ class ColumnLowCardinality final : public COWHelperupdateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertFrom(const IColumn & src, size_t n) +#else +void ColumnMap::doInsertFrom(const IColumn & src, size_t n) +#endif { nested->insertFrom(assert_cast(src).getNestedColumn(), n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { nested->insertRangeFrom( assert_cast(src).getNestedColumn(), @@ -210,7 +222,11 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_map = assert_cast(rhs); return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 52165d0d74e2..a54071a2974c 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -66,16 +66,28 @@ class ColumnMap final : public COWHelper, ColumnMap> void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 1d12a59fd598..f060e74b3154 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -221,7 +221,11 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnNullable & nullable_col = assert_cast(src); getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); @@ -258,7 +262,11 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertFrom(const IColumn & src, size_t n) +#else +void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); @@ -266,7 +274,11 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n) } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); @@ -402,7 +414,11 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else +int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, null_direction_hint); } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 510a4cacf1eb..a6d0483e527f 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,11 +69,21 @@ class ColumnNullable final : public COWHelper, Col char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); @@ -90,7 +100,11 @@ class ColumnNullable final : public COWHelper, Col void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif #if USE_EMBEDDED_COMPILER diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 90ef974010cb..adcd42b16e99 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,12 +763,20 @@ void ColumnObject::get(size_t n, Field & res) const } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnObject::insertFrom(const IColumn & src, size_t n) +#else +void ColumnObject::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto & src_object = assert_cast(src); diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index e2936b27994f..fadf2e187799 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -209,8 +209,15 @@ class ColumnObject final : public COWHelper, ColumnO void insert(const Field & field) override; bool tryInsert(const Field & field) override; void insertDefault() override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; @@ -228,7 +235,11 @@ class ColumnObject final : public COWHelper, ColumnO /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void getExtremes(Field & min, Field & max) const override; /// All other methods throw exception. diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 5190ceb49e53..809586d88108 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,11 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; @@ -248,7 +252,11 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnSparse::insertFrom(const IColumn & src, size_t n) +#else +void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) +#endif { if (const auto * src_sparse = typeid_cast(&src)) { @@ -446,7 +454,11 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else +int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { if (const auto * rhs_sparse = typeid_cast(&rhs_)) return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 12b2def7cf14..3e34d1de94a1 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,10 +81,18 @@ class ColumnSparse final : public COWHelper, ColumnS char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertDefault() override; void insertManyDefaults(size_t length) override; @@ -98,7 +106,11 @@ class ColumnSparse final : public COWHelper, ColumnS template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index a84aea734860..1eda9714d627 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,11 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnString & src_concrete = assert_cast(src); const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; @@ -129,7 +133,11 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 39d4684fd890..602ffac65e8c 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,11 @@ class ColumnString final : public COWHelper, ColumnS return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override +#else + void doInsertFrom(const IColumn & src_, size_t n) override +#endif { const ColumnString & src = assert_cast(src_); const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray. @@ -165,7 +169,11 @@ class ColumnString final : public COWHelper, ColumnS } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override { @@ -212,7 +220,11 @@ class ColumnString final : public COWHelper, ColumnS hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -238,7 +250,11 @@ class ColumnString final : public COWHelper, ColumnS offsets.push_back(offsets.back() + 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnString & rhs = assert_cast(rhs_); return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index f262a8676b75..9b822d7f570d 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -205,7 +205,11 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnTuple & src = assert_cast(src_); @@ -218,7 +222,11 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnTuple & src_tuple = assert_cast(src); @@ -318,7 +326,11 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { column_length += length; const size_t tuple_size = columns.size(); @@ -470,7 +482,11 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 0103f81b2427..38e479791d40 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -65,8 +65,15 @@ class ColumnTuple final : public COWHelper, ColumnTup void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif + void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; @@ -76,14 +83,22 @@ class ColumnTuple final : public COWHelper, ColumnTup void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 0311efd4c833..ec1f8e0a4d5b 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,11 @@ class ColumnUnique final : public COWHelperupdateHashWithValue(n, hash_func); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -488,7 +492,11 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { if (is_nullable) { diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ec47f5dfa746..ee5de4c2dde6 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,17 +595,29 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) +#endif { insertFromImpl(src_, n, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +#else +void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) +#endif { insertRangeFromImpl(src_, start, length, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else +void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { insertManyFromImpl(src_, position, length, nullptr); } @@ -1174,7 +1186,11 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_variant = assert_cast(rhs); Discriminator left_discr = globalDiscriminatorAt(n); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index e5a4498f3404..d91b8e93a7d4 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,9 +180,19 @@ class ColumnVariant final : public COWHelper, Colum void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#else + using IColumn::insertFrom; + using IColumn::insertManyFrom; + using IColumn::insertRangeFrom; + + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#endif /// Methods for insertion from another Variant but with known mapping between global discriminators. void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); @@ -213,7 +223,11 @@ class ColumnVariant final : public COWHelper, Colum ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 35d9f5386ed7..19d1b8009619 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -503,7 +503,11 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnVector & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bbd27c91a708..3a0acf5e312a 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,12 +64,20 @@ class ColumnVector final : public COWHelper, Colum return data.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override +#else + void doInsertFrom(const IColumn & src, size_t n) override +#endif { data.push_back(assert_cast(src).getData()[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -142,7 +150,11 @@ class ColumnVector final : public COWHelper, Colum } /// This method implemented in header because it could be possibly devirtualized. +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#endif { return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } @@ -228,7 +240,11 @@ class ColumnVector final : public COWHelper, Colum bool tryInsert(const DB::Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 90cccef2b039..552e52cf51c1 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,11 @@ String IColumn::dumpStructure() const return res.str(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void IColumn::insertFrom(const IColumn & src, size_t n) +#else +void IColumn::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index afa301d5c1c5..4b6f34e5aa29 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -1,15 +1,14 @@ #pragma once +#include +#include #include -#include #include +#include #include -#include -#include #include "config.h" - class SipHash; class Collator; @@ -180,18 +179,42 @@ class IColumn : public COW /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertFrom(const IColumn & src, size_t n); +#else + void insertFrom(const IColumn & src, size_t n) + { + assertTypeEquality(src); + doInsertFrom(src, n); + } +#endif /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; +#else + void insertRangeFrom(const IColumn & src, size_t start, size_t length) + { + assertTypeEquality(src); + doInsertRangeFrom(src, start, length); + } +#endif /// Appends one element from other column with the same type multiple times. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) insertFrom(src, position); } +#else + void insertManyFrom(const IColumn & src, size_t position, size_t length) + { + assertTypeEquality(src); + doInsertManyFrom(src, position, length); + } +#endif /// Appends one field multiple times. Can be optimized in inherited classes. virtual void insertMany(const Field & field, size_t length) @@ -322,7 +345,15 @@ class IColumn : public COW * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ +#if !defined(ABORT_ON_LOGICAL_ERROR) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; +#else + [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const + { + assertTypeEquality(rhs); + return doCompareAt(n, m, rhs, nan_direction_hint); + } +#endif #if USE_EMBEDDED_COMPILER @@ -610,6 +641,8 @@ class IColumn : public COW [[nodiscard]] virtual bool isSparse() const { return false; } + [[nodiscard]] virtual bool isConst() const { return false; } + [[nodiscard]] virtual bool isCollationSupported() const { return false; } virtual ~IColumn() = default; @@ -633,6 +666,29 @@ class IColumn : public COW Equals equals, Sort full_sort, PartialSort partial_sort) const; + +#if defined(ABORT_ON_LOGICAL_ERROR) + virtual void doInsertFrom(const IColumn & src, size_t n); + + virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; + + virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) + { + for (size_t i = 0; i < length; ++i) + insertFrom(src, position); + } + + virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + +private: + void assertTypeEquality(const IColumn & rhs) const + { + /// For Sparse and Const columns, we can compare only internal types. It is considered normal to e.g. insert from normal vector column to a sparse vector column. + /// This case is specifically handled in ColumnSparse implementation. Similar situation with Const column. + /// For the rest of column types we can compare the types directly. + chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs)); + } +#endif }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 27f420fbc710..c19fb704d9b2 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -26,7 +26,11 @@ class IColumnDummy : public IColumnHelper size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { } @@ -67,12 +71,20 @@ class IColumnDummy : public IColumnHelper { } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn &, size_t) override +#else + void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#else + void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#endif { s += length; } diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index f71f19a5da6a..3398452b7eed 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -85,7 +85,11 @@ class IColumnUnique : public IColumn throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t, size_t) override +#else + void doInsertRangeFrom(const IColumn &, size_t, size_t) override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique."); } diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 325cf5559cde..645f6ed79f38 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,11 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } +#if !defined(ABORT_ON_LOGICAL_ERROR) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) +#else +static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) +#endif { size_t size = src.size(); dst.insertManyFrom(src, size / 2, size); diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 1f4b0aea8f2a..111280074dd0 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,12 +38,21 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description) +void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size) { - LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description); + auto & logger = Poco::Logger::root(); + LOG_FATAL(&logger, "Logical error: '{}'.", description); + if (trace) + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size)); abort(); } +void abortOnFailedAssertion(const String & description) +{ + StackTrace st; + abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize()); +} + bool terminate_on_any_exception = false; static int terminate_status_code = 128 + SIGABRT; thread_local bool update_error_statistics = true; @@ -58,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg); + abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); } #endif diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 87ef7101cdc4..a4774a89f6ac 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -25,8 +25,6 @@ namespace DB class AtomicLogger; -[[noreturn]] void abortOnFailedAssertion(const String & description); - /// This flag can be set for testing purposes - to check that no exceptions are thrown. extern bool terminate_on_any_exception; @@ -167,6 +165,8 @@ class Exception : public Poco::Exception mutable std::vector capture_thread_frame_pointers; }; +[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size); +[[noreturn]] void abortOnFailedAssertion(const String & description); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index 14105a8651d7..2faea1957ba4 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -235,7 +235,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard & lock) loadFromConfig(context->getConfigRef(), lock); loadFromSQL(lock); - if (metadata_storage->supportsPeriodicUpdate()) + if (metadata_storage->isReplicated()) { update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); }); update_task->activate(); @@ -357,6 +357,13 @@ void NamedCollectionFactory::reloadFromSQL() add(std::move(collections), lock); } +bool NamedCollectionFactory::usesReplicatedStorage() +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + return metadata_storage->isReplicated(); +} + void NamedCollectionFactory::updateFunc() { LOG_TRACE(log, "Named collections background updating thread started"); diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.h b/src/Common/NamedCollections/NamedCollectionsFactory.h index 6ee5940e6860..a0721ad8a50f 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.h +++ b/src/Common/NamedCollections/NamedCollectionsFactory.h @@ -34,6 +34,8 @@ class NamedCollectionFactory : boost::noncopyable void updateFromSQL(const ASTAlterNamedCollectionQuery & query); + bool usesReplicatedStorage(); + void loadIfNot(); void shutdown(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 32fdb25abd31..b3671350f929 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -67,7 +67,7 @@ class NamedCollectionsMetadataStorage::INamedCollectionsStorage virtual bool removeIfExists(const std::string & path) = 0; - virtual bool supportsPeriodicUpdate() const = 0; + virtual bool isReplicated() const = 0; virtual bool waitUpdate(size_t /* timeout */) { return false; } }; @@ -89,7 +89,7 @@ class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsSt ~LocalStorage() override = default; - bool supportsPeriodicUpdate() const override { return false; } + bool isReplicated() const override { return false; } std::vector list() const override { @@ -221,7 +221,7 @@ class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectio ~ZooKeeperStorage() override = default; - bool supportsPeriodicUpdate() const override { return true; } + bool isReplicated() const override { return true; } /// Return true if children changed. bool waitUpdate(size_t timeout) override @@ -465,14 +465,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace); } -bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const +bool NamedCollectionsMetadataStorage::isReplicated() const { - return storage->supportsPeriodicUpdate(); + return storage->isReplicated(); } bool NamedCollectionsMetadataStorage::waitUpdate() { - if (!storage->supportsPeriodicUpdate()) + if (!storage->isReplicated()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported"); const auto & config = Context::getGlobalContextInstance()->getConfigRef(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index 3c089fe2fa2b..c3468fbc4681 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -30,7 +30,7 @@ class NamedCollectionsMetadataStorage : private WithContext /// Return true if update was made bool waitUpdate(); - bool supportsPeriodicUpdate() const; + bool isReplicated() const; private: class INamedCollectionsStorage; diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 239e957bdfe8..34f6f0b75359 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -545,7 +545,7 @@ std::string StackTrace::toString() const return toStringCached(frame_pointers, offset, size); } -std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size) +std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size) { __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw)); diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ce9a9281f3c..2078828f3d7b 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -59,7 +59,7 @@ class StackTrace const FramePointers & getFramePointers() const { return frame_pointers; } std::string toString() const; - static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static std::string toString(void * const * frame_pointers, size_t offset, size_t size); static void dropCache(); /// @param fatal - if true, will process inline frames (slower) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 457a962c4fff..52fa28a4481b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -346,6 +346,7 @@ class IColumn; \ M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ + M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 5174cf82c2e0..b9b722091039 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -76,6 +76,7 @@ static std::initializer_list #include #include +#include #include #include #include @@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction qualified_name.database = table_identifier->getDatabaseName(); qualified_name.table = table_identifier->shortName(); } + else if (arg->as()) + { + /// Allow IN subquery. + /// Do not add tables from the subquery into dependencies, + /// because CREATE will succeed anyway. + return; + } else { assert(false); diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 0ed995c6180c..f58b90f11343 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -107,12 +107,24 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { + // it is important to call the destructors of not_in_use without + // locked mutex to avoid potential deadlock. DetachedTables not_in_use; - std::lock_guard lock(mutex); - auto table = DatabaseOrdinary::detachTableUnlocked(name); - table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); - not_in_use = cleanupDetachedTables(); + StoragePtr table; + { + std::lock_guard lock(mutex); + table = DatabaseOrdinary::detachTableUnlocked(name); + table_name_to_path.erase(name); + detached_tables.emplace(table->getStorageID().uuid, table); + not_in_use = cleanupDetachedTables(); + } + + if (!not_in_use.empty()) + { + not_in_use.clear(); + LOG_DEBUG(log, "Finished removing not used detached tables"); + } + return table; } diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index 79a17fd18443..0e83e2039f6a 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterAlterNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().updateFromSQL(query); diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c71441daa8c2..b4920b1729ff 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterCreateNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().createFromSQL(query); diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index 2edaef1b2f2b..6233d21b4399 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterDropNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().removeFromSQL(query); diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index 44167fe72424..dd20164925cb 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -15,6 +15,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -38,6 +42,13 @@ static bool isAccessControlQuery(const ASTPtr & query) || query->as(); } +static bool isNamedCollectionQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as(); +} + ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) { auto * query_on_cluster = dynamic_cast(query.get()); @@ -50,7 +61,10 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c && context->getUserDefinedSQLObjectsStorage().isReplicated()) || (isAccessControlQuery(query) && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries - && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)) + || (isNamedCollectionQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_named_collections_queries + && NamedCollectionFactory::instance().usesReplicatedStorage())) { LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); return query_on_cluster->getRewrittenASTWithoutOnCluster(params); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7e4b1db4c89a..d38001a0feb6 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -874,46 +874,6 @@ static Field applyFunctionForField( return (*col)[0]; } -/// The case when arguments may have types different than in the primary key. -static std::pair applyFunctionForFieldOfUnknownType( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, - const Field & arg_value) -{ - ColumnsWithTypeAndName arguments{{ arg_type->createColumnConst(1, arg_value), arg_type, "x" }}; - DataTypePtr return_type = func->getResultType(); - - auto col = func->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - -/// Same as above but for binary operators -static std::pair applyBinaryFunctionForFieldOfUnknownType( - const FunctionOverloadResolverPtr & func, - const DataTypePtr & arg_type, - const Field & arg_value, - const DataTypePtr & arg_type2, - const Field & arg_value2) -{ - ColumnsWithTypeAndName arguments{ - {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}}; - - FunctionBasePtr func_base = func->build(arguments); - - DataTypePtr return_type = func_base->getResultType(); - - auto col = func_base->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { /// Fallback for fields without block reference. @@ -940,164 +900,92 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & return {field.columns, field.row_idx, result_idx}; } -/** When table's key has expression with these functions from a column, - * and when a column in a query is compared with a constant, such as: - * CREATE TABLE (x String) ORDER BY toDate(x) - * SELECT ... WHERE x LIKE 'Hello%' - * we want to apply the function to the constant for index analysis, - * but should modify it to pass on un-parsable values. - */ -static std::set date_time_parsing_functions = { - "toDate", - "toDate32", - "toDateTime", - "toDateTime64", - "parseDateTimeBestEffort", - "parseDateTimeBestEffortUS", - "parseDateTime32BestEffort", - "parseDateTime64BestEffort", - "parseDateTime", - "parseDateTimeInJodaSyntax", -}; - -/** The key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, - * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` - * condition also holds, so the index may be used to select only parts satisfying this condition. - * - * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the - * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). - * Instead, we can qualify only functions that do not transform the range (for example rounding), - * which while not strictly monotonic, are monotonic everywhere on the input range. - */ -bool KeyCondition::transformConstantWithValidFunctions( - ContextPtr context, - const String & expr_name, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, - std::function always_monotonic) const +/// Sequentially applies functions to the column, returns `true` +/// if all function arguments are compatible with functions +/// signatures, and none of the functions produce `NULL` output. +/// +/// After functions chain execution, fills result column and its type. +bool applyFunctionChainToColumn( + const ColumnPtr & in_column, + const DataTypePtr & in_data_type, + const std::vector & functions, + ColumnPtr & out_column, + DataTypePtr & out_data_type) { - const auto & sample_block = key_expr->getSampleBlock(); + // Remove LowCardinality from input column, and convert it to regular one + auto result_column = in_column->convertToFullIfNeeded(); + auto result_type = removeLowCardinality(in_data_type); - for (const auto & node : key_expr->getNodes()) + // In case function sequence is empty, return full non-LowCardinality column + if (functions.empty()) { - auto it = key_columns.find(node.result_name); - if (it != key_columns.end()) - { - std::stack chain; - - const auto * cur_node = &node; - bool is_valid_chain = true; - - while (is_valid_chain) - { - if (cur_node->result_name == expr_name) - break; - - chain.push(cur_node); - - if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) - { - is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); + out_column = result_column; + out_data_type = result_type; + return true; + } - const ActionsDAG::Node * next_node = nullptr; - for (const auto * arg : cur_node->children) - { - if (arg->column && isColumnConst(*arg->column)) - continue; + // If first function arguments are empty, cannot transform input column + if (functions[0]->getArgumentTypes().empty()) + { + return false; + } - if (next_node) - is_valid_chain = false; + // And cast it to the argument type of the first function in the chain + auto in_argument_type = functions[0]->getArgumentTypes()[0]; + if (canBeSafelyCasted(result_type, in_argument_type)) + { + result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type); + result_type = in_argument_type; + } + // If column cannot be casted accurate, casting with OrNull, and in case all + // values has been casted (no nulls), unpacking nested column from nullable. + // In case any further functions require Nullable input, they'll be able + // to cast it. + else + { + result_column = castColumnAccurateOrNull({result_column, result_type, ""}, in_argument_type); + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(in_argument_type); + } - next_node = arg; - } + for (const auto & func : functions) + { + if (func->getArgumentTypes().empty()) + return false; - if (!next_node) - is_valid_chain = false; + auto argument_type = func->getArgumentTypes()[0]; + if (!canBeSafelyCasted(result_type, argument_type)) + return false; - cur_node = next_node; - } - else if (cur_node->type == ActionsDAG::ActionType::ALIAS) - cur_node = cur_node->children.front(); - else - is_valid_chain = false; - } + result_column = castColumnAccurate({result_column, result_type, ""}, argument_type); + result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size()); + result_type = func->getResultType(); - if (is_valid_chain) + // Transforming nullable columns to the nested ones, in case no nulls found + if (result_column->isNullable()) + { + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) { - out_type = removeLowCardinality(out_type); - auto const_type = removeLowCardinality(cur_node->result_type); - auto const_column = out_type->createColumnConst(1, out_value); - auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0]; - - if (const_value.isNull()) + if (i != 0) return false; - - while (!chain.empty()) - { - const auto * func = chain.top(); - chain.pop(); - - if (func->type != ActionsDAG::ActionType::FUNCTION) - continue; - - const auto & func_name = func->function_base->getName(); - auto func_base = func->function_base; - const auto & arg_types = func_base->getArgumentTypes(); - if (date_time_parsing_functions.contains(func_name) && !arg_types.empty() && isStringOrFixedString(arg_types[0])) - { - auto func_or_null = FunctionFactory::instance().get(func_name + "OrNull", context); - ColumnsWithTypeAndName arguments; - int i = 0; - for (const auto & type : func->function_base->getArgumentTypes()) - arguments.push_back({nullptr, type, fmt::format("_{}", i++)}); - - func_base = func_or_null->build(arguments); - } - - if (func->children.size() == 1) - { - std::tie(const_value, const_type) - = applyFunctionForFieldOfUnknownType(func_base, const_type, const_value); - } - else if (func->children.size() == 2) - { - const auto * left = func->children[0]; - const auto * right = func->children[1]; - if (left->column && isColumnConst(*left->column)) - { - auto left_arg_type = left->result_type; - auto left_arg_value = (*left->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - left_arg_type, left_arg_value, const_type, const_value); - } - else - { - auto right_arg_type = right->result_type; - auto right_arg_value = (*right->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - const_type, const_value, right_arg_type, right_arg_value); - } - } - - if (const_value.isNull()) - return false; - } - - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - out_value = const_value; - out_type = const_type; - return true; } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(func->getResultType()); } } + out_column = result_column; + out_data_type = result_type; - return false; + return true; } bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( @@ -1118,13 +1006,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, - out_type, + transform_functions, [](const IFunctionBase & func, const IDataType & type) { if (!func.hasInformationAboutMonotonicity()) @@ -1138,6 +1026,27 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( } return true; }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, + out_type, + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } /// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` @@ -1173,28 +1082,48 @@ bool KeyCondition::canConstantBeWrappedByFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, + transform_functions, + [](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, out_type, - [](const IFunctionBase & func, const IDataType &) - { - return func.isDeterministic(); - }); + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } bool KeyCondition::tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num) + size_t & out_key_column_num, + bool & is_constant_transformed) { const auto & left_arg = func.getArgumentAt(0); out_key_column_num = 0; std::vector indexes_mapping; + std::vector set_transforming_chains; DataTypes data_types; auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index) @@ -1203,6 +1132,7 @@ bool KeyCondition::tryPrepareSetIndex( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; std::optional key_space_filling_curve_argument_pos; + MonotonicFunctionsChain set_transforming_chain; if (isKeyPossiblyWrappedByMonotonicFunctions( node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions) && !key_space_filling_curve_argument_pos) /// We don't support the analysis of space-filling curves and IN set. @@ -1210,6 +1140,15 @@ bool KeyCondition::tryPrepareSetIndex( indexes_mapping.push_back(index_mapping); data_types.push_back(data_type); out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); + } + // For partition index, checking if set can be transformed to prune any partitions + else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain)) + { + indexes_mapping.push_back(index_mapping); + data_types.push_back(data_type); + out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); } }; @@ -1275,6 +1214,23 @@ bool KeyCondition::tryPrepareSetIndex( auto set_element_type = set_types[set_element_index]; auto set_column = set_columns[set_element_index]; + if (!set_transforming_chains[indexes_mapping_index].empty()) + { + ColumnPtr transformed_set_column; + DataTypePtr transformed_set_type; + if (!applyFunctionChainToColumn( + set_column, + set_element_type, + set_transforming_chains[indexes_mapping_index], + transformed_set_column, + transformed_set_type)) + return false; + + set_column = transformed_set_column; + set_element_type = transformed_set_type; + is_constant_transformed = true; + } + if (canBeSafelyCasted(set_element_type, key_column_type)) { set_columns[set_element_index] = castColumn({set_column, set_element_type, {}}, key_column_type); @@ -1571,6 +1527,191 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( return false; } +/** When table's key has expression with these functions from a column, + * and when a column in a query is compared with a constant, such as: + * CREATE TABLE (x String) ORDER BY toDate(x) + * SELECT ... WHERE x LIKE 'Hello%' + * we want to apply the function to the constant for index analysis, + * but should modify it to pass on un-parsable values. + */ +static std::set date_time_parsing_functions = { + "toDate", + "toDate32", + "toDateTime", + "toDateTime64", + "parseDateTimeBestEffort", + "parseDateTimeBestEffortUS", + "parseDateTime32BestEffort", + "parseDateTime64BestEffort", + "parseDateTime", + "parseDateTimeInJodaSyntax", +}; + +/** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` + * condition also holds, so the index may be used to select only parts satisfying this condition. + * + * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the + * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). + * Instead, we can qualify only functions that do not transform the range (for example rounding), + * which while not strictly monotonic, are monotonic everywhere on the input range. + */ +bool KeyCondition::extractMonotonicFunctionsChainFromKey( + ContextPtr context, + const String & expr_name, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + MonotonicFunctionsChain & out_functions_chain, + std::function always_monotonic) const +{ + const auto & sample_block = key_expr->getSampleBlock(); + + for (const auto & node : key_expr->getNodes()) + { + auto it = key_columns.find(node.result_name); + if (it != key_columns.end()) + { + std::stack chain; + + const auto * cur_node = &node; + bool is_valid_chain = true; + + while (is_valid_chain) + { + if (cur_node->result_name == expr_name) + break; + + chain.push(cur_node); + + if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) + { + is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); + + const ActionsDAG::Node * next_node = nullptr; + for (const auto * arg : cur_node->children) + { + if (arg->column && isColumnConst(*arg->column)) + continue; + + if (next_node) + is_valid_chain = false; + + next_node = arg; + } + + if (!next_node) + is_valid_chain = false; + + cur_node = next_node; + } + else if (cur_node->type == ActionsDAG::ActionType::ALIAS) + cur_node = cur_node->children.front(); + else + is_valid_chain = false; + } + + if (is_valid_chain) + { + while (!chain.empty()) + { + const auto * func = chain.top(); + chain.pop(); + + if (func->type != ActionsDAG::ActionType::FUNCTION) + continue; + + auto func_name = func->function_base->getName(); + auto func_base = func->function_base; + + ColumnsWithTypeAndName arguments; + ColumnWithTypeAndName const_arg; + FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST; + + if (date_time_parsing_functions.contains(func_name)) + { + const auto & arg_types = func_base->getArgumentTypes(); + if (!arg_types.empty() && isStringOrFixedString(arg_types[0])) + { + func_name = func_name + "OrNull"; + } + + } + + auto func_builder = FunctionFactory::instance().tryGet(func_name, context); + + if (func->children.size() == 1) + { + arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""}); + } + else if (func->children.size() == 2) + { + const auto * left = func->children[0]; + const auto * right = func->children[1]; + if (left->column && isColumnConst(*left->column)) + { + const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""}; + arguments.push_back(const_arg); + arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""}); + kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST; + } + else + { + const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""}; + arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""}); + arguments.push_back(const_arg); + kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; + } + } + + auto out_func = func_builder->build(arguments); + if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST) + out_functions_chain.push_back(out_func); + else + out_functions_chain.push_back(std::make_shared(out_func, const_arg, kind)); + } + + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + return true; + } + } + } + + return false; +} + +bool KeyCondition::canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain) +{ + // Checking if column name matches any of key subexpressions + String expr_name = node.getColumnName(); + + if (array_joined_column_names.contains(expr_name)) + return false; + + if (!key_subexpr_names.contains(expr_name)) + { + expr_name = node.getColumnNameWithModuloLegacy(); + + if (!key_subexpr_names.contains(expr_name)) + return false; + } + + return extractMonotonicFunctionsChainFromKey( + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_res_column_type, + out_functions_chain, + [](const IFunctionBase & func, const IDataType &) + { + return func.isDeterministic(); + }); +} static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name) { @@ -1649,7 +1790,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme if (functionIsInOrGlobalInOperator(func_name)) { - if (tryPrepareSetIndex(func, out, key_column_num)) + if (tryPrepareSetIndex(func, out, key_column_num, is_constant_transformed)) { key_arg_pos = 0; is_set_const = true; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 6e5956706aa5..9e2218d7a291 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -14,6 +14,7 @@ #include #include +#include "DataTypes/Serializations/ISerialization.h" namespace DB @@ -253,13 +254,12 @@ class KeyCondition DataTypePtr & out_key_column_type, std::vector & out_functions_chain); - bool transformConstantWithValidFunctions( + bool extractMonotonicFunctionsChainFromKey( ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, + MonotonicFunctionsChain & out_functions_chain, std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( @@ -276,13 +276,25 @@ class KeyCondition Field & out_value, DataTypePtr & out_type); + /// Checks if node is a subexpression of any of key columns expressions, + /// wrapped by deterministic functions, and if so, returns `true`, and + /// specifies key column position / type. Besides that it produces the + /// chain of functions which should be executed on set, to transform it + /// into key column values. + bool canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain); + /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. bool tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num); + size_t & out_key_column_num, + bool & is_constant_transformed); /// Checks that the index can not be used. /// diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index bc64ef15cf14..c896a760597a 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -111,7 +113,7 @@ struct DeltaLakeMetadataImpl std::set result_files; NamesAndTypesList current_schema; DataLakePartitionColumns current_partition_columns; - const auto checkpoint_version = getCheckpointIfExists(result_files); + const auto checkpoint_version = getCheckpointIfExists(result_files, current_schema, current_partition_columns); if (checkpoint_version) { @@ -205,9 +207,32 @@ struct DeltaLakeMetadataImpl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); - // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - // object->stringify(oss); - // LOG_TEST(log, "Metadata: {}", oss.str()); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + const Poco::JSON::Object::Ptr & fields_object = fields_json.extract(); + + auto current_schema = parseMetadata(fields_object); + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } if (object->has("add")) { @@ -230,7 +255,12 @@ struct DeltaLakeMetadataImpl const auto value = partition_values->getValue(partition_name); auto name_and_type = file_schema.tryGetByName(partition_name); if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toNamesAndTypesDescription()); + } auto field = getFieldValue(value, name_and_type->type); current_partition_columns.emplace_back(*name_and_type, field); @@ -246,50 +276,33 @@ struct DeltaLakeMetadataImpl auto path = object->get("remove").extract()->getValue("path"); result.erase(fs::path(configuration->getPath()) / path); } - if (object->has("metaData")) - { - const auto metadata_object = object->get("metaData").extract(); - const auto schema_object = metadata_object->getValue("schemaString"); - - Poco::JSON::Parser p; - Poco::Dynamic::Var fields_json = parser.parse(schema_object); - Poco::JSON::Object::Ptr fields_object = fields_json.extract(); - - const auto fields = fields_object->get("fields").extract(); - NamesAndTypesList current_schema; - for (size_t i = 0; i < fields->size(); ++i) - { - const auto field = fields->getObject(static_cast(i)); - auto column_name = field->getValue("name"); - auto type = field->getValue("type"); - auto is_nullable = field->getValue("nullable"); - - std::string physical_name; - auto schema_metadata_object = field->get("metadata").extract(); - if (schema_metadata_object->has("delta.columnMapping.physicalName")) - physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); - else - physical_name = column_name; - - LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", - column_name, type, is_nullable, physical_name); - - current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); - } + } + } - if (file_schema.empty()) - { - file_schema = current_schema; - } - else if (file_schema != current_schema) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Reading from files with different schema is not possible " - "({} is different from {})", - file_schema.toString(), current_schema.toString()); - } - } + NamesAndTypesList parseMetadata(const Poco::JSON::Object::Ptr & metadata_json) + { + NamesAndTypesList schema; + const auto fields = metadata_json->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); } + return schema; } DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) @@ -505,7 +518,10 @@ struct DeltaLakeMetadataImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result) + size_t getCheckpointIfExists( + std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns) { const auto version = readLastCheckpointIfExists(); if (!version) @@ -526,7 +542,8 @@ struct DeltaLakeMetadataImpl auto columns = ParquetSchemaReader(*buf, format_settings).readSchema(); /// Read only columns that we need. - columns.filterColumns(NameSet{"add", "remove"}); + auto filter_column_names = NameSet{"add", "metaData"}; + columns.filterColumns(filter_column_names); Block header; for (const auto & column : columns) header.insert({column.type->createColumn(), column.type, column.name}); @@ -540,9 +557,6 @@ struct DeltaLakeMetadataImpl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr file_schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); - ArrowColumnToCHColumn column_reader( header, "Parquet", format_settings.parquet.allow_missing_columns, @@ -553,29 +567,85 @@ struct DeltaLakeMetadataImpl std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); - const auto & res_columns = res.getColumns(); + Chunk chunk = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); + auto res_block = header.cloneWithColumns(chunk.detachColumns()); + res_block = Nested::flatten(res_block); - if (res_columns.size() != 2) + const auto * nullable_path_column = assert_cast(res_block.getByName("add.path").column.get()); + const auto & path_column = assert_cast(nullable_path_column->getNestedColumn()); + + const auto * nullable_schema_column = assert_cast(res_block.getByName("metaData.schemaString").column.get()); + const auto & schema_column = assert_cast(nullable_schema_column->getNestedColumn()); + + auto partition_values_column_raw = res_block.getByName("add.partitionValues").column; + const auto & partition_values_column = assert_cast(*partition_values_column_raw); + + for (size_t i = 0; i < path_column.size(); ++i) { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Unexpected number of columns: {} (having: {}, expected: {})", - res_columns.size(), res.dumpStructure(), header.dumpStructure()); + const auto metadata = String(schema_column.getDataAt(i)); + if (!metadata.empty()) + { + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(metadata); + const Poco::JSON::Object::Ptr & object = json.extract(); + + auto current_schema = parseMetadata(object); + if (file_schema.empty()) + { + file_schema = current_schema; + LOG_TEST(log, "Processed schema from checkpoint: {}", file_schema.toString()); + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } } - const auto * tuple_column = assert_cast(res_columns[0].get()); - const auto & nullable_column = assert_cast(tuple_column->getColumn(0)); - const auto & path_column = assert_cast(nullable_column.getNestedColumn()); for (size_t i = 0; i < path_column.size(); ++i) { - const auto filename = String(path_column.getDataAt(i)); - if (filename.empty()) + const auto path = String(path_column.getDataAt(i)); + if (path.empty()) continue; - LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + Field map; + partition_values_column.get(i, map); + auto partition_values_map = map.safeGet(); + if (!partition_values_map.empty()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & map_value : partition_values_map) + { + const auto tuple = map_value.safeGet(); + const auto partition_name = tuple[0].safeGet(); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toString()); + } + const auto value = tuple[1].safeGet(); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(std::move(name_and_type.value()), std::move(field)); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + + LOG_TEST(log, "Adding {}", path); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / path); if (!inserted) - throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); + throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", path); } return version; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index f1217bc97294..c8603fccb866 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -41,6 +41,7 @@ class IStorageDataLake final : public StorageObjectStorage auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + const bool use_schema_from_metadata = columns_.empty(); if (base_configuration->format == "auto") base_configuration->format = "Parquet"; @@ -50,8 +51,9 @@ class IStorageDataLake final : public StorageObjectStorage try { metadata = DataLakeMetadata::create(object_storage, base_configuration, context); - schema_from_metadata = metadata->getTableSchema(); configuration->setPaths(metadata->getDataFiles()); + if (use_schema_from_metadata) + schema_from_metadata = metadata->getTableSchema(); } catch (...) { @@ -66,7 +68,7 @@ class IStorageDataLake final : public StorageObjectStorage return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, context, table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + use_schema_from_metadata ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6940f10cb919..a9a7e062076b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -206,23 +206,25 @@ Chunk StorageObjectStorageSource::generate() if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) { auto partition_values = partition_columns.find(filename); - - for (const auto & [name_and_type, value] : partition_values->second) + if (partition_values != partition_columns.end()) { - if (!read_from_format_info.source_header.has(name_and_type.name)) - continue; - - const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); - auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); - - /// This column is filled with default value now, remove it. - chunk.erase(column_pos); - - /// Add correct values. - if (chunk.hasColumns()) - chunk.addColumn(column_pos, std::move(partition_column)); - else - chunk.addColumn(std::move(partition_column)); + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; + + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + + /// Add correct values. + if (column_pos < chunk.getNumColumns()) + chunk.addColumn(column_pos, std::move(partition_column)); + else + chunk.addColumn(std::move(partition_column)); + } } } return chunk; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 546ad90f9e9e..9fb0f082c6cc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,20 +5,21 @@ #include #include +#include #include #include #include #include +#include #include #include #include #include +#include #include +#include #include #include -#include -#include -#include #include @@ -5272,6 +5273,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; + LOG_TRACE(log, "Start preparing for shutdown"); + try { auto settings_ptr = getSettings(); @@ -5282,7 +5285,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() stopBeingLeader(); if (attach_thread) + { attach_thread->shutdown(); + LOG_TRACE(log, "The attach thread is shutdown"); + } + restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again @@ -5295,6 +5302,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } + + LOG_TRACE(log, "Finished preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5332,6 +5341,8 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; + LOG_TRACE(log, "Shutdown started"); + flushAndPrepareForShutdown(); if (!shutdown_deadline.has_value()) @@ -5374,6 +5385,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } + LOG_TRACE(log, "Shutdown finished"); } diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 9f849052071e..adc1074b1fe3 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -35,7 +35,6 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); -void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -45,6 +44,10 @@ void registerStorageIceberg(StorageFactory & factory); #endif #endif +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory); +#endif + #if USE_HDFS #if USE_HIVE void registerStorageHive(StorageFactory & factory); diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index 9bc44025826b..f12d27979ce5 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,3 +15,4 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True +ignore_missing_imports = True \ No newline at end of file diff --git a/tests/ci/ci.py b/tests/ci/ci.py index af2f4c0a1fc0..b4a3c7ec8493 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -15,7 +15,7 @@ from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string +from ci_utils import GHActions, normalize_string, Shell from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -53,6 +53,7 @@ from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings +from ci_buddy import CIBuddy from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -262,6 +263,8 @@ def run_docker_command( def _pre_action(s3, indata, pr_info): + print("Clear dmesg") + Shell.run("sudo dmesg --clear ||:") CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -1118,6 +1121,12 @@ def main() -> int: ### POST action: start elif args.post: + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + print("WARNING: OOM while job execution") + CIBuddy().post_error("Out Of Memory") + job_report = JobReport.load() if JobReport.exist() else None if job_report: ch_helper = ClickHouseHelper() diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py new file mode 100644 index 000000000000..d03f5d819ecc --- /dev/null +++ b/tests/ci/ci_buddy.py @@ -0,0 +1,88 @@ +import json +import os + +import boto3 +import requests +from botocore.exceptions import ClientError + +from pr_info import PRInfo +from ci_utils import Shell + + +class CIBuddy: + _HEADERS = {"Content-Type": "application/json"} + + def __init__(self, dry_run=False): + self.repo = os.getenv("GITHUB_REPOSITORY", "") + self.dry_run = dry_run + res = self._get_webhooks() + self.test_channel = "" + self.dev_ci_channel = "" + if res: + self.test_channel = json.loads(res)["test_channel"] + self.dev_ci_channel = json.loads(res)["ci_channel"] + self.job_name = os.getenv("CHECK_NAME", "unknown") + pr_info = PRInfo() + self.pr_number = pr_info.number + self.head_ref = pr_info.head_ref + self.commit_url = pr_info.commit_html_url + + @staticmethod + def _get_webhooks(): + name = "ci_buddy_web_hooks" + + session = boto3.Session(region_name="us-east-1") # Replace with your region + ssm_client = session.client("ssm") + json_string = None + try: + response = ssm_client.get_parameter( + Name=name, + WithDecryption=True, # Set to True if the parameter is a SecureString + ) + json_string = response["Parameter"]["Value"] + except ClientError as e: + print(f"An error occurred: {e}") + + return json_string + + def post(self, message, dry_run=None): + if dry_run is None: + dry_run = self.dry_run + print(f"Posting slack message, dry_run [{dry_run}]") + if dry_run: + url = self.test_channel + else: + url = self.dev_ci_channel + data = {"text": message} + try: + requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10) + except Exception as e: + print(f"ERROR: Failed to post message, ex {e}") + + def post_error(self, error_description, job_name="", with_instance_info=True): + instance_id, instance_type = "unknown", "unknown" + if with_instance_info: + instance_id = Shell.run("ec2metadata --instance-id") or instance_id + instance_type = Shell.run("ec2metadata --instance-type") or instance_type + if not job_name: + job_name = os.getenv("CHECK_NAME", "unknown") + line_err = f":red_circle: {error_description} :red_circle:\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + message = line_err + message += line_job + if with_instance_info: + message += line_ghr + if self.pr_number > 0: + message += line_pr_ + else: + message += line_br_ + self.post(message) + + +if __name__ == "__main__": + # test + buddy = CIBuddy(dry_run=True) + buddy.post_error("Out of memory") diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index e7034d0b1044..629f37289a96 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,4 +1,5 @@ import os +import subprocess from contextlib import contextmanager from pathlib import Path from typing import Any, Iterator, List, Union @@ -42,3 +43,43 @@ def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None: for line in lines: print(line) print("::endgroup::") + + +class Shell: + @classmethod + def run_strict(cls, command): + subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + + @classmethod + def run(cls, command): + res = "" + result = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode == 0: + res = result.stdout + return res.strip() + + @classmethod + def check(cls, command): + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + return result.returncode == 0 diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index d8e5a7fa27fa..4440d0d332cd 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -104,6 +104,8 @@ def get_run_command( return ( f"docker run --volume={builds_path}:/package_folder " + # For dmesg and sysctl + "--privileged " f"{ci_logs_args}" f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"{volume_with_broken_test}" diff --git a/tests/clickhouse-test b/tests/clickhouse-test index af4f60a50c5d..958dde0606fb 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2501,28 +2501,13 @@ def do_run_tests(jobs, test_suite: TestSuite): ) ], ) - while not future_seq.ready(): - sleep(0.1) - if server_died.is_set(): - sleep(5) - break + future_seq.wait() - while not future.ready(): - sleep(0.1) - if server_died.is_set(): - print("== Server died ==") - sleep(5) - break - - print("future wait DONE") + future.wait() finally: - print("pool.terminate") pool.terminate() - print("pool.close") pool.close() - print("pool.join") pool.join() - print("pool.join DONE") if not args.run_sequential_tests_in_parallel: run_tests_array( diff --git a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml index 2d7946d1587b..43d80ee6f693 100644 --- a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml +++ b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml @@ -9,4 +9,21 @@ value1 + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml index 15da914f666b..7d4f0543ff1a 100644 --- a/tests/integration/test_named_collections/configs/users.d/users.xml +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -1,4 +1,9 @@ + + + 0 + + diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index dbc502236c0e..32846c79d23a 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -3,6 +3,8 @@ import os import time from helpers.cluster import ClickHouseCluster +from contextlib import nullcontext as does_not_raise +from helpers.client import QueryRuntimeException SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NAMED_COLLECTIONS_CONFIG = os.path.join( @@ -761,3 +763,32 @@ def check_dropped(node): check_dropped(node1) check_dropped(node2) + + +@pytest.mark.parametrize( + "ignore, expected_raise", + [(True, does_not_raise()), (False, pytest.raises(QueryRuntimeException))], +) +def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): + node = cluster.instances["node_with_keeper"] + + replace_in_users_config( + node, + "ignore_on_cluster_for_replicated_named_collections_queries>.", + f"ignore_on_cluster_for_replicated_named_collections_queries>{int(ignore)}", + ) + node.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node.query( + "DROP NAMED COLLECTION IF EXISTS test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) + node.query( + f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" + ) + node.query( + f"ALTER NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` SET key2=3" + ) + node.query( + f"DROP NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index affa3f32cbee..375fe58d7417 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -161,6 +161,9 @@ def test_parallel_replicas_custom_key_replicatedmergetree( insert_data("test_table_for_rmt", row_num, all_nodes=False) + for node in nodes: + node.query("SYSTEM SYNC REPLICA test_table_for_rmt LIGHTWEIGHT") + expected_result = "" for i in range(4): expected_result += f"{i}\t250\n" diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 4cb71895881b..d3dd7cfe52a5 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -596,19 +596,116 @@ def test_partition_columns(started_cluster): ) assert result == 1 - # instance.query( - # f""" - # DROP TABLE IF EXISTS {TABLE_NAME}; - # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) - # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" - # ) - # assert ( - # int( - # instance.query( - # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" - # ) - # ) - # == num_rows - 1 - # ) - # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") - # assert False + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (a Nullable(Int32), b Nullable(String), c Nullable(Date32), d Nullable(Int32), e Nullable(Bool)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/05')" + ) + ) + == 1 + ) + + # Subset of columns should work. + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (b Nullable(String), c Nullable(Date32), d Nullable(Int32)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """test1 2000-01-01 1 +test2 2000-01-02 2 +test3 2000-01-03 3 +test4 2000-01-04 4 +test5 2000-01-05 5 +test6 2000-01-06 6 +test7 2000-01-07 7 +test8 2000-01-08 8 +test9 2000-01-09 9""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + for i in range(num_rows + 1, 2 * num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + ok = False + for file in files: + if file.endswith("last_checkpoint"): + ok = True + assert ok + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows * 2 + + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false +10 test10 2000-01-10 10 false +11 test11 2000-01-11 11 false +12 test12 2000-01-12 12 false +13 test13 2000-01-13 13 false +14 test14 2000-01-14 14 false +15 test15 2000-01-15 15 false +16 test16 2000-01-16 16 false +17 test17 2000-01-17 17 false +18 test18 2000-01-18 18 false""" + == instance.query( + f""" +SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') ORDER BY c + """ + ).strip() + ) + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/15')" + ) + ) + == 1 + ) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3240039ee811..f885a3507acb 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -78,13 +78,13 @@ def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): def kill_rabbitmq(rabbitmq_id): p = subprocess.Popen(("docker", "stop", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) return p.returncode == 0 def revive_rabbitmq(rabbitmq_id, cookie): p = subprocess.Popen(("docker", "start", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) wait_rabbitmq_to_start(rabbitmq_id, cookie) diff --git a/tests/performance/replaceRegexp_fallback.xml b/tests/performance/replaceRegexp_fallback.xml index 926e66c702f5..509257efeb53 100644 --- a/tests/performance/replaceRegexp_fallback.xml +++ b/tests/performance/replaceRegexp_fallback.xml @@ -1,12 +1,12 @@ -> -> + + - > - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null - > - > - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null + + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index 4f09d1975966..7d6389bb86ed 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -15,7 +15,7 @@ tmp_path=$(mktemp "$CURDIR/01268_procfs_metrics.XXXXXX") trap 'rm -f $tmp_path' EXIT truncate -s1025 "$tmp_path" -$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars +$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events --storage_file_read_method=pread -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars # NOTE: that OSCPUVirtualTimeMicroseconds is in microseconds, so 1e6 is not enough. $CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(1e8) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds exit 0 diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910f..f5f31c4a5633 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -10,7 +10,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -34,7 +34,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -58,7 +58,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -82,7 +82,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -106,7 +106,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -130,7 +130,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -154,7 +154,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -178,7 +178,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 9d74474c1a4a..5918035e9c35 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -28,7 +28,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01323_too_many_threads_bug.sql b/tests/queries/0_stateless/01323_too_many_threads_bug.sql index c377e2c7570a..5bf282808c33 100644 --- a/tests/queries/0_stateless/01323_too_many_threads_bug.sql +++ b/tests/queries/0_stateless/01323_too_many_threads_bug.sql @@ -14,11 +14,11 @@ set log_queries = 1; select x from table_01323_many_parts limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; select x from table_01323_many_parts order by x limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; drop table if exists table_01323_many_parts; diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh index ce545a273171..d4a4e54acbd2 100755 --- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02240.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.sql b/tests/queries/0_stateless/02350_views_max_insert_threads.sql index 25e0fdeadbaa..a4f7e2546ed8 100644 --- a/tests/queries/0_stateless/02350_views_max_insert_threads.sql +++ b/tests/queries/0_stateless/02350_views_max_insert_threads.sql @@ -10,7 +10,7 @@ create materialized view t_mv Engine = Null AS select now() as ts, max(a) from t insert into t select * from numbers_mt(10e6) settings max_threads = 16, max_insert_threads=16, max_block_size=100000; system flush logs; -select arrayUniq(thread_ids)>=16 from system.query_log where +select peak_threads_usage>=16 from system.query_log where event_date >= yesterday() and current_database = currentDatabase() and type = 'QueryFinish' and diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference index d81cc0710eb6..daaac9e30302 100644 --- a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference @@ -1 +1,2 @@ 42 +42 diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql index ecdf4d506353..274940f50a3b 100644 --- a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql @@ -17,3 +17,27 @@ ENGINE = MergeTree ORDER BY conversation; INSERT INTO t2(conversation) VALUES (42); select * from t2; + +drop table t1; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +drop table t2; + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +INSERT INTO t2(conversation) VALUES (42); +select * from t2; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index f0463509b80f..4eb7e74446d9 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -36,19 +36,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 3 3 3 33 \N \N \N \N -- aliases defined in the join condition are valid -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -1 42 \N \N \N 0 -2 2 2 2 1 1 -3 3 3 33 1 1 -\N \N 4 42 \N 0 -\N \N \N \N \N 1 -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -1 42 \N \N \N 0 -2 2 2 2 1 1 -3 3 3 33 1 1 -\N \N 4 42 \N 0 -\N \N \N \N \N 0 -\N \N \N \N \N 0 +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; 2 2 2 2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 67918f4302fb..f7813e2a1b4f 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -36,8 +36,9 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- aliases defined in the join condition are valid -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference new file mode 100644 index 000000000000..3a6727b70e88 --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference @@ -0,0 +1,50 @@ +-- Single partition by function +0 +2 +-- Nested partition by function +1 +2 +1 +1 +-- Nested partition by function, LowCardinality +1 +2 +1 +1 +-- Nested partition by function, Nullable +1 +2 +1 +1 +-- Nested partition by function, LowCardinality + Nullable +1 +2 +1 +1 +-- Non-safe cast +2 +2 +-- Multiple partition columns +1 +1 +1 +2 +-- LowCardinality set +1 +1 +-- Nullable set +1 +1 +-- LowCardinality + Nullable set +1 +1 +-- Not failing with date parsing functions +1 +0 +-- Pruning + not failing with nested date parsing functions +1 +2 +0 +-- Empty transform functions +2 +1 diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql new file mode 100644 index 000000000000..8ffabacaa8c2 --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql @@ -0,0 +1,258 @@ +SELECT '-- Single partition by function'; + +DROP TABLE IF EXISTS 03173_single_function; +CREATE TABLE 03173_single_function ( + dt Date, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY toMonth(dt); + +INSERT INTO 03173_single_function +SELECT toDate('2000-01-01') + 10 * number FROM numbers(50) +UNION ALL +SELECT toDate('2100-01-01') + 10 * number FROM numbers(50); +OPTIMIZE TABLE 03173_single_function FINAL; + +SELECT count() FROM 03173_single_function WHERE dt IN ('2024-01-20', '2024-05-25') SETTINGS log_comment='03173_single_function'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_single_function'; + +DROP TABLE IF EXISTS 03173_single_function; + +SELECT '-- Nested partition by function'; + +DROP TABLE IF EXISTS 03173_nested_function; +CREATE TABLE 03173_nested_function( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function FINAL; + +SELECT count() FROM 03173_nested_function WHERE id IN (10) SETTINGS log_comment='03173_nested_function'; +SELECT count() FROM 03173_nested_function WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr'; + +DROP TABLE IF EXISTS 03173_nested_function; + +SELECT '-- Nested partition by function, LowCardinality'; + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS 03173_nested_function_lc; +CREATE TABLE 03173_nested_function_lc( + id LowCardinality(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function_lc SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc FINAL; + +SELECT count() FROM 03173_nested_function_lc WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc'; +SELECT count() FROM 03173_nested_function_lc WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc'; + +DROP TABLE IF EXISTS 03173_nested_function_lc; + +SELECT '-- Nested partition by function, Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_null; +CREATE TABLE 03173_nested_function_null( + id Nullable(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_null FINAL; + +SELECT count() FROM 03173_nested_function_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_null'; +SELECT count() FROM 03173_nested_function_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_null'; + +DROP TABLE IF EXISTS 03173_nested_function_null; + +SELECT '-- Nested partition by function, LowCardinality + Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE 03173_nested_function_lc_null( + id LowCardinality(Nullable(Int32)), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_lc_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc_null FINAL; + +SELECT count() FROM 03173_nested_function_lc_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc_null'; +SELECT count() FROM 03173_nested_function_lc_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc_null'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SELECT '-- Non-safe cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; +CREATE TABLE 03173_nonsafe_cast( + id Int64, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nonsafe_cast SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nonsafe_cast FINAL; + +SELECT count() FROM 03173_nonsafe_cast WHERE id IN (SELECT '50' UNION ALL SELECT '99') SETTINGS log_comment='03173_nonsafe_cast'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nonsafe_cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; + +SELECT '-- Multiple partition columns'; + +DROP TABLE IF EXISTS 03173_multiple_partition_cols; +CREATE TABLE 03173_multiple_partition_cols ( + key1 Int32, + key2 Int32 +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY (intDiv(key1, 50), xxHash32(key2) % 3); + +INSERT INTO 03173_multiple_partition_cols SELECT number, number FROM numbers(100); +OPTIMIZE TABLE 03173_multiple_partition_cols FINAL; + +SELECT count() FROM 03173_multiple_partition_cols WHERE key2 IN (4) SETTINGS log_comment='03173_multiple_columns'; +SELECT count() FROM 03173_multiple_partition_cols WHERE xxHash32(key2) IN (4251411170) SETTINGS log_comment='03173_multiple_columns_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns'; +-- Due to xxHash32() in WHERE condition, MinMax is unable to eliminate any parts, +-- so partition pruning leave two parts (for key1 // 50 = 0 and key1 // 50 = 1) +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns_subexpr'; + +-- Preparing base table for filtering by LowCardinality/Nullable sets +DROP TABLE IF EXISTS 03173_base_data_source; +CREATE TABLE 03173_base_data_source( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_base_data_source SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_base_data_source FINAL; + +SELECT '-- LowCardinality set'; + +SET allow_suspicious_low_cardinality_types = 1; +DROP TABLE IF EXISTS 03173_low_cardinality_set; +CREATE TABLE 03173_low_cardinality_set (id LowCardinality(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_low_cardinality_set) SETTINGS log_comment='03173_low_cardinality_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_low_cardinality_set'; + +DROP TABLE IF EXISTS 03173_low_cardinality_set; + +SELECT '-- Nullable set'; + +DROP TABLE IF EXISTS 03173_nullable_set; +CREATE TABLE 03173_nullable_set (id Nullable(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_nullable_set) SETTINGS log_comment='03173_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nullable_set'; + +DROP TABLE IF EXISTS 03173_nullable_set; + +SELECT '-- LowCardinality + Nullable set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; +CREATE TABLE 03173_lc_nullable_set (id LowCardinality(Nullable(Int32))) ENGINE=Memory AS SELECT 10 UNION ALL SELECT NULL; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_lc_nullable_set) SETTINGS log_comment='03173_lc_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_lc_nullable_set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; + +SELECT '-- Not failing with date parsing functions'; + +DROP TABLE IF EXISTS 03173_date_parsing; +CREATE TABLE 03173_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toDate(id); + +INSERT INTO 03173_date_parsing +SELECT toString(toDate('2023-04-01') + number) +FROM numbers(20); + +SELECT count() FROM 03173_date_parsing WHERE id IN ('2023-04-02', '2023-05-02'); +SELECT count() FROM 03173_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_date_parsing; + +SELECT '-- Pruning + not failing with nested date parsing functions'; + +DROP TABLE IF EXISTS 03173_nested_date_parsing; +CREATE TABLE 03173_nested_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toMonth(toDate(id)); + +INSERT INTO 03173_nested_date_parsing +SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50) +UNION ALL +SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50); + +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing'; +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_nested_date_parsing; + +SELECT '-- Empty transform functions'; + +DROP TABLE IF EXISTS 03173_empty_transform; +CREATE TABLE 03173_empty_transform( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_empty_transform SELECT number FROM numbers(6); +OPTIMIZE TABLE 03173_empty_transform FINAL; + +SELECT id FROM 03173_empty_transform WHERE xxHash32(id) % 3 IN (xxHash32(2::Int32) % 3) SETTINGS log_comment='03173_empty_transform'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_empty_transform'; + +DROP TABLE IF EXISTS 03173_empty_transform; diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.reference b/tests/queries/0_stateless/03199_join_with_materialized_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.sql b/tests/queries/0_stateless/03199_join_with_materialized_column.sql new file mode 100644 index 000000000000..8c53c5b3e666 --- /dev/null +++ b/tests/queries/0_stateless/03199_join_with_materialized_column.sql @@ -0,0 +1,6 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS table_with_materialized; +CREATE TABLE table_with_materialized (col String MATERIALIZED 'A') ENGINE = Memory; +SELECT number FROM numbers(1) AS n, table_with_materialized; +DROP TABLE table_with_materialized;