Skip to content

Commit

Permalink
Merge pull request ClickHouse#60255 from ClickHouse/dont-load-useless…
Browse files Browse the repository at this point in the history
…-index-parts-in-memory

Do not load useless columns from the index in memory
  • Loading branch information
alexey-milovidov authored Mar 12, 2024
2 parents bd4f948 + cdb9c96 commit bdc884d
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 30 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
# set CPU time limit to 1000 seconds
set (RLIMIT_CPU 1000)

# -fsanitize=memory is too heavy
if (SANITIZE STREQUAL "memory")
# -fsanitize=memory and address are too heavy
if (SANITIZE)
set (RLIMIT_DATA 10000000000) # 10G
endif()

Expand Down
2 changes: 1 addition & 1 deletion docker/test/fuzzer/run-fuzzer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ quit
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
echo "Achtung!" > description.txt
echo "Let op!" > description.txt
echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt
{ rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt
fi
Expand Down
6 changes: 3 additions & 3 deletions src/Core/Field.h
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ class Field

switch (which)
{
case Types::Null: return false;
case Types::Null: return get<Null>() < rhs.get<Null>();
case Types::Bool: [[fallthrough]];
case Types::UInt64: return get<UInt64>() < rhs.get<UInt64>();
case Types::UInt128: return get<UInt128>() < rhs.get<UInt128>();
Expand Down Expand Up @@ -541,7 +541,7 @@ class Field

switch (which)
{
case Types::Null: return true;
case Types::Null: return get<Null>() <= rhs.get<Null>();
case Types::Bool: [[fallthrough]];
case Types::UInt64: return get<UInt64>() <= rhs.get<UInt64>();
case Types::UInt128: return get<UInt128>() <= rhs.get<UInt128>();
Expand Down Expand Up @@ -590,7 +590,7 @@ class Field

switch (which)
{
case Types::Null: return true;
case Types::Null: return get<Null>() == rhs.get<Null>();
case Types::Bool: [[fallthrough]];
case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
case Types::Int64: return get<Int64>() == rhs.get<Int64>();
Expand Down
15 changes: 6 additions & 9 deletions src/Core/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ struct Null
{
enum class Value
{
Null,
PositiveInfinity,
NegativeInfinity,
NegativeInfinity = -1,
Null = 0,
PositiveInfinity = 1,
};

Value value{Value::Null};
Expand All @@ -34,15 +34,12 @@ struct Null
bool isPositiveInfinity() const { return value == Value::PositiveInfinity; }
bool isNegativeInfinity() const { return value == Value::NegativeInfinity; }

bool operator==(const Null & other) const
auto operator<=>(const Null & other) const
{
return value == other.value;
return static_cast<int>(value) <=> static_cast<int>(other.value);
}

bool operator!=(const Null & other) const
{
return !(*this == other);
}
bool operator==(const Null &) const = default;
};

using UInt128 = ::UInt128;
Expand Down
9 changes: 5 additions & 4 deletions src/Processors/QueryPlan/PartsSplitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ bool isSafePrimaryKey(const KeyDescription & primary_key)

int compareValues(const Values & lhs, const Values & rhs)
{
chassert(lhs.size() == rhs.size());
size_t size = std::min(lhs.size(), rhs.size());

for (size_t i = 0; i < lhs.size(); ++i)
for (size_t i = 0; i < size; ++i)
{
if (applyVisitor(FieldVisitorAccurateLess(), lhs[i], rhs[i]))
return -1;
Expand All @@ -124,8 +124,9 @@ class IndexAccess
Values getValue(size_t part_idx, size_t mark) const
{
const auto & index = parts[part_idx].data_part->getIndex();
Values values(index.size());
for (size_t i = 0; i < values.size(); ++i)
size_t size = index.size();
Values values(size);
for (size_t i = 0; i < size; ++i)
{
index[i]->get(mark, values[i]);
if (values[i].isNull())
Expand Down
21 changes: 21 additions & 0 deletions src/Storages/MergeTree/IMergeTreeDataPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,27 @@ void IMergeTreeDataPart::loadIndex() const
for (size_t j = 0; j < key_size; ++j)
key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {});

/// Cut useless suffix columns, if necessary.
Float64 ratio_to_drop_suffix_columns = storage.getSettings()->primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns;
if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1)
{
chassert(marks_count > 0);
for (size_t j = 0; j < key_size - 1; ++j)
{
size_t num_changes = 0;
for (size_t i = 1; i < marks_count; ++i)
if (0 != loaded_index[j]->compareAt(i, i - 1, *loaded_index[j], 0))
++num_changes;

if (static_cast<Float64>(num_changes) / marks_count >= ratio_to_drop_suffix_columns)
{
key_size = j + 1;
loaded_index.resize(key_size);
break;
}
}
}

for (size_t i = 0; i < key_size; ++i)
{
loaded_index[i]->shrinkToFit();
Expand Down
37 changes: 27 additions & 10 deletions src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
DataTypes key_types;
for (size_t i : key_indices)
{
index_columns->emplace_back(ColumnWithTypeAndName{index[i], primary_key.data_types[i], primary_key.column_names[i]});
if (i < index.size())
index_columns->emplace_back(index[i], primary_key.data_types[i], primary_key.column_names[i]);
else
index_columns->emplace_back(); /// The column of the primary key was not loaded in memory - we'll skip it.

key_types.emplace_back(primary_key.data_types[i]);
}

Expand All @@ -1027,7 +1031,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
std::function<void(size_t, size_t, FieldRef &)> create_field_ref;
if (key_condition.hasMonotonicFunctionsChain())
{

create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field)
{
field = {index_columns.get(), row, column};
Expand Down Expand Up @@ -1067,7 +1070,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
{
for (size_t i = 0; i < used_key_size; ++i)
{
create_field_ref(range.begin, i, index_left[i]);
if ((*index_columns)[i].column)
create_field_ref(range.begin, i, index_left[i]);
else
index_left[i] = NEGATIVE_INFINITY;

index_right[i] = POSITIVE_INFINITY;
}
}
Expand All @@ -1078,8 +1085,17 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(

for (size_t i = 0; i < used_key_size; ++i)
{
create_field_ref(range.begin, i, index_left[i]);
create_field_ref(range.end, i, index_right[i]);
if ((*index_columns)[i].column)
{
create_field_ref(range.begin, i, index_left[i]);
create_field_ref(range.end, i, index_right[i]);
}
else
{
/// If the PK column was not loaded in memory - exclude it from the analysis.
index_left[i] = NEGATIVE_INFINITY;
index_right[i] = POSITIVE_INFINITY;
}
}
}
key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types);
Expand Down Expand Up @@ -1114,6 +1130,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
bool part_offset_condition_exact_range
= !part_offset_condition || part_offset_condition->alwaysUnknownOrTrue() || part_offset_condition->matchesExactContinuousRange();
const String & part_name = part->isProjectionPart() ? fmt::format("{}.{}", part->name, part->getParentPart()->name) : part->name;

if (!key_condition_exact_range || !part_offset_condition_exact_range)
{
// Do exclusion search, where we drop ranges that do not match
Expand All @@ -1128,10 +1145,10 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
part->index_granularity_info.index_granularity_bytes);

/** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back).
* At each step, take the left segment and check if it fits.
* If fits, split it into smaller ones and put them on the stack. If not, discard it.
* If the segment is already of one mark length, add it to response and discard it.
*/
* At each step, take the left segment and check if it fits.
* If fits, split it into smaller ones and put them on the stack. If not, discard it.
* If the segment is already of one mark length, add it to response and discard it.
*/
std::vector<MarkRange> ranges_stack = { {0, marks_count} };

size_t steps = 0;
Expand All @@ -1141,7 +1158,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
MarkRange range = ranges_stack.back();
ranges_stack.pop_back();

steps++;
++steps;

if (!may_be_true_in_range(range))
continue;
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/MergeTreeSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ struct Settings;
M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \
M(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \
M(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \
\
M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \
/** Projection settings. */ \
M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \

Expand Down
18 changes: 18 additions & 0 deletions tests/queries/0_stateless/02998_primary_key_skip_columns.reference
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
100000
14954
798
15908
108
120
2334
19
Key size: 2400000
100000
14954
798
15908
108
120
2334
19
Key size: 800000
33 changes: 33 additions & 0 deletions tests/queries/0_stateless/02998_primary_key_skip_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
DROP TABLE IF EXISTS test;

CREATE TABLE test (a UInt64, b UInt64, c UInt64) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 1, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 1;
INSERT INTO test SELECT sipHash64(number, 1), sipHash64(number, 2), sipHash64(number, 3) FROM numbers(100000);

SELECT count() FROM test;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760;
SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137;
SELECT count() FROM test WHERE c > 13239894303140990071 AND c < 16179795840886947236;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137;
SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236;

SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test';

ALTER TABLE test MODIFY SETTING primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 0.9;

DETACH TABLE test;
ATTACH TABLE test;

SELECT count() FROM test;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760;
SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137;
SELECT count() FROM test WHERE c > 13239894303140990071 AND c < 16179795840886947236;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137;
SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236;
SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236;

SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test';

DROP TABLE test;
2 changes: 2 additions & 0 deletions tests/queries/0_stateless/03008_index_small.reference
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
3
3
19 changes: 19 additions & 0 deletions tests/queries/0_stateless/03008_index_small.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
DROP TABLE IF EXISTS test;

CREATE TABLE test (a UInt8, b UInt8) ENGINE = MergeTree ORDER BY (a, b)
SETTINGS index_granularity = 1, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 0.01;

SET optimize_move_to_prewhere = 0;

INSERT INTO test
SELECT number DIV 2, number
FROM numbers(3);

SELECT count() FROM test WHERE b >= 0;

DETACH TABLE test;
ATTACH TABLE test;

SELECT count() FROM test WHERE b >= 0;

DROP TABLE test;

0 comments on commit bdc884d

Please sign in to comment.