Skip to content

Commit

Permalink
Update vendored DuckDB sources to 9d02a50
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Jan 17, 2025
1 parent 9d02a50 commit 44e807a
Show file tree
Hide file tree
Showing 20 changed files with 191 additions and 78 deletions.
5 changes: 3 additions & 2 deletions src/duckdb/extension/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,12 +778,13 @@ void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t c
FilterPropagateResult prune_result;
// TODO we might not have stats but STILL a bloom filter so move this up
// check the bloom filter if present
if (!column_reader.Type().IsNested() &&
bool is_generated_column = column_reader.FileIdx() >= group.columns.size();
if (!column_reader.Type().IsNested() && !is_generated_column &&
ParquetStatisticsUtils::BloomFilterSupported(column_reader.Type().id()) &&
ParquetStatisticsUtils::BloomFilterExcludes(filter, group.columns[column_reader.FileIdx()].meta_data,
*state.thrift_file_proto, allocator)) {
prune_result = FilterPropagateResult::FILTER_ALWAYS_FALSE;
} else if (column_reader.Type().id() == LogicalTypeId::VARCHAR &&
} else if (column_reader.Type().id() == LogicalTypeId::VARCHAR && !is_generated_column &&
group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.min_value &&
group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.max_value) {

Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
InitializeChild(root_holder.nested_children.back()[0], root_holder);
child.children = &root_holder.nested_children_ptr.back()[0];
child.children[0]->name = "entries";
child.children[0]->flags = 0; // Set the 'entries' field to non-nullable
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options, context);
}

Expand Down
5 changes: 0 additions & 5 deletions src/duckdb/src/common/types/column/column_data_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,7 @@ ColumnDataAllocator::~ColumnDataAllocator() {
for (auto &block : blocks) {
block.handle->SetDestroyBufferUpon(DestroyBufferUpon::UNPIN);
}
const auto data_size = SizeInBytes();
blocks.clear();
if (Allocator::SupportsFlush() &&
data_size > alloc.buffer_manager->GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
Allocator::FlushAll();
}
}

BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {
Expand Down
4 changes: 0 additions & 4 deletions src/duckdb/src/common/types/row/tuple_data_segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,6 @@ TupleDataSegment::~TupleDataSegment() {
}
pinned_row_handles.clear();
pinned_heap_handles.clear();
if (Allocator::SupportsFlush() && allocator &&
data_size > allocator->GetBufferManager().GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
Allocator::FlushAll();
}
allocator.reset();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,6 @@ void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info,

// generate the OR filter
auto in_filter = make_uniq<InFilter>(std::move(in_list));
in_filter->origin_is_hash_join = true;

// we push the OR filter as an OptionalFilter so that we can use it for zonemap pruning only
// the IN-list is expensive to execute otherwise
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/execution/radix_partitioned_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,12 @@ idx_t RadixHTConfig::GetRadixBits() const {
}

void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external) {
if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
if (sink_radix_bits > radix_bits_p || sink.any_combined) {
return;
}

auto guard = sink.Lock();
if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
if (sink_radix_bits > radix_bits_p || sink.any_combined) {
return;
}

Expand Down
144 changes: 113 additions & 31 deletions src/duckdb/src/function/table/table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/common/types/value_map.hpp"

namespace duckdb {

Expand Down Expand Up @@ -361,56 +362,137 @@ unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &cont
return std::move(g_state);
}

void ExtractInFilter(unique_ptr<TableFilter> &filter, BoundColumnRefExpression &bound_ref,
unique_ptr<vector<unique_ptr<Expression>>> &filter_expressions) {
// Special-handling of IN filters.
// They are part of a CONJUNCTION_AND.
if (filter->filter_type != TableFilterType::CONJUNCTION_AND) {
return;
void ExtractExpressionsFromValues(value_set_t &unique_values, BoundColumnRefExpression &bound_ref,
vector<unique_ptr<Expression>> &expressions) {
for (const auto &value : unique_values) {
auto bound_constant = make_uniq<BoundConstantExpression>(value);
auto filter_expr = make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(),
std::move(bound_constant));
expressions.push_back(std::move(filter_expr));
}
}

auto &and_filter = filter->Cast<ConjunctionAndFilter>();
auto &children = and_filter.child_filters;
if (children.empty()) {
return;
void ExtractIn(InFilter &filter, BoundColumnRefExpression &bound_ref, vector<unique_ptr<Expression>> &expressions) {
// Eliminate any duplicates.
value_set_t unique_values;
for (const auto &value : filter.values) {
if (unique_values.find(value) == unique_values.end()) {
unique_values.insert(value);
}
}
if (children[0]->filter_type != TableFilterType::OPTIONAL_FILTER) {
ExtractExpressionsFromValues(unique_values, bound_ref, expressions);
}

void ExtractConjunctionAnd(ConjunctionAndFilter &filter, BoundColumnRefExpression &bound_ref,
vector<unique_ptr<Expression>> &expressions) {
if (filter.child_filters.empty()) {
return;
}

auto &optional_filter = children[0]->Cast<OptionalFilter>();
auto &child = optional_filter.child_filter;
if (child->filter_type != TableFilterType::IN_FILTER) {
// Extract the CONSTANT_COMPARISON and IN_FILTER children.
vector<reference<ConstantFilter>> comparisons;
vector<reference<InFilter>> in_filters;

for (idx_t i = 0; i < filter.child_filters.size(); i++) {
if (filter.child_filters[i]->filter_type == TableFilterType::CONSTANT_COMPARISON) {
auto &comparison = filter.child_filters[i]->Cast<ConstantFilter>();
comparisons.push_back(comparison);
continue;
}

if (filter.child_filters[i]->filter_type == TableFilterType::OPTIONAL_FILTER) {
auto &optional_filter = filter.child_filters[i]->Cast<OptionalFilter>();
if (!optional_filter.child_filter) {
return;
}
if (optional_filter.child_filter->filter_type != TableFilterType::IN_FILTER) {
// No support for other optional filter types yet.
return;
}
auto &in_filter = optional_filter.child_filter->Cast<InFilter>();
in_filters.push_back(in_filter);
continue;
}

// No support for other filter types than CONSTANT_COMPARISON and IN_FILTER in CONJUNCTION_AND yet.
return;
}

auto &in_filter = child->Cast<InFilter>();
if (!in_filter.origin_is_hash_join) {
// No support for other CONJUNCTION_AND cases yet.
if (in_filters.empty()) {
return;
}

// They are all on the same column, so we can split them.
for (const auto &value : in_filter.values) {
auto bound_constant = make_uniq<BoundConstantExpression>(value);
auto filter_expr = make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(),
std::move(bound_constant));
filter_expressions->push_back(std::move(filter_expr));
// Get the combined unique values of the IN filters.
value_set_t unique_values;
for (idx_t filter_idx = 0; filter_idx < in_filters.size(); filter_idx++) {
auto &in_filter = in_filters[filter_idx].get();
for (idx_t value_idx = 0; value_idx < in_filter.values.size(); value_idx++) {
auto &value = in_filter.values[value_idx];
if (unique_values.find(value) != unique_values.end()) {
continue;
}
unique_values.insert(value);
}
}

// Extract all qualifying values.
for (auto value_it = unique_values.begin(); value_it != unique_values.end();) {
bool qualifies = true;
for (idx_t comp_idx = 0; comp_idx < comparisons.size(); comp_idx++) {
if (!comparisons[comp_idx].get().Compare(*value_it)) {
qualifies = false;
value_it = unique_values.erase(value_it);
break;
}
}
if (qualifies) {
value_it++;
}
}

ExtractExpressionsFromValues(unique_values, bound_ref, expressions);
}

void ExtractFilter(TableFilter &filter, BoundColumnRefExpression &bound_ref,
vector<unique_ptr<Expression>> &expressions) {
switch (filter.filter_type) {
case TableFilterType::OPTIONAL_FILTER: {
auto &optional_filter = filter.Cast<OptionalFilter>();
if (!optional_filter.child_filter) {
return;
}
return ExtractFilter(*optional_filter.child_filter, bound_ref, expressions);
}
case TableFilterType::IN_FILTER: {
auto &in_filter = filter.Cast<InFilter>();
ExtractIn(in_filter, bound_ref, expressions);
return;
}
case TableFilterType::CONJUNCTION_AND: {
auto &conjunction_and = filter.Cast<ConjunctionAndFilter>();
ExtractConjunctionAnd(conjunction_and, bound_ref, expressions);
return;
}
default:
return;
}
}

unique_ptr<vector<unique_ptr<Expression>>> ExtractFilters(const ColumnDefinition &col, unique_ptr<TableFilter> &filter,
idx_t storage_idx) {
vector<unique_ptr<Expression>> ExtractFilterExpressions(const ColumnDefinition &col, unique_ptr<TableFilter> &filter,
idx_t storage_idx) {
ColumnBinding binding(0, storage_idx);
auto bound_ref = make_uniq<BoundColumnRefExpression>(col.Name(), col.Type(), binding);

auto filter_expressions = make_uniq<vector<unique_ptr<Expression>>>();
ExtractInFilter(filter, *bound_ref, filter_expressions);
vector<unique_ptr<Expression>> expressions;
ExtractFilter(*filter, *bound_ref, expressions);

if (filter_expressions->empty()) {
// Attempt matching the top-level filter to the index expression.
if (expressions.empty()) {
auto filter_expr = filter->ToExpression(*bound_ref);
filter_expressions->push_back(std::move(filter_expr));
expressions.push_back(std::move(filter_expr));
}
return filter_expressions;
return expressions;
}

bool TryScanIndex(ART &art, const ColumnList &column_list, TableFunctionInitInput &input, TableFilterSet &filter_set,
Expand Down Expand Up @@ -453,8 +535,8 @@ bool TryScanIndex(ART &art, const ColumnList &column_list, TableFunctionInitInpu
return false;
}

auto filter_expressions = ExtractFilters(col, filter->second, storage_index.GetIndex());
for (const auto &filter_expr : *filter_expressions) {
auto expressions = ExtractFilterExpressions(col, filter->second, storage_index.GetIndex());
for (const auto &filter_expr : expressions) {
auto scan_state = art.TryInitializeScan(*index_expr, *filter_expr);
if (!scan_state) {
return false;
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "4-dev4648"
#define DUCKDB_PATCH_VERSION "4-dev4679"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.4-dev4648"
#define DUCKDB_VERSION "v1.1.4-dev4679"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "09cef57892"
#define DUCKDB_SOURCE_ID "0024e5d4be"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class ConstantFilter : public TableFilter {
Value constant;

public:
bool Compare(const Value &value) const;
FilterPropagateResult CheckStatistics(BaseStatistics &stats) override;
string ToString(const string &column_name) override;
bool Equals(const TableFilter &other) const override;
Expand Down
2 changes: 0 additions & 2 deletions src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ class InFilter : public TableFilter {

public:
explicit InFilter(vector<Value> values);
InFilter(vector<Value> values, bool origin_is_hash_join);

vector<Value> values;
bool origin_is_hash_join;

public:
FilterPropagateResult CheckStatistics(BaseStatistics &stats) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ class OptionalFilter : public TableFilter {
public:
explicit OptionalFilter(unique_ptr<TableFilter> filter = nullptr);

//! Optional child filters.
unique_ptr<TableFilter> child_filter;

public:
string ToString(const string &column_name) override;
unique_ptr<TableFilter> Copy() const override;
unique_ptr<Expression> ToExpression(const Expression &column) const override;
FilterPropagateResult CheckStatistics(BaseStatistics &stats) override;
void Serialize(Serializer &serializer) const override;
static unique_ptr<TableFilter> Deserialize(Deserializer &deserializer);

public:
// optional child filters
unique_ptr<TableFilter> child_filter;
};

} // namespace duckdb
4 changes: 3 additions & 1 deletion src/duckdb/src/include/duckdb/storage/storage_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ struct Storage {
static void VerifyBlockAllocSize(const idx_t block_alloc_size);
};

//! The version number of the database storage format
//! The version number default, lower and upper bounds of the database storage format
extern const uint64_t VERSION_NUMBER;
extern const uint64_t VERSION_NUMBER_LOWER;
extern const uint64_t VERSION_NUMBER_UPPER;
string GetDuckDBVersion(idx_t version_number);
optional_idx GetStorageVersion(const char *version_string);
optional_idx GetSerializationVersion(const char *version_string);
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/planner/filter/conjunction_filter.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "duckdb/planner/filter/conjunction_filter.hpp"

#include "duckdb/planner/expression/bound_conjunction_expression.hpp"

namespace duckdb {
Expand Down
21 changes: 21 additions & 0 deletions src/duckdb/src/planner/filter/constant_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "duckdb/storage/statistics/base_statistics.hpp"
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/common/value_operations/value_operations.hpp"
#include "duckdb/common/enum_util.hpp"

namespace duckdb {

Expand All @@ -13,6 +15,25 @@ ConstantFilter::ConstantFilter(ExpressionType comparison_type_p, Value constant_
}
}

bool ConstantFilter::Compare(const Value &value) const {
switch (comparison_type) {
case ExpressionType::COMPARE_EQUAL:
return ValueOperations::Equals(value, constant);
case ExpressionType::COMPARE_NOTEQUAL:
return ValueOperations::NotEquals(value, constant);
case ExpressionType::COMPARE_GREATERTHAN:
return ValueOperations::GreaterThan(value, constant);
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
return ValueOperations::GreaterThanEquals(value, constant);
case ExpressionType::COMPARE_LESSTHAN:
return ValueOperations::LessThan(value, constant);
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
return ValueOperations::LessThanEquals(value, constant);
default:
throw InternalException("unknown comparison type for ConstantFilter: " + EnumUtil::ToString(comparison_type));
}
}

FilterPropagateResult ConstantFilter::CheckStatistics(BaseStatistics &stats) {
if (!stats.CanHaveNoNull()) {
// no non-null values are possible: always false
Expand Down
Loading

0 comments on commit 44e807a

Please sign in to comment.