Skip to content

Commit

Permalink
Update vendored DuckDB sources to c474827
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Jan 15, 2025
1 parent c474827 commit 4d88718
Show file tree
Hide file tree
Showing 38 changed files with 664 additions and 83 deletions.
2 changes: 1 addition & 1 deletion src/duckdb/extension/json/buffered_json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void JSONFileHandle::Reset() {
requested_reads = 0;
actual_reads = 0;
last_read_requested = false;
if (IsOpen() && CanSeek()) {
if (IsOpen() && !file_handle->IsPipe()) {
file_handle->Reset();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JS

TableFunctionSet JSONFunctions::GetReadJSONObjectsFunction() {
TableFunctionSet function_set("read_json_objects");
auto function_info =
make_shared_ptr<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::ARRAY, JSONRecordType::RECORDS);
auto function_info = make_shared_ptr<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::AUTO_DETECT,
JSONRecordType::RECORDS);
function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
return function_set;
Expand Down
14 changes: 8 additions & 6 deletions src/duckdb/src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2399,19 +2399,20 @@ const StringUtil::EnumStringLiteral *GetMetricsTypeValues() {
{ static_cast<uint32_t>(MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN), "OPTIMIZER_JOIN_FILTER_PUSHDOWN" },
{ static_cast<uint32_t>(MetricsType::OPTIMIZER_EXTENSION), "OPTIMIZER_EXTENSION" },
{ static_cast<uint32_t>(MetricsType::OPTIMIZER_MATERIALIZED_CTE), "OPTIMIZER_MATERIALIZED_CTE" },
{ static_cast<uint32_t>(MetricsType::OPTIMIZER_SUM_REWRITER), "OPTIMIZER_SUM_REWRITER" }
{ static_cast<uint32_t>(MetricsType::OPTIMIZER_SUM_REWRITER), "OPTIMIZER_SUM_REWRITER" },
{ static_cast<uint32_t>(MetricsType::OPTIMIZER_LATE_MATERIALIZATION), "OPTIMIZER_LATE_MATERIALIZATION" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<MetricsType>(MetricsType value) {
return StringUtil::EnumToString(GetMetricsTypeValues(), 48, "MetricsType", static_cast<uint32_t>(value));
return StringUtil::EnumToString(GetMetricsTypeValues(), 49, "MetricsType", static_cast<uint32_t>(value));
}

template<>
MetricsType EnumUtil::FromString<MetricsType>(const char *value) {
return static_cast<MetricsType>(StringUtil::StringToEnum(GetMetricsTypeValues(), 48, "MetricsType", value));
return static_cast<MetricsType>(StringUtil::StringToEnum(GetMetricsTypeValues(), 49, "MetricsType", value));
}

const StringUtil::EnumStringLiteral *GetNTypeValues() {
Expand Down Expand Up @@ -2584,19 +2585,20 @@ const StringUtil::EnumStringLiteral *GetOptimizerTypeValues() {
{ static_cast<uint32_t>(OptimizerType::JOIN_FILTER_PUSHDOWN), "JOIN_FILTER_PUSHDOWN" },
{ static_cast<uint32_t>(OptimizerType::EXTENSION), "EXTENSION" },
{ static_cast<uint32_t>(OptimizerType::MATERIALIZED_CTE), "MATERIALIZED_CTE" },
{ static_cast<uint32_t>(OptimizerType::SUM_REWRITER), "SUM_REWRITER" }
{ static_cast<uint32_t>(OptimizerType::SUM_REWRITER), "SUM_REWRITER" },
{ static_cast<uint32_t>(OptimizerType::LATE_MATERIALIZATION), "LATE_MATERIALIZATION" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<OptimizerType>(OptimizerType value) {
return StringUtil::EnumToString(GetOptimizerTypeValues(), 27, "OptimizerType", static_cast<uint32_t>(value));
return StringUtil::EnumToString(GetOptimizerTypeValues(), 28, "OptimizerType", static_cast<uint32_t>(value));
}

template<>
OptimizerType EnumUtil::FromString<OptimizerType>(const char *value) {
return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 27, "OptimizerType", value));
return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 28, "OptimizerType", value));
}

const StringUtil::EnumStringLiteral *GetOrderByNullTypeValues() {
Expand Down
6 changes: 6 additions & 0 deletions src/duckdb/src/common/enums/metric_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ profiler_settings_t MetricsUtils::GetOptimizerMetrics() {
MetricsType::OPTIMIZER_EXTENSION,
MetricsType::OPTIMIZER_MATERIALIZED_CTE,
MetricsType::OPTIMIZER_SUM_REWRITER,
MetricsType::OPTIMIZER_LATE_MATERIALIZATION,
};
}

Expand Down Expand Up @@ -109,6 +110,8 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) {
return MetricsType::OPTIMIZER_MATERIALIZED_CTE;
case OptimizerType::SUM_REWRITER:
return MetricsType::OPTIMIZER_SUM_REWRITER;
case OptimizerType::LATE_MATERIALIZATION:
return MetricsType::OPTIMIZER_LATE_MATERIALIZATION;
default:
throw InternalException("OptimizerType %s cannot be converted to a MetricsType", EnumUtil::ToString(type));
};
Expand Down Expand Up @@ -168,6 +171,8 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) {
return OptimizerType::MATERIALIZED_CTE;
case MetricsType::OPTIMIZER_SUM_REWRITER:
return OptimizerType::SUM_REWRITER;
case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
return OptimizerType::LATE_MATERIALIZATION;
default:
return OptimizerType::INVALID;
};
Expand Down Expand Up @@ -201,6 +206,7 @@ bool MetricsUtils::IsOptimizerMetric(MetricsType type) {
case MetricsType::OPTIMIZER_EXTENSION:
case MetricsType::OPTIMIZER_MATERIALIZED_CTE:
case MetricsType::OPTIMIZER_SUM_REWRITER:
case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
return true;
default:
return false;
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/common/enums/optimizer_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ static const DefaultOptimizerType internal_optimizer_types[] = {
{"extension", OptimizerType::EXTENSION},
{"materialized_cte", OptimizerType::MATERIALIZED_CTE},
{"sum_rewriter", OptimizerType::SUM_REWRITER},
{"late_materialization", OptimizerType::LATE_MATERIALIZATION},
{nullptr, OptimizerType::INVALID}};

string OptimizerTypeToString(OptimizerType type) {
Expand Down
17 changes: 14 additions & 3 deletions src/duckdb/src/common/local_file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,16 +853,27 @@ unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, FileOpenF
bool open_write = flags.OpenForWriting();
if (open_read && open_write) {
desired_access = GENERIC_READ | GENERIC_WRITE;
share_mode = 0;
} else if (open_read) {
desired_access = GENERIC_READ;
share_mode = FILE_SHARE_READ;
} else if (open_write) {
desired_access = GENERIC_WRITE;
share_mode = 0;
} else {
throw InternalException("READ, WRITE or both should be specified when opening a file");
}
switch (flags.Lock()) {
case FileLockType::NO_LOCK:
share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
break;
case FileLockType::READ_LOCK:
share_mode = FILE_SHARE_READ;
break;
case FileLockType::WRITE_LOCK:
share_mode = 0;
break;
default:
throw InternalException("Unknown FileLockType");
}

if (open_write) {
if (flags.CreateFileIfNotExists()) {
creation_disposition = OPEN_ALWAYS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,13 @@ void CSVSniffer::DetectHeader() {
auto &sniffer_state_machine = best_candidate->GetStateMachine();
names = DetectHeaderInternal(buffer_manager->context, best_header_row, sniffer_state_machine, set_columns,
best_sql_types_candidates_per_column_idx, options, *error_handler);
if (single_row_file && sniffer_state_machine.dialect_options.header.GetValue()) {
// This file only contains a header, lets default to the lowest type of all.
detected_types.clear();
for (idx_t i = 0; i < names.size(); i++) {
detected_types.push_back(LogicalType::BOOLEAN);
}
}
for (idx_t i = max_columns_found; i < names.size(); i++) {
detected_types.push_back(LogicalType::VARCHAR);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ void CSVSniffer::DetectTypes() {
best_format_candidates[format_candidate.first] = format_candidate.second.format;
}
if (chunk_size > 0) {
single_row_file = chunk_size == 1;
for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
auto &cur_vector = data_chunk.data[col_idx];
auto vector_data = FlatVector::GetData<string_t>(cur_vector);
Expand Down
10 changes: 10 additions & 0 deletions src/duckdb/src/function/scalar/strftime_format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,16 @@ StrpTimeFormat::ParseResult StrpTimeFormat::Parse(const string &format_string, c
return result;
}

bool StrpTimeFormat::TryParse(const string &format_string, const string &text, ParseResult &result) {
StrpTimeFormat format;
format.format_specifier = format_string;
string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
if (!error.empty()) {
throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
}
return format.Parse(text, result);
}

bool StrTimeFormat::Empty() const {
return format_specifier.empty();
}
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/function/table/table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class DuckIndexScanState : public TableScanGlobalState {

//! The batch index of the next Sink.
//! Also determines the offset of the next chunk. I.e., offset = next_batch_index * STANDARD_VECTOR_SIZE.
idx_t next_batch_index;
atomic<idx_t> next_batch_index;
//! The total scanned row IDs.
unsafe_vector<row_t> row_ids;
//! The column IDs of the to-be-scanned columns.
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "4-dev4516"
#define DUCKDB_PATCH_VERSION "4-dev4570"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.4-dev4516"
#define DUCKDB_VERSION "v1.1.4-dev4570"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "2e533ec9df"
#define DUCKDB_SOURCE_ID "d1740d6cf7"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ enum class MetricsType : uint8_t {
OPTIMIZER_EXTENSION,
OPTIMIZER_MATERIALIZED_CTE,
OPTIMIZER_SUM_REWRITER,
OPTIMIZER_LATE_MATERIALIZATION,
};

struct MetricsTypeHashFunction {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ enum class OptimizerType : uint32_t {
JOIN_FILTER_PUSHDOWN,
EXTENSION,
MATERIALIZED_CTE,
SUM_REWRITER
SUM_REWRITER,
LATE_MATERIALIZATION
};

string OptimizerTypeToString(OptimizerType type);
Expand Down
6 changes: 6 additions & 0 deletions src/duckdb/src/include/duckdb/common/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ static inline T AlignValue(T n) {
return ((n + (val - 1)) / val) * val;
}

template<uintptr_t alignment>
inline data_ptr_t AlignValue(data_ptr_t addr) {
static_assert((alignment & (alignment - 1)) == 0, "'alignment' has to be a power of 2");
return reinterpret_cast<data_ptr_t>((reinterpret_cast<uintptr_t>(addr) + alignment - 1) & ~(alignment - 1));
}

template<class T, T val=8>
constexpr inline T AlignValueFloor(T n) {
return (n / val) * val;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ class CSVSniffer {
unordered_map<idx_t, vector<LogicalType>> &best_sql_types_candidates_per_column_idx,
CSVReaderOptions &options, CSVErrorHandler &error_handler);
vector<string> names;
//! If the file only has a header
bool single_row_file = false;

//! ------------------------------------------------------//
//! ------------------ Type Replacement ----------------- //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ struct StrpTimeFormat : public StrTimeFormat { // NOLINT: work-around bug in cla
return format_specifier != other.format_specifier;
}
DUCKDB_API static ParseResult Parse(const string &format, const string &text);
DUCKDB_API static bool TryParse(const string &format, const string &text, ParseResult &result);

DUCKDB_API bool Parse(string_t str, ParseResult &result, bool strict = false) const;

Expand Down
7 changes: 7 additions & 0 deletions src/duckdb/src/include/duckdb/function/table/table_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ struct TableScanBindData : public TableFunctionData {
auto &other = other_p.Cast<TableScanBindData>();
return &other.table == &table;
}
unique_ptr<FunctionData> Copy() const override {
auto bind_data = make_uniq<TableScanBindData>(table);
bind_data->is_index_scan = is_index_scan;
bind_data->is_create_index = is_create_index;
bind_data->column_ids = column_ids;
return std::move(bind_data);
}
};

//! The table scan function represents a sequential or index scan over one of DuckDB's base tables.
Expand Down
3 changes: 3 additions & 0 deletions src/duckdb/src/include/duckdb/main/client_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ struct ClientConfig {
//! The maximum amount of OR filters we generate dynamically from a hash join
idx_t dynamic_or_filter_threshold = 50;

//! The maximum amount of rows in the LIMIT/SAMPLE for which we trigger late materialization
idx_t late_materialization_max_rows = 50;

//! Whether the "/" division operator defaults to integer division or floating point division
bool integer_division = false;
//! When a scalar subquery returns multiple rows - return a random row instead of returning an error
Expand Down
3 changes: 3 additions & 0 deletions src/duckdb/src/include/duckdb/main/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class CompressionInfo;
class EncryptionUtil;

struct CompressionFunctionSet;
struct DatabaseCacheEntry;
struct DBConfig;

enum class CheckpointAbort : uint8_t {
Expand Down Expand Up @@ -331,6 +332,8 @@ struct DBConfig {
vector<unique_ptr<ExtensionCallback>> extension_callbacks;
//! Encryption Util for OpenSSL
shared_ptr<EncryptionUtil> encryption_util;
//! Reference to the database cache entry (if any)
shared_ptr<DatabaseCacheEntry> db_cache_entry;

public:
DUCKDB_API static DBConfig &GetConfig(ClientContext &context);
Expand Down
2 changes: 0 additions & 2 deletions src/duckdb/src/include/duckdb/main/database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ class DatabaseInstance : public enable_shared_from_this<DatabaseInstance> {
const AttachOptions &options);

void AddExtensionInfo(const string &name, const ExtensionLoadedInfo &info);
void SetDatabaseCacheEntry(shared_ptr<DatabaseCacheEntry> entry);

private:
void Initialize(const char *path, DBConfig *config);
Expand All @@ -95,7 +94,6 @@ class DatabaseInstance : public enable_shared_from_this<DatabaseInstance> {
unordered_map<string, ExtensionInfo> loaded_extensions_info;
ValidChecker db_validity;
unique_ptr<DatabaseFileSystem> db_file_system;
shared_ptr<DatabaseCacheEntry> db_cache_entry;
shared_ptr<LogManager> log_manager;

duckdb_ext_api_v1 (*create_api_v1)();
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/include/duckdb/main/db_instance_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace duckdb {
class DBInstanceCache;

struct DatabaseCacheEntry {
DatabaseCacheEntry();
explicit DatabaseCacheEntry(const shared_ptr<DuckDB> &database);
~DatabaseCacheEntry();

Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/include/duckdb/main/extension_entries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
{"st_zmax", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
{"st_zmflag", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
{"st_zmin", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
{"start_ui", "motherduck", CatalogType::TABLE_FUNCTION_ENTRY},
{"starts_with", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
{"stats", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
{"stddev", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY},
Expand Down
11 changes: 11 additions & 0 deletions src/duckdb/src/include/duckdb/main/settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,17 @@ struct IntegerDivisionSetting {
static Value GetSetting(const ClientContext &context);
};

struct LateMaterializationMaxRowsSetting {
using RETURN_TYPE = idx_t;
static constexpr const char *Name = "late_materialization_max_rows";
static constexpr const char *Description =
"The maximum amount of rows in the LIMIT/SAMPLE for which we trigger late materialization";
static constexpr const char *InputType = "UBIGINT";
static void SetLocal(ClientContext &context, const Value &parameter);
static void ResetLocal(ClientContext &context);
static Value GetSetting(const ClientContext &context);
};

struct LockConfigurationSetting {
using RETURN_TYPE = bool;
static constexpr const char *Name = "lock_configuration";
Expand Down
45 changes: 45 additions & 0 deletions src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/optimizer/late_materialization.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/common/constants.hpp"
#include "duckdb/optimizer/remove_unused_columns.hpp"

namespace duckdb {
class LogicalOperator;
class LogicalGet;
class Optimizer;

//! Transform
class LateMaterialization : public BaseColumnPruner {
public:
explicit LateMaterialization(Optimizer &optimizer);

unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> op);

private:
bool TryLateMaterialization(unique_ptr<LogicalOperator> &op);

unique_ptr<LogicalGet> ConstructLHS(LogicalGet &get);
ColumnBinding ConstructRHS(unique_ptr<LogicalOperator> &op);
idx_t GetOrInsertRowId(LogicalGet &get);

void ReplaceTopLevelTableIndex(LogicalOperator &op, idx_t new_index);
void ReplaceTableReferences(Expression &expr, idx_t new_table_index);
unique_ptr<Expression> GetExpression(LogicalOperator &op, idx_t column_index);
void ReplaceExpressionReferences(LogicalOperator &next_op, unique_ptr<Expression> &expr);
bool OptimizeLargeLimit(LogicalOperator &child);

private:
Optimizer &optimizer;
//! The max row count for which we will consider late materialization
idx_t max_row_count;
};

} // namespace duckdb
Loading

0 comments on commit 4d88718

Please sign in to comment.