diff --git a/src/duckdb/extension/core_functions/scalar/math/numeric.cpp b/src/duckdb/extension/core_functions/scalar/math/numeric.cpp index 799d4cb35..24261bdf8 100644 --- a/src/duckdb/extension/core_functions/scalar/math/numeric.cpp +++ b/src/duckdb/extension/core_functions/scalar/math/numeric.cpp @@ -1605,6 +1605,9 @@ namespace { struct FactorialOperator { template static inline TR Operation(TA left) { + if (left < 0) { + throw OutOfRangeException("factorial of a negative number is undefined"); + } TR ret = 1; for (TA i = 2; i <= left; i++) { if (!TryMultiplyOperator::Operation(ret, TR(i), ret)) { diff --git a/src/duckdb/src/common/allocator/allocator_jemalloc.cpp b/src/duckdb/src/common/allocator/allocator_jemalloc.cpp index b795fa85b..5a5a5b50f 100644 --- a/src/duckdb/src/common/allocator/allocator_jemalloc.cpp +++ b/src/duckdb/src/common/allocator/allocator_jemalloc.cpp @@ -82,7 +82,7 @@ bool Allocator::SupportsFlush() { void Allocator::ThreadFlush(bool allocator_background_threads, idx_t threshold, idx_t thread_count) { if (!allocator_background_threads) { // We flush after exceeding the threshold - if (GetJemallocCTL("thread.peak.read") > threshold) { + if (GetJemallocCTL("thread.peak.read") <= threshold) { return; } diff --git a/src/duckdb/src/function/scalar/list/list_zip.cpp b/src/duckdb/src/function/scalar/list/list_zip.cpp index 2f83b61d6..7401e378f 100644 --- a/src/duckdb/src/function/scalar/list/list_zip.cpp +++ b/src/duckdb/src/function/scalar/list/list_zip.cpp @@ -112,7 +112,11 @@ static void ListZipFunction(DataChunk &args, ExpressionState &state, Vector &res offset += len; } for (idx_t child_idx = 0; child_idx < args_size; child_idx++) { - if (args.data[child_idx].GetType() != LogicalType::SQLNULL) { + if (args.data[child_idx].GetType() == LogicalType::SQLNULL || + ListVector::GetListSize(args.data[child_idx]) == 0) { + struct_entries[child_idx]->SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(*struct_entries[child_idx], true); + } else { struct_entries[child_idx]->Slice(ListVector::GetEntry(args.data[child_idx]), selections[child_idx], result_size); } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 88fe2bbc9..673c6a642 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "3-dev373" +#define DUCKDB_PATCH_VERSION "3" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 5 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.5.3-dev373" +#define DUCKDB_VERSION "v1.5.3" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "f1a6e65815" +#define DUCKDB_SOURCE_ID "14eca11bd9" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp index 5b6397e49..d96910f1e 100644 --- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp @@ -236,6 +236,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"iceberg_metadata", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_partition_stats", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_scan", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, + {"iceberg_schema_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_snapshots", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_table_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_to_ducklake", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, @@ -571,6 +572,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"regr_sxx", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"regr_sxy", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"regr_syy", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, + {"remove_iceberg_schema_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"remove_iceberg_table_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"repeat", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"replace", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -587,6 +589,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"rtrim", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"sem", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"set_bit", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"set_iceberg_schema_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"set_iceberg_table_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"setseed", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"shapefile_meta", "spatial", CatalogType::TABLE_FUNCTION_ENTRY}, diff --git a/src/duckdb/src/include/duckdb/storage/data_table.hpp b/src/duckdb/src/include/duckdb/storage/data_table.hpp index e9b899091..ed5de9902 100644 --- a/src/duckdb/src/include/duckdb/storage/data_table.hpp +++ b/src/duckdb/src/include/duckdb/storage/data_table.hpp @@ -255,6 +255,12 @@ class DataTable : public enable_shared_from_this { shared_ptr &GetDataTableInfo(); + //! Direct access to the row group collection. Intended for extensions that need to walk storage internals; + //! prefer the higher-level DataTable API for normal use. + const shared_ptr &GetRowGroupCollection() const { + return row_groups; + } + void BindIndexes(ClientContext &context); bool HasIndexes() const; bool HasUniqueIndexes() const; diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp index b6c7f73df..70fcf1ed9 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp @@ -215,6 +215,10 @@ class RowGroup : public SegmentBase { vector CheckpointDeletes(RowGroupWriter &writer); + //! Direct accessors, fall outside of general use but can be useful to some extensions + ColumnData &GetRawColumnData(const StorageIndex &c) const; + ColumnData &GetRawColumnData(storage_t c) const; + private: optional_ptr GetVersionInfo(); optional_ptr GetVersionInfoIfLoaded() const; diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp index 676e36bf8..aebd74042 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp @@ -163,11 +163,13 @@ class RowGroupCollection { //! Returns the total amount of segments - use sparingly, as this forces all segments to be loaded idx_t GetSegmentCount(); + //! Get a ptr to the raw segment tree. This can be useful for some extensions to have directly exposed. + shared_ptr GetRowGroups() const; + private: optional_ptr> NextUpdateRowGroup(RowGroupSegmentTree &row_groups, row_t *ids, idx_t &pos, idx_t count) const; - shared_ptr GetRowGroups() const; void SetRowGroups(shared_ptr row_groups); private: diff --git a/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp b/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp index 7249aaf08..f24a0981e 100644 --- a/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp @@ -69,6 +69,8 @@ class StandardColumnData : public ColumnData { void Verify(RowGroup &parent) override; void SetValidityData(shared_ptr validity); + //! Direct access to the validity column data. Intended for extensions that need to walk storage internals. + ValidityColumnData &GetValidityData(); protected: //! The validity column data diff --git a/src/duckdb/src/main/connection_manager.cpp b/src/duckdb/src/main/connection_manager.cpp index f97d373aa..4035d018d 100644 --- a/src/duckdb/src/main/connection_manager.cpp +++ b/src/duckdb/src/main/connection_manager.cpp @@ -38,17 +38,16 @@ void ConnectionManager::AssignConnectionId(Connection &connection) { vector> ConnectionManager::GetConnectionList() { lock_guard lock(connections_lock); vector> result; - for (auto &it : connections) { - auto connection = it.second.lock(); + for (auto it = connections.begin(); it != connections.end();) { + auto connection = it->second.lock(); if (!connection) { - connections.erase(it.first); - connection_count = connections.size(); - continue; + it = connections.erase(it); } else { result.push_back(std::move(connection)); + ++it; } } - + connection_count = connections.size(); return result; } diff --git a/src/duckdb/src/main/database_manager.cpp b/src/duckdb/src/main/database_manager.cpp index e4b1da3e2..839ad33c9 100644 --- a/src/duckdb/src/main/database_manager.cpp +++ b/src/duckdb/src/main/database_manager.cpp @@ -147,14 +147,13 @@ shared_ptr DatabaseManager::AttachDatabase(ClientContext &cont if (requires_tracking_attaches) { // Start timing the ATTACH-delay step. - auto profiler = context.client_data->profiler->StartTimer(MetricType::WAITING_TO_ATTACH_LATENCY); - + auto timer = context.client_data->profiler->StartTimer(MetricType::WAITING_TO_ATTACH_LATENCY); + // Start trying to attach. while (InsertDatabasePath(info, options) == InsertDatabasePathResult::ALREADY_EXISTS) { // database with this name and path already exists // first check if it exists within this transaction auto &meta_transaction = MetaTransaction::Get(context); - auto existing_db = meta_transaction.GetReferencedDatabaseOwning(info.name); - if (existing_db) { + if (auto existing_db = meta_transaction.GetReferencedDatabaseOwning(info.name)) { // it does! return it return existing_db; } @@ -171,6 +170,8 @@ shared_ptr DatabaseManager::AttachDatabase(ClientContext &cont throw InterruptException(); } } + // Returning in the loop above will also end the timer, otherwise, do it explicitly here. + timer.EndTimer(); } auto &config = DBConfig::GetConfig(context); GetDatabaseType(context, info, config, options); diff --git a/src/duckdb/src/optimizer/row_group_pruner.cpp b/src/duckdb/src/optimizer/row_group_pruner.cpp index 362e50730..32daada8a 100644 --- a/src/duckdb/src/optimizer/row_group_pruner.cpp +++ b/src/duckdb/src/optimizer/row_group_pruner.cpp @@ -90,10 +90,9 @@ bool RowGroupPruner::TryOptimize(LogicalOperator &op) const { void RowGroupPruner::GetLimitAndOffset(const LogicalLimit &logical_limit, optional_idx &row_limit, optional_idx &row_offset) const { + // UNSET = no LIMIT = unbounded; leave row_limit invalid. if (logical_limit.limit_val.Type() == LimitNodeType::CONSTANT_VALUE) { row_limit = logical_limit.limit_val.GetConstantValue(); - } else if (logical_limit.limit_val.Type() == LimitNodeType::UNSET) { - row_limit = 0; } if (logical_limit.offset_val.Type() == LimitNodeType::CONSTANT_VALUE) { diff --git a/src/duckdb/src/storage/data_table.cpp b/src/duckdb/src/storage/data_table.cpp index a0dac4701..40e1b7859 100644 --- a/src/duckdb/src/storage/data_table.cpp +++ b/src/duckdb/src/storage/data_table.cpp @@ -1821,12 +1821,13 @@ void DataTable::Checkpoint(TableDataWriter &writer, Serializer &serializer) { row_groups->Checkpoint(writer, global_stats); row_groups->SetRowGroupAppendMode(RowGroupAppendMode::SUGGEST_NEW); if (writer.GetRebuildIndexes()) { - ActiveTimer rebuild_indexes_timer; + ActiveTimer timer; auto context = writer.TryGetClientContext(); if (context) { - rebuild_indexes_timer = QueryProfiler::Get(*context).StartTimer(MetricType::CUMULATIVE_VACUUM_TIME); + timer = QueryProfiler::Get(*context).StartTimer(MetricType::CUMULATIVE_VACUUM_TIME); } RebuildIndexes(); + timer.EndTimer(); } // The row group payload data has been written. Now write: // sample diff --git a/src/duckdb/src/storage/storage_manager.cpp b/src/duckdb/src/storage/storage_manager.cpp index 6ea32cb90..d9c1c5bbc 100644 --- a/src/duckdb/src/storage/storage_manager.cpp +++ b/src/duckdb/src/storage/storage_manager.cpp @@ -502,9 +502,8 @@ void SingleFileStorageManager::LoadDatabase(QueryContext context) { auto checkpoint_reader = SingleFileCheckpointReader(*this); checkpoint_reader.LoadFromStorage(); - // End timing the storage load step. + // Reset the timer (also ends it). if (timer) { - timer->EndTimer(); timer = nullptr; } @@ -518,10 +517,7 @@ void SingleFileStorageManager::LoadDatabase(QueryContext context) { wal_path = GetWALPath(); wal = WriteAheadLog::Replay(context, *this, wal_path); - // End timing the WAL replay step. - if (timer) { - timer->EndTimer(); - } + // Timer will go out of scope here, if set. } if (row_group_size > 122880ULL && GetStorageVersion() < 4) { @@ -697,14 +693,15 @@ void SingleFileStorageManager::CreateCheckpoint(QueryContext context, Checkpoint try { // Start timing the checkpoint. auto client_context = context.GetClientContext(); - ActiveTimer profiler; + ActiveTimer timer; if (client_context) { - profiler = client_context->client_data->profiler->StartTimer(MetricType::CHECKPOINT_LATENCY); + timer = client_context->client_data->profiler->StartTimer(MetricType::CHECKPOINT_LATENCY); } // Write the checkpoint. auto checkpointer = CreateCheckpointWriter(context, options); checkpointer->CreateCheckpoint(); + timer.EndTimer(); } catch (std::exception &ex) { ErrorData error(ex); diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp index 724c17f4c..9af7d20ee 100644 --- a/src/duckdb/src/storage/table/row_group.cpp +++ b/src/duckdb/src/storage/table/row_group.cpp @@ -133,6 +133,14 @@ ColumnData &RowGroup::GetColumn(storage_t c) const { return c == COLUMN_IDENTIFIER_ROW_ID ? *row_id_column_data : *columns[c]; } +ColumnData &RowGroup::GetRawColumnData(const StorageIndex &c) const { + return GetColumn(c); +} + +ColumnData &RowGroup::GetRawColumnData(storage_t c) const { + return GetColumn(c); +} + void RowGroup::LoadColumn(storage_t c) const { if (c == COLUMN_IDENTIFIER_ROW_ID) { LoadRowIdColumnData(); diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 7a4ea0b31..99d06f048 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -1200,14 +1200,16 @@ class VacuumTask : public BaseCheckpointTask { } void ExecuteTask() override { - ActiveTimer vacuum_task_timer; + ActiveTimer timer; auto context = checkpoint_state.writer.TryGetClientContext(); if (context) { - vacuum_task_timer = QueryProfiler::Get(*context).StartTimer(MetricType::CUMULATIVE_VACUUM_TIME); + timer = QueryProfiler::Get(*context).StartTimer(MetricType::CUMULATIVE_VACUUM_TIME); } + auto &collection = checkpoint_state.collection; const idx_t row_group_size = collection.GetRowGroupSize(); auto &types = collection.GetTypes(); + // create the new set of target row groups (initially empty) vector> new_row_groups; vector append_counts; @@ -1218,7 +1220,6 @@ class VacuumTask : public BaseCheckpointTask { new_row_group->InitializeEmpty(types, ColumnDataType::MAIN_TABLE); new_row_groups.push_back(std::move(new_row_group)); append_counts.push_back(0); - row_group_rows -= current_row_group_rows; } @@ -1301,7 +1302,10 @@ class VacuumTask : public BaseCheckpointTask { "Mismatch in row group count %d vs verify count %d in RowGroupCollection::Checkpoint", merge_rows, total_append_count); } - vacuum_task_timer.EndTimer(); + + // Explicitly end the timer for the vacuum tasks here. + timer.EndTimer(); + // merging is complete - execute checkpoint tasks of the target row groups for (idx_t i = 0; i < target_count; i++) { auto checkpoint_task = collection.GetCheckpointTask(checkpoint_state, segment_idx + i); diff --git a/src/duckdb/src/storage/table/standard_column_data.cpp b/src/duckdb/src/storage/table/standard_column_data.cpp index 7e43eaae8..9728e5b71 100644 --- a/src/duckdb/src/storage/table/standard_column_data.cpp +++ b/src/duckdb/src/storage/table/standard_column_data.cpp @@ -215,6 +215,11 @@ void StandardColumnData::SetValidityData(shared_ptr validity this->validity = std::move(validity_p); } +ValidityColumnData &StandardColumnData::GetValidityData() { + D_ASSERT(validity); + return *validity; +} + struct StandardColumnCheckpointState : public ColumnCheckpointState { StandardColumnCheckpointState(const RowGroup &row_group, ColumnData &column_data, PartialBlockManager &partial_block_manager) diff --git a/src/duckdb/src/transaction/duck_transaction.cpp b/src/duckdb/src/transaction/duck_transaction.cpp index 15c44fa72..70a8d09c8 100644 --- a/src/duckdb/src/transaction/duck_transaction.cpp +++ b/src/duckdb/src/transaction/duck_transaction.cpp @@ -221,9 +221,9 @@ ErrorData DuckTransaction::WriteToWAL(ClientContext &context, AttachedDatabase & commit_state = storage_manager.GenStorageCommitState(*wal); auto &profiler = *context.client_data->profiler; - auto commit_timer = profiler.StartTimer(MetricType::COMMIT_LOCAL_STORAGE_LATENCY); storage->Commit(commit_state.get()); + commit_timer.EndTimer(); auto wal_timer = profiler.StartTimer(MetricType::WRITE_TO_WAL_LATENCY); undo_buffer.WriteToWAL(*wal, commit_state.get()); @@ -233,6 +233,8 @@ ErrorData DuckTransaction::WriteToWAL(ClientContext &context, AttachedDatabase & // hence we need to ensure those optimistically written blocks are persisted storage_manager.GetBlockManager().FileSync(); } + wal_timer.EndTimer(); + } catch (std::exception &ex) { // Call RevertCommit() outside this try-catch as it itself may throw error_data = ErrorData(ex); diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index 1dda62f9f..f77cc507f 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -348,17 +348,17 @@ #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" + #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" - -#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"