From 63ba078ca8a0b7227661daa95e776bde24f319d7 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Tue, 31 Mar 2026 10:40:54 -0700 Subject: [PATCH 01/11] Changes to move to protobufs for DDL messages rather than JSON --- include/common/ddl_helpers.hh | 81 ++++ include/pg_fdw/pg_ddl_mgr.hh | 21 +- include/pg_log_mgr/committer.hh | 5 +- include/pg_log_mgr/xid_ready.hh | 7 +- include/redis/redis_ddl.hh | 26 +- include/sys_tbl_mgr/server.hh | 96 +++-- python/grpc/.gitignore | 1 + src/pg_fdw/CMakeLists.txt | 1 + src/pg_fdw/pg_ddl_mgr.cc | 395 +++++++++--------- src/pg_fdw/test/test_pg_ddl_mgr.cc | 57 ++- src/pg_log_mgr/CMakeLists.txt | 1 + src/pg_log_mgr/committer.cc | 61 +-- src/pg_log_mgr/pg_log_reader.cc | 165 ++++---- src/proto/ddl.proto | 254 ++++++++++++ src/redis/CMakeLists.txt | 1 + src/redis/redis_ddl.cc | 91 +++-- src/sys_tbl_mgr/CMakeLists.txt | 4 +- src/sys_tbl_mgr/server.cc | 626 ++++++++++++++++------------- 18 files changed, 1178 insertions(+), 715 deletions(-) create mode 100644 include/common/ddl_helpers.hh create mode 100644 src/proto/ddl.proto diff --git a/include/common/ddl_helpers.hh b/include/common/ddl_helpers.hh new file mode 100644 index 000000000..8efb4a75c --- /dev/null +++ b/include/common/ddl_helpers.hh @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include + +namespace springtail { + +// Extract the table ID from a DDL operation, if the operation references a specific table. +// Returns nullopt for namespace, user type, and partition operations that don't carry a tid. +inline std::optional get_table_id(const proto::DDLOperation& op) { + switch (op.operation_case()) { + case proto::DDLOperation::kCreateTable: + return op.create_table().tid(); + case proto::DDLOperation::kDropTable: + return op.drop_table().tid(); + case proto::DDLOperation::kRenameTable: + return op.rename_table().tid(); + case proto::DDLOperation::kSetNamespace: + return op.set_namespace().tid(); + case proto::DDLOperation::kSetRlsEnabled: + return op.set_rls_enabled().tid(); + case proto::DDLOperation::kSetRlsForced: + return op.set_rls_forced().tid(); + case proto::DDLOperation::kColumnAdd: + return op.column_add().tid(); + case proto::DDLOperation::kColumnDrop: + return op.column_drop().tid(); + case proto::DDLOperation::kColumnRename: + return op.column_rename().tid(); + case proto::DDLOperation::kColumnNullable: + return op.column_nullable().tid(); + case proto::DDLOperation::kResync: + return op.resync().tid(); + case proto::DDLOperation::kNoChange: + return op.no_change().tid(); + case proto::DDLOperation::kResyncPartitions: + case proto::DDLOperation::kNamespaceCreate: + case proto::DDLOperation::kNamespaceAlter: + case proto::DDLOperation::kNamespaceDrop: + case proto::DDLOperation::kUsertypeCreate: + case proto::DDLOperation::kUsertypeAlter: + case proto::DDLOperation::kUsertypeDrop: + case proto::DDLOperation::kAttachPartition: + case proto::DDLOperation::kDetachPartition: + case proto::DDLOperation::OPERATION_NOT_SET: + return std::nullopt; + } + return std::nullopt; +} + +// Return a human-readable action name for logging purposes. +inline std::string get_action_name(const proto::DDLOperation& op) { + switch (op.operation_case()) { + case proto::DDLOperation::kCreateTable: return "create"; + case proto::DDLOperation::kDropTable: return "drop"; + case proto::DDLOperation::kRenameTable: return "rename"; + case proto::DDLOperation::kSetNamespace: return "set_namespace"; + case proto::DDLOperation::kSetRlsEnabled: return "set_rls_enabled"; + case proto::DDLOperation::kSetRlsForced: return "set_rls_forced"; + case proto::DDLOperation::kColumnAdd: return "col_add"; + case proto::DDLOperation::kColumnDrop: return "col_drop"; + case proto::DDLOperation::kColumnRename: return "col_rename"; + case proto::DDLOperation::kColumnNullable: return "col_nullable"; + case proto::DDLOperation::kResync: return "resync"; + case proto::DDLOperation::kNoChange: return "no_change"; + case proto::DDLOperation::kResyncPartitions: return "resync_partitions"; + case proto::DDLOperation::kNamespaceCreate: return "ns_create"; + case proto::DDLOperation::kNamespaceAlter: return "ns_alter"; + case proto::DDLOperation::kNamespaceDrop: return "ns_drop"; + case proto::DDLOperation::kUsertypeCreate: return "ut_create"; + case proto::DDLOperation::kUsertypeAlter: return "ut_alter"; + case proto::DDLOperation::kUsertypeDrop: return "ut_drop"; + case proto::DDLOperation::kAttachPartition: return "attach_partition"; + case proto::DDLOperation::kDetachPartition: return "detach_partition"; + case proto::DDLOperation::OPERATION_NOT_SET: return "unknown"; + } + return "unknown"; +} + +} // namespace springtail diff --git a/include/pg_fdw/pg_ddl_mgr.hh b/include/pg_fdw/pg_ddl_mgr.hh index d634dc953..13b567803 100644 --- a/include/pg_fdw/pg_ddl_mgr.hh +++ b/include/pg_fdw/pg_ddl_mgr.hh @@ -6,6 +6,7 @@ #include #include +#include #include namespace springtail::pg_fdw { @@ -88,7 +89,7 @@ namespace springtail::pg_fdw { struct DBData { std::mutex pending_ddls_mutex; ///< mutex for applying changes to pending_ddls - std::map pending_ddls; ///< map of xid to pending ddl + std::map> pending_ddls; ///< map of xid to pending ddl operations std::shared_mutex db_mutex; ///< mutex for changes to this data structure std::string db_name{}; ///< database name std::atomic state{redis::db_state_change::DB_STATE_UNKNOWN}; @@ -201,12 +202,12 @@ namespace springtail::pg_fdw { /** * @brief Helper to apply outstanding DDL changes to the FDW tables. * @param db_id The database ID to apply the changes to. - * @param schema_xid The XID at which the DDL changes were applied. - * @param ddls A JSON array of DDL statements to apply. + * @param db_name The database name. + * @param xid_map Map of XIDs to DDL operations to apply. * @return Status of the operation. True if successful, false otherwise. */ bool _update_schemas(uint64_t db_id, const std::string &db_name, - const std::map &xid_map); + const std::map> &xid_map); /** Helper to execute ddl statements for this db */ /** @@ -237,15 +238,15 @@ namespace springtail::pg_fdw { _get_usertypes(uint64_t db_id, uint64_t xid); /** - * @brief Helper to generate sql statement from json. Decodes the ddl json. + * @brief Helper to generate sql statement from a protobuf DDL operation. * @param conn LibPqConnectionPtr connection * @param server_name fdw server name - * @param ddl json object containing ddl + * @param op protobuf DDLOperation * @return std::string sql statement */ - std::string _gen_sql_from_json(LibPqConnectionPtr conn, - const std::string &server_name, - const nlohmann::json &ddl); + std::string _gen_sql_from_ddl(LibPqConnectionPtr conn, + const std::string &server_name, + const proto::DDLOperation &op); /** * @brief Function for creating a replicated database @@ -404,7 +405,7 @@ namespace springtail::pg_fdw { * @param db_id - database id * @param xid_map - map of xids to ddl */ - void _queue_request(uint64_t db_id, const std::map &xid_map); + void _queue_request(uint64_t db_id, const std::map> &xid_map); }; } // springtail::pg_fdw diff --git a/include/pg_log_mgr/committer.hh b/include/pg_log_mgr/committer.hh index 644df1955..caf41fb68 100644 --- a/include/pg_log_mgr/committer.hh +++ b/include/pg_log_mgr/committer.hh @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -88,7 +89,7 @@ namespace springtail::committer { /** * Clear the SysTblMgr::Client cache for any tables with DDL mutations. */ - void _invalidate_systbl_cache(uint64_t db, const nlohmann::json &completed_ddls); + void _invalidate_systbl_cache(uint64_t db, const std::vector &completed_ddls); /** * @brief Expire dropped table dirs @@ -97,7 +98,7 @@ namespace springtail::committer { * @param completed_ddls DDLs processed * @param committed_xid XID at which ddls were processed */ - void _expire_table_drops(uint64_t db_id, const nlohmann::json &completed_ddls, uint64_t committed_xid); + void _expire_table_drops(uint64_t db_id, const std::vector &completed_ddls, uint64_t committed_xid); /** * @brief Expire dropped index paths diff --git a/include/pg_log_mgr/xid_ready.hh b/include/pg_log_mgr/xid_ready.hh index 9503c6702..23173cba6 100644 --- a/include/pg_log_mgr/xid_ready.hh +++ b/include/pg_log_mgr/xid_ready.hh @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -41,7 +42,7 @@ namespace springtail::committer { */ class SwapMsg { public: - SwapMsg(uint64_t xid, nlohmann::json &&ddls, std::vector&& tids) + SwapMsg(uint64_t xid, std::vector &&ddls, std::vector&& tids) : _xid(xid), _ddls(std::move(ddls)), _tids(std::move(tids)) @@ -51,7 +52,7 @@ namespace springtail::committer { return _xid; } - const nlohmann::json &ddls() const { + const std::vector &ddls() const { return _ddls; } @@ -61,7 +62,7 @@ namespace springtail::committer { private: uint64_t _xid; - nlohmann::json _ddls; + std::vector _ddls; std::vector _tids; }; diff --git a/include/redis/redis_ddl.hh b/include/redis/redis_ddl.hh index 12bee0da9..058ae159a 100644 --- a/include/redis/redis_ddl.hh +++ b/include/redis/redis_ddl.hh @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include @@ -30,18 +30,19 @@ namespace springtail { /** * Used by gc::LogParser (GC-1) to record DDL statements against the XID. + * @param db_id The database ID. * @param xid The XID at which this DDL statement needs to be applied. - * @param ddl A JSON representation of the DDL statement. + * @param ddl A protobuf DDLOperation representing the DDL statement. */ - void add_ddl(uint64_t db_id, uint64_t xid, const std::string &ddl); + void add_ddl(uint64_t db_id, uint64_t xid, proto::DDLOperation ddl); /** * Used by gc::Committer (GC-2) to retrieve the set of DDL statements recorded against the * XID we are about to commit. * @param xid The XID we are about to commit. - * @return A JSON array containing the ordered set of DDLs to apply at each FDW. + * @return The ordered set of DDLOperations to apply at each FDW, empty if none. */ - nlohmann::json get_ddls_xid(uint64_t db_id, uint64_t xid); + std::vector get_ddls_xid(uint64_t db_id, uint64_t xid); /** * Used by gc::LogParser (GC-1) to clear DDL statements it recorded against a given XID. @@ -63,9 +64,9 @@ namespace springtail { * commit_ddl(). * @param db_id The ID of the database instance we are updating. * @param xid The XID at which these DDL statements were applied. - * @param ddls A JSON array of DDL statements to apply, retrieved from get_ddls_xid() + * @param ddls The DDL operations to apply, retrieved from get_ddls_xid() */ - void precommit_ddl(uint64_t db_id, uint64_t xid, nlohmann::json ddls); + void precommit_ddl(uint64_t db_id, uint64_t xid, std::vector ddls); /** * Used by gc::Committer (GC-2) to provide the list of DDL statements to the FDWs. @@ -89,12 +90,11 @@ namespace springtail { void abort_ddl(uint64_t db_id, uint64_t xid); /** - * Used by the FDW to retrieve the next set of DDL statements that need to be applied. + * Used by the FDW to retrieve the next set of DDL batches that need to be applied. * @param fdw_id The ID of the FDW we are updating. - * @return A JSON object containing the XID at which the DDLs were applied and an array of - * the DDL statements themselves. + * @return A vector of DDLBatch messages, each containing db_id, xid, and DDL operations. */ - std::vector get_next_ddls(const std::string &fdw_id); + std::vector get_next_ddls(const std::string &fdw_id); /** * Used by the FDW to abort applying a set of DDL statements and place them back on the @@ -177,10 +177,10 @@ namespace springtail { RedisClientPtr _redis; - // In-memory DDL storage: db_id -> (xid -> [ddl_statements]) + // In-memory DDL storage: db_id -> (xid -> [ddl_operations]) // Used for the hot path between GC-1 (PgLogReader) and GC-2 (Committer) // Entries are transient and cleared after precommit_ddl() moves them to Redis - std::unordered_map>> _ddl_cache; + std::unordered_map>> _ddl_cache; mutable std::shared_mutex _ddl_cache_mutex; }; diff --git a/include/sys_tbl_mgr/server.hh b/include/sys_tbl_mgr/server.hh index 6a4a56cdc..62865fba4 100644 --- a/include/sys_tbl_mgr/server.hh +++ b/include/sys_tbl_mgr/server.hh @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -21,67 +22,67 @@ public: /** * @brief Create table API cal */ - std::string + std::vector create_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg); /** * @brief Alter table API cal */ - std::string + std::vector alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg); /** * @brief Drop table API cal */ - std::string + std::vector drop_table(uint64_t db_id, const XidLsn &xid, const PgMsgDropTable &msg); /** * @brief Create namespace API cal */ - std::string + std::vector create_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace &msg); /** * @brief Alter namespace API cal */ - std::string + std::vector alter_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace &msg); /** * @brief Drop namespace API cal */ - std::string + std::vector drop_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace &msg); /** * @brief Create user type API cal */ - std::string + std::vector create_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &msg); /** * @brief Alter user type API cal */ - std::string + std::vector alter_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &msg); /** * @brief Drop user type API cal */ - std::string + std::vector drop_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &msg); /** * @brief Attach partition API cal */ - std::string + std::vector attach_partition(uint64_t db_id, const XidLsn &xid, const PgMsgAttachPartition &msg); /** * @brief Detach partition API cal */ - std::string + std::vector detach_partition(uint64_t db_id, const XidLsn &xid, const PgMsgDetachPartition &msg); /** @@ -165,7 +166,7 @@ public: /** * @brief Swap and sync table API cal */ - std::string + std::vector swap_sync_table(const proto::NamespaceRequest &namespace_req, const proto::TableRequest &create_req, const std::vector &index_reqs, @@ -475,8 +476,17 @@ private: * * @return A vector of ColumnHistory objects containing the history events for the child partition tables. */ - nlohmann::json _generate_partition_updates(const proto::TableRequest& request, - const proto::ColumnHistory& history); + proto::DDLOperation _generate_partition_updates(const proto::TableRequest& request, + const proto::ColumnHistory& history, + const XidLsn& xid); + + /** + * Result of comparing old and new schemas during ALTER TABLE. + */ + struct SchemaUpdateResult { + proto::ColumnHistory history; + proto::DDLOperation operation; + }; /** * Helper function to extract a change entry for a schema by comparing the old and new @@ -484,13 +494,21 @@ private: * @param old_schema The schema before the alteration. * @param new_schema The schema after the alteration. * @param xid The XID/LSN at which the alteration occurred. - * @param ddl The JSON object to which the DDL statement will be added. + * @param tid The table ID. + * @param schema_name The schema/namespace name. + * @param table_name The table name. + * @param rls_enabled Whether RLS is enabled. + * @param rls_forced Whether RLS is forced. */ - proto::ColumnHistory _generate_update( + SchemaUpdateResult _generate_update( const google::protobuf::RepeatedPtrField& old_schema, const google::protobuf::RepeatedPtrField& new_schema, const XidLsn& xid, - nlohmann::json& ddl); + uint64_t tid, + const std::string& schema_name, + const std::string& table_name, + bool rls_enabled, + bool rls_forced); // CACHE FOR NAMESPACES @@ -660,10 +678,10 @@ private: * @param xid XID for visibility * @return DDL JSON object for ATTACH PARTITION */ - nlohmann::json _generate_attach_partition_ddl(uint64_t db_id, - const TableCacheRecordPtr& child_info, - uint64_t parent_table_id, - const XidLsn& xid); + proto::DDLOperation _generate_attach_partition_ddl(uint64_t db_id, + const TableCacheRecordPtr& child_info, + uint64_t parent_table_id, + const XidLsn& xid); /** * Generate ATTACH PARTITION DDLs for all direct children (non-recursive) @@ -679,7 +697,7 @@ private: uint64_t parent_table_id, const XidLsn& xid, uint64_t parent_snapshot_xid, - std::vector& ddl_array_out); + std::vector& ddl_array_out); /** * Performs a create_table() assuming that the correct locks are already held. @@ -694,7 +712,7 @@ private: * @param request Table request containing table metadata, columns, partition info, and snapshot XID * @return JSON array of DDL objects (CREATE + optional ATTACH + optional child ATTACHes) */ - nlohmann::json _create_table(const proto::TableRequest& request); + std::vector _create_table(const proto::TableRequest& request); /** * Performs a drop_table() assuming that the correct locks are already held. @@ -704,7 +722,7 @@ private: * * @return ddl to be applied */ - nlohmann::json _drop_table(const proto::DropTableRequest& request, bool is_resync=false); + proto::DDLOperation _drop_table(const proto::DropTableRequest& request, bool is_resync=false); /** * Performs an update_roots() assuming that the correct locks are already held. @@ -749,27 +767,27 @@ private: /** * Helper for updating the namespace_names table. */ - nlohmann::json _mutate_namespace(uint64_t db, - uint64_t ns_id, - std::optional name, - const XidLsn& xid, - bool exists); + proto::DDLOperation _mutate_namespace(uint64_t db, + uint64_t ns_id, + std::optional name, + const XidLsn& xid, + bool exists); std::optional> _find_cached_index( uint64_t db_id, uint64_t index_id, const XidLsn& xid, std::optional tid); /** * @brief Helper for updating the usertype table. - * @return nlohmann::json DDL json for ddl mgr - */ - nlohmann::json _mutate_usertype(uint64_t db_id, - uint64_t type_id, - const std::string &name, - uint64_t ns_id, - int8_t type, - const std::string &value_json, - const XidLsn xid, - bool active); + * @return base DDLOperation with xid/lsn set (caller sets the oneof action) + */ + proto::DDLOperation _mutate_usertype(uint64_t db_id, + uint64_t type_id, + const std::string &name, + uint64_t ns_id, + int8_t type, + const std::string &value_json, + const XidLsn xid, + bool active); /** * Retrieve the current read XID for a db. diff --git a/python/grpc/.gitignore b/python/grpc/.gitignore index 3567113cf..9422b98dd 100644 --- a/python/grpc/.gitignore +++ b/python/grpc/.gitignore @@ -3,3 +3,4 @@ sys_tbl_mgr write_cache xid_manager pg_copy_table +ddl \ No newline at end of file diff --git a/src/pg_fdw/CMakeLists.txt b/src/pg_fdw/CMakeLists.txt index f99ee0dd3..ca236167b 100644 --- a/src/pg_fdw/CMakeLists.txt +++ b/src/pg_fdw/CMakeLists.txt @@ -101,6 +101,7 @@ target_link_libraries(springtail_pg_ddl PRIVATE springtail_sys_tbl_mgr_client PRIVATE Boost::thread nlohmann_json::nlohmann_json + proto_ddl gRPC::grpc++ protobuf::libprotobuf ) diff --git a/src/pg_fdw/pg_ddl_mgr.cc b/src/pg_fdw/pg_ddl_mgr.cc index c4cd108d0..cdb992e5a 100644 --- a/src/pg_fdw/pg_ddl_mgr.cc +++ b/src/pg_fdw/pg_ddl_mgr.cc @@ -13,6 +13,9 @@ #include #include +#include +#include + namespace { // helper to delete xid history from shm caches @@ -276,7 +279,7 @@ namespace springtail::pg_fdw { } } - void PgDDLMgr::_queue_request(uint64_t db_id, const std::map &xid_map) + void PgDDLMgr::_queue_request(uint64_t db_id, const std::map> &xid_map) { _thread_manager->queue_request(std::make_shared( db_id, [this, db_id, xid_map]() { @@ -1170,14 +1173,14 @@ namespace springtail::pg_fdw { } // check if we should skip applying any of the DDLs based on already-applied XIDs - std::map> db_map; - for (auto &entry : ddls_vec) { - uint64_t db_id = entry.at("db_id").get(); - uint64_t schema_xid = entry.at("xid").get(); - auto ddls = entry.at("ddls"); + std::map>> db_map; + for (auto &batch : ddls_vec) { + uint64_t db_id = batch.db_id(); + uint64_t schema_xid = batch.xid(); auto token = open_telemetry::OpenTelemetry::get_instance()->set_context_variables({{"db_id", std::to_string(db_id)}, {"xid", std::to_string(schema_xid)}}); - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "DDLS: {}", ddls.dump(4)); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "DDLS: batch db_id={}, xid={}, ops={}", + db_id, schema_xid, batch.operations_size()); std::shared_lock lock(_db_mutex); auto it = _db_data.find(db_id); @@ -1187,6 +1190,9 @@ namespace springtail::pg_fdw { std::shared_lock db_lock(db_item.db_mutex); lock.unlock(); + // Convert repeated operations to a vector + std::vector ops(batch.operations().begin(), batch.operations().end()); + uint64_t current_xid = db_item.latest_xid; if (current_xid >= schema_xid) { LOG_WARN("Schema XID has already been applied: db_id={}, current={}, new={}", @@ -1196,11 +1202,11 @@ namespace springtail::pg_fdw { if (db_item.state != redis::db_state_change::DB_STATE_RUNNING) { LOG_INFO("New schema XID will be stored till database is in 'running' state: db_id={}, current={}, new={}", db_id, current_xid, schema_xid); - db_item.pending_ddls[schema_xid] = ddls; + db_item.pending_ddls[schema_xid] = std::move(ops); } else { LOG_INFO("New schema XID will be applied: db_id={}, current={}, new={}", db_id, current_xid, schema_xid); - db_map[db_id][schema_xid] = ddls; + db_map[db_id][schema_xid] = std::move(ops); } } } @@ -1293,7 +1299,7 @@ namespace springtail::pg_fdw { bool PgDDLMgr::_update_schemas(uint64_t db_id, const std::string &db_name, - const std::map &xid_map) + const std::map> &xid_map) { // get the database name for the db_id; XXX should see if we can swtich to OID @@ -1301,10 +1307,10 @@ namespace springtail::pg_fdw { std::vector txn; try { - // generate a DDL statement for each JSON in the transaction - for (auto &[xid, ddls] : xid_map) { - for (const auto &ddl : ddls) { - auto query_string = _gen_sql_from_json(conn, SPRINGTAIL_FDW_SERVER_NAME, ddl); + // generate a DDL statement for each operation in the transaction + for (auto &[xid, ops] : xid_map) { + for (const auto &op : ops) { + auto query_string = _gen_sql_from_ddl(conn, SPRINGTAIL_FDW_SERVER_NAME, op); if (!query_string.empty()) { txn.push_back(query_string); } @@ -1351,121 +1357,100 @@ namespace springtail::pg_fdw { conn->end_transaction(); } - PartitionInfo - _get_partition_info(const nlohmann::json &ddl){ - uint64_t parent_table_id = 0; - std::string parent_namespace_name; - std::string parent_table_name; - std::string partition_key; - std::string partition_bound; - - if (ddl.contains("parent_table_id") && !ddl.at("parent_table_id").is_null()) { - parent_table_id = ddl.at("parent_table_id").get(); - } - - if (ddl.contains("parent_namespace_name") && !ddl.at("parent_namespace_name").is_null()) { - parent_namespace_name = ddl.at("parent_namespace_name").get(); - } - - if (ddl.contains("parent_table_name") && !ddl.at("parent_table_name").is_null()) { - parent_table_name = ddl.at("parent_table_name").get(); - } - - if (ddl.contains("partition_key") && !ddl.at("partition_key").is_null()) { - partition_key = ddl.at("partition_key").get(); - } - - if (ddl.contains("partition_bound") && !ddl.at("partition_bound").is_null()) { - partition_bound = ddl.at("partition_bound").get(); - } - - PartitionInfo partition_info(parent_table_id, - parent_namespace_name, - parent_table_name, - partition_key, - partition_bound); - - return partition_info; - } - std::string - PgDDLMgr::_gen_sql_from_json(LibPqConnectionPtr conn, - const std::string &server_name, - const nlohmann::json &ddl) + PgDDLMgr::_gen_sql_from_ddl(LibPqConnectionPtr conn, + const std::string &server_name, + const proto::DDLOperation &op) { - assert(ddl.is_object()); - assert(ddl.contains("action")); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "DDL operation: {}", get_action_name(op)); - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "DDL JSON: {}", ddl.dump(4)); + switch (op.operation_case()) { - // determine if this is a foreign table or regular table operation - // if partition_key is empty, then it's a foreign table, since it is a leaf table - PartitionInfo partition_info = _get_partition_info(ddl); - bool is_foreign_table_type = partition_info.partition_key().empty(); + case proto::DDLOperation::kCreateTable: { + const auto &ct = op.create_table(); - auto const &action = ddl.at("action"); - if (action == "create") { // create table - // generate the CREATE TABLE statement - std::vector columns; + // if partition_key is absent or empty, it's a foreign (leaf) table + bool is_foreign_table_type = !ct.has_partition_key() || ct.partition_key().empty(); - for (const auto &col : ddl.at("columns")) { + // Build partition info: CreateTableDDL has no parent info, only partition_key + PartitionInfo partition_info(0, "", "", ct.has_partition_key() ? ct.partition_key() : "", ""); + + // Build column list + std::vector columns; + for (const auto &col : ct.columns()) { auto fully_qualified_type_name = fmt::format("{}.{}", - conn->escape_identifier(col.at("type_namespace").get()), - conn->escape_identifier(col.at("type_name").get())); - columns.emplace_back(col.at("name"), - fully_qualified_type_name, - col.at("nullable")); + conn->escape_identifier(col.type_namespace()), + conn->escape_identifier(col.type_name())); + columns.emplace_back(col.name(), fully_qualified_type_name, col.nullable()); } - return PgFdwCommon::_gen_fdw_table_sql(server_name, ddl.at("schema"), ddl.at("table"), ddl.at("tid"), columns, + return PgFdwCommon::_gen_fdw_table_sql(server_name, ct.schema(), ct.table(), ct.tid(), columns, partition_info, is_foreign_table_type, [conn](const std::string &name) { return conn->escape_identifier(name.c_str()); }); } - else if (action == "rename") { // rename table + case proto::DDLOperation::kDropTable: { + const auto &dt = op.drop_table(); + bool is_foreign_table_type = !dt.has_partition_key() || dt.partition_key().empty(); + return fmt::format("DROP {} TABLE IF EXISTS {}.{};", + is_foreign_table_type ? "FOREIGN" : "", + conn->escape_identifier(dt.schema()), + conn->escape_identifier(dt.table())); + } + + case proto::DDLOperation::kRenameTable: { + const auto &rn = op.rename_table(); + bool is_foreign_table_type = !rn.has_partition_key() || rn.partition_key().empty(); std::string rename = fmt::format("ALTER {} TABLE {}.{} RENAME TO {};", is_foreign_table_type ? "FOREIGN" : "", - conn->escape_identifier(ddl.at("old_schema").get()), - conn->escape_identifier(ddl.at("old_table").get()), - conn->escape_identifier(ddl.at("table").get())); + conn->escape_identifier(rn.old_schema()), + conn->escape_identifier(rn.old_table()), + conn->escape_identifier(rn.table())); // XXX it's not clear to me that we need to support a schema change here? - if (ddl.at("schema").get() != ddl.at("old_schema").get()) { + if (rn.schema() != rn.old_schema()) { return rename + fmt::format("ALTER {} TABLE {}.{} SET SCHEMA {};", is_foreign_table_type ? "FOREIGN" : "", - conn->escape_identifier(ddl.at("old_schema").get()), - conn->escape_identifier(ddl.at("table").get()), - conn->escape_identifier(ddl.at("schema").get())); + conn->escape_identifier(rn.old_schema()), + conn->escape_identifier(rn.table()), + conn->escape_identifier(rn.schema())); } else { return rename; } } - else if (action == "drop") { // drop table - return fmt::format("DROP {} TABLE IF EXISTS {}.{};", + case proto::DDLOperation::kSetNamespace: { + const auto &sns = op.set_namespace(); + bool is_foreign_table_type = !sns.has_partition_key() || sns.partition_key().empty(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Set namespace: {}.{} -> {}", sns.old_schema(), sns.table(), sns.schema()); + const auto schema = conn->escape_identifier(sns.schema()); + const auto table = conn->escape_identifier(sns.table()); + const auto old_schema = conn->escape_identifier(sns.old_schema()); + + return fmt::format("ALTER {} TABLE {}.{} SET SCHEMA {};", is_foreign_table_type ? "FOREIGN" : "", - conn->escape_identifier(ddl.at("schema").get()), - conn->escape_identifier(ddl.at("table").get())); + old_schema, table, schema); } + #if ENABLE_SCHEMA_MUTATES // XXX THIS CODE IS CURRENTLY DISABLED - else if (action == "col_add") { // alter table add column - auto &col = ddl.at("column"); + case proto::DDLOperation::kColumnAdd: { + const auto &ca = op.column_add(); + // ColumnAddDDL does not carry partition_key; treat as foreign table + bool is_foreign_table_type = true; + const auto &col = ca.column(); std::string constraints; - std::string null_constraint = col.at("nullable").get() - ? "NULL" - : "NOT NULL"; - if (col.contains("default")) { - constraints = fmt::format("{} {}", null_constraint, - col.at("default").get()); + std::string null_constraint = col.nullable() ? "NULL" : "NOT NULL"; + if (col.has_default_value()) { + constraints = fmt::format("{} {}", null_constraint, col.default_value()); } else { constraints = null_constraint; } - uint32_t type_oid = col.at("type").get(); + uint32_t type_oid = col.type_oid(); std::string type_name; auto it = _type_cache.find(type_oid); if (it != _type_cache.end()) { @@ -1473,124 +1458,110 @@ namespace springtail::pg_fdw { } return fmt::format("ALTER {} TABLE {}.{} ADD COLUMN {} {} {};", is_foreign_table_type ? "FOREIGN" : "", - conn->escape_identifier(ddl.at("schema").get()), - conn->escape_identifier(ddl.at("table").get()), - conn->escape_identifier(col.at("name").get()), + conn->escape_identifier(ca.schema()), + conn->escape_identifier(ca.table()), + conn->escape_identifier(col.name()), type_name, constraints); } - else if (action == "col_drop") { // alter table drop column + case proto::DDLOperation::kColumnDrop: { + const auto &cd = op.column_drop(); + // ColumnDropDDL does not carry partition_key; treat as foreign table + bool is_foreign_table_type = true; return fmt::format("ALTER {} TABLE {}.{} DROP COLUMN {};", is_foreign_table_type ? "FOREIGN" : "", - conn->escape_identifier(ddl.at("schema").get()), - conn->escape_identifier(ddl.at("table").get()), - conn->escape_identifier(ddl.at("column").get())); + conn->escape_identifier(cd.schema()), + conn->escape_identifier(cd.table()), + conn->escape_identifier(cd.column_name())); } #endif /* ENABLE_SCHEMA_MUTATES */ - else if (action == "col_rename") { + case proto::DDLOperation::kColumnRename: { + const auto &cr = op.column_rename(); + // ColumnRenameDDL does not carry partition_key; treat as foreign table + bool is_foreign_table_type = true; return fmt::format("ALTER {} TABLE {}.{} RENAME COLUMN {} TO {};", is_foreign_table_type ? "FOREIGN" : "", - conn->escape_identifier(ddl.at("schema").get()), - conn->escape_identifier(ddl.at("table").get()), - conn->escape_identifier(ddl.at("old_name").get()), - conn->escape_identifier(ddl.at("new_name").get())); + conn->escape_identifier(cr.schema()), + conn->escape_identifier(cr.table()), + conn->escape_identifier(cr.old_name()), + conn->escape_identifier(cr.new_name())); } #if ENABLE_SCHEMA_MUTATES - else if (action == "col_nullable") { - auto &col = ddl.at("column"); + case proto::DDLOperation::kColumnNullable: { + const auto &cn = op.column_nullable(); return fmt::format("ALTER FOREIGN TABLE {}.{} ALTER COLUMN {} {} NOT NULL;", - conn->escape_identifier(ddl.at("schema").get()), - conn->escape_identifier(ddl.at("table").get()), - conn->escape_identifier(col.at("name").get()), - col.at("nullable").get() ? "DROP" : "SET"); + conn->escape_identifier(cn.schema()), + conn->escape_identifier(cn.table()), + conn->escape_identifier(cn.column_name()), + cn.nullable() ? "DROP" : "SET"); } #endif /* ENABLE_SCHEMA_MUTATES */ - else if (action == "create_index") { - // TODO: do something? - LOG_ERROR("CREATE INDEX"); - CHECK(false); - return ""; - } - else if (action == "drop_index") { - // TODO: do something? - LOG_ERROR("DROP INDEX"); - CHECK(false); - return ""; - } - else if (action == "ns_create") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Creating schema with JSON: {}", ddl.dump()); - const auto escaped_schema = conn->escape_identifier(ddl.at("name").get()); + case proto::DDLOperation::kNamespaceCreate: { + const auto &nc = op.namespace_create(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Creating schema: {}", nc.name()); + const auto escaped_schema = conn->escape_identifier(nc.name()); return _get_create_schema_with_grants_query(escaped_schema); } - else if (action == "ns_alter") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Altering schema with JSON: {}", ddl.dump()); - const auto old_schema = conn->escape_identifier(ddl.at("old_name").get()); - const auto new_schema = conn->escape_identifier(ddl.at("name").get()); + case proto::DDLOperation::kNamespaceAlter: { + const auto &na = op.namespace_alter(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Altering schema: {} -> {}", na.old_name(), na.name()); + const auto old_schema = conn->escape_identifier(na.old_name()); + const auto new_schema = conn->escape_identifier(na.name()); return _get_alter_schema_with_grants_query(old_schema, new_schema); } - else if (action == "ns_drop") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Dropping schema with JSON: {}", ddl.dump()); - const auto escaped_schema = conn->escape_identifier(ddl.at("name").get()); + + case proto::DDLOperation::kNamespaceDrop: { + const auto &nd = op.namespace_drop(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Dropping schema: {}", nd.name()); + const auto escaped_schema = conn->escape_identifier(nd.name()); return fmt::format("DROP SCHEMA IF EXISTS {} CASCADE", escaped_schema); } - else if (action == "set_namespace") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Set namespace with JSON: {}", ddl.dump()); - const auto schema = conn->escape_identifier(ddl.at("schema").get()); - const auto table = conn->escape_identifier(ddl.at("table").get()); - const auto old_schema = conn->escape_identifier(ddl.at("old_schema").get()); - return fmt::format("ALTER {} TABLE {}.{} SET SCHEMA {};", - is_foreign_table_type ? "FOREIGN" : "", - old_schema, table, schema); - } - else if (action == "ut_create") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Creating user type with JSON: {}", ddl.dump()); - const auto escaped_schema = conn->escape_identifier(ddl.at("schema").get()); - const auto escaped_name = conn->escape_identifier(ddl.at("name").get()); - const auto value_json_str = ddl.at("value").get(); - const auto type = ddl.at("type").get(); + case proto::DDLOperation::kUsertypeCreate: { + const auto &utc = op.usertype_create(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Creating user type: {}.{}", utc.schema(), utc.name()); + const auto escaped_schema = conn->escape_identifier(utc.schema()); + const auto escaped_name = conn->escape_identifier(utc.name()); + const auto &value_json_str = utc.value_json(); + const auto type = utc.type(); if (type == constant::USER_TYPE_EXTENSION) { return ""; } return _get_create_type_query(escaped_schema, escaped_name, value_json_str, conn); } - else if (action == "ut_drop") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Dropping user type with JSON: {}", ddl.dump()); - const auto escaped_schema = conn->escape_identifier(ddl.at("schema").get()); - const auto escaped_name = conn->escape_identifier(ddl.at("name").get()); - return fmt::format("DROP TYPE IF EXISTS {}.{} CASCADE", escaped_schema, escaped_name); - } else if (action == "ut_alter") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Altering user type with JSON: {}", ddl.dump()); + case proto::DDLOperation::kUsertypeAlter: { + const auto &uta = op.usertype_alter(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Altering user type: {}.{}", uta.schema(), uta.name()); // need to check if it is renamed - const auto old_name = conn->escape_identifier(ddl.at("old_name").get()); - const auto new_name = conn->escape_identifier(ddl.at("name").get()); - const auto escaped_schema = conn->escape_identifier(ddl.at("schema").get()); + const auto old_name = conn->escape_identifier(uta.old_name()); + const auto new_name = conn->escape_identifier(uta.name()); + const auto escaped_schema = conn->escape_identifier(uta.schema()); if (old_name != new_name) { return fmt::format("ALTER TYPE {}.{} RENAME TO {}", escaped_schema, old_name, new_name); } // need to check if schema has changed - const auto old_schema = conn->escape_identifier(ddl.at("old_schema").get()); - const auto new_schema = conn->escape_identifier(ddl.at("schema").get()); + const auto old_schema = conn->escape_identifier(uta.old_schema()); + const auto new_schema = conn->escape_identifier(uta.schema()); if (old_schema != new_schema) { return fmt::format("ALTER TYPE {}.{} SET SCHEMA {};", old_schema, new_name, new_schema); } // need to check if value has changed - const auto value_json_str = ddl.at("value").get(); - const auto old_value_json_str = ddl.at("old_value").get(); + const auto &value_json_str = uta.value_json(); + const auto &old_value_json_str = uta.old_value_json(); if (value_json_str != old_value_json_str) { - return gen_alter_enum_sql(ddl.at("schema").get(), - ddl.at("name").get(), + return gen_alter_enum_sql(uta.schema(), + uta.name(), nlohmann::json::parse(old_value_json_str), nlohmann::json::parse(value_json_str), conn); @@ -1598,50 +1569,80 @@ namespace springtail::pg_fdw { // nothing to actually change in the FDW return {}; - } else if (action == "attach_partition") { - std::string alter = fmt::format("ALTER TABLE {}.{} ATTACH PARTITION {}.{} {};", - conn->escape_identifier(ddl.at("schema")), - conn->escape_identifier(ddl.at("table")), - conn->escape_identifier(ddl.at("partition_schema")), - conn->escape_identifier(ddl.at("partition_name")), - ddl.at("partition_bound").get()); - - return alter; - } else if (action == "detach_partition") { - std::string alter = fmt::format("ALTER TABLE {}.{} DETACH PARTITION {}.{};", - conn->escape_identifier(ddl.at("schema")), - conn->escape_identifier(ddl.at("table")), - conn->escape_identifier(ddl.at("partition_schema")), - conn->escape_identifier(ddl.at("partition_name"))); - - return alter; - } else if (action == "set_rls_enabled") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Setting RLS enabled with JSON: {}", ddl.dump()); - const auto schema = conn->escape_identifier(ddl.at("schema").get()); - const auto table = conn->escape_identifier(ddl.at("table").get()); - bool rls_enabled = ddl.at("rls_enabled").get(); + } + + case proto::DDLOperation::kUsertypeDrop: { + const auto &utd = op.usertype_drop(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Dropping user type: {}.{}", utd.schema(), utd.name()); + const auto escaped_schema = conn->escape_identifier(utd.schema()); + const auto escaped_name = conn->escape_identifier(utd.name()); + return fmt::format("DROP TYPE IF EXISTS {}.{} CASCADE", escaped_schema, escaped_name); + } + + case proto::DDLOperation::kAttachPartition: { + const auto &ap = op.attach_partition(); + return fmt::format("ALTER TABLE {}.{} ATTACH PARTITION {}.{} {};", + conn->escape_identifier(ap.schema()), + conn->escape_identifier(ap.table()), + conn->escape_identifier(ap.partition_schema()), + conn->escape_identifier(ap.partition_name()), + ap.partition_bound()); + } + + case proto::DDLOperation::kDetachPartition: { + const auto &dp = op.detach_partition(); + return fmt::format("ALTER TABLE {}.{} DETACH PARTITION {}.{};", + conn->escape_identifier(dp.schema()), + conn->escape_identifier(dp.table()), + conn->escape_identifier(dp.partition_schema()), + conn->escape_identifier(dp.partition_name())); + } + + case proto::DDLOperation::kSetRlsEnabled: { + const auto &rls = op.set_rls_enabled(); + bool is_foreign_table_type = !rls.has_partition_key() || rls.partition_key().empty(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Setting RLS enabled: {}.{} = {}", + rls.schema(), rls.table(), rls.rls_enabled()); + const auto schema = conn->escape_identifier(rls.schema()); + const auto table = conn->escape_identifier(rls.table()); return fmt::format("ALTER {} TABLE {}.{} {} ROW LEVEL SECURITY;", is_foreign_table_type ? "FOREIGN" : "", schema, table, - rls_enabled ? "ENABLE" : "DISABLE"); - } else if (action == "set_rls_forced") { - LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Setting RLS forced with JSON: {}", ddl.dump()); - const auto schema = conn->escape_identifier(ddl.at("schema").get()); - const auto table = conn->escape_identifier(ddl.at("table").get()); - bool rls_forced = ddl.at("rls_forced").get(); + rls.rls_enabled() ? "ENABLE" : "DISABLE"); + } + + case proto::DDLOperation::kSetRlsForced: { + const auto &rls = op.set_rls_forced(); + bool is_foreign_table_type = !rls.has_partition_key() || rls.partition_key().empty(); + LOG_DEBUG(LOG_FDW, LOG_LEVEL_DEBUG1, "Setting RLS forced: {}.{} = {}", + rls.schema(), rls.table(), rls.rls_forced()); + const auto schema = conn->escape_identifier(rls.schema()); + const auto table = conn->escape_identifier(rls.table()); return fmt::format("ALTER {} TABLE {}.{} {} ROW LEVEL SECURITY;", is_foreign_table_type ? "FOREIGN" : "", schema, table, - rls_forced ? "FORCE" : "NO FORCE"); + rls.rls_forced() ? "FORCE" : "NO FORCE"); } - // can't currently support other kinds of DDL mutations - LOG_ERROR("Bad DDL statement: {}", action.get()); - CHECK(false); + case proto::DDLOperation::kResync: + case proto::DDLOperation::kNoChange: + case proto::DDLOperation::kResyncPartitions: + // No SQL to generate for these operations + return {}; + + case proto::DDLOperation::OPERATION_NOT_SET: + LOG_ERROR("DDL operation not set"); + CHECK(false); + return {}; - return {}; + default: + // can't currently support other kinds of DDL mutations + LOG_ERROR("Unhandled DDL operation: {}", get_action_name(op)); + CHECK(false); + return {}; + } } void diff --git a/src/pg_fdw/test/test_pg_ddl_mgr.cc b/src/pg_fdw/test/test_pg_ddl_mgr.cc index 33e74ba8e..fd7665df9 100644 --- a/src/pg_fdw/test/test_pg_ddl_mgr.cc +++ b/src/pg_fdw/test/test_pg_ddl_mgr.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -158,37 +159,35 @@ namespace { RedisMgr::SubscriberPtr _subscriber; std::string _fdw_key{fmt::format(redis::QUEUE_DDL_FDW, Properties::get_db_instance_id(), _fdw_id_str)}; - nlohmann::json _create_table_ddl(uint64_t xid, const std::string &schema_name, const std::string &table_name, + std::string _create_table_ddl(uint64_t xid, const std::string &schema_name, const std::string &table_name, uint32_t table_oid, std::vector> columns) { - nlohmann::json create_table_json = { - { "db_id", 1 }, - { "xid", xid }, - { "ddls", nlohmann::json::array( - { - { - { "action", "create" }, - { "schema", schema_name }, - { "table", table_name }, - // any number - { "tid", table_oid }, - { "columns", nlohmann::json::array({}) } - } - }) - } - }; - for (auto item: columns) { - std::string name = std::get<0>(item); + proto::DDLBatch batch; + batch.set_db_id(1); + batch.set_xid(xid); + + auto* op = batch.add_operations(); + op->set_xid(xid); + op->set_lsn(0); + auto* ct = op->mutable_create_table(); + ct->set_tid(table_oid); + ct->set_schema(schema_name); + ct->set_table(table_name); + ct->set_rls_enabled(false); + ct->set_rls_forced(false); + + for (const auto& item : columns) { + const std::string& name = std::get<0>(item); uint32_t type_oid = std::get<1>(item); bool nullable = std::get<2>(item); - nlohmann::json column_json = { - {"name", name}, - // any number in numerical order - {"type", type_oid}, - {"nullable", nullable} - }; - create_table_json["/ddls/0/columns"_json_pointer].push_back(column_json); + auto* col = ct->add_columns(); + col->set_name(name); + col->set_type_oid(type_oid); + col->set_nullable(nullable); } - return create_table_json; + + std::string serialized; + batch.SerializeToString(&serialized); + return serialized; } void _verify_table_exists(const std::string &schema_name, const std::string &table_name) { @@ -249,9 +248,9 @@ namespace { {"col1", 25, true}, {"col2", 16, false} }; - nlohmann::json json_ddl = _create_table_ddl(12, "public", "test", 1, columns); + std::string serialized_ddl = _create_table_ddl(12, "public", "test", 1, columns); - _redis_client_data->lpush(_fdw_key, nlohmann::to_string(json_ddl)); + _redis_client_data->lpush(_fdw_key, serialized_ddl); redis_notification.wait_for_notifications(); _verify_table_exists("public", "test"); diff --git a/src/pg_log_mgr/CMakeLists.txt b/src/pg_log_mgr/CMakeLists.txt index f0f03b29f..447b7148a 100644 --- a/src/pg_log_mgr/CMakeLists.txt +++ b/src/pg_log_mgr/CMakeLists.txt @@ -26,6 +26,7 @@ target_link_libraries(springtail_pglog springtail_storage springtail_sys_tbl_mgr PRIVATE nlohmann_json::nlohmann_json + PRIVATE proto_ddl PRIVATE springtail_pgext PRIVATE fmt::fmt) diff --git a/src/pg_log_mgr/committer.cc b/src/pg_log_mgr/committer.cc index 98556a8ba..207d6c6f6 100644 --- a/src/pg_log_mgr/committer.cc +++ b/src/pg_log_mgr/committer.cc @@ -1,4 +1,6 @@ +#include #include +#include #include #include #include @@ -413,9 +415,9 @@ namespace springtail::committer { LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "Processing batch XID: {}@{}", db_id, xid); // get DDL changes for this XID - nlohmann::json completed_ddls = RedisDDL::get_instance()->get_ddls_xid(db_id, xid); + std::vector completed_ddls = RedisDDL::get_instance()->get_ddls_xid(db_id, xid); - if (!completed_ddls.is_null()) { + if (!completed_ddls.empty()) { // pre-commit the DDLs to be applied to the FDWs RedisDDL::get_instance()->precommit_ddl(db_id, xid, completed_ddls); _has_ddl_precommit = true; @@ -435,7 +437,7 @@ namespace springtail::committer { } // Check and notify vacuumer about dropped tables - if (!completed_ddls.is_null()) { + if (!completed_ddls.empty()) { _expire_table_drops(db_id, completed_ddls, xid); } @@ -446,11 +448,11 @@ namespace springtail::committer { if (is_last_xid) { if (batch->table_cache().empty()) { LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "No table mutations in batch for db {} xid: {}", db_id, xid); - xid_mgr::XidMgrServer::get_instance()->commit_xid(db_id, pg_xid, xid, !completed_ddls.is_null(), + xid_mgr::XidMgrServer::get_instance()->commit_xid(db_id, pg_xid, xid, !completed_ddls.empty(), result->timestamp(), result->get_tracker()); } else { // commit the completed XID without xlog update because table data has not been persisted (fsync). - xid_mgr::XidMgrServer::get_instance()->commit_xid_no_xlog(db_id, pg_xid, xid, !completed_ddls.is_null(), true, + xid_mgr::XidMgrServer::get_instance()->commit_xid_no_xlog(db_id, pg_xid, xid, !completed_ddls.empty(), true, result->timestamp(), result->get_tracker()); _table_sync_processor->add(final_xid, std::move(tables_to_sync)); } @@ -463,7 +465,7 @@ namespace springtail::committer { } } else { // for intermediate XIDs, just record the mapping - xid_mgr::XidMgrServer::get_instance()->record_mapping(db_id, pg_xid, xid, !completed_ddls.is_null(), + xid_mgr::XidMgrServer::get_instance()->record_mapping(db_id, pg_xid, xid, !completed_ddls.empty(), result->timestamp(), result->get_tracker(), table_ids); } } else { @@ -471,17 +473,17 @@ namespace springtail::committer { if (batch->table_cache().empty()) { LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "No table mutations in batch for db {} xid: {}", db_id, xid); // don't commit, but record any DDL changes to the history - xid_mgr::XidMgrServer::get_instance()->record_mapping(db_id, pg_xid, xid, !completed_ddls.is_null(), + xid_mgr::XidMgrServer::get_instance()->record_mapping(db_id, pg_xid, xid, !completed_ddls.empty(), result->timestamp(), result->get_tracker(), table_ids); } else { // commit the completed XID without xlog update - xid_mgr::XidMgrServer::get_instance()->commit_xid_no_xlog(db_id, pg_xid, xid, !completed_ddls.is_null(), false, + xid_mgr::XidMgrServer::get_instance()->commit_xid_no_xlog(db_id, pg_xid, xid, !completed_ddls.empty(), false, result->timestamp(), result->get_tracker(), table_ids); _table_sync_processor->add(batch->get_final_xid(), tables_to_sync); } } else { // don't commit, but record any DDL changes to the history - xid_mgr::XidMgrServer::get_instance()->record_mapping(db_id, pg_xid, xid, !completed_ddls.is_null(), + xid_mgr::XidMgrServer::get_instance()->record_mapping(db_id, pg_xid, xid, !completed_ddls.empty(), result->timestamp(), result->get_tracker(), table_ids); } } @@ -584,7 +586,7 @@ namespace springtail::committer { uint64_t db_id) { uint64_t completed_xid = result->swap().xid(); - nlohmann::json ddls = result->swap().ddls(); + const std::vector& ddls = result->swap().ddls(); auto swapped_tids = result->swap().tids(); LOG_DEBUG( @@ -594,8 +596,11 @@ namespace springtail::committer { auto token_3 = open_telemetry::OpenTelemetry::get_instance()->set_context_variables({{"xid", std::to_string(completed_xid)}}); - // Table sync should always have DDL operations - CHECK(!ddls.is_null()) << "TABLE_SYNC should have DDL operations"; + // DDLs may be empty when all tables in the sync were invalidated (e.g. generated columns + // detected during copy), in which case copy_info is nullptr and no DDLs are produced. + if (ddls.empty()) { + LOG_INFO("TABLE_SYNC has no DDL operations for db {} xid {}", db_id, completed_xid); + } // Invalidate schema cache for tables with DDL changes during table sync _invalidate_systbl_cache(db_id, ddls); @@ -669,8 +674,8 @@ namespace springtail::committer { // check if there were DDL mutations as part of this txn, invalidate the schema cache // accordingly - nlohmann::json completed_ddls = RedisDDL::get_instance()->get_ddls_xid(db_id, xid); - if (!completed_ddls.is_null()) { + std::vector completed_ddls = RedisDDL::get_instance()->get_ddls_xid(db_id, xid); + if (!completed_ddls.empty()) { _invalidate_systbl_cache(db_id, completed_ddls); } @@ -793,33 +798,31 @@ namespace springtail::committer { } void - Committer::_expire_table_drops(uint64_t db_id, const nlohmann::json &completed_ddls, uint64_t committed_xid) + Committer::_expire_table_drops(uint64_t db_id, const std::vector &completed_ddls, uint64_t committed_xid) { - for (auto& ddl: completed_ddls) { - if (ddl.contains("tid") && ddl.contains("action")) { - uint64_t tid = ddl["tid"].get(); - auto action = ddl["action"].get(); - if (action == "drop") { - auto dropped_table_dir = TableMgr::get_instance()->get_table_data_dir(db_id, tid, committed_xid - 1); - if (dropped_table_dir.has_value()) { - Vacuumer::get_instance()->expire_snapshot(db_id, dropped_table_dir.value(), committed_xid); - } + for (const auto& ddl: completed_ddls) { + if (ddl.has_drop_table()) { + uint64_t tid = ddl.drop_table().tid(); + auto dropped_table_dir = TableMgr::get_instance()->get_table_data_dir(db_id, tid, committed_xid - 1); + if (dropped_table_dir.has_value()) { + Vacuumer::get_instance()->expire_snapshot(db_id, dropped_table_dir.value(), committed_xid); } } } } void - Committer::_invalidate_systbl_cache(uint64_t db, const nlohmann::json &completed_ddls) + Committer::_invalidate_systbl_cache(uint64_t db, const std::vector &completed_ddls) { auto server = sys_tbl_mgr::Server::get_instance(); - for (auto ddl : completed_ddls) { - if (!ddl.contains("tid")) { + for (const auto& ddl : completed_ddls) { + auto tid_opt = springtail::get_table_id(ddl); + if (!tid_opt.has_value()) { continue; // mutation doesn't reference a specific table } - uint64_t tid = ddl["tid"].get(); - XidLsn ddl_xid(ddl["xid"].get(), ddl["lsn"].get()); + uint64_t tid = tid_opt.value(); + XidLsn ddl_xid(ddl.xid(), ddl.lsn()); server->invalidate_table(db, tid, ddl_xid); // Invalidate the extent schema cache for this table diff --git a/src/pg_log_mgr/pg_log_reader.cc b/src/pg_log_mgr/pg_log_reader.cc index 409760a34..a92bc9e1e 100644 --- a/src/pg_log_mgr/pg_log_reader.cc +++ b/src/pg_log_mgr/pg_log_reader.cc @@ -1,9 +1,11 @@ +#include #include #include #include #include +#include #include #include #include @@ -250,8 +252,13 @@ namespace springtail::pg_log_mgr { entry.table_schema = sync_skip.schema(); if (entry.table_schema == nullptr) { entry.table_schema = TableMgr::get_instance()->get_extent_schema(_db, tid, xidlsn, {PgExtnRegistry::get_instance()->comparator_func}, true, false); + entry.update_schema(_db, tid, xidlsn); + } else { + // Use create_schema when the sync tracker provides the schema directly, + // since the system table cache may not yet reflect the post-resync schema + // (swap_sync_table hasn't run yet). + entry.create_schema(entry.table_schema); } - entry.update_schema(_db, tid, xidlsn); } LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "Create extent with row width: {}", entry.schema->row_size()); @@ -712,52 +719,38 @@ namespace springtail::pg_log_mgr { PgMsgNamespace namespace_msg{table_msg.lsn, table_msg.namespace_id, table_msg.xid, table_msg.namespace_name}; // make sure that the namespace is created - std::string &&ns_ddl_stmt = server->create_namespace(_db, xidlsn, namespace_msg); - nlohmann::json ns_action = nlohmann::json::parse(ns_ddl_stmt).at("action"); - - if (ns_action.get() != "no_change") { - LOG_INFO("Table namespace created: {} {} {}", table_msg.table, table_msg.namespace_name, table_msg.namespace_id); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ns_ddl_stmt); + auto ns_ddl_ops = server->create_namespace(_db, xidlsn, namespace_msg); + for (auto& op : ns_ddl_ops) { + if (!op.has_no_change()) { + LOG_INFO("Table namespace created: {} {} {}", table_msg.table, table_msg.namespace_name, table_msg.namespace_id); + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } } - // Create table - ALWAYS returns JSON array - std::string ddl_stmt = server->create_table(_db, xidlsn, table_msg); - nlohmann::json ddl_array = nlohmann::json::parse(ddl_stmt); - - DCHECK(ddl_array.is_array()); // Always an array now + // Create table - returns vector of DDLOperation + auto ddl_ops = server->create_table(_db, xidlsn, table_msg); - // Array structure: [CREATE TABLE, optional ATTACH PARTITION(s)...] + // Vector structure: [CREATE TABLE, optional ATTACH PARTITION(s)...] // Only the first entry (CREATE TABLE) has a tid that needs to be cached - DCHECK(!ddl_array.empty()); // Should always have at least CREATE TABLE + DCHECK(!ddl_ops.empty()); // Should always have at least CREATE TABLE // Process first entry (CREATE TABLE) and update exists cache - const auto& create_ddl = ddl_array[0]; - DCHECK(create_ddl["action"].get() == "create"); + DCHECK(ddl_ops[0].has_create_table()); - std::string create_ddl_str = nlohmann::to_string(create_ddl); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, create_ddl_str); - - uint32_t tid = create_ddl["tid"].get(); + uint32_t tid = ddl_ops[0].create_table().tid(); _exists_cache->insert(_db, tid, true); LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG2, "Added CREATE TABLE DDL for table {} to FDW queue", tid); - // Process remaining entries (ATTACH PARTITION DDLs) - just add to RedisDDL - for (size_t i = 1; i < ddl_array.size(); i++) { - const auto& attach_ddl = ddl_array[i]; - DCHECK(attach_ddl["action"].get() == "attach_partition"); - - std::string attach_ddl_str = nlohmann::to_string(attach_ddl); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, attach_ddl_str); - - LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG2, - "Added ATTACH PARTITION DDL to FDW queue"); + // Process all entries (CREATE TABLE + optional ATTACH PARTITION DDLs) + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); } - if (ddl_array.size() > 1) { + if (ddl_ops.size() > 1) { LOG_INFO("Processed CREATE TABLE for {} and {} ATTACH PARTITION DDL(s)", - tid, ddl_array.size() - 1); + tid, ddl_ops.size() - 1); } break; } @@ -767,24 +760,23 @@ namespace springtail::pg_log_mgr { LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "ALTER TABLE: xid={}, lsn={}, pg_xid={}, tid={}", xidlsn.xid, xidlsn.lsn, table_msg.xid, table_msg.oid); - std::string &&ddl_stmt = server->alter_table(_db, xidlsn, table_msg); + auto ddl_ops = server->alter_table(_db, xidlsn, table_msg); // check for re-sync - nlohmann::json action = nlohmann::json::parse(ddl_stmt).at("action"); - - if (action.get() == "resync") { - _mark_table_resync(table_msg.oid, xidlsn, pg_xids); - } else if (action.get() == "resync_partitions") { - std::unordered_set table_ids; - table_ids.insert(table_msg.oid); - nlohmann::json table_ids_json = nlohmann::json::parse(ddl_stmt).at("table_ids"); - for (auto table_id : table_ids_json.get>()) { - table_ids.insert(table_id); + for (auto& op : ddl_ops) { + if (op.has_resync()) { + _mark_table_resync(table_msg.oid, xidlsn, pg_xids); + } else if (op.has_resync_partitions()) { + std::unordered_set table_ids; + table_ids.insert(table_msg.oid); + for (auto table_id : op.resync_partitions().table_ids()) { + table_ids.insert(static_cast(table_id)); + } + // Mark the parent table for resync + _mark_table_resync(table_ids, xidlsn, pg_xids); + } else if (!op.has_no_change()) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); } - // Mark the parent table for resync - _mark_table_resync(table_ids, xidlsn, pg_xids); - } else if (action.get() != "no_change") { - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); } break; } @@ -794,55 +786,68 @@ namespace springtail::pg_log_mgr { LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "DROP TABLE: xid={}, pg_xid={}, tid={}", xidlsn.xid, drop_msg.xid, drop_msg.oid); - std::string &&ddl_stmt = server->drop_table(_db, xidlsn, drop_msg); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->drop_table(_db, xidlsn, drop_msg); + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } _exists_cache->insert(_db, drop_msg.oid, false); break; } case PgMsgEnum::CREATE_NAMESPACE: { auto &namespace_msg = std::get(change->msg); - std::string &&ddl_stmt = server->create_namespace(_db, xidlsn, namespace_msg); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->create_namespace(_db, xidlsn, namespace_msg); + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } case PgMsgEnum::ALTER_NAMESPACE: { auto &namespace_msg = std::get(change->msg); - std::string &&ddl_stmt = server->alter_namespace(_db, xidlsn, namespace_msg); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->alter_namespace(_db, xidlsn, namespace_msg); + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } case PgMsgEnum::DROP_NAMESPACE: { auto &namespace_msg = std::get(change->msg); - std::string &&ddl_stmt = server->drop_namespace(_db, xidlsn, namespace_msg); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->drop_namespace(_db, xidlsn, namespace_msg); + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } case PgMsgEnum::CREATE_TYPE: { auto &type_msg = std::get(change->msg); - std::string &&ddl_stmt = server->create_usertype(_db, xidlsn, type_msg); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->create_usertype(_db, xidlsn, type_msg); + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } case PgMsgEnum::ALTER_TYPE: { auto &type_msg = std::get(change->msg); - std::string &&ddl_stmt = server->alter_usertype(_db, xidlsn, type_msg); - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->alter_usertype(_db, xidlsn, type_msg); + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } case PgMsgEnum::DROP_TYPE: { auto &type_msg = std::get(change->msg); - std::string &&ddl_stmt = server->drop_usertype(_db, xidlsn, type_msg); - auto json = nlohmann::json::parse(ddl_stmt); - LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "DROP TYPE: xid={}, pg_xid={}, tid={}, ddl={}", xidlsn.xid, - type_msg.xid, type_msg.oid, json.dump()); - if (json.at("action").get() != "no_change") { - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + auto ddl_ops = server->drop_usertype(_db, xidlsn, type_msg); + for (auto& op : ddl_ops) { + LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "DROP TYPE: xid={}, pg_xid={}, tid={}, action={}", xidlsn.xid, + type_msg.xid, type_msg.oid, get_action_name(op)); + if (!op.has_no_change()) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } } break; } @@ -881,10 +886,12 @@ namespace springtail::pg_log_mgr { auto &attach_partition_msg = std::get(change->msg); LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "ATTACH PARTITION: xid={}, pg_xid={}, tid={}", xidlsn.xid, attach_partition_msg.xid, attach_partition_msg.table_id); - std::string &&ddl_stmt = server->attach_partition(_db, xidlsn, attach_partition_msg); + auto ddl_ops = server->attach_partition(_db, xidlsn, attach_partition_msg); - // Store the DDL statement for the Committer - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + // Store the DDL operations for the Committer + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } @@ -893,10 +900,12 @@ namespace springtail::pg_log_mgr { auto &detach_partition_msg = std::get(change->msg); LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "DETACH PARTITION: xid={}, pg_xid={}, tid={}", xidlsn.xid, detach_partition_msg.xid, detach_partition_msg.table_id); - std::string &&ddl_stmt = server->detach_partition(_db, xidlsn, detach_partition_msg); + auto ddl_ops = server->detach_partition(_db, xidlsn, detach_partition_msg); - // Store the DDL statement for the Committer - RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, ddl_stmt); + // Store the DDL operations for the Committer + for (auto& op : ddl_ops) { + RedisDDL::get_instance()->add_ddl(_db, xidlsn.xid, std::move(op)); + } break; } @@ -1235,7 +1244,7 @@ namespace springtail::pg_log_mgr { // for operations at the SysTblMgr auto server = sys_tbl_mgr::Server::get_instance(); - nlohmann::json ddls = nlohmann::json::array({}); + std::vector ddls; std::vector table_ids; // issue the updates to the system tables @@ -1260,8 +1269,9 @@ namespace springtail::pg_log_mgr { drop_msg.oid = table_info.id(); drop_msg.namespace_name = table_info.namespace_name(); drop_msg.table = table_info.name(); - std::string &&ddl_stmt = server->drop_table(_db_id, XidLsn{xid}, drop_msg); - ddls.emplace_back(ddl_stmt); + auto drop_ddl_ops = server->drop_table(_db_id, XidLsn{xid}, drop_msg); + ddls.insert(ddls.end(), std::make_move_iterator(drop_ddl_ops.begin()), + std::make_move_iterator(drop_ddl_ops.end())); } else { LOG_DEBUG(LOG_PG_LOG_MGR, LOG_LEVEL_DEBUG1, "table_id {}", entry->table_id); @@ -1287,12 +1297,11 @@ namespace springtail::pg_log_mgr { roots->set_xid(xid); // note: this will also invalidate the table's client cache entry - auto ddl_str = server->swap_sync_table(*namespace_req, *create, indexes_vec, *roots); + auto swap_ddl_ops = server->swap_sync_table(*namespace_req, *create, indexes_vec, *roots); // store the ddl mutations for the FDWs - auto ddl = nlohmann::json::parse(ddl_str); - CHECK(ddl.is_array()); - ddls.insert(ddls.end(), ddl.begin(), ddl.end()); + ddls.insert(ddls.end(), std::make_move_iterator(swap_ddl_ops.begin()), + std::make_move_iterator(swap_ddl_ops.end())); table_ids.emplace_back(static_cast(entry->table_id)); } } diff --git a/src/proto/ddl.proto b/src/proto/ddl.proto new file mode 100644 index 000000000..1dd0d3fa2 --- /dev/null +++ b/src/proto/ddl.proto @@ -0,0 +1,254 @@ +syntax = "proto3"; + +package springtail.proto; + +// Column definition used in CREATE TABLE DDL +message DDLColumn { + string name = 1; + uint32 type_oid = 2; + string type_name = 3; + string type_namespace = 4; + bool nullable = 5; + optional string default_value = 6; +} + +// ============================================================================ +// Table operations +// ============================================================================ + +message CreateTableDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + repeated DDLColumn columns = 6; + optional string partition_key = 7; + bool is_resync = 8; +} + +message DropTableDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + optional uint64 parent_table_id = 4; + optional string partition_key = 5; + optional string partition_bound = 6; +} + +message RenameTableDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + string old_table = 6; + string old_schema = 7; + optional string partition_key = 8; +} + +message SetNamespaceDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + string old_schema = 6; + optional string partition_key = 7; + optional string partition_bound = 8; + optional uint64 parent_table_id = 9; +} + +message SetRlsEnabledDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + optional string partition_key = 6; +} + +message SetRlsForcedDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + optional string partition_key = 6; +} + +message ColumnAddDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + DDLColumn column = 6; +} + +message ColumnDropDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + string column_name = 6; +} + +message ColumnRenameDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + string old_name = 6; + string new_name = 7; +} + +message ColumnNullableDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; + string column_name = 6; + bool nullable = 7; +} + +message ResyncDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; +} + +message NoChangeDDL { + uint64 tid = 1; + string schema = 2; + string table = 3; + bool rls_enabled = 4; + bool rls_forced = 5; +} + +message ResyncPartitionsDDL { + uint64 parent_table_id = 1; + repeated uint64 table_ids = 2; +} + +// ============================================================================ +// Namespace operations +// ============================================================================ + +message NamespaceCreateDDL { + uint64 id = 1; + string name = 2; +} + +message NamespaceAlterDDL { + uint64 id = 1; + string name = 2; + string old_name = 3; +} + +message NamespaceDropDDL { + uint64 id = 1; + string name = 2; +} + +// ============================================================================ +// User type operations +// ============================================================================ + +message UserTypeCreateDDL { + uint64 id = 1; + string name = 2; + string value_json = 3; + int32 type = 4; + uint64 namespace_id = 5; + string schema = 6; +} + +message UserTypeAlterDDL { + uint64 id = 1; + string name = 2; + string value_json = 3; + int32 type = 4; + uint64 namespace_id = 5; + string schema = 6; + string old_name = 7; + string old_value_json = 8; + string old_schema = 9; +} + +message UserTypeDropDDL { + uint64 id = 1; + string name = 2; + string schema = 3; +} + +// ============================================================================ +// Partition operations +// ============================================================================ + +message AttachPartitionDDL { + string schema = 1; + string table = 2; + string partition_schema = 3; + string partition_name = 4; + string partition_bound = 5; +} + +message DetachPartitionDDL { + string schema = 1; + string table = 2; + string partition_schema = 3; + string partition_name = 4; +} + +// ============================================================================ +// Top-level DDL operation with oneof discriminator +// ============================================================================ + +message DDLOperation { + uint64 xid = 1; + uint64 lsn = 2; + + oneof operation { + // Table operations (10-29) + CreateTableDDL create_table = 10; + DropTableDDL drop_table = 11; + RenameTableDDL rename_table = 12; + SetNamespaceDDL set_namespace = 13; + SetRlsEnabledDDL set_rls_enabled = 14; + SetRlsForcedDDL set_rls_forced = 15; + ColumnAddDDL column_add = 16; + ColumnDropDDL column_drop = 17; + ColumnRenameDDL column_rename = 18; + ColumnNullableDDL column_nullable = 19; + ResyncDDL resync = 20; + NoChangeDDL no_change = 21; + ResyncPartitionsDDL resync_partitions = 22; + + // Namespace operations (30-39) + NamespaceCreateDDL namespace_create = 30; + NamespaceAlterDDL namespace_alter = 31; + NamespaceDropDDL namespace_drop = 32; + + // User type operations (40-49) + UserTypeCreateDDL usertype_create = 40; + UserTypeAlterDDL usertype_alter = 41; + UserTypeDropDDL usertype_drop = 42; + + // Partition operations (50-59) + AttachPartitionDDL attach_partition = 50; + DetachPartitionDDL detach_partition = 51; + } +} + +// Envelope wrapping a batch of DDL operations for a single transaction +message DDLBatch { + uint64 db_id = 1; + uint64 xid = 2; + repeated DDLOperation operations = 3; +} diff --git a/src/redis/CMakeLists.txt b/src/redis/CMakeLists.txt index c5995d306..7170e3d55 100644 --- a/src/redis/CMakeLists.txt +++ b/src/redis/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(springtail_redis fmt::fmt redis++::redis++ nlohmann_json::nlohmann_json + proto_ddl ) springtail_unit_test(test_redis diff --git a/src/redis/redis_ddl.cc b/src/redis/redis_ddl.cc index 04a99aa83..2f845f1c7 100644 --- a/src/redis/redis_ddl.cc +++ b/src/redis/redis_ddl.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -17,14 +18,18 @@ namespace { RedisClient& redis, uint64_t db_id, uint64_t xid, - nlohmann::json ddls + const std::vector& ddls ) { - nlohmann::json op; - op["db_id"] = db_id; - op["xid"] = xid; - op["ddls"] = ddls; - std::string value = nlohmann::to_string(op); + proto::DDLBatch batch; + batch.set_db_id(db_id); + batch.set_xid(xid); + for (const auto& ddl : ddls) { + *batch.add_operations() = ddl; + } + + std::string value; + batch.SerializeToString(&value); std::string precommit_key = fmt::format(redis::HASH_DDL_PRECOMMIT, Properties::get_db_instance_id()); @@ -40,16 +45,16 @@ namespace springtail { void RedisDDL::add_ddl(uint64_t db_id, uint64_t xid, - const std::string &ddl) + proto::DDLOperation ddl) { // Store DDL in in-memory cache for fast access by Committer { std::unique_lock lock(_ddl_cache_mutex); - _ddl_cache[db_id][xid].push_back(nlohmann::json::parse(ddl)); + _ddl_cache[db_id][xid].push_back(std::move(ddl)); } } - nlohmann::json + std::vector RedisDDL::get_ddls_xid(uint64_t db_id, uint64_t xid) { @@ -60,16 +65,12 @@ namespace springtail { if (db_it != _ddl_cache.end()) { auto xid_it = db_it->second.find(xid); if (xid_it != db_it->second.end()) { - nlohmann::json ddls; - for (const auto &ddl : xid_it->second) { - ddls.push_back(ddl); - } - return ddls; + return xid_it->second; } } } - // Return null if not found (no DDLs for this XID) - return nlohmann::json(); + // Return empty vector if not found (no DDLs for this XID) + return {}; } @@ -104,9 +105,9 @@ namespace springtail { void RedisDDL::precommit_ddl(uint64_t db_id, uint64_t xid, - nlohmann::json ddls) + std::vector ddls) { - LOG_INFO("RedisDDL::precommit_ddl: db_id={}, xid={}, ddls={}", db_id, xid, ddls.dump()); + LOG_INFO("RedisDDL::precommit_ddl: db_id={}, xid={}, ddls_count={}", db_id, xid, ddls.size()); // Move DDLs to Redis pre-commit phase for crash recovery _precommit(*_redis, db_id, xid, ddls); @@ -161,32 +162,32 @@ namespace springtail { for (const auto &entry : commit_map) { const auto &key = entry.first; - nlohmann::json ddls = nlohmann::json::parse(entry.second); + proto::DDLBatch batch; + batch.ParseFromString(entry.second); // iterate through the DDL statements and see if any // result in the addition or removal of a table/schema // or the renaming of a table and add them to the table set for this db - for (auto ddl: ddls.at("ddls")) { - assert(ddl.is_object()); - assert(ddl.contains("action")); - auto &action = ddl.at("action"); - - // only care about create, drop and rename - if (action == "create") { - auto schema = ddl.at("schema").get(); - auto table = ddl.at("table").get(); - RedisDbTables::add_table(ts, db_id, table, schema); - } else if (action == "drop") { - auto schema = ddl.at("schema").get(); - auto table = ddl.at("table").get(); - RedisDbTables::remove_table(ts, db_id, table, schema); - } else if (action == "rename") { - auto schema = ddl.at("schema").get(); - auto table = ddl.at("table").get(); - auto old_schema = ddl.at("old_schema").get(); - auto old_table = ddl.at("old_table").get(); - RedisDbTables::remove_table(ts, db_id, old_table, old_schema); - RedisDbTables::add_table(ts, db_id, table, schema); + for (const auto& ddl : batch.operations()) { + switch (ddl.operation_case()) { + case proto::DDLOperation::kCreateTable: { + const auto& ct = ddl.create_table(); + RedisDbTables::add_table(ts, db_id, ct.table(), ct.schema()); + break; + } + case proto::DDLOperation::kDropTable: { + const auto& dt = ddl.drop_table(); + RedisDbTables::remove_table(ts, db_id, dt.table(), dt.schema()); + break; + } + case proto::DDLOperation::kRenameTable: { + const auto& rt = ddl.rename_table(); + RedisDbTables::remove_table(ts, db_id, rt.old_table(), rt.old_schema()); + RedisDbTables::add_table(ts, db_id, rt.table(), rt.schema()); + break; + } + default: + break; } } @@ -237,7 +238,7 @@ namespace springtail { } - std::vector + std::vector RedisDDL::get_next_ddls(const std::string &fdw_id) { // retrieve the next set of DDLs to apply for the given FDW; this blocks @@ -249,13 +250,15 @@ namespace springtail { return {}; } - std::vector ddls; + std::vector batches; do { - ddls.push_back(nlohmann::json::parse(*value)); + proto::DDLBatch batch; + batch.ParseFromString(*value); + batches.push_back(std::move(batch)); value = queue.try_pop("active"); } while (value != nullptr); - return ddls; + return batches; } void diff --git a/src/sys_tbl_mgr/CMakeLists.txt b/src/sys_tbl_mgr/CMakeLists.txt index bf1eb5325..22813ef9c 100644 --- a/src/sys_tbl_mgr/CMakeLists.txt +++ b/src/sys_tbl_mgr/CMakeLists.txt @@ -41,7 +41,7 @@ add_library(springtail_sys_tbl_mgr ${SYS_TBL_MGR_SOURCES}) target_include_directories(springtail_sys_tbl_mgr PUBLIC ${CMAKE_BINARY_DIR}/src PRIVATE ${CMAKE_SOURCE_DIR}/include) target_link_libraries(springtail_sys_tbl_mgr PUBLIC springtail_common springtail_grpc springtail_storage springtail_xid_mgr - PRIVATE proto_sys_tbl_mgr + PRIVATE proto_sys_tbl_mgr proto_ddl PRIVATE Boost::thread) # add the library target @@ -57,7 +57,7 @@ add_library(springtail_sch_mem_cache ${SCH_MEM_CACHE_SOURCES}) target_include_directories(springtail_sch_mem_cache PRIVATE ${CMAKE_SOURCE_DIR}/include) target_link_libraries(springtail_sch_mem_cache PUBLIC springtail_common - PRIVATE Boost::thread) + PRIVATE springtail_redis Boost::thread) # file system check utility add_executable(file_system_check test/file_system_check.cc) diff --git a/src/sys_tbl_mgr/server.cc b/src/sys_tbl_mgr/server.cc index 8b71126eb..003b470ee 100644 --- a/src/sys_tbl_mgr/server.cc +++ b/src/sys_tbl_mgr/server.cc @@ -3,9 +3,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -43,7 +45,7 @@ Server::_internal_shutdown() _grpc_server_manager.shutdown(); } -std::string +std::vector Server::create_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) { auto request = RequestHelper::gen_table_request(db_id, xid, msg); @@ -55,13 +57,10 @@ Server::create_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) boost::shared_lock lock(_write_mutex); // perform the CREATE TABLE - auto&& ddl = _create_table(request); - - // serialize the JSON and return - return nlohmann::to_string(ddl); + return _create_table(request); } -std::string +std::vector Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) { auto request = RequestHelper::gen_table_request(db_id, xid, msg); @@ -78,14 +77,16 @@ Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) auto ns_info = _get_namespace_info(db_id, request.table().namespace_name(), xid, false); CHECK(ns_info); - nlohmann::json ddl; - ddl["tid"] = request.table().id(); - ddl["xid"] = request.xid(); - ddl["lsn"] = request.lsn(); - ddl["schema"] = request.table().namespace_name(); - ddl["table"] = request.table().name(); - ddl["rls_enabled"] = request.table().rls_enabled(); - ddl["rls_forced"] = request.table().rls_forced(); + // Common fields for the DDLOperation + const uint64_t tid = request.table().id(); + const auto& schema_name = request.table().namespace_name(); + const auto& table_name = request.table().name(); + const bool rls_enabled = request.table().rls_enabled(); + const bool rls_forced = request.table().rls_forced(); + + proto::DDLOperation op; + op.set_xid(request.xid()); + op.set_lsn(request.lsn()); // retrieve the name of the table at the point of alteration auto table_info = _get_table_info(db_id, request.table().id(), xid); @@ -107,17 +108,6 @@ Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) partition_bound = request.table().partition_bound(); } - // update the partition details - if (partition_key.has_value()) { - ddl["partition_key"] = partition_key.value(); - } - if (partition_bound.has_value()) { - ddl["partition_bound"] = partition_bound.value(); - } - if (parent_table_id.has_value()) { - ddl["parent_table_id"] = parent_table_id.value(); - } - auto new_info = std::make_shared(request.table().id(), request.xid(), request.lsn(), ns_info->id, request.table().name(), @@ -131,12 +121,26 @@ Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) _set_table_info(db_id, new_info); - // set the DDL statement - ddl["action"] = "set_namespace"; - auto old_ns_info = _get_namespace_info(db_id, table_info->namespace_id, xid); CHECK(old_ns_info); - ddl["old_schema"] = old_ns_info->name; + + // set the DDL statement + auto* sns = op.mutable_set_namespace(); + sns->set_tid(tid); + sns->set_schema(schema_name); + sns->set_table(table_name); + sns->set_rls_enabled(rls_enabled); + sns->set_rls_forced(rls_forced); + sns->set_old_schema(old_ns_info->name); + if (partition_key.has_value()) { + sns->set_partition_key(partition_key.value()); + } + if (partition_bound.has_value()) { + sns->set_partition_bound(partition_bound.value()); + } + if (parent_table_id.has_value()) { + sns->set_parent_table_id(parent_table_id.value()); + } } else if (table_info->name != request.table().name()) { // if the name is changed, update the name in the table_names table @@ -144,15 +148,23 @@ Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) _set_table_info(db_id, new_info); // set the DDL statement - ddl["action"] = "rename"; - ddl["old_table"] = table_info->name; + auto* rn = op.mutable_rename_table(); + rn->set_tid(tid); + rn->set_schema(schema_name); + rn->set_table(table_name); + rn->set_rls_enabled(rls_enabled); + rn->set_rls_forced(rls_forced); + rn->set_old_table(table_info->name); + if (partition_key.has_value()) { + rn->set_partition_key(partition_key.value()); + } if (table_info->namespace_id != ns_info->id) { auto old_ns_info = _get_namespace_info(db_id, table_info->namespace_id, xid); CHECK(old_ns_info); - ddl["old_schema"] = old_ns_info->name; + rn->set_old_schema(old_ns_info->name); } else { - ddl["old_schema"] = request.table().namespace_name(); + rn->set_old_schema(schema_name); } _set_primary_index(db_id, ns_info->id, request.table().id(), table_info->name, @@ -166,35 +178,51 @@ Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) _set_table_info(db_id, new_info); // set the DDL statement - ddl["action"] = "set_rls_enabled"; - ddl["rls_enabled"] = request.table().rls_enabled(); + auto* sre = op.mutable_set_rls_enabled(); + sre->set_tid(tid); + sre->set_schema(schema_name); + sre->set_table(table_name); + sre->set_rls_enabled(rls_enabled); + sre->set_rls_forced(rls_forced); + if (partition_key.has_value()) { + sre->set_partition_key(partition_key.value()); + } } else if (table_info->rls_forced != request.table().rls_forced()) { // if the RLS forced flags are changed, update the table_names table _set_table_info(db_id, new_info); // set the DDL statement - ddl["action"] = "set_rls_forced"; - ddl["rls_forced"] = request.table().rls_forced(); + auto* srf = op.mutable_set_rls_forced(); + srf->set_tid(tid); + srf->set_schema(schema_name); + srf->set_table(table_name); + srf->set_rls_enabled(rls_enabled); + srf->set_rls_forced(rls_forced); + if (partition_key.has_value()) { + srf->set_partition_key(partition_key.value()); + } } else { // get the schema prior to this change auto info = _get_schema_info(db_id, request.table().id(), xid, xid); // generate a tuple for the change - // note: _generate_update() sets the necessary elements of the ddl - auto history = _generate_update(info->columns(), request.table().columns(), - xid, ddl); + auto result = _generate_update(info->columns(), request.table().columns(), + xid, tid, schema_name, table_name, + rls_enabled, rls_forced); // If the partition key is not empty, generate the history events for the the child partition tables if (partition_key.value_or("") != "") { - ddl = _generate_partition_updates(request, history); + op = _generate_partition_updates(request, result.history, xid); + } else { + op = std::move(result.operation); } // we won't apply any changes to the system tables in these cases - if (history.update_type() != static_cast(SchemaUpdateType::NO_CHANGE) && - history.update_type() != static_cast(SchemaUpdateType::RESYNC)) { + if (result.history.update_type() != static_cast(SchemaUpdateType::NO_CHANGE) && + result.history.update_type() != static_cast(SchemaUpdateType::RESYNC)) { // write the column change to the schemas table and update the cache _set_schema_info(db_id, request.table().id(), ns_info->id, - request.table().name(), {history}); + request.table().name(), {result.history}); } _set_primary_index(db_id, ns_info->id, request.table().id(), @@ -205,12 +233,12 @@ Server::alter_table(uint64_t db_id, const XidLsn &xid, const PgMsgTable &msg) invalidate_table(db_id, msg.oid, xid); LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Alter table {}@{}:{} action: {}", db_id, xid.xid, - request.table().id(), ddl["action"].get()); + request.table().id(), get_action_name(op)); - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::drop_table(uint64_t db_id, const XidLsn &xid, const PgMsgDropTable &msg) { @@ -226,15 +254,15 @@ Server::drop_table(uint64_t db_id, const XidLsn &xid, const PgMsgDropTable &msg) boost::shared_lock lock(_write_mutex); // perform the DROP TABLE - auto&& ddl = _drop_table(request); + auto op = _drop_table(request); // Automatically invalidate the schema cache from the provided XID invalidate_table(db_id, msg.oid, xid); - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::create_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace &msg) { proto::NamespaceRequest request; @@ -246,9 +274,6 @@ Server::create_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace db_id, request.namespace_id(), request.name(), request.xid(), request.lsn()); - - nlohmann::json ddl; - // acquire a shared lock to ensure no one is doing a finalize boost::shared_lock lock(_write_mutex); @@ -257,16 +282,27 @@ Server::create_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace LOG_INFO("Namespace exists -- db {} namespace_id {} name {} xid {} lsn {}", db_id, request.namespace_id(), request.name(), request.xid(), request.lsn()); - ddl["action"] = "no_change"; + proto::DDLOperation op; + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); + auto* nc = op.mutable_no_change(); + nc->set_tid(0); + nc->set_schema(""); + nc->set_table(""); + nc->set_rls_enabled(false); + nc->set_rls_forced(false); + return {std::move(op)}; } else { // update the namespace_names table - ddl = _mutate_namespace(db_id, request.namespace_id(), request.name(), xid, true); - ddl["action"] = "ns_create"; + auto op = _mutate_namespace(db_id, request.namespace_id(), request.name(), xid, true); + auto* nc = op.mutable_namespace_create(); + nc->set_id(request.namespace_id()); + nc->set_name(request.name()); + return {std::move(op)}; } - return nlohmann::to_string(ddl); } -std::string +std::vector Server::alter_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace &msg) { proto::NamespaceRequest request; @@ -286,15 +322,17 @@ Server::alter_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace CHECK(ns_info); // update the namespace_names table - auto ddl = + auto op = _mutate_namespace(db_id, request.namespace_id(), request.name(), xid, true); - ddl["action"] = "ns_alter"; - ddl["old_name"] = ns_info->name; + auto* na = op.mutable_namespace_alter(); + na->set_id(request.namespace_id()); + na->set_name(request.name()); + na->set_old_name(ns_info->name); - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::drop_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace &msg) { proto::NamespaceRequest request; @@ -309,15 +347,16 @@ Server::drop_namespace(uint64_t db_id, const XidLsn &xid, const PgMsgNamespace & boost::shared_lock lock(_write_mutex); // update the namespace_names table - auto ddl = + auto op = _mutate_namespace(db_id, request.namespace_id(), request.name(), xid, false); - ddl["action"] = "ns_drop"; - ddl["name"] = request.name(); + auto* nd = op.mutable_namespace_drop(); + nd->set_id(request.namespace_id()); + nd->set_name(request.name()); - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::create_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &msg) { proto::UserTypeRequest request; @@ -337,16 +376,21 @@ Server::create_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType & boost::shared_lock lock(_write_mutex); // update the user_types table - auto ddl = _mutate_usertype(db_id, request.type_id(), request.name(), + auto op = _mutate_usertype(db_id, request.type_id(), request.name(), request.namespace_id(), request.type(), request.value_json(), xid, true); - ddl["action"] = "ut_create"; - ddl["schema"] = request.namespace_name(); + auto* uc = op.mutable_usertype_create(); + uc->set_id(request.type_id()); + uc->set_name(request.name()); + uc->set_value_json(request.value_json()); + uc->set_type(request.type()); + uc->set_namespace_id(request.namespace_id()); + uc->set_schema(request.namespace_name()); - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::alter_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &msg) { proto::UserTypeRequest request; @@ -370,26 +414,31 @@ Server::alter_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &m CHECK(user_type_info != nullptr); // update the user defined types table - auto ddl = _mutate_usertype(db_id, request.type_id(), request.name(), + auto op = _mutate_usertype(db_id, request.type_id(), request.name(), request.namespace_id(), request.type(), request.value_json(), xid, true); - ddl["action"] = "ut_alter"; - ddl["schema"] = request.namespace_name(); - ddl["old_name"] = user_type_info->name; - ddl["old_value"] = user_type_info->value_json; + auto* ua = op.mutable_usertype_alter(); + ua->set_id(request.type_id()); + ua->set_name(request.name()); + ua->set_value_json(request.value_json()); + ua->set_type(request.type()); + ua->set_namespace_id(request.namespace_id()); + ua->set_schema(request.namespace_name()); + ua->set_old_name(user_type_info->name); + ua->set_old_value_json(user_type_info->value_json); // need to get old namespace id and check if it has changed if (user_type_info->namespace_id != request.namespace_id()) { auto old_ns_info = _get_namespace_info(db_id, user_type_info->namespace_id, xid); - ddl["old_schema"] = old_ns_info->name; + ua->set_old_schema(old_ns_info->name); } else { - ddl["old_schema"] = request.namespace_name(); + ua->set_old_schema(request.namespace_name()); } - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::drop_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &msg) { proto::UserTypeRequest request; @@ -404,25 +453,34 @@ Server::drop_usertype(uint64_t db_id, const XidLsn &xid, const PgMsgUserType &ms // acquire a shared lock to ensure no one is doing a finalize boost::shared_lock lock(_write_mutex); - nlohmann::json ddl; + proto::DDLOperation op; auto user_type_info = _get_usertype_info(db_id, request.type_id(), xid); if (user_type_info == nullptr) { // drop could for a type we don't support, so ignore it here LOG_WARN("User type {} not found", request.type_id()); - ddl["action"] = "no_change"; + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); + auto* nc = op.mutable_no_change(); + nc->set_tid(0); + nc->set_schema(""); + nc->set_table(""); + nc->set_rls_enabled(false); + nc->set_rls_forced(false); } else { // update the user defined types table - ddl = _mutate_usertype(db_id, request.type_id(), request.name(), + op = _mutate_usertype(db_id, request.type_id(), request.name(), request.namespace_id(), request.type(), request.value_json(), xid, false); - ddl["action"] = "ut_drop"; - ddl["schema"] = request.namespace_name(); + auto* ud = op.mutable_usertype_drop(); + ud->set_id(request.type_id()); + ud->set_name(request.name()); + ud->set_schema(request.namespace_name()); } - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::attach_partition(uint64_t db_id, const XidLsn &xid, const PgMsgAttachPartition &msg) { proto::AttachPartitionRequest request; @@ -485,23 +543,24 @@ Server::attach_partition(uint64_t db_id, const XidLsn &xid, const PgMsgAttachPar partition_name = table_info->name; } - nlohmann::json ddl; + proto::DDLOperation op; + op.set_xid(request.xid()); + op.set_lsn(request.lsn()); - ddl["action"] = "attach_partition"; - ddl["partition_schema"] = partition_schema; - ddl["partition_name"] = partition_name; - ddl["partition_bound"] = partition_bound; - ddl["schema"] = request.namespace_name(); - ddl["table"] = request.table_name(); - ddl["xid"] = request.xid(); - ddl["lsn"] = request.lsn(); + auto* ap = op.mutable_attach_partition(); + ap->set_schema(request.namespace_name()); + ap->set_table(request.table_name()); + ap->set_partition_schema(partition_schema); + ap->set_partition_name(partition_name); + ap->set_partition_bound(partition_bound); - LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Attach partition DDL: {}", ddl.dump()); + LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Attach partition DDL: schema={} table={} partition={}", + request.namespace_name(), request.table_name(), partition_name); - return nlohmann::to_string(ddl); + return {std::move(op)}; } -std::string +std::vector Server::detach_partition(uint64_t db_id, const XidLsn &xid, const PgMsgDetachPartition &msg) { proto::DetachPartitionRequest request; @@ -555,19 +614,20 @@ Server::detach_partition(uint64_t db_id, const XidLsn &xid, const PgMsgDetachPar partition_name = table_info->name; } - nlohmann::json ddl; + proto::DDLOperation op; + op.set_xid(request.xid()); + op.set_lsn(request.lsn()); - ddl["action"] = "detach_partition"; - ddl["partition_schema"] = partition_schema; - ddl["partition_name"] = partition_name; - ddl["schema"] = request.namespace_name(); - ddl["table"] = request.table_name(); - ddl["xid"] = request.xid(); - ddl["lsn"] = request.lsn(); + auto* dp = op.mutable_detach_partition(); + dp->set_schema(request.namespace_name()); + dp->set_table(request.table_name()); + dp->set_partition_schema(partition_schema); + dp->set_partition_name(partition_name); - LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Detach partition DDL: {}", ddl.dump()); + LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Detach partition DDL: schema={} table={} partition={}", + request.namespace_name(), request.table_name(), partition_name); - return nlohmann::to_string(ddl); + return {std::move(op)}; } proto::IndexProcessRequest @@ -1037,14 +1097,14 @@ Server::exists(uint64_t db_id, uint64_t table_id, const XidLsn &xid) return result.value(); } -std::string +std::vector Server::swap_sync_table(const proto::NamespaceRequest &namespace_req, const proto::TableRequest &create_req, const std::vector &index_reqs, const proto::UpdateRootsRequest &roots_req) { LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "got swap_sync_table()"); - nlohmann::json ddls = nlohmann::json::array(); // Initialize to empty array + std::vector ddl_ops; // 1. acquire a shared lock to ensure no one is doing a finalize boost::shared_lock lock(_write_mutex); @@ -1057,10 +1117,12 @@ Server::swap_sync_table(const proto::NamespaceRequest &namespace_req, namespace_req.db_id(), namespace_req.name(), namespace_req.namespace_id(), ns_xid.xid, ns_xid.lsn); - auto&& ns_ddl = _mutate_namespace(namespace_req.db_id(), namespace_req.namespace_id(), + auto ns_op = _mutate_namespace(namespace_req.db_id(), namespace_req.namespace_id(), namespace_req.name(), ns_xid, true); - ns_ddl["action"] = "ns_create"; - ddls.push_back(ns_ddl); + auto* nc = ns_op.mutable_namespace_create(); + nc->set_id(namespace_req.namespace_id()); + nc->set_name(namespace_req.name()); + ddl_ops.push_back(std::move(ns_op)); LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Create namespace name {}, id {}", namespace_req.name(), namespace_req.namespace_id()); } else { @@ -1087,9 +1149,8 @@ Server::swap_sync_table(const proto::NamespaceRequest &namespace_req, drop.xid(), drop.lsn()); is_resync = true; - auto&& drop_ddl = this->_drop_table(drop, is_resync); - drop_ddl["is_resync"] = is_resync; - ddls.push_back(drop_ddl); + auto drop_op = this->_drop_table(drop, is_resync); + ddl_ops.push_back(std::move(drop_op)); } // 5. perform a create table @@ -1097,27 +1158,27 @@ Server::swap_sync_table(const proto::NamespaceRequest &namespace_req, create_req.table().id(), create_req.xid(), create_req.lsn()); assert(create_req.lsn() == constant::RESYNC_CREATE_LSN); - auto&& create_ddl_array = this->_create_table(create_req); + auto create_ddl_ops = this->_create_table(create_req); - DCHECK(create_ddl_array.is_array()); // Always an array now - - if (create_ddl_array.empty()) { - // Empty array - parent chain incomplete, DDL deferred + if (create_ddl_ops.empty()) { + // Empty - parent chain incomplete, DDL deferred LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "DDL deferred during resync for table {} - parent chain incomplete", create_req.table().id()); } else { - // Non-empty array - mark first element (root parent) with is_resync flag - create_ddl_array[0]["is_resync"] = is_resync; + // Non-empty - mark first element (root parent) with is_resync flag if applicable + if (is_resync && create_ddl_ops[0].has_create_table()) { + create_ddl_ops[0].mutable_create_table()->set_is_resync(true); + } - for (const auto& ddl_item : create_ddl_array) { - ddls.push_back(ddl_item); + for (auto& ddl_item : create_ddl_ops) { + ddl_ops.push_back(std::move(ddl_item)); } LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Resync generated {} DDL statement(s) (parent + descendants)", - create_ddl_array.size()); + create_ddl_ops.size()); } for (const proto::IndexRequest& index : index_reqs) { @@ -1136,14 +1197,14 @@ Server::swap_sync_table(const proto::NamespaceRequest &namespace_req, create_req.table().id(), create_req.xid(), create_req.lsn()); this->_update_roots(roots_req); - // 7. serialize the ddl json and return - LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Response: {}", nlohmann::to_string(ddls)); + // 7. return the DDL operations + LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Response: {} DDL operation(s)", ddl_ops.size()); // Auto-invalidate the cache for the swapped table invalidate_table(create_req.db_id(), create_req.table().id(), XidLsn(create_req.xid(), create_req.lsn())); - return nlohmann::to_string(ddls); + return ddl_ops; } std::shared_ptr @@ -1742,7 +1803,7 @@ Server::_get_direct_children(uint64_t db_id, return children; } -nlohmann::json +proto::DDLOperation Server::_generate_attach_partition_ddl(uint64_t db_id, const TableCacheRecordPtr& child_info, uint64_t parent_table_id, @@ -1757,17 +1818,18 @@ Server::_generate_attach_partition_ddl(uint64_t db_id, auto child_ns = _get_namespace_info(db_id, child_info->namespace_id, xid); auto parent_ns = _get_namespace_info(db_id, parent_info->namespace_id, xid); - nlohmann::json ddl; - ddl["action"] = "attach_partition"; - ddl["partition_schema"] = child_ns->name; - ddl["partition_name"] = child_info->name; - ddl["partition_bound"] = child_info->partition_bound.value(); - ddl["schema"] = parent_ns->name; - ddl["table"] = parent_info->name; - ddl["xid"] = xid.xid; - ddl["lsn"] = xid.lsn; + proto::DDLOperation op; + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); - return ddl; + auto* ap = op.mutable_attach_partition(); + ap->set_partition_schema(child_ns->name); + ap->set_partition_name(child_info->name); + ap->set_partition_bound(child_info->partition_bound.value()); + ap->set_schema(parent_ns->name); + ap->set_table(parent_info->name); + + return op; } void @@ -1775,7 +1837,7 @@ Server::_generate_child_attach_ddls(uint64_t db_id, uint64_t parent_table_id, const XidLsn& xid, uint64_t parent_snapshot_xid, - std::vector& ddl_array_out) + std::vector& ddl_array_out) { // Find all children that have this table as their parent auto children = _get_direct_children(db_id, parent_table_id, xid); @@ -1811,13 +1873,13 @@ Server::_generate_child_attach_ddls(uint64_t db_id, continue; } - auto attach_ddl = _generate_attach_partition_ddl( + auto attach_op = _generate_attach_partition_ddl( db_id, child_info, parent_table_id, xid ); - ddl_array_out.push_back(attach_ddl); + ddl_array_out.push_back(std::move(attach_op)); LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Generated ATTACH DDL for child table {} (snapshot_xid={})", @@ -1826,7 +1888,7 @@ Server::_generate_child_attach_ddls(uint64_t db_id, } } -nlohmann::json +std::vector Server::_create_table(const proto::TableRequest& request) { XidLsn xid(request.xid(), request.lsn()); @@ -1836,18 +1898,6 @@ Server::_create_table(const proto::TableRequest& request) XidLsn(request.xid(), request.lsn())); CHECK(ns_info); - // initialize the ddl statement - nlohmann::json ddl; - ddl["action"] = "create"; - ddl["schema"] = request.table().namespace_name(); - ddl["table"] = request.table().name(); - ddl["tid"] = request.table().id(); - ddl["xid"] = request.xid(); - ddl["lsn"] = request.lsn(); - ddl["rls_enabled"] = request.table().rls_enabled(); - ddl["rls_forced"] = request.table().rls_forced(); - ddl["columns"] = nlohmann::json::array(); - // partition info - track for system tables and logic, but don't add to CREATE DDL std::optional parent_table_id = std::nullopt; bool has_parent = false; // Track if direct parent exists @@ -1863,7 +1913,6 @@ Server::_create_table(const proto::TableRequest& request) std::optional partition_key = std::nullopt; if (request.table().has_partition_key() && !request.table().partition_key().empty()) { partition_key = request.table().partition_key(); - ddl["partition_key"] = partition_key.value(); } // partition bound - track for system tables but don't add to CREATE DDL @@ -1897,6 +1946,21 @@ Server::_create_table(const proto::TableRequest& request) // add schemas entries for each column std::vector columns; + + // Build the CreateTableDDL protobuf + proto::DDLOperation create_op; + create_op.set_xid(request.xid()); + create_op.set_lsn(request.lsn()); + auto* ct = create_op.mutable_create_table(); + ct->set_tid(request.table().id()); + ct->set_schema(request.table().namespace_name()); + ct->set_table(request.table().name()); + ct->set_rls_enabled(request.table().rls_enabled()); + ct->set_rls_forced(request.table().rls_forced()); + if (partition_key.has_value()) { + ct->set_partition_key(partition_key.value()); + } + for (const auto& column : request.table().columns()) { proto::ColumnHistory& history = columns.emplace_back(); history.set_xid(xid.xid); @@ -1905,18 +1969,16 @@ Server::_create_table(const proto::TableRequest& request) history.set_update_type(static_cast(SchemaUpdateType::NEW_COLUMN)); *history.mutable_column() = column; - // store the column data into the json - nlohmann::json column_json; - column_json["name"] = column.name(); - column_json["type"] = column.pg_type(); - column_json["type_name"] = column.type_name(); - column_json["type_namespace"] = column.type_namespace(); - column_json["nullable"] = column.is_nullable(); + // store the column data into the protobuf + auto* ddl_col = ct->add_columns(); + ddl_col->set_name(column.name()); + ddl_col->set_type_oid(column.pg_type()); + ddl_col->set_type_name(column.type_name()); + ddl_col->set_type_namespace(column.type_namespace()); + ddl_col->set_nullable(column.is_nullable()); if (column.has_default_value()) { - column_json["default"] = column.default_value(); + ddl_col->set_default_value(column.default_value()); } - - ddl["columns"].push_back(column_json); } _set_schema_info(request.db_id(), request.table().id(), ns_info->id, request.table().name(), @@ -1941,25 +2003,25 @@ Server::_create_table(const proto::TableRequest& request) ranges.size()); } - // Build DDL array - always include CREATE TABLE - std::vector ddl_array; + // Build DDL operation vector - always include CREATE TABLE + std::vector ddl_array; // Step 1: Always generate CREATE TABLE DDL // (partition_bound and parent info not added - they go in ATTACH PARTITION) - ddl_array.push_back(ddl); + ddl_array.push_back(std::move(create_op)); // Step 2: If this is a child partition with an existing parent, generate ATTACH PARTITION DDL if (partition_bound.has_value() && has_parent) { auto child_info = _get_table_info(request.db_id(), request.table().id(), xid); DCHECK(child_info != nullptr && child_info->exists); - auto attach_ddl = _generate_attach_partition_ddl( + auto attach_op = _generate_attach_partition_ddl( request.db_id(), child_info, parent_table_id.value(), xid ); - ddl_array.push_back(attach_ddl); + ddl_array.push_back(std::move(attach_op)); LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "Generated ATTACH PARTITION for child table {}", request.table().id()); @@ -1984,28 +2046,28 @@ Server::_create_table(const proto::TableRequest& request) } } - // ALWAYS return a JSON array - return nlohmann::json(ddl_array); + return ddl_array; } -nlohmann::json +proto::DDLOperation Server::_generate_partition_updates(const proto::TableRequest& request, - const proto::ColumnHistory& history) + const proto::ColumnHistory& history, + const XidLsn& xid) { - nlohmann::json ddl; - std::vector table_ids; + proto::DDLOperation op; + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); + + auto* rp = op.mutable_resync_partitions(); + rp->set_parent_table_id(request.table().id()); for (auto &partition : request.table().partition_data()) { - table_ids.push_back(partition.table_id()); + rp->add_table_ids(partition.table_id()); } - ddl["action"] = "resync_partitions"; - ddl["parent_table_id"] = request.table().id(); - ddl["table_ids"] = table_ids; - - return ddl; + return op; } -nlohmann::json +proto::DDLOperation Server::_drop_table(const proto::DropTableRequest& request, bool is_resync) { // retrieve the id of the namespace @@ -2022,23 +2084,24 @@ Server::_drop_table(const proto::DropTableRequest& request, bool is_resync) CHECK(old_table_info); - // initialize the ddl json - nlohmann::json ddl; - ddl["action"] = "drop"; - ddl["tid"] = request.table_id(); - ddl["xid"] = request.xid(); - ddl["lsn"] = request.lsn(); - ddl["schema"] = request.namespace_name(); - ddl["table"] = request.name(); + // initialize the DDLOperation + proto::DDLOperation op; + op.set_xid(request.xid()); + op.set_lsn(request.lsn()); + + auto* dt = op.mutable_drop_table(); + dt->set_tid(request.table_id()); + dt->set_schema(request.namespace_name()); + dt->set_table(request.name()); if (old_table_info->parent_table_id.has_value()) { - ddl["parent_table_id"] = old_table_info->parent_table_id.value(); + dt->set_parent_table_id(old_table_info->parent_table_id.value()); } if (old_table_info->partition_key.has_value() && !old_table_info->partition_key.value().empty()) { - ddl["partition_key"] = old_table_info->partition_key.value(); + dt->set_partition_key(old_table_info->partition_key.value()); } if (old_table_info->partition_bound.has_value() && !old_table_info->partition_bound.value().empty()) { - ddl["partition_bound"] = old_table_info->partition_bound.value(); + dt->set_partition_bound(old_table_info->partition_bound.value()); } XidLsn xid(request.xid(), request.lsn()); @@ -2105,21 +2168,17 @@ Server::_drop_table(const proto::DropTableRequest& request, bool is_resync) } } - return ddl; + return op; } -nlohmann::json +proto::DDLOperation Server::_mutate_namespace( uint64_t db_id, uint64_t ns_id, std::optional name, const XidLsn& xid, bool exists) { - // construct the DDL to provide to the FDW - nlohmann::json ddl; - ddl["id"] = ns_id; - ddl["xid"] = xid.xid; - ddl["lsn"] = xid.lsn; - if (name) { - ddl["name"] = *name; - } + // construct a base DDLOperation with xid/lsn set; caller sets the oneof action + proto::DDLOperation op; + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); // record the namespace info into the cache { @@ -2139,7 +2198,7 @@ Server::_mutate_namespace( sys_tbl::NamespaceNames::Data::tuple(ns_id, name.value_or(""), xid.xid, xid.lsn, exists, table->get_next_internal_row_id()); table->upsert(tuple, constant::UNKNOWN_EXTENT); - return ddl; + return op; } void @@ -2298,7 +2357,7 @@ Server::_get_modified_partition_details(uint64_t db_id, return result; } -nlohmann::json +proto::DDLOperation Server::_mutate_usertype(uint64_t db_id, uint64_t type_id, const std::string &name, @@ -2306,36 +2365,27 @@ Server::_mutate_usertype(uint64_t db_id, int8_t type, const std::string &value_json, const XidLsn xid, - bool exists) + bool active) { - // construct the DDL to provide to the FDW - nlohmann::json ddl; - ddl["id"] = type_id; - ddl["xid"] = xid.xid; - ddl["lsn"] = xid.lsn; - ddl["name"] = name; - - if (exists) { - // these are not set for drop when exists is false - ddl["value"] = value_json; - ddl["type"] = type; - ddl["namespace_id"] = ns_id; - } + // construct a base DDLOperation with xid/lsn set; caller sets the oneof action + proto::DDLOperation op; + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); // record the user defined type info into the cache { boost::unique_lock lock(_mutex); - auto entry = std::make_shared(type_id, name, ns_id, type, value_json, exists); + auto entry = std::make_shared(type_id, name, ns_id, type, value_json, active); _usertype_id_cache[db_id][type_id][xid] = entry; } // add the type to the user types table auto table = _get_mutable_system_table(db_id, sys_tbl::UserTypes::ID); auto tuple = - sys_tbl::UserTypes::Data::tuple(type_id, ns_id, name, value_json, xid.xid, xid.lsn, type, exists, table->get_next_internal_row_id()); + sys_tbl::UserTypes::Data::tuple(type_id, ns_id, name, value_json, xid.xid, xid.lsn, type, active, table->get_next_internal_row_id()); table->upsert(tuple, constant::UNKNOWN_EXTENT); - return ddl; + return op; } Server::UserTypeCacheRecordPtr @@ -3916,16 +3966,37 @@ Server::_write_index(const XidLsn& xid, } } -proto::ColumnHistory +Server::SchemaUpdateResult Server::_generate_update(const google::protobuf::RepeatedPtrField& old_schema, const google::protobuf::RepeatedPtrField& new_schema, const XidLsn& xid, - nlohmann::json& ddl) + uint64_t tid, + const std::string& schema_name, + const std::string& table_name, + bool rls_enabled, + bool rls_forced) { - proto::ColumnHistory update; + SchemaUpdateResult result; + auto& update = result.history; + auto& op = result.operation; + update.set_xid(xid.xid); update.set_lsn(xid.lsn); + op.set_xid(xid.xid); + op.set_lsn(xid.lsn); + + // Helper lambda to populate a resync DDLOperation + auto set_resync = [&]() { + auto* r = op.mutable_resync(); + r->set_tid(tid); + r->set_schema(schema_name); + r->set_table(table_name); + r->set_rls_enabled(rls_enabled); + r->set_rls_forced(rls_forced); + update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); + }; + // Build maps keyed by column.position() for both old and new schemas std::map oldMap; for (const auto& column : old_schema) { @@ -3944,19 +4015,22 @@ Server::_generate_update(const google::protobuf::RepeatedPtrField(SchemaUpdateType::REMOVE_COLUMN)); update.set_exists(false); - ddl["action"] = "col_drop"; - ddl["column"] = old_col->name(); + auto* cd = op.mutable_column_drop(); + cd->set_tid(tid); + cd->set_schema(schema_name); + cd->set_table(table_name); + cd->set_rls_enabled(rls_enabled); + cd->set_rls_forced(rls_forced); + cd->set_column_name(old_col->name()); #else - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); + set_resync(); #endif - return update; + return result; } } @@ -3967,27 +4041,31 @@ Server::_generate_update(const google::protobuf::RepeatedPtrFieldhas_default_value()) { - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); + set_resync(); } else { update.set_update_type(static_cast(SchemaUpdateType::NEW_COLUMN)); update.set_exists(true); *update.mutable_column() = *new_col; - ddl["action"] = "col_add"; - ddl["column"]["name"] = new_col->name(); - ddl["column"]["type"] = new_col->pg_type(); - ddl["column"]["nullable"] = new_col->is_nullable(); + auto* ca = op.mutable_column_add(); + ca->set_tid(tid); + ca->set_schema(schema_name); + ca->set_table(table_name); + ca->set_rls_enabled(rls_enabled); + ca->set_rls_forced(rls_forced); + auto* col = ca->mutable_column(); + col->set_name(new_col->name()); + col->set_type_oid(new_col->pg_type()); + col->set_nullable(new_col->is_nullable()); if (new_col->has_default_value()) { - ddl["column"]["default"] = new_col->default_value(); + col->set_default_value(new_col->default_value()); } CHECK(false); // XXX new_col->type_name must be added } #else - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); + set_resync(); #endif - return update; + return result; } } @@ -4001,10 +4079,15 @@ Server::_generate_update(const google::protobuf::RepeatedPtrField(SchemaUpdateType::NAME_CHANGE)); update.set_exists(true); - ddl["action"] = "col_rename"; - ddl["old_name"] = old_col->name(); - ddl["new_name"] = new_col->name(); - return update; + auto* cr = op.mutable_column_rename(); + cr->set_tid(tid); + cr->set_schema(schema_name); + cr->set_table(table_name); + cr->set_rls_enabled(rls_enabled); + cr->set_rls_forced(rls_forced); + cr->set_old_name(old_col->name()); + cr->set_new_name(new_col->name()); + return result; } // Check for a change in nullability (from not-null to nullable). @@ -4015,50 +4098,55 @@ Server::_generate_update(const google::protobuf::RepeatedPtrField(SchemaUpdateType::NULLABLE_CHANGE)); update.set_exists(true); - ddl["action"] = "col_nullable"; - ddl["column"]["name"] = new_col->name(); - ddl["column"]["nullable"] = new_col->is_nullable(); + auto* cn = op.mutable_column_nullable(); + cn->set_tid(tid); + cn->set_schema(schema_name); + cn->set_table(table_name); + cn->set_rls_enabled(rls_enabled); + cn->set_rls_forced(rls_forced); + cn->set_column_name(new_col->name()); + cn->set_nullable(new_col->is_nullable()); #else - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); + set_resync(); #endif - return update; + return result; } // Changing from nullable to not-nullable requires a resync if (old_col->is_nullable() && !new_col->is_nullable()) { - - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); - return update; + set_resync(); + return result; } // Check for a change in the data type if (old_col->pg_type() != new_col->pg_type()) { - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); - return update; + set_resync(); + return result; } // Check for a primary key position change if ((old_col->has_pk_position() && !new_col->has_pk_position()) || (!old_col->has_pk_position() && new_col->has_pk_position()) || (old_col->pk_position() != new_col->pk_position())) { - ddl["action"] = "resync"; - update.set_update_type(static_cast(SchemaUpdateType::RESYNC)); - return update; + set_resync(); + return result; } } } // No change detected - ddl["action"] = "no_change"; + auto* nc = op.mutable_no_change(); + nc->set_tid(tid); + nc->set_schema(schema_name); + nc->set_table(table_name); + nc->set_rls_enabled(rls_enabled); + nc->set_rls_forced(rls_forced); update.set_update_type(static_cast(SchemaUpdateType::NO_CHANGE)); LOG_DEBUG(LOG_SCHEMA, LOG_LEVEL_DEBUG1, "No schema change detected for table @{}:{}", xid.xid, xid.lsn); - return update; + return result; } } // namespace springtail::sys_tbl_mgr From d083a4d3ff1917bf0119856951c148b6d7bb5bc7 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Sun, 19 Apr 2026 13:19:53 -0700 Subject: [PATCH 02/11] - Fixes to deal with restarting after killing the server hitting port-in-use error - Updated the integration test workflow to capture the logs when there's a failure during startup - Fixed a compilation error in release --- .github/workflows/rw-run-integration-test.yml | 10 ++++- include/grpc/grpc_server_manager.hh | 1 + src/grpc/grpc_server_manager.cc | 44 ++++++++++++------- src/proxy/client_session.cc | 2 +- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/.github/workflows/rw-run-integration-test.yml b/.github/workflows/rw-run-integration-test.yml index 7f9d8933a..643235e12 100644 --- a/.github/workflows/rw-run-integration-test.yml +++ b/.github/workflows/rw-run-integration-test.yml @@ -221,7 +221,15 @@ jobs: python3 ./springtail.py -f ../../system.json.test --load-redis --build-dir ../../${CI_BUILD_TYPE} python3 ./springtail.py -f ../../system.json.test --install-triggers --build-dir ../../${CI_BUILD_TYPE} python3 ./springtail.py --start -f ../../system.json.test -b ../../${CI_BUILD_TYPE} -s data_init.sql - python3 ./springtail.py --start -f ../../system.json.test -b ../../${CI_BUILD_TYPE} --check + if ! python3 ./springtail.py --start -f ../../system.json.test -b ../../${CI_BUILD_TYPE} --check; then + echo "*** Startup failed, dumping daemon logs" + output=$(python3 springtail.py --dump -f ../../system.json.test -b ../../${CI_BUILD_TYPE}) || true + fname=$(echo $output | cut -f2 -d':' | xargs | head -1) + if [ -f "$fname" ]; then + mv $fname springtail-startup-failure-${_LOG_SHA}.tar.gz + fi + exit 1 + fi # Run integration tests set +e diff --git a/include/grpc/grpc_server_manager.hh b/include/grpc/grpc_server_manager.hh index b52e768c5..0d6383ac2 100644 --- a/include/grpc/grpc_server_manager.hh +++ b/include/grpc/grpc_server_manager.hh @@ -23,6 +23,7 @@ public: void shutdown(); private: + std::unique_ptr _create_builder(const std::string& address); static std::string read_file_contents(const std::string& path); int _worker_thread_count = 0; diff --git a/src/grpc/grpc_server_manager.cc b/src/grpc/grpc_server_manager.cc index b76933d25..8efb2dd6e 100644 --- a/src/grpc/grpc_server_manager.cc +++ b/src/grpc/grpc_server_manager.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -52,18 +53,15 @@ GrpcServerManager::addService(grpc::Service* service) _services.push_back(service); } -void -GrpcServerManager::startup() +std::unique_ptr +GrpcServerManager::_create_builder(const std::string& address) { - grpc::ServerBuilder builder; - std::string address = "0.0.0.0:" + std::to_string(_port); + auto builder = std::make_unique(); - // Configure resource quota grpc::ResourceQuota rq; - LOG_INFO("Setting gRPC server max threads to {}", _worker_thread_count); rq.SetMaxThreads(_worker_thread_count); - builder.SetResourceQuota(rq); - builder.AddChannelArgument(GRPC_ARG_ALLOW_REUSEPORT, 0); + builder->SetResourceQuota(rq); + builder->AddChannelArgument(GRPC_ARG_ALLOW_REUSEPORT, 0); if (_ssl) { grpc::SslServerCredentialsOptions::PemKeyCertPair key_cert_pair = {_server_key, @@ -73,20 +71,36 @@ GrpcServerManager::startup() opts.pem_key_cert_pairs.push_back(key_cert_pair); opts.client_certificate_request = GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_BUT_DONT_VERIFY; - auto creds = grpc::SslServerCredentials(opts); - builder.AddListeningPort(address, creds); + builder->AddListeningPort(address, grpc::SslServerCredentials(opts)); } else { - builder.AddListeningPort(address, grpc::InsecureServerCredentials()); + builder->AddListeningPort(address, grpc::InsecureServerCredentials()); } - // Register all services with the builder for (auto service : _services) { - builder.RegisterService(service); + builder->RegisterService(service); } - _server = builder.BuildAndStart(); + return builder; +} + +void +GrpcServerManager::startup() +{ + std::string address = "0.0.0.0:" + std::to_string(_port); + LOG_INFO("Setting gRPC server max threads to {}", _worker_thread_count); + + // Retry startup in case the port is briefly unavailable after a force-kill + for (int attempt = 0; attempt < 5; ++attempt) { + auto builder = _create_builder(address); + _server = builder->BuildAndStart(); + if (_server) { + break; + } + LOG_WARN("Failed to start gRPC server on {} (attempt {}/5), retrying in 1s...", address, attempt + 1); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } if (!_server) { - throw std::runtime_error("Failed to start GRPC server"); + throw std::runtime_error("Failed to start GRPC server on " + address + " after 5 attempts"); } LOG_INFO("Server listening on {}", address); } diff --git a/src/proxy/client_session.cc b/src/proxy/client_session.cc index f6bcc2e25..656240a99 100644 --- a/src/proxy/client_session.cc +++ b/src/proxy/client_session.cc @@ -780,7 +780,7 @@ namespace springtail::pg_proxy { // XXX debugging auto qs_deps = msg->qs_dependencies(); - for (const auto& qs : qs_deps) { + for ([[maybe_unused]] const auto& qs : qs_deps) { LOG_DEBUG(LOG_PROXY, LOG_LEVEL_DEBUG3, "[C:{}] Query dependency: {}", _id, qs->to_string()); } From 237cc9c106c8f66a5f2610188e849053682b9da9 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Sun, 19 Apr 2026 14:47:02 -0700 Subject: [PATCH 03/11] make sure that --dump always generates output, even if the system tables are invalid --- python/testing/springtail.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/testing/springtail.py b/python/testing/springtail.py index c5e9dec4e..01cda82bb 100644 --- a/python/testing/springtail.py +++ b/python/testing/springtail.py @@ -635,8 +635,11 @@ def gen_dump_tarball(props : Properties, build_dir : str) -> str: for log in glob.glob(logs): shutil.copy(log, tmp_logs_dir) - # dump the system tables - run_command(os.path.join(build_dir, 'src/storage/dump_system_tables'), ['1'], os.path.join(tmp_logs_dir, 'system_table.dump')); + # dump the system tables (best-effort; may fail if daemons aren't running) + try: + run_command(os.path.join(build_dir, 'src/storage/dump_system_tables'), ['1'], os.path.join(tmp_logs_dir, 'system_table.dump')) + except Exception as e: + logging.warning(f"Failed to dump system tables (continuing without): {e}") # create the tarball run_command('tar', ['-czf', tarball, '-C', tmp_dir, 'logs']) From 703276970aeda4a5603571ad4b9415643e7dba35 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Sun, 19 Apr 2026 21:47:32 -0700 Subject: [PATCH 04/11] More fixes to hopefully get logs from a failed startup --- .github/workflows/rw-run-integration-test.yml | 4 ++-- python/testing/springtail.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/rw-run-integration-test.yml b/.github/workflows/rw-run-integration-test.yml index 996ddf30b..0d8bfb08f 100644 --- a/.github/workflows/rw-run-integration-test.yml +++ b/.github/workflows/rw-run-integration-test.yml @@ -210,8 +210,7 @@ jobs: cd python/testing python3 ./springtail.py -f ../../system.json.test --load-redis --build-dir ../../${CI_BUILD_TYPE} python3 ./springtail.py -f ../../system.json.test --install-triggers --build-dir ../../${CI_BUILD_TYPE} - python3 ./springtail.py --start -f ../../system.json.test -b ../../${CI_BUILD_TYPE} -s data_init.sql - if ! python3 ./springtail.py --start -f ../../system.json.test -b ../../${CI_BUILD_TYPE} --check; then + if ! python3 ./springtail.py --start --startup-timeout 60 -f ../../system.json.test -b ../../${CI_BUILD_TYPE} -s data_init.sql; then echo "*** Startup failed, dumping daemon logs" output=$(python3 springtail.py --dump -f ../../system.json.test -b ../../${CI_BUILD_TYPE}) || true fname=$(echo $output | cut -f2 -d':' | xargs | head -1) @@ -220,6 +219,7 @@ jobs: fi exit 1 fi + python3 ./springtail.py --start -f ../../system.json.test -b ../../${CI_BUILD_TYPE} --check # Run integration tests set +e diff --git a/python/testing/springtail.py b/python/testing/springtail.py index 01cda82bb..8f730ec57 100644 --- a/python/testing/springtail.py +++ b/python/testing/springtail.py @@ -514,12 +514,12 @@ def start_proxy(props : Properties, build_dir : str, restart: bool = False, valg pid_path=props.get_pid_path(), log_path=props.get_log_path()) -def wait_for_running(props : Properties) -> None: +def wait_for_running(props : Properties, timeout : int = 600) -> None: """Wait for the system to be in a running state.""" # Wait for the system to be in a running state for db_config in props.get_db_configs(): id = db_config['id'] - props.wait_for_state('running', id, 'failed') + props.wait_for_state('running', id, 'failed', timeout=timeout) def execute_startup_sql(props : Properties, sql_file : str) -> None: @@ -735,7 +735,8 @@ def start(config_file: str, do_init: bool = True, postgres_only: bool = False, do_fdw_install: bool = True, - valgrind_daemons: List[str] = []) -> None: + valgrind_daemons: List[str] = [], + startup_timeout: int = 600) -> None: """Main function to start the Springtail system.""" # must do init if we are performing cleanup if do_cleanup: @@ -793,7 +794,7 @@ def start(config_file: str, # wait for running state print("\nWaiting for running state...") - wait_for_running(props) + wait_for_running(props, timeout=startup_timeout) # start the fdw daemons (e.g., ddl manager to import schemas) print("\nStarting FDW daemons...") @@ -971,6 +972,7 @@ def parse_arguments() -> argparse.Namespace: parser.add_argument('-x', '--start-xid', type=int, required=False, help='Start the system at a specific XID') parser.add_argument('--no-cleanup', action='store_true', help="Start without reinitializing the system") parser.add_argument('--postgres-only', action='store_true', help="Start postgres only, don't start springtail FDW") + parser.add_argument('--startup-timeout', type=int, required=False, default=600, help='Timeout in seconds for waiting for system to reach running state') parser.add_argument('--start', action=argparse.BooleanOptionalAction, help="Start the Springtail system") parser.add_argument('--status', action=argparse.BooleanOptionalAction, help="Check the status of the Springtail daemons") parser.add_argument('--kill', action=argparse.BooleanOptionalAction, help="Kill the Springtail daemons") @@ -1038,7 +1040,8 @@ def main() -> None: if args.start: start(args.config_file, args.build_dir, args.sql_file, do_cleanup=not args.no_cleanup, do_init=not args.no_cleanup, - postgres_only=args.postgres_only) + postgres_only=args.postgres_only, + startup_timeout=args.startup_timeout) except Exception as e: print(f"Caught error: {e}") From 492adce22fe9e06d1e82ee3a469485a15c67789d Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Sun, 19 Apr 2026 21:49:27 -0700 Subject: [PATCH 05/11] Additional workflow for faster testing --- .github/workflows/integration-test.yml | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/integration-test.yml diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 000000000..9e03c21bb --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,36 @@ +name: Integration Tests +on: + workflow_dispatch: + +permissions: + contents: read + packages: write + checks: write + +jobs: + debug-build: + uses: ./.github/workflows/rw-build-container.yml + secrets: inherit + permissions: + contents: read + packages: write + with: + base_image_tag: ${{ vars.IMAGE_TAG_DEBUG }} + build_type: "debug" + + integration-test: + uses: ./.github/workflows/rw-run-integration-test.yml + needs: [ debug-build ] + secrets: inherit + permissions: + contents: read + packages: write + checks: write + with: + image_tag: ${{ needs.debug-build.outputs.output_image_tag }} + config_to_run: "nightly" + system_settings_override: '.github/workflows/nightly.overrides.json' + iterations: "1" + build_type: "debug" + reuse_code: true + timeout_minutes: 120 From 957ede38b09da26a9adf7dde50a83f49300dae25 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Wed, 22 Apr 2026 21:32:03 -0700 Subject: [PATCH 06/11] Fix for missing pg function in pgext library --- include/pg_ext/string.hh | 1 + src/pg_ext/string.cc | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/pg_ext/string.hh b/include/pg_ext/string.hh index a2488fc39..3fe565e91 100644 --- a/include/pg_ext/string.hh +++ b/include/pg_ext/string.hh @@ -62,6 +62,7 @@ extern "C" PGEXT_API char *upperstr_with_len(const char *str, int len); extern "C" PGEXT_API int pg_snprintf(char *str, size_t count, const char *fmt, ...); extern "C" PGEXT_API int pg_database_encoding_max_length(void); extern "C" PGEXT_API int pg_mblen(const char *mbstr); +extern "C" PGEXT_API int pg_mblen_cstr(const char *mbstr); extern "C" PGEXT_API int pg_mbstrlen_with_len(const char *mbstr, int len); extern "C" PGEXT_API int pg_mb2wchar_with_len(const char *mbstr, wchar_t *wstr, int len); extern "C" PGEXT_API int pg_wchar2mb_with_len(const wchar_t *wstr, char *mbstr, int len); diff --git a/src/pg_ext/string.cc b/src/pg_ext/string.cc index a2c5ee9c9..ffede6acb 100644 --- a/src/pg_ext/string.cc +++ b/src/pg_ext/string.cc @@ -49,6 +49,12 @@ pg_mblen(const char *mbstr) return len; } +int +pg_mblen_cstr(const char *mbstr) +{ + return pg_mblen(mbstr); +} + char *lowerstr_with_len(const char *str, int len) { if (!str || len <= 0) { auto *empty = (char *)palloc(1); From 9bbad1aae0feb7602fc7f584b185aec56412c5e2 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Wed, 22 Apr 2026 22:06:47 -0700 Subject: [PATCH 07/11] More missing PG functions --- include/pg_ext/string.hh | 6 ++++++ src/pg_ext/string.cc | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/include/pg_ext/string.hh b/include/pg_ext/string.hh index 3fe565e91..5ef5fea7d 100644 --- a/include/pg_ext/string.hh +++ b/include/pg_ext/string.hh @@ -63,6 +63,8 @@ extern "C" PGEXT_API int pg_snprintf(char *str, size_t count, const char *fmt, . extern "C" PGEXT_API int pg_database_encoding_max_length(void); extern "C" PGEXT_API int pg_mblen(const char *mbstr); extern "C" PGEXT_API int pg_mblen_cstr(const char *mbstr); +extern "C" PGEXT_API int pg_mblen_unbounded(const char *mbstr); +extern "C" PGEXT_API int pg_mblen_range(const char *mbstr, const char *end); extern "C" PGEXT_API int pg_mbstrlen_with_len(const char *mbstr, int len); extern "C" PGEXT_API int pg_mb2wchar_with_len(const char *mbstr, wchar_t *wstr, int len); extern "C" PGEXT_API int pg_wchar2mb_with_len(const wchar_t *wstr, char *mbstr, int len); @@ -70,6 +72,10 @@ extern "C" PGEXT_API int t_isalnum(const char *ptr); extern "C" PGEXT_API int t_isspace(const char *p); extern "C" PGEXT_API int t_isdigit(const char *p); extern "C" PGEXT_API int t_isalpha(const char *p); +extern "C" PGEXT_API int t_isalnum_with_len(const char *ptr, int len); +extern "C" PGEXT_API int t_isspace_with_len(const char *p, int len); +extern "C" PGEXT_API int t_isdigit_with_len(const char *p, int len); +extern "C" PGEXT_API int t_isalpha_with_len(const char *p, int len); extern "C" PGEXT_API char *str_tolower(const char *buff, size_t nbytes, Oid collid); extern "C" PGEXT_API char *str_toupper(const char *buff, size_t nbytes, Oid collid); extern "C" PGEXT_API char *pstrdup(const char *in); diff --git a/src/pg_ext/string.cc b/src/pg_ext/string.cc index ffede6acb..e186486c0 100644 --- a/src/pg_ext/string.cc +++ b/src/pg_ext/string.cc @@ -55,6 +55,22 @@ pg_mblen_cstr(const char *mbstr) return pg_mblen(mbstr); } +int +pg_mblen_unbounded(const char *mbstr) +{ + return pg_mblen(mbstr); +} + +int +pg_mblen_range(const char *mbstr, const char *end) +{ + int len = pg_mblen(mbstr); + if (end && mbstr + len > end) { + return end - mbstr; + } + return len; +} + char *lowerstr_with_len(const char *str, int len) { if (!str || len <= 0) { auto *empty = (char *)palloc(1); @@ -229,6 +245,26 @@ int t_isalpha(const char *p) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } +int t_isalnum_with_len(const char *ptr, int len) { + if (!ptr || len <= 0) return 0; + return t_isalnum(ptr); +} + +int t_isspace_with_len(const char *p, int len) { + if (!p || len <= 0) return 0; + return t_isspace(p); +} + +int t_isdigit_with_len(const char *p, int len) { + if (!p || len <= 0) return 0; + return t_isdigit(p); +} + +int t_isalpha_with_len(const char *p, int len) { + if (!p || len <= 0) return 0; + return t_isalpha(p); +} + char *str_tolower(const char *buff, size_t nbytes, Oid collid) { return lowerstr_with_len(buff, nbytes); } From 6df742394472e2d39490482e68918e6f930fef78 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Sun, 26 Apr 2026 10:52:33 -0700 Subject: [PATCH 08/11] Fixes to use the compiled version of postgres in the CI --- docker/Dockerfile.ci | 5 +- .../ansible/roles/custom-pg/defaults/main.yml | 2 +- docker/ansible/roles/custom-pg/tasks/main.yml | 54 ++++++++++--------- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index 900bf1e1c..b70146a0a 100644 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -46,8 +46,9 @@ RUN cd ${CI_BUILD_TYPE} && \ pip3 install --break-system-packages -r python/performance/requirements.txt && \ rm -rf $CI_WORKSPACE /home/dev/${CI_BUILD_TYPE}/src /home/dev/external/vcpkg/buildtrees /home/dev/external/vcpkg/packages /home/dev/external/vcpkg/downloads -# Pull back the libpg_query -RUN cd /home/dev/external && git clone https://github.com/Springtail-inc/libpg_query.git +# Pull back the libpg_query (cmake FetchContent already populates it via the +# external/ symlink during the build above; only clone if it isn't there) +RUN cd /home/dev/external && [ -d libpg_query/.git ] || git clone https://github.com/Springtail-inc/libpg_query.git # Set tini as the container's entrypoint ENTRYPOINT ["/tini", "--"] diff --git a/docker/ansible/roles/custom-pg/defaults/main.yml b/docker/ansible/roles/custom-pg/defaults/main.yml index cc396eb1d..1cb9e0daf 100644 --- a/docker/ansible/roles/custom-pg/defaults/main.yml +++ b/docker/ansible/roles/custom-pg/defaults/main.yml @@ -1,3 +1,3 @@ -custom_pg_package_url: "https://s3.us-east-1.amazonaws.com/public-share.springtail.io/postgres_apt.tgz" +custom_pg_package_url: "https://s3.us-east-1.amazonaws.com/public-share.springtail.io/postgresql-16_16.9-1_93cd1a8d-78c2-4979-8204-fa73ab5521a4.tar.gz" superuser_name: "springtail" superuser_password: "springtail" \ No newline at end of file diff --git a/docker/ansible/roles/custom-pg/tasks/main.yml b/docker/ansible/roles/custom-pg/tasks/main.yml index 912223240..51eb259b1 100644 --- a/docker/ansible/roles/custom-pg/tasks/main.yml +++ b/docker/ansible/roles/custom-pg/tasks/main.yml @@ -20,14 +20,14 @@ path: /tmp/springtail-pg.tar.gz register: pg_tarball -- name: Download Custom PG .deb packages from URL +- name: Download Custom PG source from URL ansible.builtin.get_url: url: "{{ custom_pg_package_url }}" dest: /tmp/springtail-pg.tar.gz mode: '0644' when: not pg_tarball.stat.exists -- name: Install PG runtime dependencies +- name: Install PG build dependencies ansible.builtin.apt: name: - libreadline-dev @@ -40,36 +40,38 @@ - liblz4-dev - libzstd-dev - uuid-dev + - bison + - flex state: present -- name: Install Custom PG from .deb packages +- name: Build and install Custom PG from source ansible.builtin.shell: | set -e cd /tmp - mkdir -p postgres_apt - tar xzf springtail-pg.tar.gz -C postgres_apt - cd postgres_apt - - # Install packages in dependency order, skipping debug symbol (.ddeb) packages - dpkg -i libpq5_*.deb || apt-get install -f -y - dpkg -i libpq-dev_*.deb || apt-get install -f -y - dpkg -i libecpg6_*.deb libecpg-compat3_*.deb libpgtypes3_*.deb libecpg-dev_*.deb || apt-get install -f -y - dpkg -i postgresql-client-16_*.deb || apt-get install -f -y - dpkg -i postgresql-16_*.deb || apt-get install -f -y - dpkg -i postgresql-server-dev-16_*.deb || apt-get install -f -y - - # Optional PL language packages — install if present - dpkg -i postgresql-plpython3-16_*.deb 2>/dev/null || true - dpkg -i postgresql-plperl-16_*.deb 2>/dev/null || true - dpkg -i postgresql-pltcl-16_*.deb 2>/dev/null || true - dpkg -i postgresql-doc-16_*.deb 2>/dev/null || true - - # Fix any remaining dependency issues - apt-get install -f -y - - # Clean up + tar xzf springtail-pg.tar.gz + cd postgresql-16.9 + ./configure \ + --prefix=/usr/lib/postgresql/16 \ + --bindir=/usr/lib/postgresql/16/bin \ + --datadir=/usr/share/postgresql/16 \ + --sysconfdir=/etc/postgresql-common \ + --libdir=/usr/lib/postgresql/16/lib \ + --includedir=/usr/include/postgresql/16 \ + --with-openssl \ + --with-libxml \ + --with-libxslt \ + --with-icu \ + --with-uuid=e2fs \ + --with-lz4 \ + --with-zstd \ + --with-systemd \ + --with-pgport=5432 + make -j$(nproc) world + make install-world + # Link system zoneinfo as PG timezone data (source tarball lacks timezone source files) + ln -sfn /usr/share/zoneinfo /usr/share/postgresql/16/timezone cd /tmp - rm -rf postgres_apt springtail-pg.tar.gz + rm -rf postgresql-16.9 springtail-pg.tar.gz - name: Enable PG16 ansible.builtin.copy: From 5f31b8246a6f36fa2ab3fc68e6d26a24a5c392f4 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Tue, 28 Apr 2026 21:59:51 -0700 Subject: [PATCH 09/11] Fix two DDL/DML race conditions in pg_log_mgr - pg_log_reader: cap valid_until on cached pre-resync schemas synchronously inside _check_sync_commit, before the reader can decode subsequent messages. The committer thread's later record_schema_change call (committer.cc:847) was leaving a window after SyncTracker::clear_tables during which add_mutation fell into the cache path and stamped a stale ExtentSchemaPtr into the TableEntry, causing the producer to misinterpret post-ALTER 6-column messages through a 5-column pg_fields decoder. - committer: register min_index_xid via insert_index_xid before the per-xid commit loop runs, instead of after process_requests is invoked. The previous ordering let last_committed_xid advance past the future idx._xid while min_index_xid was still unregistered, briefly raising the vacuum cutoff and letting the vacuumer hole-punch extents the upcoming index reconciliation needed to follow via prev_offset. Two safety-net DCHECKs are added so future regressions of either invariant fail loudly: - committer::_process_extent: DCHECK_EQ producer/consumer row_size on every write-cache extent so a schema-version mismatch surfaces at the source of the misread, not as an "Invalid operation: 0" downstream. - indexer::_reconcile_index: DCHECK that prev_extent read from disk is non-null while min_index_xid is registered. Repro: PARALLEL_DML_And_DDL.sql integration test. --- src/pg_log_mgr/committer.cc | 67 ++++++++++++++++++++++++--------- src/pg_log_mgr/indexer.cc | 10 +++++ src/pg_log_mgr/pg_log_reader.cc | 11 ++++++ 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/src/pg_log_mgr/committer.cc b/src/pg_log_mgr/committer.cc index 207d6c6f6..1df7005ce 100644 --- a/src/pg_log_mgr/committer.cc +++ b/src/pg_log_mgr/committer.cc @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -405,6 +407,32 @@ namespace springtail::committer { Coordinator::get_instance()->register_thread(daemon_type, "committer"); + // Pre-collect index requests for this batch and register min_index_xid + // BEFORE the per-xid commit loop runs. The vacuumer's cutoff_xid is + // min(min_fdw_xid, last_committed_xid, min_index_xid); without an + // upfront registration, last_committed_xid can advance past final_xid + // while min_index_xid is still unregistered, letting the vacuumer + // hole-punch extents that the upcoming index build / reconciliation + // will need to follow via prev_offset. insert_index_xid is an + // idempotent zadd, and Indexer::build / Indexer::drop will redundantly + // re-register at line 144 / 160 (which is the authoritative path for + // recover_indexes after a restart). + std::list combined_index_requests; + for (auto& result : batch->xid_results()) { + uint64_t xid = result->xact().xid(); + auto index_requests = _index_requests_mgr->get_index_requests(db_id, xid); + if (!index_requests.empty()) { + combined_index_requests.splice(combined_index_requests.end(), index_requests); + } + } + for (const auto& index_request : combined_index_requests) { + const auto& action = index_request.action(); + if (action == "create_index" || action == "drop_index") { + RedisDDL::get_instance()->insert_index_xid(db_id, final_xid); + break; + } + } + // process each XID in the batch // Note: xid_results() is not thread-safe, but safe here since all workers are done for (auto& result : batch->xid_results()) { @@ -512,29 +540,19 @@ namespace springtail::committer { LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "XID completed: {}@{}", db_id, xid); } - // Collect all index requests across all XIDs in the batch - std::list combined_index_requests; - for (auto& result : batch->xid_results()) { - uint64_t xid = result->xact().xid(); - - auto index_requests = _index_requests_mgr->get_index_requests(db_id, xid); - if (!index_requests.empty()) { - // Append to combined list - combined_index_requests.splice(combined_index_requests.end(), index_requests); - } - } - - // Process all index requests once at the final XID + // Process all index requests once at the final XID. The list was + // collected before the commit loop above so that min_index_xid could + // be registered upfront; here we hand the same list to the indexer. if (!combined_index_requests.empty()) { - auto final_xid = _completed_xids[db_id]; + auto completed_final_xid = _completed_xids[db_id]; LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "Processing {} index requests for batch at final_xid {}", - combined_index_requests.size(), final_xid); + combined_index_requests.size(), completed_final_xid); // Handle all index operations at final_xid - _indexer->process_requests(db_id, final_xid, combined_index_requests); + _indexer->process_requests(db_id, completed_final_xid, combined_index_requests); // Check and notify vacuumer about dropped indexes (use final_xid for all) - _expire_index_drops(db_id, combined_index_requests, final_xid); + _expire_index_drops(db_id, combined_index_requests, completed_final_xid); } // clear the table cache for this batch @@ -983,6 +1001,21 @@ namespace springtail::committer { Extent extent(*wc_extent->data); LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "xid={} rows={}", xid.xid, extent.row_count()); + // Defense-in-depth: the producer (pg_log_reader) and the consumer (this + // path) must agree on the pg-log batch schema for this (db, tid). If + // they don't, op_f / wc_fields point at the wrong byte offsets and the + // op switch below silently mis-reads (we've seen "Invalid operation: 0" + // from this when a resync swap raced an in-flight pg_xid). Catch it + // here at the source of the misread instead. + if (extent.header().row_size != wc_schema->row_size()) { + LOG_ERROR("wc-extent row_size mismatch: db={} tid={} xid={}:{} producer_row_size={} consumer_row_size={} consumer_columns=[{}]", + db_id, tid, xid.xid, xid.lsn, + extent.header().row_size, wc_schema->row_size(), + fmt::join(wc_schema->column_order(), ",")); + } + DCHECK_EQ(extent.header().row_size, wc_schema->row_size()) + << "wc-extent row_size mismatch — schema-version mismatch (resync race?)"; + // To add internal_row_id as part of INSERTS. Other mutations will have // internal_row_id as part of wc_extent as-is auto internal_row_id_field = std::make_shared(1); diff --git a/src/pg_log_mgr/indexer.cc b/src/pg_log_mgr/indexer.cc index 0148d1b63..5786fa271 100644 --- a/src/pg_log_mgr/indexer.cc +++ b/src/pg_log_mgr/indexer.cc @@ -646,6 +646,16 @@ namespace springtail::committer { // Get the previous extent and its schema auto [prev_extent, tmp_next_eid] = table->read_extent_from_disk(prev_eid); + // Invariant: while min_index_xid is registered for this + // index, the vacuum cutoff cannot advance past idx._xid, + // so any extent reachable via prev_offset from a + // post-snapshot extent must still be on disk. A null here + // means the registration window reopened (e.g. someone + // moved insert_index_xid back after commit_xid). + DCHECK(prev_extent != nullptr) + << "prev_extent unreadable at offset " << prev_eid + << " for db " << db_id << " table " << idx_state._tid + << " idx " << index_id << " idx_xid " << idx_state._idx._xid; auto prev_xid = prev_extent->header().xid; auto prev_schema = TableMgr::get_instance()->get_extent_schema(db_id, idx_state._tid, XidLsn(prev_xid), {PgExtnRegistry::get_instance()->comparator_func}); auto internal_row_id_f = prev_schema->get_field(constant::INTERNAL_ROW_ID); diff --git a/src/pg_log_mgr/pg_log_reader.cc b/src/pg_log_mgr/pg_log_reader.cc index a92bc9e1e..b86635f56 100644 --- a/src/pg_log_mgr/pg_log_reader.cc +++ b/src/pg_log_mgr/pg_log_reader.cc @@ -1299,6 +1299,17 @@ namespace springtail::pg_log_mgr { // note: this will also invalidate the table's client cache entry auto swap_ddl_ops = server->swap_sync_table(*namespace_req, *create, indexes_vec, *roots); + // Synchronously cap valid_until on any cached pre-resync schema for + // this tid so the very next add_mutation on this thread either MISSes + // and re-fetches from sys_tbl_mgr, or HITs a freshly-cached new + // version. Without this, the committer thread's later call to + // record_schema_change (committer.cc:847) leaves a window after + // SyncTracker::clear_tables (above) but before the cap is applied, + // during which add_mutation falls into the cache path and stamps a + // stale ExtentSchemaPtr into TableEntry::schema. The committer's + // later call becomes idempotent. + TableMgr::get_instance()->record_schema_change(_db_id, entry->table_id, XidLsn{xid}); + // store the ddl mutations for the FDWs ddls.insert(ddls.end(), std::make_move_iterator(swap_ddl_ops.begin()), std::make_move_iterator(swap_ddl_ops.end())); From 4af8ff45bc11c419f501c39fb803228658d32635 Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Wed, 29 Apr 2026 09:04:33 -0700 Subject: [PATCH 10/11] Fix premature min_index_xid removal across DDL batch siblings When a single batch's final_xid carries multiple index DDLs (e.g. a drop_index plus several create_index requests on the same table), every resulting IndexState ends up bucketed under the same idx._xid. The Redis sorted set backing min_index_xid keys entries by xid, so all siblings share one entry. _reconcile_index was calling remove_index_xid per IndexState, so the first sibling to finish (typically the drop, which has no build phase) yanked the pin while the rest were still walking prev_offset chains -- letting the vacuumer punch chain extents the in-flight reconciliation needed and tripping the indexer.cc:655 DCHECK. Move the remove_index_xid call into process_index_reconciliation, keyed off reconcile_xid, so the pin is dropped exactly once after every sibling has finished. Repro: PARALLEL_DML_And_DDL.sql nightly integration run; observed cutoff_xid jumping from 250 to 252 mid-reconciliation when 34189's drop released the pin ahead of 34246/34247/34248's catch-up walks. --- src/pg_log_mgr/indexer.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/pg_log_mgr/indexer.cc b/src/pg_log_mgr/indexer.cc index 5786fa271..25a6ef39c 100644 --- a/src/pg_log_mgr/indexer.cc +++ b/src/pg_log_mgr/indexer.cc @@ -577,6 +577,13 @@ namespace springtail::committer { _reconcile_index(idx_state, end_xid); } + // Drop the min_index_xid pin only after every IndexState bucketed + // under reconcile_xid is done. The Redis sorted set keys this entry + // by xid, so a single drop_index/create_index sharing reconcile_xid + // would otherwise yank the pin while the rest of the list is still + // walking prev_offset chains the vacuumer could now punch. + RedisDDL::get_instance()->remove_index_xid(db_id, reconcile_xid); + // Clean up if entries are empty xid_map.erase(xid_it); // Remove processed xid entry if (xid_map.empty()) { @@ -708,9 +715,6 @@ namespace springtail::committer { LOG_DEBUG(LOG_COMMITTER, LOG_LEVEL_DEBUG1, "Initiating Index commit: {}:{}", db_id, index_id); _commit_build(idx_state._root, idx_state._key, idx_state._idx, end_xid, idx_state._look_aside_root); } - - // Remove XID from the tracker - RedisDDL::get_instance()->remove_index_xid(db_id, idx_state._idx._xid); } void Indexer::_remove_index_key(uint64_t db_id, uint64_t table_id, const Key& key) From bf7b791d3a65e2aef5053701f07188feddac73be Mon Sep 17 00:00:00 2001 From: Craig Soules Date: Wed, 29 Apr 2026 16:50:19 -0700 Subject: [PATCH 11/11] remove the query_benchmark test set from the nightly debug run --- python/testing/test_runner.py | 27 +++++++++++++++++++++----- python/testing/test_runner_config.yaml | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/python/testing/test_runner.py b/python/testing/test_runner.py index 3ca4ef295..5a059d2f5 100644 --- a/python/testing/test_runner.py +++ b/python/testing/test_runner.py @@ -23,7 +23,8 @@ def generate_tests(test_folder: str, build_dir: str, test_params: Optional[dict], overlay: Optional[str], - valgrind_daemons: list[str] = []) -> list[TestSet]: + valgrind_daemons: list[str] = [], + exclude_test_sets: list[str] = []) -> list[TestSet]: """ Generates a list of TestSet objects based on the provided configuration and test inputs. @@ -35,6 +36,7 @@ def generate_tests(test_folder: str, build_dir (str): Top level build directory. test_params (dict | None): Optional test parameters for modifying test environment. overlay (str | None): Optional overlay name used for the tests. + exclude_test_sets (list[str]): Test set names to drop from the resolved list. Returns: list[TestSet]: A list of TestSet objects ready for execution. @@ -44,7 +46,8 @@ def generate_tests(test_folder: str, f'Generate test: test_folder: {test_folder}, ' f'test_set: {"ALL" if test_set is None else test_set}, ' f'test_files: {", ".join(test_files) if len(test_files) != 0 else "ALL"}, ' - f'overlay: {overlay}' + f'overlay: {overlay}, ' + f'exclude_test_sets: {exclude_test_sets if exclude_test_sets else "NONE"}' ) test_sets = [] @@ -58,6 +61,13 @@ def generate_tests(test_folder: str, raise ValueError(f"Test set directory {test_set_file_path} does not exists") test_sets.append(t) + for excluded in exclude_test_sets: + excluded_path = os.path.join(test_folder, excluded) + if not os.path.isdir(excluded_path): + logging.error(f"Excluded test set directory {excluded_path} does not exist") + raise ValueError(f"Excluded test set directory {excluded_path} does not exist") + test_sets = [ts for ts in test_sets if ts not in exclude_test_sets] + tests = [] for ts in test_sets: if not os.path.isfile(os.path.join(test_folder, ts, '__config.sql')): @@ -89,7 +99,8 @@ def generate_tests_for_overlay(test_folder: str, test_files: list[str], default_config: dict, overlay: Optional[str], - valgrind_daemons: list[str] = []) -> list[TestSet]: + valgrind_daemons: list[str] = [], + exclude_test_sets: list[str] = []) -> list[TestSet]: """ Generates a list of TestSet objects using overlay-specific configurations. @@ -106,6 +117,7 @@ def generate_tests_for_overlay(test_folder: str, test_files (list[str]): List of test file names to process. If empty, all files will be included. default_config (dict): Default configuration stored in system_json_path. overlay (str | None): Optional overlay name used for the tests. + exclude_test_sets (list[str]): Test set names to drop from the resolved list. Returns: list[TestSet]: A list of TestSet objects configured with the specified overlay. @@ -144,7 +156,7 @@ def generate_tests_for_overlay(test_folder: str, json.dump(merged_config, f, indent=2) # generate tests - return generate_tests(test_folder, test_set, test_files, config_json_path, build_dir, overlay_params, overlay, valgrind_daemons) + return generate_tests(test_folder, test_set, test_files, config_json_path, build_dir, overlay_params, overlay, valgrind_daemons, exclude_test_sets) def try_generate_junit(junit_file: str, test_sets: list[TestSet]) -> None: @@ -249,7 +261,12 @@ def parse_arguments(): logging.error(f'Configuration "{args.config}": test_sets is empty') raise ValueError(f'Configuration "{args.config}" test_sets is empty') - tests += generate_tests_for_overlay(test_folder, build_dir, system_json_path, tmp_config_dir, overlays_config, test_sets, [], default_config, overlay, args.valgrind) + exclude_test_sets = overlay_item.get('exclude_test_sets', []) + if not isinstance(exclude_test_sets, list) or not all(isinstance(x, str) for x in exclude_test_sets): + logging.error(f'Configuration "{args.config}": exclude_test_sets must be a list of strings') + raise ValueError(f'Configuration "{args.config}" exclude_test_sets must be a list of strings') + + tests += generate_tests_for_overlay(test_folder, build_dir, system_json_path, tmp_config_dir, overlays_config, test_sets, [], default_config, overlay, args.valgrind, exclude_test_sets) else: test_sets = [args.test_set] if args.test_set is not None else None tests += generate_tests_for_overlay(test_folder, build_dir, system_json_path, tmp_config_dir, overlays_config, test_sets, args.test_case, default_config, args.overlay, args.valgrind) diff --git a/python/testing/test_runner_config.yaml b/python/testing/test_runner_config.yaml index afe46e2a9..b2407cf42 100644 --- a/python/testing/test_runner_config.yaml +++ b/python/testing/test_runner_config.yaml @@ -43,6 +43,8 @@ configs: nightly: - overlay: ~ test_sets: 'all' + # query_benchmark is intended for the release build, not debug + exclude_test_sets: [ 'query_benchmark' ] - overlay: 'small_log_rotate' test_sets: [ 'recovery' ] - overlay: 'small_log_rotate_with_streaming'