From 2e230213a50388578cca8450cfa2d45f65518429 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Sat, 1 Nov 2025 12:50:51 +0300 Subject: [PATCH 01/25] done --- ydb/public/api/protos/ydb_import.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/public/api/protos/ydb_import.proto b/ydb/public/api/protos/ydb_import.proto index 4fb733d9f4bd..3bf71ee50ab0 100644 --- a/ydb/public/api/protos/ydb_import.proto +++ b/ydb/public/api/protos/ydb_import.proto @@ -122,7 +122,7 @@ message ImportFromS3Response { Ydb.Operations.Operation operation = 1; } -/// File system (FS) +// File system (FS) message ImportFromFsSettings { message Item { /* YDB tables in FS are stored in a directory structure (see ydb_export.proto). From 61a3ac621d735e94f0ae080a4facfee65c520fa4 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Wed, 5 Nov 2025 16:17:32 +0300 Subject: [PATCH 02/25] small fix --- ydb/public/api/protos/ydb_import.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/public/api/protos/ydb_import.proto b/ydb/public/api/protos/ydb_import.proto index 3bf71ee50ab0..4fb733d9f4bd 100644 --- a/ydb/public/api/protos/ydb_import.proto +++ b/ydb/public/api/protos/ydb_import.proto @@ -122,7 +122,7 @@ message ImportFromS3Response { Ydb.Operations.Operation operation = 1; } -// File system (FS) +/// File system (FS) message ImportFromFsSettings { message Item { /* YDB tables in FS are stored in a directory structure (see ydb_export.proto). From f6e29682cf0eb845f4a9118c76b8bb9ffeb5fd65 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 6 Nov 2025 12:33:54 +0300 Subject: [PATCH 03/25] fix tests --- ydb/core/cms/console/console_configs_subscriber.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ydb/core/cms/console/console_configs_subscriber.cpp b/ydb/core/cms/console/console_configs_subscriber.cpp index 9efd9af599ae..4b8d2336e6a9 100644 --- a/ydb/core/cms/console/console_configs_subscriber.cpp +++ b/ydb/core/cms/console/console_configs_subscriber.cpp @@ -232,6 +232,13 @@ class TConfigsSubscriber : public TActorBootstrapped { notChanged = false; } + if (!notChanged) { + CurrentDynConfig.Clear(); + if (rec.HasConfig()) { + CurrentDynConfig.CopyFrom(rec.GetConfig()); + } + } + if (rec.VolatileConfigsSize() != VolatileYamlConfigs.size()) { notChanged = false; } From 5e2b3ac11319bf4869cf3e7e5be675814f5403ff Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 7 Nov 2025 11:32:46 +0300 Subject: [PATCH 04/25] remove --- ydb/core/cms/console/console_configs_subscriber.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ydb/core/cms/console/console_configs_subscriber.cpp b/ydb/core/cms/console/console_configs_subscriber.cpp index 4b8d2336e6a9..9efd9af599ae 100644 --- a/ydb/core/cms/console/console_configs_subscriber.cpp +++ b/ydb/core/cms/console/console_configs_subscriber.cpp @@ -232,13 +232,6 @@ class TConfigsSubscriber : public TActorBootstrapped { notChanged = false; } - if (!notChanged) { - CurrentDynConfig.Clear(); - if (rec.HasConfig()) { - CurrentDynConfig.CopyFrom(rec.GetConfig()); - } - } - if (rec.VolatileConfigsSize() != VolatileYamlConfigs.size()) { notChanged = false; } From 67713920a2a1cc5580657e780f6c51d709ed4b9e Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Mon, 10 Nov 2025 17:37:29 +0300 Subject: [PATCH 05/25] export --- ydb/core/protos/flat_scheme_op.proto | 3 + ydb/core/protos/fs_settings.proto | 10 ++ ydb/core/protos/ya.make | 1 + .../tx/schemeshard/schemeshard_audit_log.cpp | 3 + .../tx/schemeshard/schemeshard_export.cpp | 4 + .../schemeshard_export__create.cpp | 16 ++ .../schemeshard/schemeshard_export__list.cpp | 2 + .../schemeshard_export_flow_proposals.cpp | 19 ++ .../tx/schemeshard/schemeshard_info_types.h | 1 + .../tx/schemeshard/ut_export/ut_export_fs.cpp | 167 ++++++++++++++++++ ydb/core/tx/schemeshard/ut_export/ya.make | 1 + 11 files changed, 227 insertions(+) create mode 100644 ydb/core/protos/fs_settings.proto create mode 100644 ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 6eaa71c40e15..425c0dcd6be3 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -5,6 +5,7 @@ import "ydb/core/protos/channel_purpose.proto"; import "ydb/core/protos/compaction.proto"; import "ydb/core/protos/filestore_config.proto"; import "ydb/core/protos/follower_group.proto"; +import "ydb/core/protos/fs_settings.proto"; import "ydb/core/protos/index_builder.proto"; import "ydb/core/protos/pqconfig.proto"; import "ydb/core/protos/replication.proto"; @@ -1320,6 +1321,7 @@ message TBackupTask { oneof Settings { TYTSettings YTSettings = 4; NKikimrSchemeOp.TS3Settings S3Settings = 9; + NKikimrSchemeOp.TFSSettings FSSettings = 20; } optional TPathDescription Table = 10; // for further restore @@ -1367,6 +1369,7 @@ message TRestoreTask { oneof Settings { NKikimrSchemeOp.TS3Settings S3Settings = 6; + NKikimrSchemeOp.TFSSettings FSSettings = 9; } optional bool ValidateChecksums = 7; // currently available for s3 diff --git a/ydb/core/protos/fs_settings.proto b/ydb/core/protos/fs_settings.proto new file mode 100644 index 000000000000..14414ce1ddae --- /dev/null +++ b/ydb/core/protos/fs_settings.proto @@ -0,0 +1,10 @@ +package NKikimrSchemeOp; +option java_package = "ru.yandex.kikimr.proto"; + +message TFSSettings { + optional string BasePath = 1; // Base path on the file system (e.g., /mnt/exports) + optional string Path = 2; // Relative path for this specific backup/restore operation +} + + + diff --git a/ydb/core/protos/ya.make b/ydb/core/protos/ya.make index 2eaea64c1951..edb2b7491d78 100644 --- a/ydb/core/protos/ya.make +++ b/ydb/core/protos/ya.make @@ -82,6 +82,7 @@ SRCS( flat_scheme_op.proto flat_tx_scheme.proto follower_group.proto + fs_settings.proto grpc.proto grpc_pq_old.proto grpc_status_proxy.proto diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp index 06540f533841..a4de89f341a0 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp @@ -437,6 +437,9 @@ void AuditLogExportEnd(const TExportInfo& info, TSchemeShard* SS) { proto.MutableExportToS3Settings()->clear_access_key(); proto.MutableExportToS3Settings()->clear_secret_key(); break; + case TExportInfo::EKind::FS: + Y_ABORT_UNLESS(proto.MutableExportToFsSettings()->ParseFromString(info.Settings)); + break; } _AuditLogXxportEnd(info, "EXPORT END", ExportKindSpecificParts(proto), SS); } diff --git a/ydb/core/tx/schemeshard/schemeshard_export.cpp b/ydb/core/tx/schemeshard/schemeshard_export.cpp index bbf417e1160b..613ab498dae4 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export.cpp @@ -140,6 +140,10 @@ void TSchemeShard::FromXxportInfo(NKikimrExport::TExport& exprt, const TExportIn exprt.MutableExportToS3Settings()->clear_access_key(); exprt.MutableExportToS3Settings()->clear_secret_key(); break; + + case TExportInfo::EKind::FS: + Y_ABORT_UNLESS(exprt.MutableExportToFsSettings()->ParseFromString(exportInfo.Settings)); + break; } } diff --git a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp index 8df1b37af75c..84e9e87d7ee6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp @@ -148,6 +148,22 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { } break; + case NKikimrExport::TCreateExportRequest::kExportToFsSettings: + { + const auto& settings = request.GetRequest().GetExportToFsSettings(); + exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::FS, settings, domainPath.Base()->PathId, request.GetPeerName()); + exportInfo->EnableChecksums = AppData()->FeatureFlags.GetEnableChecksumsExport(); + exportInfo->EnablePermissions = AppData()->FeatureFlags.GetEnablePermissionsExport(); + TString explain; + if (!FillItems(*exportInfo, settings, explain)) { + return Reply( + std::move(response), + Ydb::StatusIds::BAD_REQUEST, + TStringBuilder() << "Failed item check: " << explain + ); + } + } + break; default: Y_DEBUG_ABORT("Unknown export kind"); } diff --git a/ydb/core/tx/schemeshard/schemeshard_export__list.cpp b/ydb/core/tx/schemeshard/schemeshard_export__list.cpp index 2507bb6bfbfd..e3c21d95db56 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__list.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__list.cpp @@ -21,6 +21,8 @@ struct TSchemeShard::TExport::TTxList: public TSchemeShard::TXxport::TTxList< static bool TryParseKind(const TString& str, TExportInfo::EKind& parsed) { if (str == "export/s3") { parsed = TExportInfo::EKind::S3; + } else if (str == "export/fs") { + parsed = TExportInfo::EKind::FS; } else { // fallback to yt parsed = TExportInfo::EKind::YT; } diff --git a/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp b/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp index eeeb8bf79f21..00e419294151 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export_flow_proposals.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -256,6 +257,24 @@ THolder BackupPropose( } } break; + case TExportInfo::EKind::FS: + { + Ydb::Export::ExportToFsSettings exportSettings; + Y_ABORT_UNLESS(exportSettings.ParseFromString(exportInfo.Settings)); + + task.SetNumberOfRetries(exportSettings.number_of_retries()); + auto& backupSettings = *task.MutableFSSettings(); + backupSettings.SetBasePath(exportSettings.base_path()); + backupSettings.SetPath(exportSettings.items(itemIdx).destination_path()); + + if (const auto compression = exportSettings.compression()) { + Y_ABORT_UNLESS(FillCompression(*task.MutableCompression(), compression)); + } + + task.SetEnableChecksums(exportInfo.EnableChecksums); + task.SetEnablePermissions(exportInfo.EnablePermissions); + } + break; } return propose; diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 4bac42385e60..f1116c9f4e5c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2858,6 +2858,7 @@ struct TExportInfo: public TSimpleRefCount { enum class EKind: ui8 { YT = 0, S3, + FS, }; struct TItem { diff --git a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp new file mode 100644 index 000000000000..d5b58bd4c8f5 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp @@ -0,0 +1,167 @@ +#include + +#include + +using namespace NSchemeShardUT_Private; + +Y_UNIT_TEST_SUITE(TSchemeShardExportToFsTests) { + Y_UNIT_TEST(ShouldSucceedCreateExportToFs) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + // Test that schemeshard accepts ExportToFsSettings + TestExport(runtime, ++txId, "/MyRoot", R"( + ExportToFsSettings { + base_path: "/mnt/exports" + items { + source_path: "/MyRoot/Table" + destination_path: "backup/Table" + } + } + )"); + + // Check that export was created + auto response = TestGetExport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasExportToFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetExportToFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.base_path(), "/mnt/exports"); + UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 1); + UNIT_ASSERT_VALUES_EQUAL(settings.items(0).source_path(), "/MyRoot/Table"); + UNIT_ASSERT_VALUES_EQUAL(settings.items(0).destination_path(), "backup/Table"); + } + + Y_UNIT_TEST(ShouldAcceptCompressionForFs) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestExport(runtime, ++txId, "/MyRoot", R"( + ExportToFsSettings { + base_path: "/mnt/exports" + compression: "zstd-3" + items { + source_path: "/MyRoot/Table" + destination_path: "backup/Table" + } + } + )"); + + auto response = TestGetExport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasExportToFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetExportToFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.compression(), "zstd-3"); + } + + Y_UNIT_TEST(ShouldFailOnNonExistentPath) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestExport(runtime, ++txId, "/MyRoot", R"( + ExportToFsSettings { + base_path: "/mnt/exports" + items { + source_path: "/MyRoot/NonExistentTable" + destination_path: "backup/Table" + } + } + )", "", "", Ydb::StatusIds::BAD_REQUEST); + } + + Y_UNIT_TEST(ShouldFailOnDeletedPath) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "TableToDelete" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestDropTable(runtime, ++txId, "/MyRoot", "TableToDelete"); + env.TestWaitNotification(runtime, txId); + + TestExport(runtime, ++txId, "/MyRoot", R"( + ExportToFsSettings { + base_path: "/mnt/exports" + items { + source_path: "/MyRoot/TableToDelete" + destination_path: "backup/Table" + } + } + )", "", "", Ydb::StatusIds::BAD_REQUEST); + } + + Y_UNIT_TEST(FsExportWithMultipleTables) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table1" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table2" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestExport(runtime, ++txId, "/MyRoot", R"( + ExportToFsSettings { + base_path: "/mnt/exports" + items { + source_path: "/MyRoot/Table1" + destination_path: "backup/Table1" + } + items { + source_path: "/MyRoot/Table2" + destination_path: "backup/Table2" + } + } + )"); + + auto response = TestGetExport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasExportToFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetExportToFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 2); + } +} + diff --git a/ydb/core/tx/schemeshard/ut_export/ya.make b/ydb/core/tx/schemeshard/ut_export/ya.make index df6ca93fb643..de730e88e51a 100644 --- a/ydb/core/tx/schemeshard/ut_export/ya.make +++ b/ydb/core/tx/schemeshard/ut_export/ya.make @@ -28,6 +28,7 @@ IF (NOT OS_WINDOWS) ) SRCS( ut_export.cpp + ut_export_fs.cpp ) ENDIF() From 83ba96aa919d8af767b32bfb6c8615ae3aa9da45 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Tue, 11 Nov 2025 13:26:49 +0300 Subject: [PATCH 06/25] restore (without scheme getter) --- ydb/core/tx/schemeshard/schemeshard__init.cpp | 4 +- .../tx/schemeshard/schemeshard_audit_log.cpp | 12 +- .../tx/schemeshard/schemeshard_import.cpp | 13 +- .../schemeshard_import__create.cpp | 104 +++++++++++-- .../schemeshard_import_flow_proposals.cpp | 51 ++++-- .../schemeshard_import_getters.cpp | 19 +-- .../tx/schemeshard/schemeshard_info_types.cpp | 9 +- .../tx/schemeshard/schemeshard_info_types.h | 82 +++++++++- .../schemeshard/ut_restore/ut_restore_fs.cpp | 146 ++++++++++++++++++ ydb/core/tx/schemeshard/ut_restore/ya.make | 1 + 10 files changed, 385 insertions(+), 56 deletions(-) create mode 100644 ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index c26815a43e20..2a51a04d7057 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -4541,9 +4541,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { rowset.GetValue()); TString peerName = rowset.GetValueOrDefault(); - Ydb::Import::ImportFromS3Settings settings; - Y_ABORT_UNLESS(ParseFromStringNoSizeLimit(settings, rowset.GetValue())); - + TString settings = rowset.GetValue(); TImportInfo::TPtr importInfo = new TImportInfo(id, uid, kind, settings, domainPathId, peerName); if (rowset.HaveValue()) { diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp index a4de89f341a0..397c0b3e7f46 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp @@ -346,6 +346,16 @@ template <> TParts ImportKindSpecificParts(const Ydb::Import::ImportFromFsSettin }; } +TParts ImportKindSpecificParts(const TImportInfo& info) { + switch (info.Kind) { + case TImportInfo::EKind::S3: + return ImportKindSpecificParts(info.GetS3Settings()); + case TImportInfo::EKind::FS: + return ImportKindSpecificParts(info.GetFsSettings()); + } + return {}; +} + } // anonymous namespace template @@ -444,7 +454,7 @@ void AuditLogExportEnd(const TExportInfo& info, TSchemeShard* SS) { _AuditLogXxportEnd(info, "EXPORT END", ExportKindSpecificParts(proto), SS); } void AuditLogImportEnd(const TImportInfo& info, TSchemeShard* SS) { - _AuditLogXxportEnd(info, "IMPORT END", ImportKindSpecificParts(info.Settings), SS); + _AuditLogXxportEnd(info, "IMPORT END", ImportKindSpecificParts(info), SS); } } diff --git a/ydb/core/tx/schemeshard/schemeshard_import.cpp b/ydb/core/tx/schemeshard/schemeshard_import.cpp index 02b48060d451..163ae838aa96 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import.cpp @@ -105,19 +105,26 @@ void TSchemeShard::FromXxportInfo(NKikimrImport::TImport& import, const TImportI } switch (importInfo.Kind) { - case TImportInfo::EKind::S3: - import.MutableImportFromS3Settings()->CopyFrom(importInfo.Settings); + case TImportInfo::EKind::S3: { + Ydb::Import::ImportFromS3Settings settings = importInfo.GetS3Settings(); + import.MutableImportFromS3Settings()->CopyFrom(settings); import.MutableImportFromS3Settings()->clear_access_key(); import.MutableImportFromS3Settings()->clear_secret_key(); break; } + case TImportInfo::EKind::FS: { + Ydb::Import::ImportFromFsSettings settings = importInfo.GetFsSettings(); + import.MutableImportFromFsSettings()->CopyFrom(settings); + break; + } + } } void TSchemeShard::PersistCreateImport(NIceDb::TNiceDb& db, const TImportInfo& importInfo) { db.Table().Key(importInfo.Id).Update( NIceDb::TUpdate(importInfo.Uid), NIceDb::TUpdate(static_cast(importInfo.Kind)), - NIceDb::TUpdate(importInfo.Settings.SerializeAsString()), + NIceDb::TUpdate(importInfo.Settings), NIceDb::TUpdate(importInfo.DomainPathId.OwnerId), NIceDb::TUpdate(importInfo.DomainPathId.LocalPathId), NIceDb::TUpdate(importInfo.Items.size()), diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index 7cf443003295..8e73ec88d11b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -208,6 +208,23 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { } break; + case NKikimrImport::TCreateImportRequest::kImportFromFsSettings: + { + const auto& settings = request.GetRequest().GetImportFromFsSettings(); + + importInfo = new TImportInfo(id, uid, TImportInfo::EKind::FS, settings, domainPath.Base()->PathId, request.GetPeerName()); + + if (request.HasUserSID()) { + importInfo->UserSID = request.GetUserSID(); + } + + TString explain; + if (!FillItems(*importInfo, settings, explain)) { + return Reply(std::move(response), Ydb::StatusIds::BAD_REQUEST, explain); + } + } + break; + default: Y_DEBUG_ABORT("Unknown import kind"); } @@ -267,23 +284,35 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { return true; } - template - bool FillItems(TImportInfo& importInfo, const TSettings& settings, TString& explain) { + // Common helper to validate destination path + bool ValidateAndAddDestinationPath(const TString& dstPath, THashSet& dstPaths, TString& explain) { + if (dstPath) { + if (!dstPaths.insert(NBackup::NormalizeItemPath(dstPath)).second) { + explain = TStringBuilder() << "Duplicate destination_path: " << dstPath; + return false; + } + + if (!ValidateImportDstPath(dstPath, Self, explain)) { + return false; + } + } + return true; + } + + // S3-specific FillItems + bool FillItems(TImportInfo& importInfo, const Ydb::Import::ImportFromS3Settings& settings, TString& explain) { THashSet dstPaths; importInfo.Items.reserve(settings.items().size()); for (ui32 itemIdx : xrange(settings.items().size())) { const TString& dstPath = settings.items(itemIdx).destination_path(); - if (dstPath) { - if (!dstPaths.insert(NBackup::NormalizeItemPath(dstPath)).second) { - explain = TStringBuilder() << "Duplicate destination_path: " << dstPath; - return false; - } + + if (!ValidateAndAddDestinationPath(dstPath, dstPaths, explain)) { + return false; + } - if (!ValidateImportDstPath(dstPath, Self, explain)) { - return false; - } - } else if (settings.source_prefix().empty()) { // Can not take path from schema mapping + if (!dstPath && settings.source_prefix().empty()) { + // Can not take path from schema mapping explain = "No common source prefix and item destination path set"; return false; } @@ -296,6 +325,37 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { return true; } + // FS-specific FillItems + bool FillItems(TImportInfo& importInfo, const Ydb::Import::ImportFromFsSettings& settings, TString& explain) { + THashSet dstPaths; + + importInfo.Items.reserve(settings.items().size()); + for (ui32 itemIdx : xrange(settings.items().size())) { + const TString& dstPath = settings.items(itemIdx).destination_path(); + + if (!ValidateAndAddDestinationPath(dstPath, dstPaths, explain)) { + return false; + } + + if (!dstPath) { + explain = "destination_path is required for FS import items"; + return false; + } + + const TString& srcPath = settings.items(itemIdx).source_path(); + if (!srcPath) { + explain = "source_path is required for FS import items"; + return false; + } + + auto& item = importInfo.Items.emplace_back(dstPath); + // For FS imports, source_path is the full relative path from base_path + item.SrcPath = NBackup::NormalizeItemPath(srcPath); + } + + return true; + } + }; // TTxCreate struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase { @@ -404,9 +464,17 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase LOG_I("TImport::TTxProgress: Get scheme" << ": info# " << importInfo->ToString() << ", item# " << item.ToString(itemIdx)); - - item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetter(Self->SelfId(), importInfo, itemIdx, item.ExportItemIV)); - Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); + + if (importInfo->Kind == TImportInfo::EKind::S3) { + item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetter(Self->SelfId(), importInfo, itemIdx, item.ExportItemIV)); + Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); + } else { + LOG_I("TImport::TTxProgress: Get scheme for FS import is not supported" + << ": info# " << importInfo->ToString() + << ", item# " << item.ToString(itemIdx)); + // item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetterFS(Self->SelfId(), importInfo, itemIdx)); + // Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); + } } void GetSchemaMapping(TImportInfo::TPtr importInfo, const TActorContext& ctx) { @@ -1080,8 +1148,12 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase } if (!importInfo->SchemaMapping->Items.empty()) { - if (importInfo->Settings.has_encryption_settings() != importInfo->SchemaMapping->Items[0].IV.Defined()) { - return CancelAndPersist(db, importInfo, -1, {}, "incorrect schema mapping"); + // Only S3 imports support schema mapping with encryption + if (importInfo->Kind == TImportInfo::EKind::S3) { + auto settings = importInfo->GetS3Settings(); + if (settings.has_encryption_settings() != importInfo->SchemaMapping->Items[0].IV.Defined()) { + return CancelAndPersist(db, importInfo, -1, {}, "incorrect schema mapping"); + } } } diff --git a/ydb/core/tx/schemeshard/schemeshard_import_flow_proposals.cpp b/ydb/core/tx/schemeshard/schemeshard_import_flow_proposals.cpp index fd87f36fa171..5cfce72b4e92 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import_flow_proposals.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import_flow_proposals.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -152,27 +153,29 @@ THolder RestoreTableDataPropose( task.SetTableName(dstPath.LeafName()); *task.MutableTableDescription() = RebuildTableDescription(GetTableDescription(ss, item.DstPathId), *item.Table); - if (importInfo.Settings.has_encryption_settings()) { - auto& taskEncryptionSettings = *task.MutableEncryptionSettings(); - *taskEncryptionSettings.MutableSymmetricKey() = importInfo.Settings.encryption_settings().symmetric_key(); - if (item.ExportItemIV) { - taskEncryptionSettings.SetIV(item.ExportItemIV->GetBinaryString()); - } - } - switch (importInfo.Kind) { case TImportInfo::EKind::S3: { - task.SetNumberOfRetries(importInfo.Settings.number_of_retries()); + auto settings = importInfo.GetS3Settings(); + + if (settings.has_encryption_settings()) { + auto& taskEncryptionSettings = *task.MutableEncryptionSettings(); + *taskEncryptionSettings.MutableSymmetricKey() = settings.encryption_settings().symmetric_key(); + if (item.ExportItemIV) { + taskEncryptionSettings.SetIV(item.ExportItemIV->GetBinaryString()); + } + } + + task.SetNumberOfRetries(settings.number_of_retries()); auto& restoreSettings = *task.MutableS3Settings(); - restoreSettings.SetEndpoint(importInfo.Settings.endpoint()); - restoreSettings.SetBucket(importInfo.Settings.bucket()); - restoreSettings.SetAccessKey(importInfo.Settings.access_key()); - restoreSettings.SetSecretKey(importInfo.Settings.secret_key()); + restoreSettings.SetEndpoint(settings.endpoint()); + restoreSettings.SetBucket(settings.bucket()); + restoreSettings.SetAccessKey(settings.access_key()); + restoreSettings.SetSecretKey(settings.secret_key()); restoreSettings.SetObjectKeyPattern(importInfo.GetItemSrcPrefix(itemIdx)); - restoreSettings.SetUseVirtualAddressing(!importInfo.Settings.disable_virtual_addressing()); + restoreSettings.SetUseVirtualAddressing(!settings.disable_virtual_addressing()); - switch (importInfo.Settings.scheme()) { + switch (settings.scheme()) { case Ydb::Import::ImportFromS3Settings::HTTP: restoreSettings.SetScheme(NKikimrSchemeOp::TS3Settings::HTTP); break; @@ -183,12 +186,26 @@ THolder RestoreTableDataPropose( Y_ABORT("Unknown scheme"); } - if (const auto region = importInfo.Settings.region()) { + if (const auto region = settings.region()) { restoreSettings.SetRegion(region); } if (!item.Metadata.HasVersion() || item.Metadata.GetVersion() > 0) { - task.SetValidateChecksums(!importInfo.Settings.skip_checksum_validation()); + task.SetValidateChecksums(!importInfo.GetSkipChecksumValidation()); + } + } + break; + + case TImportInfo::EKind::FS: + { + auto settings = importInfo.GetFsSettings(); + task.SetNumberOfRetries(settings.number_of_retries()); + auto& restoreSettings = *task.MutableFSSettings(); + restoreSettings.SetBasePath(settings.base_path()); + restoreSettings.SetPath(importInfo.GetItemSrcPrefix(itemIdx)); + + if (!item.Metadata.HasVersion() || item.Metadata.GetVersion() > 0) { + task.SetValidateChecksums(!importInfo.GetSkipChecksumValidation()); } } break; diff --git a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp index ae8362e0aacd..e91ab325bfb9 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp @@ -44,10 +44,11 @@ struct TGetterSettings { static TGetterSettings FromImportInfo(const TImportInfo::TPtr& importInfo, TMaybe iv) { TGetterSettings settings; - settings.ExternalStorageConfig.reset(new NWrappers::NExternalStorage::TS3ExternalStorageConfig(importInfo->Settings)); - settings.Retries = importInfo->Settings.number_of_retries(); - if (importInfo->Settings.has_encryption_settings()) { - settings.Key = NBackup::TEncryptionKey(importInfo->Settings.encryption_settings().symmetric_key().key()); + Y_ABORT_UNLESS(importInfo->Kind == TImportInfo::EKind::S3); + settings.ExternalStorageConfig.reset(new NWrappers::NExternalStorage::TS3ExternalStorageConfig(importInfo->GetS3Settings())); + settings.Retries = importInfo->GetS3Settings().number_of_retries(); + if (importInfo->GetS3Settings().has_encryption_settings()) { + settings.Key = NBackup::TEncryptionKey(importInfo->GetS3Settings().encryption_settings().symmetric_key().key()); } settings.IV = std::move(iv); return settings; @@ -772,8 +773,8 @@ class TSchemeGetter: public TGetterFromS3 { , MetadataKey(MetadataKeyFromSettings(*ImportInfo, itemIdx)) , SchemeKey(SchemeKeyFromSettings(*ImportInfo, itemIdx, "scheme.pb")) , PermissionsKey(PermissionsKeyFromSettings(*ImportInfo, itemIdx)) - , NeedDownloadPermissions(!ImportInfo->Settings.no_acl()) - , NeedValidateChecksums(!ImportInfo->Settings.skip_checksum_validation()) + , NeedDownloadPermissions(!ImportInfo->GetNoAcl()) + , NeedValidateChecksums(!ImportInfo->GetSkipChecksumValidation()) { } @@ -851,15 +852,15 @@ class TSchemeGetter: public TGetterFromS3 { class TSchemaMappingGetter : public TGetterFromS3 { static TString MetadataKeyFromSettings(const TImportInfo& importInfo) { - return TStringBuilder() << importInfo.Settings.source_prefix() << "/metadata.json"; + return TStringBuilder() << importInfo.GetS3Settings().source_prefix() << "/metadata.json"; } static TString SchemaMappingKeyFromSettings(const TImportInfo& importInfo) { - return TStringBuilder() << importInfo.Settings.source_prefix() << "/SchemaMapping/mapping.json"; + return TStringBuilder() << importInfo.GetS3Settings().source_prefix() << "/SchemaMapping/mapping.json"; } static TString SchemaMappingMetadataKeyFromSettings(const TImportInfo& importInfo) { - return TStringBuilder() << importInfo.Settings.source_prefix() << "/SchemaMapping/metadata.json"; + return TStringBuilder() << importInfo.GetS3Settings().source_prefix() << "/SchemaMapping/metadata.json"; } void HandleMetadata(TEvExternalStorage::TEvHeadObjectResponse::TPtr& ev) { diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 56043b0ff0c7..7a9c39846929 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -2960,14 +2960,17 @@ NProtoBuf::Timestamp SecondsToProtoTimeStamp(ui64 sec) { TImportInfo::TFillItemsFromSchemaMappingResult TImportInfo::FillItemsFromSchemaMapping(TSchemeShard* ss) { TFillItemsFromSchemaMappingResult result; + Y_ABORT_UNLESS(Kind == EKind::S3); + auto settings = GetS3Settings(); + TString dstRoot; - if (Settings.destination_path().empty()) { + if (settings.destination_path().empty()) { dstRoot = CanonizePath(ss->RootPathElements); } else { - dstRoot = CanonizePath(Settings.destination_path()); + dstRoot = CanonizePath(settings.destination_path()); } - TString sourcePrefix = NBackup::NormalizeExportPrefix(Settings.source_prefix()); + TString sourcePrefix = NBackup::NormalizeExportPrefix(settings.source_prefix()); if (sourcePrefix) { sourcePrefix.push_back('/'); } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index f1116c9f4e5c..28e335528a24 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3031,6 +3031,7 @@ struct TImportInfo: public TSimpleRefCount { enum class EKind: ui8 { S3 = 0, + FS = 1, }; struct TItem { @@ -3090,7 +3091,7 @@ struct TImportInfo: public TSimpleRefCount { ui64 Id; // TxId from the original TEvCreateImportRequest TString Uid; EKind Kind; - Ydb::Import::ImportFromS3Settings Settings; + TString Settings; // Serialized settings (S3 or FS) TPathId DomainPathId; TMaybe UserSID; TString PeerName; // required for making audit log records @@ -3116,18 +3117,69 @@ struct TImportInfo: public TSimpleRefCount { // Backward compatibility. // But there can be no paths in settings at all. - if (i < ui32(Settings.items_size())) { - return Settings.items(i).source_prefix(); + switch (Kind) { + case EKind::S3: { + Ydb::Import::ImportFromS3Settings settings = GetS3Settings(); + if (i < ui32(settings.items_size())) { + return settings.items(i).source_prefix(); + } + break; + } + case EKind::FS: { + Ydb::Import::ImportFromFsSettings settings = GetFsSettings(); + if (i < ui32(settings.items_size())) { + return settings.items(i).source_path(); + } + break; + } } return {}; } + Ydb::Import::ImportFromS3Settings GetS3Settings() const { + Y_ABORT_UNLESS(Kind == EKind::S3); + Ydb::Import::ImportFromS3Settings settings; + Y_ABORT_UNLESS(settings.ParseFromString(Settings)); + return settings; + } + + Ydb::Import::ImportFromFsSettings GetFsSettings() const { + Y_ABORT_UNLESS(Kind == EKind::FS); + Ydb::Import::ImportFromFsSettings settings; + Y_ABORT_UNLESS(settings.ParseFromString(Settings)); + return settings; + } + + bool GetNoAcl() const { + switch (Kind) { + case EKind::S3: + return GetS3Settings().no_acl(); + case EKind::FS: + return GetFsSettings().no_acl(); + default: + Y_ABORT_UNLESS(false); + } + return false; + } + + bool GetSkipChecksumValidation() const { + switch (Kind) { + case EKind::S3: + return GetS3Settings().skip_checksum_validation(); + case EKind::FS: + return GetFsSettings().skip_checksum_validation(); + default: + Y_ABORT_UNLESS(false); + } + return false; + } + explicit TImportInfo( const ui64 id, const TString& uid, const EKind kind, - const Ydb::Import::ImportFromS3Settings& settings, + const TString& settings, const TPathId domainPathId, const TString& peerName) : Id(id) @@ -3139,6 +3191,28 @@ struct TImportInfo: public TSimpleRefCount { { } + template + explicit TImportInfo( + const ui64 id, + const TString& uid, + const EKind kind, + const TSettingsPB& settingsPb, + const TPathId domainPathId, + const TString& peerName) + : TImportInfo(id, uid, kind, SerializeSettings(settingsPb), domainPathId, peerName) + { + } + +private: + template + static TString SerializeSettings(const TSettingsPB& settingsPb) { + TString result; + Y_ABORT_UNLESS(settingsPb.SerializeToString(&result)); + return result; + } + +public: + TString ToString() const; bool IsFinished() const; diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp new file mode 100644 index 000000000000..8f9832e4d96d --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp @@ -0,0 +1,146 @@ +#include + +#include + +using namespace NSchemeShardUT_Private; + +Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { + Y_UNIT_TEST(ShouldSucceedCreateImportFromFs) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + // Test that schemeshard accepts ImportFromFsSettings + TestImport(runtime, ++txId, "/MyRoot", R"( + ImportFromFsSettings { + base_path: "/mnt/backups" + items { + source_path: "backup/Table" + destination_path: "/MyRoot/RestoredTable" + } + } + )"); + + // Check that import was created + auto response = TestGetImport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.base_path(), "/mnt/backups"); + UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 1); + UNIT_ASSERT_VALUES_EQUAL(settings.items(0).source_path(), "backup/Table"); + UNIT_ASSERT_VALUES_EQUAL(settings.items(0).destination_path(), "/MyRoot/RestoredTable"); + } + + Y_UNIT_TEST(ShouldAcceptNoAclForFs) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestImport(runtime, ++txId, "/MyRoot", R"( + ImportFromFsSettings { + base_path: "/mnt/backups" + no_acl: true + items { + source_path: "backup/Table" + destination_path: "/MyRoot/RestoredTable" + } + } + )"); + + auto response = TestGetImport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.no_acl(), true); + } + + Y_UNIT_TEST(ShouldAcceptSkipChecksumValidation) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestImport(runtime, ++txId, "/MyRoot", R"( + ImportFromFsSettings { + base_path: "/mnt/backups" + skip_checksum_validation: true + items { + source_path: "backup/Table" + destination_path: "/MyRoot/RestoredTable" + } + } + )"); + + auto response = TestGetImport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.skip_checksum_validation(), true); + } + + Y_UNIT_TEST(ShouldFailOnInvalidDestinationPath) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + // Invalid destination path (empty) should fail validation + TestImport(runtime, ++txId, "/MyRoot", R"( + ImportFromFsSettings { + base_path: "/mnt/backups" + items { + source_path: "backup/Table" + destination_path: "" + } + } + )", "", "", Ydb::StatusIds::BAD_REQUEST); + } + + Y_UNIT_TEST(ShouldFailOnDuplicateDestination) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + // Duplicate destination paths should fail validation + TestImport(runtime, ++txId, "/MyRoot", R"( + ImportFromFsSettings { + base_path: "/mnt/backups" + items { + source_path: "backup/Table1" + destination_path: "/MyRoot/SameName" + } + items { + source_path: "backup/Table2" + destination_path: "/MyRoot/SameName" + } + } + )", "", "", Ydb::StatusIds::BAD_REQUEST); + } + + Y_UNIT_TEST(FsImportWithMultipleTables) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestImport(runtime, ++txId, "/MyRoot", R"( + ImportFromFsSettings { + base_path: "/mnt/backups" + items { + source_path: "backup/Table1" + destination_path: "/MyRoot/RestoredTable1" + } + items { + source_path: "backup/Table2" + destination_path: "/MyRoot/RestoredTable2" + } + } + )"); + + auto response = TestGetImport(runtime, txId, "/MyRoot"); + UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + + const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 2); + } +} + + diff --git a/ydb/core/tx/schemeshard/ut_restore/ya.make b/ydb/core/tx/schemeshard/ut_restore/ya.make index 72c4ad464ef0..e4009603e204 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ya.make +++ b/ydb/core/tx/schemeshard/ut_restore/ya.make @@ -27,6 +27,7 @@ PEERDIR( SRCS( ut_restore.cpp + ut_restore_fs.cpp ) YQL_LAST_ABI_VERSION() From 41ba51635c718755fa3c644f28970ce15a1c34e9 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Tue, 11 Nov 2025 15:18:46 +0300 Subject: [PATCH 07/25] scheme getter --- .../schemeshard_import__create.cpp | 9 +- .../schemeshard_import_getters.cpp | 172 ++++++++++++++++++ .../schemeshard/schemeshard_import_getters.h | 2 + 3 files changed, 177 insertions(+), 6 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index 8e73ec88d11b..65d2ce668c1a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -469,11 +469,8 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetter(Self->SelfId(), importInfo, itemIdx, item.ExportItemIV)); Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); } else { - LOG_I("TImport::TTxProgress: Get scheme for FS import is not supported" - << ": info# " << importInfo->ToString() - << ", item# " << item.ToString(itemIdx)); - // item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetterFS(Self->SelfId(), importInfo, itemIdx)); - // Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); + item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetterFS(Self->SelfId(), importInfo, itemIdx)); + Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); } } @@ -1152,7 +1149,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase if (importInfo->Kind == TImportInfo::EKind::S3) { auto settings = importInfo->GetS3Settings(); if (settings.has_encryption_settings() != importInfo->SchemaMapping->Items[0].IV.Defined()) { - return CancelAndPersist(db, importInfo, -1, {}, "incorrect schema mapping"); + return CancelAndPersist(db, importInfo, -1, {}, "incorrect schema mapping"); } } } diff --git a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp index e91ab325bfb9..27921da9da0d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp @@ -20,6 +20,8 @@ #include +#include +#include #include #include @@ -1403,10 +1405,180 @@ class TListObjectsInS3ExportGetter : public TGetterFromS3 { + + bool ProcessMetadata(const TString& content, TString& error) { + NJson::TJsonValue json; + if (!NJson::ReadJsonTree(content, &json)) { + error = "Failed to parse metadata json"; + return false; + } + + NBackup::TMetadata& metadata = ImportInfo->Items[ItemIdx].Metadata; + TString parseError; + if (!metadata.Deserialize(content, parseError)) { + error = TStringBuilder() << "Failed to parse metadata: " << parseError; + return false; + } + + return true; + } + + bool ProcessScheme(const TString& content, TString& error) { + auto& item = ImportInfo->Items[ItemIdx]; + + Ydb::Table::CreateTableRequest table; + if (table.ParseFromString(content)) { + item.Table = table; + return true; + } + + error = "Failed to parse scheme as table"; + return false; + } + + void ProcessPermissions(const TString& content) { + auto& item = ImportInfo->Items[ItemIdx]; + Ydb::Scheme::ModifyPermissionsRequest permissions; + if (permissions.ParseFromString(content)) { + item.Permissions = permissions; + } + } + + void Reply(bool success, const TString& errorMessage = {}) { + LOG_I("TSchemeGetterFS: Reply" + << ": self# " << SelfId() + << ", importId# " << ImportInfo->Id + << ", itemIdx# " << ItemIdx + << ", success# " << success + << ", error# " << errorMessage); + + Send(ReplyTo, new TEvPrivate::TEvImportSchemeReady(ImportInfo->Id, ItemIdx, success, errorMessage)); + PassAway(); + } + +public: + explicit TSchemeGetterFS(const TActorId& replyTo, TImportInfo::TPtr importInfo, ui32 itemIdx) + : ReplyTo(replyTo) + , ImportInfo(std::move(importInfo)) + , ItemIdx(itemIdx) + { + Y_ABORT_UNLESS(ImportInfo->Kind == TImportInfo::EKind::FS); + } + + void Bootstrap() { + const auto settings = ImportInfo->GetFsSettings(); + const TString basePath = settings.base_path(); + + Y_ABORT_UNLESS(ItemIdx < ImportInfo->Items.size()); + auto& item = ImportInfo->Items[ItemIdx]; + + TString sourcePath = item.SrcPath; + if (sourcePath.empty()) { + Reply(false, "Source path is empty for import item"); + return; + } + + const TString metadataPath = TFSHelper::GetFullPath(basePath, sourcePath + "/metadata.json"); + TString metadataContent; + TString error; + + if (!TFSHelper::ReadFile(metadataPath, metadataContent, error)) { + Reply(false, error); + return; + } + + if (!ProcessMetadata(metadataContent, error)) { + Reply(false, error); + return; + } + + const TString schemeFileName = NYdb::NDump::NFiles::TableScheme().FileName; + + const TString schemePath = TFSHelper::GetFullPath(basePath, sourcePath + "/" + schemeFileName); + TString schemeContent; + + if (!TFSHelper::ReadFile(schemePath, schemeContent, error)) { + Reply(false, error); + return; + } + + if (!ProcessScheme(schemeContent, error)) { + Reply(false, error); + return; + } + + if (!ImportInfo->GetNoAcl()) { + const TString permissionsPath = TFSHelper::GetFullPath(basePath, sourcePath + "/permissions.pb"); + TString permissionsContent; + + if (TFSHelper::ReadFile(permissionsPath, permissionsContent, error)) { + ProcessPermissions(permissionsContent); + } + } + + Reply(true); + } + +private: + const TActorId ReplyTo; + TImportInfo::TPtr ImportInfo; + const ui32 ItemIdx; +}; + IActor* CreateSchemeGetter(const TActorId& replyTo, TImportInfo::TPtr importInfo, ui32 itemIdx, TMaybe iv) { return new TSchemeGetter(replyTo, std::move(importInfo), itemIdx, std::move(iv)); } +IActor* CreateSchemeGetterFS(const TActorId& replyTo, TImportInfo::TPtr importInfo, ui32 itemIdx) { + return new TSchemeGetterFS(replyTo, std::move(importInfo), itemIdx); +} + IActor* CreateSchemaMappingGetter(const TActorId& replyTo, TImportInfo::TPtr importInfo) { return new TSchemaMappingGetter(replyTo, std::move(importInfo)); } diff --git a/ydb/core/tx/schemeshard/schemeshard_import_getters.h b/ydb/core/tx/schemeshard/schemeshard_import_getters.h index 1a5ae6f2cb9b..e8fb686e2cec 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import_getters.h +++ b/ydb/core/tx/schemeshard/schemeshard_import_getters.h @@ -13,5 +13,7 @@ IActor* CreateSchemaMappingGetter(const TActorId& replyTo, TImportInfo::TPtr imp IActor* CreateListObjectsInS3ExportGetter(TEvImport::TEvListObjectsInS3ExportRequest::TPtr&& ev); +IActor* CreateSchemeGetterFS(const TActorId& replyTo, TImportInfo::TPtr importInfo, ui32 itemIdx); + } // NSchemeShard } // NKikimr From 7145d70f0baa7ed865fd51463bd38a14e1c2dbd3 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Tue, 11 Nov 2025 19:32:49 +0300 Subject: [PATCH 08/25] test with files (no metadata) --- .../schemeshard_import_getters.cpp | 23 --- .../schemeshard/ut_restore/ut_restore_fs.cpp | 137 ++++++++++++++++-- 2 files changed, 124 insertions(+), 36 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp index 27921da9da0d..fdc322e1465e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp @@ -1432,22 +1432,6 @@ class TFSHelper { return false; } } - - static bool ValidateChecksum(const TString& content, const TString& expectedChecksum, TString& error) { - if (expectedChecksum.empty()) { - return true; - } - - TString actualChecksum = NBackup::MakeChecksum(content); - if (actualChecksum != expectedChecksum) { - error = TStringBuilder() - << "Checksum mismatch. Expected: " << expectedChecksum - << ", Got: " << actualChecksum; - return false; - } - - return true; - } }; class TSchemeGetterFS: public TActorBootstrapped { @@ -1459,13 +1443,6 @@ class TSchemeGetterFS: public TActorBootstrapped { return false; } - NBackup::TMetadata& metadata = ImportInfo->Items[ItemIdx].Metadata; - TString parseError; - if (!metadata.Deserialize(content, parseError)) { - error = TStringBuilder() << "Failed to parse metadata: " << parseError; - return false; - } - return true; } diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp index 8f9832e4d96d..d84b5351bea6 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp @@ -1,32 +1,124 @@ #include +#include +#include #include +#include + +#include +#include +#include +#include +#include using namespace NSchemeShardUT_Private; +namespace { + +// Helper class to create temporary backup files for tests +class TTempBackupFiles { +public: + explicit TTempBackupFiles() + { + } + + const TString& GetBasePath() const { + return TempDir.Name(); + } + + void CreateTableBackup(const TString& tablePath, const TString& tableName) { + const TString fullPath = TempDir.Name() + "/" + tablePath; + MakePathIfNotExist(fullPath.c_str()); + + // Create metadata.json + CreateMetadataFile(fullPath); + + // Create scheme.pb (table schema) + CreateTableSchemeFile(fullPath, tableName); + + // Create permissions.pb (optional, but include it) + CreatePermissionsFile(fullPath); + } + +private: + void CreateMetadataFile(const TString& dirPath) { + NBackup::TMetadata metadata; + metadata.SetVersion(1); + metadata.SetEnablePermissions(false); + + TString serialized = metadata.Serialize(); + + TFileOutput file(dirPath + "/metadata.json"); + file << serialized; + } + + void CreateTableSchemeFile(const TString& dirPath, const TString& tableName) { + Ydb::Table::CreateTableRequest table; + table.set_path(tableName); + + // Add simple columns + auto* col1 = table.add_columns(); + col1->set_name("key"); + col1->mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::UTF8); + + auto* col2 = table.add_columns(); + col2->set_name("value"); + col2->mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::UTF8); + + // Set primary key + table.add_primary_key("key"); + + TString serialized; + Y_ABORT_UNLESS(table.SerializeToString(&serialized)); + + TFileOutput file(dirPath + "/" + NYdb::NDump::NFiles::TableScheme().FileName); + file.Write(serialized); + } + + void CreatePermissionsFile(const TString& dirPath) { + Ydb::Scheme::ModifyPermissionsRequest permissions; + + TString serialized; + Y_ABORT_UNLESS(permissions.SerializeToString(&serialized)); + + TFileOutput file(dirPath + "/permissions.pb"); + file.Write(serialized); + } + + TTempDir TempDir; +}; + +} // namespace + Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { Y_UNIT_TEST(ShouldSucceedCreateImportFromFs) { TTestBasicRuntime runtime; TTestEnv env(runtime); ui64 txId = 100; + // Create temporary backup files + TTempBackupFiles backup; + backup.CreateTableBackup("backup/Table", "Table"); + // Test that schemeshard accepts ImportFromFsSettings - TestImport(runtime, ++txId, "/MyRoot", R"( + TString importSettings = Sprintf(R"( ImportFromFsSettings { - base_path: "/mnt/backups" + base_path: "%s" items { source_path: "backup/Table" destination_path: "/MyRoot/RestoredTable" } } - )"); + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); // Check that import was created auto response = TestGetImport(runtime, txId, "/MyRoot"); UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); - UNIT_ASSERT_VALUES_EQUAL(settings.base_path(), "/mnt/backups"); + UNIT_ASSERT_VALUES_EQUAL(settings.base_path(), backup.GetBasePath()); UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 1); UNIT_ASSERT_VALUES_EQUAL(settings.items(0).source_path(), "backup/Table"); UNIT_ASSERT_VALUES_EQUAL(settings.items(0).destination_path(), "/MyRoot/RestoredTable"); @@ -37,16 +129,22 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - TestImport(runtime, ++txId, "/MyRoot", R"( + // Create temporary backup files + TTempBackupFiles backup; + backup.CreateTableBackup("backup/Table", "Table"); + + TString importSettings = Sprintf(R"( ImportFromFsSettings { - base_path: "/mnt/backups" + base_path: "%s" no_acl: true items { source_path: "backup/Table" destination_path: "/MyRoot/RestoredTable" } } - )"); + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); auto response = TestGetImport(runtime, txId, "/MyRoot"); UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); @@ -60,16 +158,22 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - TestImport(runtime, ++txId, "/MyRoot", R"( + // Create temporary backup files + TTempBackupFiles backup; + backup.CreateTableBackup("backup/Table", "Table"); + + TString importSettings = Sprintf(R"( ImportFromFsSettings { - base_path: "/mnt/backups" + base_path: "%s" skip_checksum_validation: true items { source_path: "backup/Table" destination_path: "/MyRoot/RestoredTable" } } - )"); + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); auto response = TestGetImport(runtime, txId, "/MyRoot"); UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); @@ -121,9 +225,14 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - TestImport(runtime, ++txId, "/MyRoot", R"( + // Create temporary backup files + TTempBackupFiles backup; + backup.CreateTableBackup("backup/Table1", "Table1"); + backup.CreateTableBackup("backup/Table2", "Table2"); + + TString importSettings = Sprintf(R"( ImportFromFsSettings { - base_path: "/mnt/backups" + base_path: "%s" items { source_path: "backup/Table1" destination_path: "/MyRoot/RestoredTable1" @@ -133,7 +242,9 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { destination_path: "/MyRoot/RestoredTable2" } } - )"); + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); auto response = TestGetImport(runtime, txId, "/MyRoot"); UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); From d6b56c09e326a7158f578aed92fa2517140d4992 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Wed, 12 Nov 2025 15:20:43 +0300 Subject: [PATCH 09/25] tests --- ydb/core/tx/datashard/restore_unit.cpp | 4 + .../schemeshard_import__create.cpp | 4 + .../schemeshard/ut_restore/ut_restore_fs.cpp | 253 +++++++++++++++--- 3 files changed, 224 insertions(+), 37 deletions(-) diff --git a/ydb/core/tx/datashard/restore_unit.cpp b/ydb/core/tx/datashard/restore_unit.cpp index cc3990371c93..91fce89c3edf 100644 --- a/ydb/core/tx/datashard/restore_unit.cpp +++ b/ydb/core/tx/datashard/restore_unit.cpp @@ -48,6 +48,10 @@ class TRestoreUnit : public TBackupRestoreUnitBaseSetAsyncJobResult(new TImportJobProduct(true, TString(), 0, 0)); + break; + default: Abort(op, ctx, TStringBuilder() << "Unknown settings: " << static_cast(settingsKind)); return false; diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index 65d2ce668c1a..d82fb6ea22aa 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -1537,6 +1537,10 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase if (!item.Table) { Y_ABORT("Create Scheme Object: schema objects are empty"); } + if (importInfo->Kind == TImportInfo::EKind::FS) { + item.State = EState::Done; + break; + } item.State = EState::Transferring; AllocateTxId(*importInfo, itemIdx); break; diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp index d84b5351bea6..0842ccce7beb 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp @@ -15,7 +15,6 @@ using namespace NSchemeShardUT_Private; namespace { -// Helper class to create temporary backup files for tests class TTempBackupFiles { public: explicit TTempBackupFiles() @@ -27,46 +26,60 @@ class TTempBackupFiles { } void CreateTableBackup(const TString& tablePath, const TString& tableName) { + CreateTableBackup( + tablePath, + tableName, + { + {"key", Ydb::Type::UTF8}, + {"value", Ydb::Type::UTF8} + }, + {"key"} + ); + } + + void CreateTableBackup(const TString& tablePath, const TString& tableName, + const TVector>& columns, + const TVector& keyColumns) { const TString fullPath = TempDir.Name() + "/" + tablePath; MakePathIfNotExist(fullPath.c_str()); // Create metadata.json CreateMetadataFile(fullPath); - // Create scheme.pb (table schema) - CreateTableSchemeFile(fullPath, tableName); + // Create scheme.pb + CreateTableSchemeFile(fullPath, tableName, columns, keyColumns); - // Create permissions.pb (optional, but include it) + // Create permissions.pb CreatePermissionsFile(fullPath); } private: - void CreateMetadataFile(const TString& dirPath) { + static void CreateMetadataFile(const TString& dirPath) { NBackup::TMetadata metadata; metadata.SetVersion(1); - metadata.SetEnablePermissions(false); + metadata.SetEnablePermissions(true); TString serialized = metadata.Serialize(); TFileOutput file(dirPath + "/metadata.json"); - file << serialized; + file.Write(serialized); } - void CreateTableSchemeFile(const TString& dirPath, const TString& tableName) { + static void CreateTableSchemeFile(const TString& dirPath, const TString& tableName, + const TVector>& columns, + const TVector& keyColumns) { Ydb::Table::CreateTableRequest table; table.set_path(tableName); - // Add simple columns - auto* col1 = table.add_columns(); - col1->set_name("key"); - col1->mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::UTF8); + for (const auto& [colName, colType] : columns) { + auto* col = table.add_columns(); + col->set_name(colName); + col->mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(colType); + } - auto* col2 = table.add_columns(); - col2->set_name("value"); - col2->mutable_type()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::UTF8); - - // Set primary key - table.add_primary_key("key"); + for (const auto& keyCol : keyColumns) { + table.add_primary_key(keyCol); + } TString serialized; Y_ABORT_UNLESS(table.SerializeToString(&serialized)); @@ -75,7 +88,19 @@ class TTempBackupFiles { file.Write(serialized); } - void CreatePermissionsFile(const TString& dirPath) { + static void CreateTableSchemeFile(const TString& dirPath, const TString& tableName) { + CreateTableSchemeFile( + dirPath, + tableName, + { + {"key", Ydb::Type::UTF8}, + {"value", Ydb::Type::UTF8} + }, + {"key"} + ); + } + + static void CreatePermissionsFile(const TString& dirPath) { Ydb::Scheme::ModifyPermissionsRequest permissions; TString serialized; @@ -96,11 +121,9 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - // Create temporary backup files TTempBackupFiles backup; backup.CreateTableBackup("backup/Table", "Table"); - // Test that schemeshard accepts ImportFromFsSettings TString importSettings = Sprintf(R"( ImportFromFsSettings { base_path: "%s" @@ -112,16 +135,25 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { )", backup.GetBasePath().c_str()); TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); - // Check that import was created - auto response = TestGetImport(runtime, txId, "/MyRoot"); - UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::SUCCESS); + const auto& entry = response.GetResponse().GetEntry(); + + UNIT_ASSERT(entry.HasImportFromFsSettings()); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); - const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + const auto& settings = entry.GetImportFromFsSettings(); UNIT_ASSERT_VALUES_EQUAL(settings.base_path(), backup.GetBasePath()); UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 1); UNIT_ASSERT_VALUES_EQUAL(settings.items(0).source_path(), "backup/Table"); UNIT_ASSERT_VALUES_EQUAL(settings.items(0).destination_path(), "/MyRoot/RestoredTable"); + + // Verify that the table was actually created + TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable"), { + NLs::PathExist, + NLs::IsTable + }); } Y_UNIT_TEST(ShouldAcceptNoAclForFs) { @@ -129,7 +161,6 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - // Create temporary backup files TTempBackupFiles backup; backup.CreateTableBackup("backup/Table", "Table"); @@ -145,12 +176,21 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { )", backup.GetBasePath().c_str()); TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); - auto response = TestGetImport(runtime, txId, "/MyRoot"); - UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::SUCCESS); + const auto& entry = response.GetResponse().GetEntry(); + + UNIT_ASSERT(entry.HasImportFromFsSettings()); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); - const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + const auto& settings = entry.GetImportFromFsSettings(); UNIT_ASSERT_VALUES_EQUAL(settings.no_acl(), true); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable"), { + NLs::PathExist, + NLs::IsTable + }); } Y_UNIT_TEST(ShouldAcceptSkipChecksumValidation) { @@ -158,7 +198,6 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - // Create temporary backup files TTempBackupFiles backup; backup.CreateTableBackup("backup/Table", "Table"); @@ -174,12 +213,21 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { )", backup.GetBasePath().c_str()); TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); - auto response = TestGetImport(runtime, txId, "/MyRoot"); - UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::SUCCESS); + const auto& entry = response.GetResponse().GetEntry(); + + UNIT_ASSERT(entry.HasImportFromFsSettings()); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); - const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + const auto& settings = entry.GetImportFromFsSettings(); UNIT_ASSERT_VALUES_EQUAL(settings.skip_checksum_validation(), true); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable"), { + NLs::PathExist, + NLs::IsTable + }); } Y_UNIT_TEST(ShouldFailOnInvalidDestinationPath) { @@ -225,7 +273,6 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { TTestEnv env(runtime); ui64 txId = 100; - // Create temporary backup files TTempBackupFiles backup; backup.CreateTableBackup("backup/Table1", "Table1"); backup.CreateTableBackup("backup/Table2", "Table2"); @@ -245,12 +292,144 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { )", backup.GetBasePath().c_str()); TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); - auto response = TestGetImport(runtime, txId, "/MyRoot"); - UNIT_ASSERT(response.GetResponse().GetEntry().HasImportFromFsSettings()); + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::SUCCESS); + const auto& entry = response.GetResponse().GetEntry(); - const auto& settings = response.GetResponse().GetEntry().GetImportFromFsSettings(); + UNIT_ASSERT(entry.HasImportFromFsSettings()); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); + + const auto& settings = entry.GetImportFromFsSettings(); UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 2); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable1"), { + NLs::PathExist, + NLs::IsTable + }); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable2"), { + NLs::PathExist, + NLs::IsTable + }); + } + + Y_UNIT_TEST(ShouldFailOnMissingBackupFiles) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TTempBackupFiles backup; + + TString importSettings = Sprintf(R"( + ImportFromFsSettings { + base_path: "%s" + items { + source_path: "backup/NonExistentTable" + destination_path: "/MyRoot/RestoredTable" + } + } + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); + + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::CANCELLED); + const auto& entry = response.GetResponse().GetEntry(); + + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_CANCELLED); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable"), { + NLs::PathNotExist + }); + } + + Y_UNIT_TEST(ShouldValidateTableSchema) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TTempBackupFiles backup; + backup.CreateTableBackup("backup/ComplexTable", "ComplexTable"); + + TString importSettings = Sprintf(R"( + ImportFromFsSettings { + base_path: "%s" + items { + source_path: "backup/ComplexTable" + destination_path: "/MyRoot/ComplexTable" + } + } + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); + + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(response.GetResponse().GetEntry().GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); + + auto describe = DescribePath(runtime, "/MyRoot/ComplexTable"); + TestDescribeResult(describe, { + NLs::PathExist, + NLs::IsTable + }); + + const auto& table = describe.GetPathDescription().GetTable(); + UNIT_ASSERT_VALUES_EQUAL(table.ColumnsSize(), 2); + UNIT_ASSERT_VALUES_EQUAL(table.GetColumns(0).GetName(), "key"); + UNIT_ASSERT_VALUES_EQUAL(table.GetColumns(1).GetName(), "value"); + UNIT_ASSERT_VALUES_EQUAL(table.KeyColumnNamesSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(table.GetKeyColumnNames(0), "key"); + } + + Y_UNIT_TEST(ShouldImportTableWithDifferentTypes) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TTempBackupFiles backup; + backup.CreateTableBackup( + "backup/TypedTable", + "TypedTable", + { + {"id", Ydb::Type::UINT64}, + {"name", Ydb::Type::UTF8}, + {"value", Ydb::Type::INT32}, + {"flag", Ydb::Type::BOOL} + }, + {"id"} + ); + + TString importSettings = Sprintf(R"( + ImportFromFsSettings { + base_path: "%s" + items { + source_path: "backup/TypedTable" + destination_path: "/MyRoot/TypedTable" + } + } + )", backup.GetBasePath().c_str()); + + TestImport(runtime, ++txId, "/MyRoot", importSettings); + env.TestWaitNotification(runtime, txId); + + auto response = TestGetImport(runtime, txId, "/MyRoot", Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(response.GetResponse().GetEntry().GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); + + auto describe = DescribePath(runtime, "/MyRoot/TypedTable"); + TestDescribeResult(describe, { + NLs::PathExist, + NLs::IsTable + }); + + const auto& table = describe.GetPathDescription().GetTable(); + UNIT_ASSERT_VALUES_EQUAL(table.ColumnsSize(), 4); + UNIT_ASSERT_VALUES_EQUAL(table.GetColumns(0).GetName(), "id"); + UNIT_ASSERT_VALUES_EQUAL(table.GetColumns(1).GetName(), "name"); + UNIT_ASSERT_VALUES_EQUAL(table.GetColumns(2).GetName(), "value"); + UNIT_ASSERT_VALUES_EQUAL(table.GetColumns(3).GetName(), "flag"); + UNIT_ASSERT_VALUES_EQUAL(table.KeyColumnNamesSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(table.GetKeyColumnNames(0), "id"); } } From 349f0637eb3fb010e7083eb7383c7c0c3613c9d9 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Wed, 12 Nov 2025 18:25:13 +0300 Subject: [PATCH 10/25] small fixes --- .../schemeshard_import__create.cpp | 2 +- .../schemeshard_import_getters.cpp | 36 ++++++++++--------- .../schemeshard/ut_restore/ut_restore_fs.cpp | 13 ------- 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index d82fb6ea22aa..df4ac9f4013e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -1145,7 +1145,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase } if (!importInfo->SchemaMapping->Items.empty()) { - // Only S3 imports support schema mapping with encryption + // TODO(st-shchetinin): Only S3 imports support schema mapping with encryption (add for FS) if (importInfo->Kind == TImportInfo::EKind::S3) { auto settings = importInfo->GetS3Settings(); if (settings.has_encryption_settings() != importInfo->SchemaMapping->Items[0].IV.Defined()) { diff --git a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp index fdc322e1465e..be4e0f5d430e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import_getters.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -450,8 +451,11 @@ class TSchemeGetter: public TGetterFromS3 { LOG_T("Trying to parse metadata" << ": self# " << SelfId() << ", body# " << SubstGlobalCopy(content, "\n", "\\n")); - - item.Metadata = NBackup::TMetadata::Deserialize(content); + try { + item.Metadata = NBackup::TMetadata::Deserialize(content); + } catch (const std::exception& e) { + return Reply(Ydb::StatusIds::BAD_REQUEST, TStringBuilder() << "Failed to parse metadata: " << e.what()); + } if (item.Metadata.HasVersion() && item.Metadata.GetVersion() == 0) { NeedValidateChecksums = false; @@ -1409,12 +1413,9 @@ class TFSHelper { public: static TString GetFullPath(const TString& basePath, const TString& relativePath) { if (basePath.empty()) { - return "/" + relativePath; + return TStringBuilder() << "/" << relativePath; } - if (basePath.EndsWith('/')) { - return basePath + relativePath; - } - return basePath + "/" + relativePath; + return TFsPath(basePath) / relativePath; } static bool ReadFile(const TString& path, TString& content, TString& error) { @@ -1437,13 +1438,13 @@ class TFSHelper { class TSchemeGetterFS: public TActorBootstrapped { bool ProcessMetadata(const TString& content, TString& error) { - NJson::TJsonValue json; - if (!NJson::ReadJsonTree(content, &json)) { - error = "Failed to parse metadata json"; + try { + ImportInfo->Items[ItemIdx].Metadata = NBackup::TMetadata::Deserialize(content); + return true; + } catch (const std::exception& e) { + error = TStringBuilder() << "Failed to parse metadata: " << e.what(); return false; } - - return true; } bool ProcessScheme(const TString& content, TString& error) { @@ -1501,9 +1502,11 @@ class TSchemeGetterFS: public TActorBootstrapped { return; } - const TString metadataPath = TFSHelper::GetFullPath(basePath, sourcePath + "/metadata.json"); - TString metadataContent; + const TFsPath itemPath = TFsPath(basePath) / sourcePath; TString error; + + const TString metadataPath = itemPath / "metadata.json"; + TString metadataContent; if (!TFSHelper::ReadFile(metadataPath, metadataContent, error)) { Reply(false, error); @@ -1516,8 +1519,7 @@ class TSchemeGetterFS: public TActorBootstrapped { } const TString schemeFileName = NYdb::NDump::NFiles::TableScheme().FileName; - - const TString schemePath = TFSHelper::GetFullPath(basePath, sourcePath + "/" + schemeFileName); + const TString schemePath = itemPath / schemeFileName; TString schemeContent; if (!TFSHelper::ReadFile(schemePath, schemeContent, error)) { @@ -1531,7 +1533,7 @@ class TSchemeGetterFS: public TActorBootstrapped { } if (!ImportInfo->GetNoAcl()) { - const TString permissionsPath = TFSHelper::GetFullPath(basePath, sourcePath + "/permissions.pb"); + const TString permissionsPath = itemPath / "permissions.pb"; TString permissionsContent; if (TFSHelper::ReadFile(permissionsPath, permissionsContent, error)) { diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp index 0842ccce7beb..3539a26922d9 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp @@ -88,18 +88,6 @@ class TTempBackupFiles { file.Write(serialized); } - static void CreateTableSchemeFile(const TString& dirPath, const TString& tableName) { - CreateTableSchemeFile( - dirPath, - tableName, - { - {"key", Ydb::Type::UTF8}, - {"value", Ydb::Type::UTF8} - }, - {"key"} - ); - } - static void CreatePermissionsFile(const TString& dirPath) { Ydb::Scheme::ModifyPermissionsRequest permissions; @@ -149,7 +137,6 @@ Y_UNIT_TEST_SUITE(TSchemeShardImportFromFsTests) { UNIT_ASSERT_VALUES_EQUAL(settings.items(0).source_path(), "backup/Table"); UNIT_ASSERT_VALUES_EQUAL(settings.items(0).destination_path(), "/MyRoot/RestoredTable"); - // Verify that the table was actually created TestDescribeResult(DescribePath(runtime, "/MyRoot/RestoredTable"), { NLs::PathExist, NLs::IsTable From 030f69283880e4a18a8d84737a4f53c6419528f4 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Wed, 26 Nov 2025 15:18:30 +0000 Subject: [PATCH 11/25] info types fix --- .../tx/schemeshard/schemeshard_import.cpp | 2 +- .../tx/schemeshard/schemeshard_info_types.h | 89 +++++++------------ .../schemeshard_info_types_helper.h | 41 +++++++++ 3 files changed, 76 insertions(+), 56 deletions(-) create mode 100644 ydb/core/tx/schemeshard/schemeshard_info_types_helper.h diff --git a/ydb/core/tx/schemeshard/schemeshard_import.cpp b/ydb/core/tx/schemeshard/schemeshard_import.cpp index 163ae838aa96..687a416b2407 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import.cpp @@ -124,7 +124,7 @@ void TSchemeShard::PersistCreateImport(NIceDb::TNiceDb& db, const TImportInfo& i db.Table().Key(importInfo.Id).Update( NIceDb::TUpdate(importInfo.Uid), NIceDb::TUpdate(static_cast(importInfo.Kind)), - NIceDb::TUpdate(importInfo.Settings), + NIceDb::TUpdate(importInfo.SettingsSerialized), NIceDb::TUpdate(importInfo.DomainPathId.OwnerId), NIceDb::TUpdate(importInfo.DomainPathId.LocalPathId), NIceDb::TUpdate(importInfo.Items.size()), diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 28e335528a24..fba3de16e8f1 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3,6 +3,7 @@ #include "olap/schema/schema.h" #include "olap/schema/update.h" #include "schemeshard_identificators.h" +#include "schemeshard_info_types_helper.h" #include "schemeshard_path_element.h" #include "schemeshard_schema.h" #include "schemeshard_tx_infly.h" @@ -3091,7 +3092,9 @@ struct TImportInfo: public TSimpleRefCount { ui64 Id; // TxId from the original TEvCreateImportRequest TString Uid; EKind Kind; - TString Settings; // Serialized settings (S3 or FS) + const TString SettingsSerialized; + std::variant Settings; TPathId DomainPathId; TMaybe UserSID; TString PeerName; // required for making audit log records @@ -3110,6 +3113,7 @@ struct TImportInfo: public TSimpleRefCount { TInstant StartTime = TInstant::Zero(); TInstant EndTime = TInstant::Zero(); +public: TString GetItemSrcPrefix(size_t i) const { if (i < Items.size() && Items[i].SrcPrefix) { return Items[i].SrcPrefix; @@ -3117,78 +3121,47 @@ struct TImportInfo: public TSimpleRefCount { // Backward compatibility. // But there can be no paths in settings at all. - switch (Kind) { - case EKind::S3: { - Ydb::Import::ImportFromS3Settings settings = GetS3Settings(); - if (i < ui32(settings.items_size())) { - return settings.items(i).source_prefix(); - } - break; - } - case EKind::FS: { - Ydb::Import::ImportFromFsSettings settings = GetFsSettings(); - if (i < ui32(settings.items_size())) { - return settings.items(i).source_path(); - } - break; - } - } - - return {}; + return std::visit([i](const auto& settings) -> TString { + using T = std::decay_t; + return TItemSourcePathGetter::Get(settings, i); + }, Settings); } Ydb::Import::ImportFromS3Settings GetS3Settings() const { Y_ABORT_UNLESS(Kind == EKind::S3); - Ydb::Import::ImportFromS3Settings settings; - Y_ABORT_UNLESS(settings.ParseFromString(Settings)); - return settings; + return std::get(Settings); } Ydb::Import::ImportFromFsSettings GetFsSettings() const { Y_ABORT_UNLESS(Kind == EKind::FS); - Ydb::Import::ImportFromFsSettings settings; - Y_ABORT_UNLESS(settings.ParseFromString(Settings)); - return settings; - } - - bool GetNoAcl() const { - switch (Kind) { - case EKind::S3: - return GetS3Settings().no_acl(); - case EKind::FS: - return GetFsSettings().no_acl(); - default: - Y_ABORT_UNLESS(false); - } - return false; + return std::get(Settings); } - bool GetSkipChecksumValidation() const { - switch (Kind) { - case EKind::S3: - return GetS3Settings().skip_checksum_validation(); - case EKind::FS: - return GetFsSettings().skip_checksum_validation(); - default: - Y_ABORT_UNLESS(false); - } - return false; - } + // Generate getters for common settings fields + IMPORT_SETTINGS_GETTER(GetNoAcl, no_acl) + IMPORT_SETTINGS_GETTER(GetSkipChecksumValidation, skip_checksum_validation) explicit TImportInfo( const ui64 id, const TString& uid, const EKind kind, - const TString& settings, + const TString& serializedSettings, const TPathId domainPathId, const TString& peerName) : Id(id) , Uid(uid) , Kind(kind) - , Settings(settings) + , SettingsSerialized(serializedSettings) , DomainPathId(domainPathId) , PeerName(peerName) { + // Parse settings from serialized string based on import kind. + switch (kind) { + PARSE_SETTINGS_CASE(S3, Ydb::Import::ImportFromS3Settings) + PARSE_SETTINGS_CASE(FS, Ydb::Import::ImportFromFsSettings) + default: + Y_ABORT("Unknown import kind"); + } } template @@ -3199,16 +3172,22 @@ struct TImportInfo: public TSimpleRefCount { const TSettingsPB& settingsPb, const TPathId domainPathId, const TString& peerName) - : TImportInfo(id, uid, kind, SerializeSettings(settingsPb), domainPathId, peerName) + : Id(id) + , Uid(uid) + , Kind(kind) + , SettingsSerialized(SerializeSettings(settingsPb)) + , Settings(settingsPb) + , DomainPathId(domainPathId) + , PeerName(peerName) { } private: template - static TString SerializeSettings(const TSettingsPB& settingsPb) { - TString result; - Y_ABORT_UNLESS(settingsPb.SerializeToString(&result)); - return result; + static TString SerializeSettings(const TSettingsPB& settings) { + TString serialized; + Y_PROTOBUF_SUPPRESS_NODISCARD settings.SerializeToString(&serialized); + return serialized; } public: diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h new file mode 100644 index 000000000000..6895ad471868 --- /dev/null +++ b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +template +struct TItemSourcePathGetter; + +#define DEFINE_ITEM_SOURCE_PATH_GETTER(SettingsType, Method) \ + template <> \ + struct TItemSourcePathGetter { \ + static TString Get(const SettingsType& settings, size_t i) { \ + if (i < ui32(settings.items_size())) { \ + return settings.items(i).Method(); \ + } \ + return {}; \ + } \ + }; + +DEFINE_ITEM_SOURCE_PATH_GETTER(Ydb::Import::ImportFromS3Settings, source_prefix) +DEFINE_ITEM_SOURCE_PATH_GETTER(Ydb::Import::ImportFromFsSettings, source_path) + +#undef DEFINE_ITEM_SOURCE_PATH_GETTER + +// Macro to generate a getter method that retrieves a field from settings. +#define IMPORT_SETTINGS_GETTER(MethodName, FieldAccessor) \ + bool MethodName() const { \ + return std::visit([](const auto& settings) { \ + return settings.FieldAccessor(); \ + }, Settings); \ + } + +// Macro to generate a case statement for parsing settings from a serialized string. +#define PARSE_SETTINGS_CASE(KindValue, SettingsType) \ + case EKind::KindValue: { \ + SettingsType tmpSettings; \ + Y_ABORT_UNLESS(tmpSettings.ParseFromString(serializedSettings)); \ + Settings = std::move(tmpSettings); \ + break; \ + } + From c62344d460491b5a03fa3fe371907ab793528e47 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Wed, 26 Nov 2025 16:35:40 +0000 Subject: [PATCH 12/25] fix cs build --- ydb/core/tx/columnshard/backup/iscan/iscan.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/backup/iscan/iscan.cpp b/ydb/core/tx/columnshard/backup/iscan/iscan.cpp index 819000fd1190..a29cfb023865 100644 --- a/ydb/core/tx/columnshard/backup/iscan/iscan.cpp +++ b/ydb/core/tx/columnshard/backup/iscan/iscan.cpp @@ -67,6 +67,8 @@ TConclusion> CreateIScanExportUploader(const TAct break; case NKikimrSchemeOp::TBackupTask::SETTINGS_NOT_SET: return TConclusionStatus::Fail("Internal error. It is not possible to have empty settings for backup here"); + default: + return TConclusionStatus::Fail("Internal error. Unsupported type of backup task settings"); } auto createUploader = [subscriberActorId = subscriberActorId, txId = txId, exp]() { @@ -331,4 +333,4 @@ std::unique_ptr CreateExportUploaderActor(const TActorId& subscriberActo return std::make_unique(backupTask, exportFactory, tableColumns, subscriberActorId, txId); } -} // namespace NKikimr::NColumnShard::NBackup \ No newline at end of file +} // namespace NKikimr::NColumnShard::NBackup From 1be20b94bd075bfb757f1261d50280e4e20fab69 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 27 Nov 2025 15:13:28 +0000 Subject: [PATCH 13/25] io pool register --- ydb/core/tx/schemeshard/schemeshard_import__create.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index df4ac9f4013e..77c3ff477439 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -469,7 +469,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetter(Self->SelfId(), importInfo, itemIdx, item.ExportItemIV)); Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); } else { - item.SchemeGetter = ctx.RegisterWithSameMailbox(CreateSchemeGetterFS(Self->SelfId(), importInfo, itemIdx)); + item.SchemeGetter = ctx.Register(CreateSchemeGetterFS(Self->SelfId(), importInfo, itemIdx), TMailboxType::Simple, AppData()->IOPoolId); Self->RunningImportSchemeGetters.emplace(item.SchemeGetter); } } From 10b57feed94c5b2cea48eefd69e75b2fad318997 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 27 Nov 2025 16:16:34 +0000 Subject: [PATCH 14/25] fix some issues --- ydb/core/tx/schemeshard/schemeshard_import__create.cpp | 2 +- ydb/core/tx/schemeshard/schemeshard_info_types_helper.h | 4 ++++ ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index 77c3ff477439..9562fc1289ac 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -1149,7 +1149,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase if (importInfo->Kind == TImportInfo::EKind::S3) { auto settings = importInfo->GetS3Settings(); if (settings.has_encryption_settings() != importInfo->SchemaMapping->Items[0].IV.Defined()) { - return CancelAndPersist(db, importInfo, -1, {}, "incorrect schema mapping"); + return CancelAndPersist(db, importInfo, -1, {}, "incorrect schema mapping"); } } } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h index 6895ad471868..8692d74784b9 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h @@ -3,6 +3,8 @@ #include #include +namespace NKikimr::NSchemeShard { + template struct TItemSourcePathGetter; @@ -39,3 +41,5 @@ DEFINE_ITEM_SOURCE_PATH_GETTER(Ydb::Import::ImportFromFsSettings, source_path) break; \ } +} // namespace NKikimr::NSchemeShard + diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp index 3539a26922d9..0984db45876d 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore_fs.cpp @@ -1,10 +1,10 @@ +#include +#include + #include #include #include -#include -#include - #include #include #include From 3b557fbfe1c87e824dcb8ca00bd3955bf8231b1b Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 27 Nov 2025 16:44:42 +0000 Subject: [PATCH 15/25] 1 stage refactoring --- .../tx/schemeshard/schemeshard_info_types.h | 34 ++++++++--- .../schemeshard_info_types_helper.h | 61 ++++++++++--------- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index fba3de16e8f1..fd3e5279477a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3121,10 +3121,10 @@ struct TImportInfo: public TSimpleRefCount { // Backward compatibility. // But there can be no paths in settings at all. - return std::visit([i](const auto& settings) -> TString { + return VisitSettings([i](const auto& settings) -> TString { using T = std::decay_t; return TItemSourcePathGetter::Get(settings, i); - }, Settings); + }); } Ydb::Import::ImportFromS3Settings GetS3Settings() const { @@ -3137,9 +3137,18 @@ struct TImportInfo: public TSimpleRefCount { return std::get(Settings); } - // Generate getters for common settings fields - IMPORT_SETTINGS_GETTER(GetNoAcl, no_acl) - IMPORT_SETTINGS_GETTER(GetSkipChecksumValidation, skip_checksum_validation) + // Getters for common settings fields + bool GetNoAcl() const { + return VisitSettings([](const auto& settings) { + return settings.no_acl(); + }); + } + + bool GetSkipChecksumValidation() const { + return VisitSettings([](const auto& settings) { + return settings.skip_checksum_validation(); + }); + } explicit TImportInfo( const ui64 id, @@ -3157,8 +3166,14 @@ struct TImportInfo: public TSimpleRefCount { { // Parse settings from serialized string based on import kind. switch (kind) { - PARSE_SETTINGS_CASE(S3, Ydb::Import::ImportFromS3Settings) - PARSE_SETTINGS_CASE(FS, Ydb::Import::ImportFromFsSettings) + case EKind::S3: { + Settings = ParseSettings(serializedSettings); + break; + } + case EKind::FS: { + Settings = ParseSettings(serializedSettings); + break; + } default: Y_ABORT("Unknown import kind"); } @@ -3190,6 +3205,11 @@ struct TImportInfo: public TSimpleRefCount { return serialized; } + template + auto VisitSettings(TFunc&& func) const { + return NImportHelpers::VisitSettings(Settings, std::forward(func)); + } + public: TString ToString() const; diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h index 8692d74784b9..2d064adbfbd1 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h @@ -8,38 +8,39 @@ namespace NKikimr::NSchemeShard { template struct TItemSourcePathGetter; -#define DEFINE_ITEM_SOURCE_PATH_GETTER(SettingsType, Method) \ - template <> \ - struct TItemSourcePathGetter { \ - static TString Get(const SettingsType& settings, size_t i) { \ - if (i < ui32(settings.items_size())) { \ - return settings.items(i).Method(); \ - } \ - return {}; \ - } \ - }; - -DEFINE_ITEM_SOURCE_PATH_GETTER(Ydb::Import::ImportFromS3Settings, source_prefix) -DEFINE_ITEM_SOURCE_PATH_GETTER(Ydb::Import::ImportFromFsSettings, source_path) - -#undef DEFINE_ITEM_SOURCE_PATH_GETTER - -// Macro to generate a getter method that retrieves a field from settings. -#define IMPORT_SETTINGS_GETTER(MethodName, FieldAccessor) \ - bool MethodName() const { \ - return std::visit([](const auto& settings) { \ - return settings.FieldAccessor(); \ - }, Settings); \ +template <> +struct TItemSourcePathGetter { + static TString Get(const Ydb::Import::ImportFromS3Settings& settings, size_t i) { + if (i < ui32(settings.items_size())) { + return settings.items(i).source_prefix(); + } + return {}; } - -// Macro to generate a case statement for parsing settings from a serialized string. -#define PARSE_SETTINGS_CASE(KindValue, SettingsType) \ - case EKind::KindValue: { \ - SettingsType tmpSettings; \ - Y_ABORT_UNLESS(tmpSettings.ParseFromString(serializedSettings)); \ - Settings = std::move(tmpSettings); \ - break; \ +}; + +template <> +struct TItemSourcePathGetter { + static TString Get(const Ydb::Import::ImportFromFsSettings& settings, size_t i) { + if (i < ui32(settings.items_size())) { + return settings.items(i).source_path(); + } + return {}; } +}; + + +template +auto VisitSettings(const TVariant& settings, TFunc&& func) { + return std::visit(std::forward(func), settings); +} + + +template +TSettings ParseSettings(const TString& serializedSettings) { + TSettings tmpSettings; + Y_ABORT_UNLESS(tmpSettings.ParseFromString(serializedSettings)); + return tmpSettings; +} } // namespace NKikimr::NSchemeShard From 95d2d7d051be06dc497fff8c9d10a18a06723fa1 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 28 Nov 2025 09:50:11 +0000 Subject: [PATCH 16/25] remove macros --- .../tx/schemeshard/schemeshard_info_types.h | 37 ++++++++++--------- .../schemeshard_info_types_helper.h | 35 ++++++++---------- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index fd3e5279477a..8a3ae5d0b50c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3113,7 +3113,21 @@ struct TImportInfo: public TSimpleRefCount { TInstant StartTime = TInstant::Zero(); TInstant EndTime = TInstant::Zero(); +private: + template + static TString SerializeSettings(const TSettingsPB& settings) { + TString serialized; + Y_ABORT_UNLESS(settings.SerializeToString(&serialized)); + return serialized; + } + + template + auto Visit(TFunc&& func) const { + return VisitSettings(Settings, std::forward(func)); + } + public: + TString GetItemSrcPrefix(size_t i) const { if (i < Items.size() && Items[i].SrcPrefix) { return Items[i].SrcPrefix; @@ -3121,9 +3135,9 @@ struct TImportInfo: public TSimpleRefCount { // Backward compatibility. // But there can be no paths in settings at all. - return VisitSettings([i](const auto& settings) -> TString { - using T = std::decay_t; - return TItemSourcePathGetter::Get(settings, i); + return Visit([i](const auto& settings) -> TString { + // using T = std::decay_t; + return GetItemSource(settings, i); }); } @@ -3139,13 +3153,13 @@ struct TImportInfo: public TSimpleRefCount { // Getters for common settings fields bool GetNoAcl() const { - return VisitSettings([](const auto& settings) { + return Visit([](const auto& settings) { return settings.no_acl(); }); } bool GetSkipChecksumValidation() const { - return VisitSettings([](const auto& settings) { + return Visit([](const auto& settings) { return settings.skip_checksum_validation(); }); } @@ -3197,19 +3211,6 @@ struct TImportInfo: public TSimpleRefCount { { } -private: - template - static TString SerializeSettings(const TSettingsPB& settings) { - TString serialized; - Y_PROTOBUF_SUPPRESS_NODISCARD settings.SerializeToString(&serialized); - return serialized; - } - - template - auto VisitSettings(TFunc&& func) const { - return NImportHelpers::VisitSettings(Settings, std::forward(func)); - } - public: TString ToString() const; diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h index 2d064adbfbd1..74ccf1be84d3 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types_helper.h @@ -1,40 +1,37 @@ #pragma once +#include #include #include namespace NKikimr::NSchemeShard { -template -struct TItemSourcePathGetter; +template +TString GetItemSource(const TItem& item); template <> -struct TItemSourcePathGetter { - static TString Get(const Ydb::Import::ImportFromS3Settings& settings, size_t i) { - if (i < ui32(settings.items_size())) { - return settings.items(i).source_prefix(); - } - return {}; - } -}; +inline TString GetItemSource(const Ydb::Import::ImportFromS3Settings::Item& item) { + return item.source_prefix(); +} template <> -struct TItemSourcePathGetter { - static TString Get(const Ydb::Import::ImportFromFsSettings& settings, size_t i) { - if (i < ui32(settings.items_size())) { - return settings.items(i).source_path(); - } - return {}; - } -}; +inline TString GetItemSource(const Ydb::Import::ImportFromFsSettings::Item& item) { + return item.source_path(); +} +template +inline TString GetItemSource(const TSettings& settings, size_t i) { + if (i < ui32(settings.items_size())) { + return GetItemSource(settings.items(i)); + } + return {}; +} template auto VisitSettings(const TVariant& settings, TFunc&& func) { return std::visit(std::forward(func), settings); } - template TSettings ParseSettings(const TString& serializedSettings) { TSettings tmpSettings; From 9f41f65ebc2ef0334ea1578072a4e395aabd53fb Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 4 Dec 2025 10:04:37 +0000 Subject: [PATCH 17/25] done --- ydb/apps/ydbd/export/export.cpp | 7 + ydb/apps/ydbd/export/export.h | 1 + ydb/core/driver_lib/run/export.cpp | 7 + ydb/core/driver_lib/run/export.h | 1 + ydb/core/testlib/basics/appdata.h | 7 + ydb/core/tx/datashard/backup_unit.cpp | 15 + ydb/core/tx/datashard/export_fs.h | 31 ++ ydb/core/tx/datashard/export_fs_uploader.cpp | 479 ++++++++++++++++++ ydb/core/tx/datashard/export_iface.h | 1 + ydb/core/tx/datashard/restore_unit.cpp | 2 + ydb/core/tx/datashard/ut_export/ya.make | 13 - ydb/core/tx/datashard/ya.make | 1 + .../tx/schemeshard/ut_export/ut_export_fs.cpp | 365 ++++++++++--- 13 files changed, 846 insertions(+), 84 deletions(-) create mode 100644 ydb/core/tx/datashard/export_fs.h create mode 100644 ydb/core/tx/datashard/export_fs_uploader.cpp delete mode 100644 ydb/core/tx/datashard/ut_export/ya.make diff --git a/ydb/apps/ydbd/export/export.cpp b/ydb/apps/ydbd/export/export.cpp index 3f85de70e33e..c797b3fdd2dc 100644 --- a/ydb/apps/ydbd/export/export.cpp +++ b/ydb/apps/ydbd/export/export.cpp @@ -1,6 +1,7 @@ #include "export.h" #include +#include NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToYt( const IExport::TTask& task, const IExport::TTableColumns& columns) const @@ -22,6 +23,12 @@ NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToS3( #endif } +NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToFs( + const IExport::TTask& task, const IExport::TTableColumns& columns) const +{ + return new NKikimr::NDataShard::TFsExport(task, columns); +} + void TDataShardExportFactory::Shutdown() { // No cleanup required for TDataShardExportFactory. } diff --git a/ydb/apps/ydbd/export/export.h b/ydb/apps/ydbd/export/export.h index 9d077f16aa5e..e7613aaff639 100644 --- a/ydb/apps/ydbd/export/export.h +++ b/ydb/apps/ydbd/export/export.h @@ -8,5 +8,6 @@ class TDataShardExportFactory : public NKikimr::NDataShard::IExportFactory { public: IExport* CreateExportToYt(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; IExport* CreateExportToS3(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; + IExport* CreateExportToFs(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; void Shutdown() override; }; diff --git a/ydb/core/driver_lib/run/export.cpp b/ydb/core/driver_lib/run/export.cpp index 75d4386b4e17..b3c1b913d1b5 100644 --- a/ydb/core/driver_lib/run/export.cpp +++ b/ydb/core/driver_lib/run/export.cpp @@ -4,6 +4,7 @@ #include #include +#include NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToYt( const IExport::TTask& task, const IExport::TTableColumns& columns) const @@ -29,6 +30,12 @@ NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToS3( #endif } +NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToFs( + const IExport::TTask& task, const IExport::TTableColumns& columns) const +{ + return new NKikimr::NDataShard::TFsExport(task, columns); +} + void TDataShardExportFactory::Shutdown() { #ifndef KIKIMR_DISABLE_YT ShutdownYT(); diff --git a/ydb/core/driver_lib/run/export.h b/ydb/core/driver_lib/run/export.h index 5f422b5f4011..1898fa00ba93 100644 --- a/ydb/core/driver_lib/run/export.h +++ b/ydb/core/driver_lib/run/export.h @@ -8,5 +8,6 @@ class TDataShardExportFactory : public NKikimr::NDataShard::IExportFactory { public: IExport* CreateExportToYt(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; IExport* CreateExportToS3(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; + IExport* CreateExportToFs(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; void Shutdown() override; }; diff --git a/ydb/core/testlib/basics/appdata.h b/ydb/core/testlib/basics/appdata.h index 793af8526dae..6dfe55c08fe6 100644 --- a/ydb/core/testlib/basics/appdata.h +++ b/ydb/core/testlib/basics/appdata.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,12 @@ namespace NKikimr { #endif } + IExport* CreateExportToFs( + const IExport::TTask& task, const IExport::TTableColumns& columns) const override + { + return new NDataShard::TFsExport(task, columns); + } + void Shutdown() override { } }; diff --git a/ydb/core/tx/datashard/backup_unit.cpp b/ydb/core/tx/datashard/backup_unit.cpp index 0350299defa8..da12a1e2bcbc 100644 --- a/ydb/core/tx/datashard/backup_unit.cpp +++ b/ydb/core/tx/datashard/backup_unit.cpp @@ -4,6 +4,7 @@ #include "export_iface.h" #include "export_scan.h" #include "export_s3.h" +#include "export_fs.h" #include @@ -74,6 +75,20 @@ class TBackupUnit : public TBackupRestoreUnitBase Abort(op, ctx, "Exports to S3 are disabled"); return false; } + } else if (backup.HasFSSettings()) { + NBackupRestoreTraits::ECompressionCodec codec; + if (!TryCodecFromTask(backup, codec)) { + Abort(op, ctx, TStringBuilder() << "Unsupported compression codec" + << ": " << backup.GetCompression().GetCodec()); + return false; + } + + if (auto* exportFactory = appData->DataShardExportFactory) { + std::shared_ptr(exportFactory->CreateExportToFs(backup, columns)).swap(exp); + } else { + Abort(op, ctx, "Exports to FS are disabled"); + return false; + } } else { Abort(op, ctx, "Unsupported backup task"); return false; diff --git a/ydb/core/tx/datashard/export_fs.h b/ydb/core/tx/datashard/export_fs.h new file mode 100644 index 000000000000..b61005e7d451 --- /dev/null +++ b/ydb/core/tx/datashard/export_fs.h @@ -0,0 +1,31 @@ +#pragma once + +#include "export_iface.h" + +namespace NKikimr { +namespace NDataShard { + +class TFsExport: public IExport { +public: + explicit TFsExport(const TTask& task, const TTableColumns& columns) + : Task(task) + , Columns(columns) + { + Y_ENSURE(task.HasFSSettings()); + } + + IActor* CreateUploader(const TActorId& dataShard, ui64 txId) const override; + + IBuffer* CreateBuffer() const override; + + void Shutdown() const override {} + +protected: + const TTask Task; + const TTableColumns Columns; +}; + +} // NDataShard +} // NKikimr + + diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp new file mode 100644 index 000000000000..56f77cbe3d84 --- /dev/null +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -0,0 +1,479 @@ +#include "export_common.h" +#include "export_fs.h" +#include "export_scan.h" +#include "backup_restore_traits.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr { +namespace NDataShard { + +using namespace NBackup; + +struct TChangefeedExportDescriptions { + const Ydb::Table::ChangefeedDescription ChangefeedDescription; + const Ydb::Topic::DescribeTopicResult Topic; + TString Name; + TString Prefix; +}; + +class TFsUploader: public TActorBootstrapped { + struct TFsSettings { + TString BasePath; + TString Path; + + TString GetMetadataPath() const { + return TFsPath(BasePath) / Path / "metadata"; + } + + TString GetSchemePath() const { + return TFsPath(BasePath) / Path / "scheme"; + } + + TString GetPermissionsPath() const { + return TFsPath(BasePath) / Path / "permissions"; + } + + TString GetChangefeedPath(const TString& prefix) const { + return TFsPath(BasePath) / Path / (prefix + "_changefeed"); + } + + TString GetTopicPath(const TString& prefix) const { + return TFsPath(BasePath) / Path / (prefix + "_topic"); + } + + TString GetChecksumPath(const TString& objectPath) const { + return objectPath + ".checksum"; + } + + static TFsSettings FromBackupTask(const NKikimrSchemeOp::TBackupTask& task) { + Y_ENSURE(task.HasFSSettings()); + const auto& fsSettings = task.GetFSSettings(); + + TFsSettings result; + result.BasePath = fsSettings.GetBasePath(); + result.Path = fsSettings.GetPath(); + return result; + } + }; + + void WriteToFile(const TString& path, const TString& data) { + EXPORT_LOG_D("WriteToFile" + << ": self# " << SelfId() + << ", path# " << path + << ", size# " << data.size()); + + try { + // Ensure directory exists + TFsPath filePath(path); + TFsPath dirPath = filePath.Parent(); + dirPath.MkDirs(); + + // Write file + TFileOutput file(path); + file.Write(data); + file.Finish(); + + EXPORT_LOG_I("Successfully wrote file" + << ": self# " << SelfId() + << ", path# " << path + << ", size# " << data.size()); + } catch (const std::exception& ex) { + Error = TStringBuilder() << "Failed to write file '" << path << "': " << ex.what(); + EXPORT_LOG_E("WriteToFile error" + << ": self# " << SelfId() + << ", path# " << path + << ", error# " << Error); + throw; + } + } + + void WriteMessage(const google::protobuf::Message& message, const TString& path, TString& checksum) { + TString data; + google::protobuf::TextFormat::PrintToString(message, &data); + + if (EnableChecksums) { + checksum = ComputeChecksum(data); + } + + WriteToFile(path, data); + + if (EnableChecksums) { + WriteChecksum(checksum, Settings.GetChecksumPath(path), path); + } + } + + void WriteChecksum(const TString& checksum, const TString& checksumPath, const TString& objectPath) { + // Format compatible with sha256sum CLI tool + TString checksumData = checksum + " " + TFsPath(objectPath).GetName(); + WriteToFile(checksumPath, checksumData); + } + + void UploadMetadata() { + Y_ENSURE(!MetadataUploaded); + Y_ENSURE(ShardNum == 0); + + EXPORT_LOG_D("UploadMetadata" + << ": self# " << SelfId()); + + try { + if (EnableChecksums) { + MetadataChecksum = ComputeChecksum(Metadata); + } + + WriteToFile(Settings.GetMetadataPath(), Metadata); + + if (EnableChecksums) { + WriteChecksum(MetadataChecksum, Settings.GetChecksumPath(Settings.GetMetadataPath()), Settings.GetMetadataPath()); + } + + MetadataUploaded = true; + } catch (...) { + return Finish(false, Error.GetOrElse("Unknown error during metadata upload")); + } + } + + void UploadPermissions() { + Y_ENSURE(EnablePermissions && !PermissionsUploaded); + Y_ENSURE(ShardNum == 0); + + EXPORT_LOG_D("UploadPermissions" + << ": self# " << SelfId()); + + if (!Permissions) { + return Finish(false, "Cannot infer permissions"); + } + + try { + WriteMessage(Permissions.GetRef(), Settings.GetPermissionsPath(), PermissionsChecksum); + PermissionsUploaded = true; + } catch (...) { + return Finish(false, Error.GetOrElse("Unknown error during permissions upload")); + } + } + + void UploadScheme() { + Y_ENSURE(!SchemeUploaded); + Y_ENSURE(ShardNum == 0); + + EXPORT_LOG_D("UploadScheme" + << ": self# " << SelfId()); + + if (!Scheme) { + return Finish(false, "Cannot infer scheme"); + } + + try { + WriteMessage(Scheme.GetRef(), Settings.GetSchemePath(), SchemeChecksum); + SchemeUploaded = true; + } catch (...) { + return Finish(false, Error.GetOrElse("Unknown error during scheme upload")); + } + } + + void UploadChangefeed() { + Y_ENSURE(!ChangefeedsUploaded); + Y_ENSURE(ShardNum == 0); + + EXPORT_LOG_D("UploadChangefeed" + << ": self# " << SelfId() + << ", index# " << IndexExportedChangefeed + << ", total# " << Changefeeds.size()); + + if (IndexExportedChangefeed == Changefeeds.size()) { + ChangefeedsUploaded = true; + return Finish(); + } + + try { + const auto& desc = Changefeeds[IndexExportedChangefeed]; + WriteMessage(desc.ChangefeedDescription, Settings.GetChangefeedPath(desc.Prefix), ChangefeedChecksum); + UploadTopic(); + } catch (...) { + return Finish(false, Error.GetOrElse("Unknown error during changefeed upload")); + } + } + + void UploadTopic() { + Y_ENSURE(IndexExportedChangefeed < Changefeeds.size()); + Y_ENSURE(ShardNum == 0); + + EXPORT_LOG_D("UploadTopic" + << ": self# " << SelfId() + << ", index# " << IndexExportedChangefeed); + + try { + const auto& desc = Changefeeds[IndexExportedChangefeed]; + WriteMessage(desc.Topic, Settings.GetTopicPath(desc.Prefix), TopicChecksum); + + ++IndexExportedChangefeed; + UploadChangefeed(); + } catch (...) { + return Finish(false, Error.GetOrElse("Unknown error during topic upload")); + } + } + + void Finish(bool success = true, const TString& error = TString()) { + EXPORT_LOG_I("Finish" + << ": self# " << SelfId() + << ", success# " << success + << ", error# " << error); + + if (!success) { + Error = error; + } + + PassAway(); + } + + void PassAway() override { + if (Scanner) { + Send(Scanner, new TEvExportScan::TEvFinish(Error.Empty(), Error.GetOrElse(TString()))); + } + + IActor::PassAway(); + } + +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::EXPORT_S3_UPLOADER_ACTOR; // Reuse S3 activity type + } + + static constexpr TStringBuf LogPrefix() { + return "fs"sv; + } + + explicit TFsUploader( + const TActorId& dataShard, ui64 txId, + const NKikimrSchemeOp::TBackupTask& task, + TMaybe&& scheme, + TVector changefeeds, + TMaybe&& permissions, + TString&& metadata) + : Settings(TFsSettings::FromBackupTask(task)) + , DataShard(dataShard) + , TxId(txId) + , Scheme(std::move(scheme)) + , Changefeeds(std::move(changefeeds)) + , Metadata(std::move(metadata)) + , Permissions(std::move(permissions)) + , ShardNum(task.GetShardNum()) + , SchemeUploaded(ShardNum == 0 ? false : true) + , ChangefeedsUploaded(ShardNum == 0 ? false : true) + , MetadataUploaded(ShardNum == 0 ? false : true) + , PermissionsUploaded(ShardNum == 0 ? false : true) + , EnableChecksums(task.GetEnableChecksums()) + , EnablePermissions(task.GetEnablePermissions()) + { + Y_UNUSED(TxId); // Reserved for future use + } + + void Bootstrap() { + EXPORT_LOG_D("Bootstrap" + << ": self# " << SelfId() + << ", shardNum# " << ShardNum); + + // For shard 0, upload metadata, permissions, scheme, changefeeds + // For other shards, we would upload data (not implemented yet) + if (ShardNum != 0) { + // For now, just finish successfully for non-zero shards + // Data export will be implemented later + return Finish(); + } + + try { + if (!MetadataUploaded) { + UploadMetadata(); + } + + if (EnablePermissions && !PermissionsUploaded) { + UploadPermissions(); + } + + if (!SchemeUploaded) { + UploadScheme(); + } + + if (!ChangefeedsUploaded) { + UploadChangefeed(); + } else { + Finish(); + } + } catch (...) { + Finish(false, Error ? *Error : "Unknown error during bootstrap"); + } + } + + STATEFN(StateBase) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvExportScan::TEvReady, Handle); + sFunc(TEvents::TEvPoisonPill, PassAway); + } + } + + void Handle(TEvExportScan::TEvReady::TPtr& ev) { + EXPORT_LOG_D("Handle TEvExportScan::TEvReady" + << ": self# " << SelfId() + << ", sender# " << ev->Sender); + + Scanner = ev->Sender; + + // For schema-only export, we're already done + if (Error) { + return PassAway(); + } + + // Data export not implemented yet, so we finish here + Finish(); + } + +private: + TFsSettings Settings; + const TActorId DataShard; + const ui64 TxId; + const TMaybe Scheme; + const TVector Changefeeds; + const TString Metadata; + const TMaybe Permissions; + + const ui32 ShardNum; + bool SchemeUploaded; + bool ChangefeedsUploaded; + bool MetadataUploaded; + bool PermissionsUploaded; + + ui64 IndexExportedChangefeed = 0; + + TActorId Scanner; + + bool EnableChecksums; + bool EnablePermissions; + + TString MetadataChecksum; + TString ChangefeedChecksum; + TString TopicChecksum; + TString SchemeChecksum; + TString PermissionsChecksum; + + TMaybe Error; +}; // TFsUploader + +// Dummy buffer for schema-only export (no data scanning needed) +class TSchemaOnlyBuffer : public NExportScan::IBuffer { +public: + void ColumnsOrder(const TVector&) override { + // No-op for schema-only export + } + + bool Collect(const NTable::IScan::TRow&) override { + // Should never be called for schema-only export + return false; + } + + IEventBase* PrepareEvent(bool, TStats&) override { + // Should never be called for schema-only export + return nullptr; + } + + void Clear() override { + // No-op for schema-only export + } + + bool IsFilled() const override { + // Never filled for schema-only export + return false; + } + + TString GetError() const override { + return {}; + } +}; + +IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { + auto scheme = (Task.GetShardNum() == 0) + ? GenYdbScheme(Columns, Task.GetTable()) + : Nothing(); + + TMetadata metadata; + metadata.SetVersion(Task.GetEnableChecksums() ? 1 : 0); + metadata.SetEnablePermissions(Task.GetEnablePermissions()); + + TVector changefeeds; + const bool enableChangefeedsExport = AppData() && AppData()->FeatureFlags.GetEnableChangefeedsExport(); + if (enableChangefeedsExport) { + const auto& persQueues = Task.GetChangefeedUnderlyingTopics(); + const auto& cdcStreams = Task.GetTable().GetTable().GetCdcStreams(); + Y_ASSERT(persQueues.size() == cdcStreams.size()); + + const int changefeedsCount = cdcStreams.size(); + changefeeds.reserve(changefeedsCount); + + for (int i = 0; i < changefeedsCount; ++i) { + Ydb::Table::ChangefeedDescription changefeed; + const auto& cdcStream = cdcStreams.at(i); + FillChangefeedDescription(changefeed, cdcStream); + + Ydb::Topic::DescribeTopicResult topic; + const auto& pq = persQueues.at(i); + Ydb::StatusIds::StatusCode status; + TString error; + FillTopicDescription(topic, pq.GetPersQueueGroup(), pq.GetSelf(), cdcStream.GetName(), status, error); + // Unnecessary fields + topic.clear_self(); + topic.clear_topic_stats(); + + auto& descr = changefeeds.emplace_back(changefeed, topic); + descr.Name = descr.ChangefeedDescription.name(); + // For filesystem, use actual names (no anonymization for now) + descr.Prefix = descr.Name; + + metadata.AddChangefeed(TChangefeedMetadata{ + .ExportPrefix = descr.Prefix, + .Name = descr.Name, + }); + } + } + + auto permissions = (Task.GetEnablePermissions() && Task.GetShardNum() == 0) + ? GenYdbPermissions(Task.GetTable()) + : Nothing(); + + TFullBackupMetadata::TPtr backup = new TFullBackupMetadata{ + .SnapshotVts = TVirtualTimestamp( + Task.GetSnapshotStep(), + Task.GetSnapshotTxId()) + }; + metadata.AddFullBackup(backup); + + return new TFsUploader( + dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), metadata.Serialize()); +} + +IExport::IBuffer* TFsExport::CreateBuffer() const { + // For schema-only export, return a dummy buffer + // Data export will be implemented later + return new TSchemaOnlyBuffer(); +} + +} // NDataShard +} // NKikimr + diff --git a/ydb/core/tx/datashard/export_iface.h b/ydb/core/tx/datashard/export_iface.h index 54e45ba39bb9..b34bc0e26d1c 100644 --- a/ydb/core/tx/datashard/export_iface.h +++ b/ydb/core/tx/datashard/export_iface.h @@ -27,6 +27,7 @@ class IExportFactory { virtual IExport* CreateExportToYt(const IExport::TTask& task, const IExport::TTableColumns& columns) const = 0; virtual IExport* CreateExportToS3(const IExport::TTask& task, const IExport::TTableColumns& columns) const = 0; + virtual IExport* CreateExportToFs(const IExport::TTask& task, const IExport::TTableColumns& columns) const = 0; virtual void Shutdown() = 0; }; diff --git a/ydb/core/tx/datashard/restore_unit.cpp b/ydb/core/tx/datashard/restore_unit.cpp index 91fce89c3edf..6d9bc5a66890 100644 --- a/ydb/core/tx/datashard/restore_unit.cpp +++ b/ydb/core/tx/datashard/restore_unit.cpp @@ -49,6 +49,8 @@ class TRestoreUnit : public TBackupRestoreUnitBaseSetAsyncJobResult(new TImportJobProduct(true, TString(), 0, 0)); break; diff --git a/ydb/core/tx/datashard/ut_export/ya.make b/ydb/core/tx/datashard/ut_export/ya.make deleted file mode 100644 index 7e204cc7e216..000000000000 --- a/ydb/core/tx/datashard/ut_export/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -UNITTEST_FOR(ydb/core/tx/datashard) - -PEERDIR( - ydb/core/testlib/default -) - -YQL_LAST_ABI_VERSION() - -SRCS( - export_s3_buffer_ut.cpp -) - -END() diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make index 8e59941d595e..e8e88aede10c 100644 --- a/ydb/core/tx/datashard/ya.make +++ b/ydb/core/tx/datashard/ya.make @@ -153,6 +153,7 @@ SRCS( execution_unit_ctors.h execution_unit_kind.h export_common.cpp + export_fs_uploader.cpp export_iface.cpp export_iface.h export_scan.cpp diff --git a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp index d5b58bd4c8f5..725cee1ea1a0 100644 --- a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp @@ -1,46 +1,299 @@ #include #include +#include + +#include +#include +#include +#include +#include + +#include using namespace NSchemeShardUT_Private; -Y_UNIT_TEST_SUITE(TSchemeShardExportToFsTests) { - Y_UNIT_TEST(ShouldSucceedCreateExportToFs) { - TTestBasicRuntime runtime; - TTestEnv env(runtime); +namespace { + + void Run(TTestBasicRuntime& runtime, TTestEnv& env, const TVector& tables, + const TString& request, + Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS) { + ui64 txId = 100; - TestCreateTable(runtime, ++txId, "/MyRoot", R"( - Name: "Table" - Columns { Name: "key" Type: "Utf8" } - Columns { Name: "value" Type: "Utf8" } - KeyColumnNames: ["key"] - )"); + for (const auto& table : tables) { + TestCreateTable(runtime, ++txId, "/MyRoot", table); + env.TestWaitNotification(runtime, txId); + } + + runtime.SetLogPriority(NKikimrServices::DATASHARD_BACKUP, NActors::NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::EXPORT, NActors::NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::DATASHARD_RESTORE, NActors::NLog::PRI_TRACE); + + const auto initialStatus = expectedStatus == Ydb::StatusIds::PRECONDITION_FAILED + ? expectedStatus + : Ydb::StatusIds::SUCCESS; + TestExport(runtime, ++txId, "/MyRoot", request, "", "", initialStatus); env.TestWaitNotification(runtime, txId); - // Test that schemeshard accepts ExportToFsSettings - TestExport(runtime, ++txId, "/MyRoot", R"( - ExportToFsSettings { - base_path: "/mnt/exports" - items { - source_path: "/MyRoot/Table" - destination_path: "backup/Table" - } + if (initialStatus != Ydb::StatusIds::SUCCESS) { + return; + } + + const ui64 exportId = txId; + TestGetExport(runtime, exportId, "/MyRoot", expectedStatus); + + TestForgetExport(runtime, ++txId, "/MyRoot", exportId); + env.TestWaitNotification(runtime, exportId); + + TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::NOT_FOUND); + } + + class TFsExportFixture : public NUnitTest::TBaseFixture { + public: + void RunFs(const TVector& tables, const TString& basePath, const TString& destinationPath, + Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS, + bool checkFsFilesExistence = true) { + + TString requestStr = Sprintf(R"( + ExportToFsSettings { + base_path: "%s" + items { + source_path: "/MyRoot/%s" + destination_path: "%s" + } + } + )", basePath.c_str(), tables[0].Contains("Name:") ? ExtractTableName(tables[0]).c_str() : "Table", destinationPath.c_str()); + + Env(); // Init test env + Runtime().GetAppData().FeatureFlags.SetEnableChecksumsExport(true); + Runtime().GetAppData().FeatureFlags.SetEnablePermissionsExport(true); + + Run(Runtime(), Env(), tables, requestStr, expectedStatus); + + if (expectedStatus == Ydb::StatusIds::SUCCESS && checkFsFilesExistence) { + TFsPath exportPath = TFsPath(basePath) / destinationPath; + + // Check metadata file + TFsPath metadataPath = exportPath / "metadata"; + UNIT_ASSERT_C(metadataPath.Exists(), "Metadata file should exist: " << metadataPath.GetPath()); + + // Check scheme file + TFsPath schemePath = exportPath / "scheme"; + UNIT_ASSERT_C(schemePath.Exists(), "Scheme file should exist: " << schemePath.GetPath()); + + // Check permissions file (if enabled) + if (Runtime().GetAppData().FeatureFlags.GetEnablePermissionsExport()) { + TFsPath permissionsPath = exportPath / "permissions"; + UNIT_ASSERT_C(permissionsPath.Exists(), "Permissions file should exist: " << permissionsPath.GetPath()); + } + + // Check checksums (if enabled) + if (Runtime().GetAppData().FeatureFlags.GetEnableChecksumsExport()) { + TFsPath metadataChecksumPath = exportPath / "metadata.checksum"; + UNIT_ASSERT_C(metadataChecksumPath.Exists(), "Metadata checksum should exist: " << metadataChecksumPath.GetPath()); + + TFsPath schemeChecksumPath = exportPath / "scheme.checksum"; + UNIT_ASSERT_C(schemeChecksumPath.Exists(), "Scheme checksum should exist: " << schemeChecksumPath.GetPath()); + } } - )"); + } - // Check that export was created - auto response = TestGetExport(runtime, txId, "/MyRoot"); - UNIT_ASSERT(response.GetResponse().GetEntry().HasExportToFsSettings()); + void RunFsMultiTable(const TVector& tables, const TString& basePath, const TVector& destinationPaths, + Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS) { + + TStringBuilder items; + for (size_t i = 0; i < tables.size(); ++i) { + TString tableName = ExtractTableName(tables[i]); + TString destPath = i < destinationPaths.size() ? destinationPaths[i] : ("backup/" + tableName); + + items << "items {" + << " source_path: \"/MyRoot/" << tableName << "\"" + << " destination_path: \"" << destPath << "\"" + << " }"; + } + + TString requestStr = Sprintf(R"( + ExportToFsSettings { + base_path: "%s" + %s + } + )", basePath.c_str(), items.c_str()); + + Env(); // Init test env + Runtime().GetAppData().FeatureFlags.SetEnableChecksumsExport(true); + Runtime().GetAppData().FeatureFlags.SetEnablePermissionsExport(true); + + Run(Runtime(), Env(), tables, requestStr, expectedStatus); + + if (expectedStatus == Ydb::StatusIds::SUCCESS) { + for (size_t i = 0; i < destinationPaths.size(); ++i) { + TFsPath exportPath = TFsPath(basePath) / destinationPaths[i]; + TFsPath schemePath = exportPath / "scheme"; + UNIT_ASSERT_C(schemePath.Exists(), "Scheme file should exist for table " << i << ": " << schemePath.GetPath()); + } + } + } + + bool HasFsFile(const TString& basePath, const TString& relativePath) { + TFsPath filePath = TFsPath(basePath) / relativePath; + return filePath.Exists(); + } + + TString GetFsFileContent(const TString& basePath, const TString& relativePath) { + TFsPath filePath = TFsPath(basePath) / relativePath; + if (filePath.Exists()) { + TFileInput file(filePath.GetPath()); + return file.ReadAll(); + } + return {}; + } + + protected: + TTestBasicRuntime& Runtime() { + if (!TestRuntime) { + TestRuntime.ConstructInPlace(); + } + return *TestRuntime; + } + + TTestEnvOptions& EnvOptions() { + if (!TestEnvOptions) { + TestEnvOptions.ConstructInPlace(); + } + return *TestEnvOptions; + } + + TTestEnv& Env() { + if (!TestEnv) { + TestEnv.ConstructInPlace(Runtime(), EnvOptions()); + } + return *TestEnv; + } + + TTempDir& TempDir() { + if (!TestTempDir) { + TestTempDir.ConstructInPlace(); + } + return *TestTempDir; + } + + private: + static TString ExtractTableName(const TString& tableSchema) { + // Extract "Name: "Table"" from schema + size_t pos = tableSchema.find("Name:"); + if (pos == TString::npos) { + return "Table"; + } + pos = tableSchema.find('"', pos); + if (pos == TString::npos) { + return "Table"; + } + size_t endPos = tableSchema.find('"', pos + 1); + if (endPos == TString::npos) { + return "Table"; + } + return tableSchema.substr(pos + 1, endPos - pos - 1); + } + + TMaybe TestRuntime; + TMaybe TestEnvOptions; + TMaybe TestEnv; + TMaybe TestTempDir; + }; + +} // anonymous + +Y_UNIT_TEST_SUITE_F(TExportToFsTests, TFsExportFixture) { + Y_UNIT_TEST(ShouldSucceedOnSingleShardTable) { + TString basePath = TempDir().Path(); - const auto& settings = response.GetResponse().GetEntry().GetExportToFsSettings(); - UNIT_ASSERT_VALUES_EQUAL(settings.base_path(), "/mnt/exports"); - UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 1); - UNIT_ASSERT_VALUES_EQUAL(settings.items(0).source_path(), "/MyRoot/Table"); - UNIT_ASSERT_VALUES_EQUAL(settings.items(0).destination_path(), "backup/Table"); + RunFs({ + R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )", + }, basePath, "backup/Table"); } - Y_UNIT_TEST(ShouldAcceptCompressionForFs) { + Y_UNIT_TEST(ShouldSucceedOnMultiShardTable) { + TString basePath = TempDir().Path(); + + RunFs({ + R"( + Name: "Table" + Columns { Name: "key" Type: "Uint32" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + UniformPartitionsCount: 2 + )", + }, basePath, "backup/MultiShardTable"); + } + + Y_UNIT_TEST(ShouldSucceedOnManyTables) { + TString basePath = TempDir().Path(); + + RunFsMultiTable({ + R"( + Name: "Table1" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )", + R"( + Name: "Table2" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )", + }, basePath, {"backup/Table1", "backup/Table2"}); + } + + Y_UNIT_TEST(ShouldCheckFilesCreatedOnDisk) { + TString basePath = TempDir().Path(); + TString destinationPath = "backup/TestTable"; + + RunFs({ + R"( + Name: "TestTable" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )", + }, basePath, destinationPath); + + // Check all expected files exist + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata"), "metadata"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme"), "scheme"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions"), "permissions"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata.checksum"), "metadata.checksum"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme.checksum"), "scheme.checksum"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions.checksum"), "permissions.checksum"); + + // Check scheme content + TString schemeContent = GetFsFileContent(basePath, destinationPath + "/scheme"); + UNIT_ASSERT_C(!schemeContent.empty(), "Scheme file should not be empty"); + + Ydb::Table::CreateTableRequest schemeProto; + UNIT_ASSERT_C(google::protobuf::TextFormat::ParseFromString(schemeContent, &schemeProto), + "Should parse scheme protobuf"); + + UNIT_ASSERT_VALUES_EQUAL(schemeProto.columns_size(), 2); + UNIT_ASSERT_VALUES_EQUAL(schemeProto.columns(0).name(), "key"); + UNIT_ASSERT_VALUES_EQUAL(schemeProto.columns(1).name(), "value"); + UNIT_ASSERT_VALUES_EQUAL(schemeProto.primary_key_size(), 1); + UNIT_ASSERT_VALUES_EQUAL(schemeProto.primary_key(0), "key"); + + // Check checksum format + TString checksumContent = GetFsFileContent(basePath, destinationPath + "/metadata.checksum"); + UNIT_ASSERT_C(!checksumContent.empty(), "Checksum should not be empty"); + UNIT_ASSERT_C(checksumContent.Contains("metadata"), "Checksum should contain filename"); + UNIT_ASSERT_GE(checksumContent.size(), 64); // sha256 is 64 hex chars + } + + Y_UNIT_TEST(ShouldAcceptCompressionSettings) { TTestBasicRuntime runtime; TTestEnv env(runtime); ui64 txId = 100; @@ -55,7 +308,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExportToFsTests) { TestExport(runtime, ++txId, "/MyRoot", R"( ExportToFsSettings { - base_path: "/mnt/exports" + base_path: "/tmp/ydb_export" compression: "zstd-3" items { source_path: "/MyRoot/Table" @@ -86,7 +339,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExportToFsTests) { TestExport(runtime, ++txId, "/MyRoot", R"( ExportToFsSettings { - base_path: "/mnt/exports" + base_path: "/tmp/ydb_export" items { source_path: "/MyRoot/NonExistentTable" destination_path: "backup/Table" @@ -113,7 +366,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExportToFsTests) { TestExport(runtime, ++txId, "/MyRoot", R"( ExportToFsSettings { - base_path: "/mnt/exports" + base_path: "/tmp/ydb_export" items { source_path: "/MyRoot/TableToDelete" destination_path: "backup/Table" @@ -122,46 +375,16 @@ Y_UNIT_TEST_SUITE(TSchemeShardExportToFsTests) { )", "", "", Ydb::StatusIds::BAD_REQUEST); } - Y_UNIT_TEST(FsExportWithMultipleTables) { - TTestBasicRuntime runtime; - TTestEnv env(runtime); - ui64 txId = 100; - - TestCreateTable(runtime, ++txId, "/MyRoot", R"( - Name: "Table1" - Columns { Name: "key" Type: "Utf8" } - Columns { Name: "value" Type: "Utf8" } - KeyColumnNames: ["key"] - )"); - env.TestWaitNotification(runtime, txId); - - TestCreateTable(runtime, ++txId, "/MyRoot", R"( - Name: "Table2" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] - )"); - env.TestWaitNotification(runtime, txId); - - TestExport(runtime, ++txId, "/MyRoot", R"( - ExportToFsSettings { - base_path: "/mnt/exports" - items { - source_path: "/MyRoot/Table1" - destination_path: "backup/Table1" - } - items { - source_path: "/MyRoot/Table2" - destination_path: "backup/Table2" - } - } - )"); - - auto response = TestGetExport(runtime, txId, "/MyRoot"); - UNIT_ASSERT(response.GetResponse().GetEntry().HasExportToFsSettings()); + Y_UNIT_TEST(ShouldHandleNestedPaths) { + TString basePath = TempDir().Path(); - const auto& settings = response.GetResponse().GetEntry().GetExportToFsSettings(); - UNIT_ASSERT_VALUES_EQUAL(settings.items_size(), 2); + RunFs({ + R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )", + }, basePath, "deep/nested/directory/structure/backup"); } } - From b83132f355be073fb762cc4db116b7827f853917 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 4 Dec 2025 12:04:53 +0000 Subject: [PATCH 18/25] tests passed --- ydb/core/tx/datashard/backup_unit.cpp | 24 +++- ydb/core/tx/datashard/export_fs_uploader.cpp | 119 ++++++++++++++++--- ydb/core/tx/datashard/export_scan.cpp | 8 ++ ydb/core/tx/datashard/restore_unit.cpp | 2 - 4 files changed, 132 insertions(+), 21 deletions(-) diff --git a/ydb/core/tx/datashard/backup_unit.cpp b/ydb/core/tx/datashard/backup_unit.cpp index da12a1e2bcbc..26433ebfb48c 100644 --- a/ydb/core/tx/datashard/backup_unit.cpp +++ b/ydb/core/tx/datashard/backup_unit.cpp @@ -33,11 +33,15 @@ class TBackupUnit : public TBackupRestoreUnitBase } bool Run(TOperation::TPtr op, TTransactionContext& txc, const TActorContext& ctx) override { + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run START" << std::endl; TActiveTransaction* tx = dynamic_cast(op.Get()); Y_ENSURE(tx, "cannot cast operation of kind " << op->GetKind()); Y_ENSURE(tx->GetSchemeTx().HasBackup()); const auto& backup = tx->GetSchemeTx().GetBackup(); + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run HasYTSettings=" << backup.HasYTSettings() + << " HasS3Settings=" << backup.HasS3Settings() + << " HasFSSettings=" << backup.HasFSSettings() << std::endl; const ui64 tableId = backup.GetTableId(); Y_ENSURE(DataShard.GetUserTables().contains(tableId)); @@ -76,16 +80,24 @@ class TBackupUnit : public TBackupRestoreUnitBase return false; } } else if (backup.HasFSSettings()) { + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run FSSettings detected" << std::endl; + std::cerr << "FSEXPORT_DEBUG: FSSettings BasePath=" + << " Path=" << std::endl; NBackupRestoreTraits::ECompressionCodec codec; if (!TryCodecFromTask(backup, codec)) { + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run unsupported compression codec" << std::endl; Abort(op, ctx, TStringBuilder() << "Unsupported compression codec" << ": " << backup.GetCompression().GetCodec()); return false; } + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run codec OK" << std::endl; if (auto* exportFactory = appData->DataShardExportFactory) { + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run creating FS export" << std::endl; std::shared_ptr(exportFactory->CreateExportToFs(backup, columns)).swap(exp); + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run FS export created, exp=" << (void*)exp.get() << std::endl; } else { + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run exportFactory is NULL" << std::endl; Abort(op, ctx, "Exports to FS are disabled"); return false; } @@ -94,12 +106,20 @@ class TBackupUnit : public TBackupRestoreUnitBase return false; } + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run creating uploader lambda" << std::endl; auto createUploader = [self = DataShard.SelfId(), txId = op->GetTxId(), exp]() { - return exp->CreateUploader(self, txId); + std::cerr << "FSEXPORT_DEBUG: createUploader lambda called" << std::endl; + auto* uploader = exp->CreateUploader(self, txId); + std::cerr << "FSEXPORT_DEBUG: createUploader lambda returned uploader=" << (void*)uploader << std::endl; + return uploader; }; + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run calling CreateBuffer" << std::endl; THolder buffer{exp->CreateBuffer()}; + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run CreateBuffer returned buffer=" << (void*)buffer.Get() << std::endl; + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run calling CreateExportScan" << std::endl; THolder scan{CreateExportScan(std::move(buffer), createUploader)}; + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run CreateExportScan returned scan=" << (void*)scan.Get() << std::endl; const auto& taskName = appData->DataShardConfig.GetBackupTaskName(); const auto taskPrio = appData->DataShardConfig.GetBackupTaskPriority(); @@ -114,12 +134,14 @@ class TBackupUnit : public TBackupRestoreUnitBase readAheadHi = readAheadHiOverride; } + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run calling QueueScan localTableId=" << localTableId << std::endl; tx->SetScanTask(DataShard.QueueScan(localTableId, scan.Release(), op->GetTxId(), TScanOptions() .SetResourceBroker(taskName, taskPrio) .SetReadAhead(readAheadLo, readAheadHi) .SetReadPrio(TScanOptions::EReadPrio::Low) )); + std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run QueueScan done, returning true" << std::endl; return true; } diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index 56f77cbe3d84..f7da076bd473 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -77,27 +77,35 @@ class TFsUploader: public TActorBootstrapped { }; void WriteToFile(const TString& path, const TString& data) { + std::cerr << "FSEXPORT_DEBUG: WriteToFile START path=" << path << " size=" << data.size() << std::endl; EXPORT_LOG_D("WriteToFile" << ": self# " << SelfId() << ", path# " << path << ", size# " << data.size()); try { - // Ensure directory exists + std::cerr << "FSEXPORT_DEBUG: WriteToFile creating TFsPath" << std::endl; TFsPath filePath(path); TFsPath dirPath = filePath.Parent(); + std::cerr << "FSEXPORT_DEBUG: WriteToFile calling MkDirs for " << dirPath.GetPath() << std::endl; dirPath.MkDirs(); + std::cerr << "FSEXPORT_DEBUG: WriteToFile MkDirs done" << std::endl; // Write file + std::cerr << "FSEXPORT_DEBUG: WriteToFile opening file" << std::endl; TFileOutput file(path); + std::cerr << "FSEXPORT_DEBUG: WriteToFile writing data" << std::endl; file.Write(data); + std::cerr << "FSEXPORT_DEBUG: WriteToFile finishing file" << std::endl; file.Finish(); + std::cerr << "FSEXPORT_DEBUG: WriteToFile SUCCESS" << std::endl; EXPORT_LOG_I("Successfully wrote file" << ": self# " << SelfId() << ", path# " << path << ", size# " << data.size()); } catch (const std::exception& ex) { + std::cerr << "FSEXPORT_DEBUG: WriteToFile EXCEPTION: " << ex.what() << std::endl; Error = TStringBuilder() << "Failed to write file '" << path << "': " << ex.what(); EXPORT_LOG_E("WriteToFile error" << ": self# " << SelfId() @@ -129,6 +137,7 @@ class TFsUploader: public TActorBootstrapped { } void UploadMetadata() { + std::cerr << "FSEXPORT_DEBUG: UploadMetadata START" << std::endl; Y_ENSURE(!MetadataUploaded); Y_ENSURE(ShardNum == 0); @@ -137,22 +146,30 @@ class TFsUploader: public TActorBootstrapped { try { if (EnableChecksums) { + std::cerr << "FSEXPORT_DEBUG: UploadMetadata computing checksum" << std::endl; MetadataChecksum = ComputeChecksum(Metadata); + std::cerr << "FSEXPORT_DEBUG: UploadMetadata checksum=" << MetadataChecksum << std::endl; } + std::cerr << "FSEXPORT_DEBUG: UploadMetadata calling WriteToFile" << std::endl; WriteToFile(Settings.GetMetadataPath(), Metadata); + std::cerr << "FSEXPORT_DEBUG: UploadMetadata WriteToFile done" << std::endl; if (EnableChecksums) { + std::cerr << "FSEXPORT_DEBUG: UploadMetadata writing checksum" << std::endl; WriteChecksum(MetadataChecksum, Settings.GetChecksumPath(Settings.GetMetadataPath()), Settings.GetMetadataPath()); } MetadataUploaded = true; + std::cerr << "FSEXPORT_DEBUG: UploadMetadata SUCCESS" << std::endl; } catch (...) { + std::cerr << "FSEXPORT_DEBUG: UploadMetadata EXCEPTION" << std::endl; return Finish(false, Error.GetOrElse("Unknown error during metadata upload")); } } void UploadPermissions() { + std::cerr << "FSEXPORT_DEBUG: UploadPermissions START" << std::endl; Y_ENSURE(EnablePermissions && !PermissionsUploaded); Y_ENSURE(ShardNum == 0); @@ -160,18 +177,23 @@ class TFsUploader: public TActorBootstrapped { << ": self# " << SelfId()); if (!Permissions) { + std::cerr << "FSEXPORT_DEBUG: UploadPermissions Permissions is empty" << std::endl; return Finish(false, "Cannot infer permissions"); } + std::cerr << "FSEXPORT_DEBUG: UploadPermissions has Permissions, calling WriteMessage" << std::endl; try { WriteMessage(Permissions.GetRef(), Settings.GetPermissionsPath(), PermissionsChecksum); PermissionsUploaded = true; + std::cerr << "FSEXPORT_DEBUG: UploadPermissions SUCCESS" << std::endl; } catch (...) { + std::cerr << "FSEXPORT_DEBUG: UploadPermissions EXCEPTION" << std::endl; return Finish(false, Error.GetOrElse("Unknown error during permissions upload")); } } void UploadScheme() { + std::cerr << "FSEXPORT_DEBUG: UploadScheme START" << std::endl; Y_ENSURE(!SchemeUploaded); Y_ENSURE(ShardNum == 0); @@ -179,13 +201,17 @@ class TFsUploader: public TActorBootstrapped { << ": self# " << SelfId()); if (!Scheme) { + std::cerr << "FSEXPORT_DEBUG: UploadScheme Scheme is empty" << std::endl; return Finish(false, "Cannot infer scheme"); } + std::cerr << "FSEXPORT_DEBUG: UploadScheme has Scheme, calling WriteMessage" << std::endl; try { WriteMessage(Scheme.GetRef(), Settings.GetSchemePath(), SchemeChecksum); SchemeUploaded = true; + std::cerr << "FSEXPORT_DEBUG: UploadScheme SUCCESS" << std::endl; } catch (...) { + std::cerr << "FSEXPORT_DEBUG: UploadScheme EXCEPTION" << std::endl; return Finish(false, Error.GetOrElse("Unknown error during scheme upload")); } } @@ -233,6 +259,7 @@ class TFsUploader: public TActorBootstrapped { } void Finish(bool success = true, const TString& error = TString()) { + std::cerr << "FSEXPORT_DEBUG: Finish called success=" << success << " error=" << error << std::endl; EXPORT_LOG_I("Finish" << ": self# " << SelfId() << ", success# " << success @@ -240,17 +267,26 @@ class TFsUploader: public TActorBootstrapped { if (!success) { Error = error; + std::cerr << "FSEXPORT_DEBUG: Finish setting Error=" << error << std::endl; } + std::cerr << "FSEXPORT_DEBUG: Finish calling PassAway" << std::endl; PassAway(); } void PassAway() override { + std::cerr << "FSEXPORT_DEBUG: PassAway called Scanner=" << (void*)&Scanner << std::endl; if (Scanner) { + std::cerr << "FSEXPORT_DEBUG: PassAway sending TEvFinish to Scanner" << std::endl; Send(Scanner, new TEvExportScan::TEvFinish(Error.Empty(), Error.GetOrElse(TString()))); + std::cerr << "FSEXPORT_DEBUG: PassAway TEvFinish sent" << std::endl; + } else { + std::cerr << "FSEXPORT_DEBUG: PassAway Scanner is NULL" << std::endl; } + std::cerr << "FSEXPORT_DEBUG: PassAway calling IActor::PassAway" << std::endl; IActor::PassAway(); + std::cerr << "FSEXPORT_DEBUG: PassAway done" << std::endl; } public: @@ -284,43 +320,70 @@ class TFsUploader: public TActorBootstrapped { , EnableChecksums(task.GetEnableChecksums()) , EnablePermissions(task.GetEnablePermissions()) { - Y_UNUSED(TxId); // Reserved for future use + Y_UNUSED(TxId); + std::cerr << "FSEXPORT_DEBUG: TFsUploader constructor shardNum=" << ShardNum + << " hasScheme=" << Scheme.Defined() + << " hasPermissions=" << Permissions.Defined() + << " changefeeds.size=" << Changefeeds.size() + << " EnableChecksums=" << EnableChecksums + << " EnablePermissions=" << EnablePermissions << std::endl; } void Bootstrap() { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::Bootstrap START shardNum=" << ShardNum << std::endl; EXPORT_LOG_D("Bootstrap" << ": self# " << SelfId() << ", shardNum# " << ShardNum); - // For shard 0, upload metadata, permissions, scheme, changefeeds - // For other shards, we would upload data (not implemented yet) + Become(&TThis::StateBase); + std::cerr << "FSEXPORT_DEBUG: TFsUploader::Bootstrap END (waiting for TEvReady)" << std::endl; + } + + void DoWork() { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork START shardNum=" << ShardNum << std::endl; + if (ShardNum != 0) { - // For now, just finish successfully for non-zero shards - // Data export will be implemented later + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork shardNum!=0, finishing" << std::endl; return Finish(); } try { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork MetadataUploaded=" << MetadataUploaded << std::endl; if (!MetadataUploaded) { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadMetadata" << std::endl; UploadMetadata(); + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadMetadata done" << std::endl; } + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork EnablePermissions=" << EnablePermissions + << " PermissionsUploaded=" << PermissionsUploaded << std::endl; if (EnablePermissions && !PermissionsUploaded) { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadPermissions" << std::endl; UploadPermissions(); + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadPermissions done" << std::endl; } + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork SchemeUploaded=" << SchemeUploaded << std::endl; if (!SchemeUploaded) { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadScheme" << std::endl; UploadScheme(); + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadScheme done" << std::endl; } + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork ChangefeedsUploaded=" << ChangefeedsUploaded << std::endl; if (!ChangefeedsUploaded) { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadChangefeed" << std::endl; UploadChangefeed(); + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadChangefeed done" << std::endl; } else { + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork no changefeeds, calling Finish" << std::endl; Finish(); } } catch (...) { - Finish(false, Error ? *Error : "Unknown error during bootstrap"); + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork EXCEPTION caught" << std::endl; + Finish(false, Error ? *Error : "Unknown error during work"); } + std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork END" << std::endl; } STATEFN(StateBase) { @@ -331,19 +394,20 @@ class TFsUploader: public TActorBootstrapped { } void Handle(TEvExportScan::TEvReady::TPtr& ev) { + std::cerr << "FSEXPORT_DEBUG: Handle TEvExportScan::TEvReady START sender=" << ev->Sender.ToString() << std::endl; EXPORT_LOG_D("Handle TEvExportScan::TEvReady" << ": self# " << SelfId() << ", sender# " << ev->Sender); Scanner = ev->Sender; + std::cerr << "FSEXPORT_DEBUG: Handle TEvExportScan::TEvReady Scanner set, calling DoWork" << std::endl; - // For schema-only export, we're already done if (Error) { + std::cerr << "FSEXPORT_DEBUG: Handle TEvExportScan::TEvReady Error is set, calling PassAway" << std::endl; return PassAway(); } - // Data export not implemented yet, so we finish here - Finish(); + DoWork(); } private: @@ -377,41 +441,49 @@ class TFsUploader: public TActorBootstrapped { TMaybe Error; }; // TFsUploader -// Dummy buffer for schema-only export (no data scanning needed) class TSchemaOnlyBuffer : public NExportScan::IBuffer { public: + TSchemaOnlyBuffer() { + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer constructor" << std::endl; + } + void ColumnsOrder(const TVector&) override { - // No-op for schema-only export + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::ColumnsOrder called" << std::endl; } bool Collect(const NTable::IScan::TRow&) override { - // Should never be called for schema-only export + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::Collect called - returning false" << std::endl; return false; } IEventBase* PrepareEvent(bool, TStats&) override { - // Should never be called for schema-only export + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::PrepareEvent called - returning nullptr" << std::endl; return nullptr; } void Clear() override { - // No-op for schema-only export + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::Clear called" << std::endl; } bool IsFilled() const override { - // Never filled for schema-only export + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::IsFilled called - returning false" << std::endl; return false; } TString GetError() const override { + std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::GetError called" << std::endl; return {}; } }; IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader START shardNum=" << Task.GetShardNum() << std::endl; + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader dataShard=" << dataShard.ToString() << " txId=" << txId << std::endl; + auto scheme = (Task.GetShardNum() == 0) ? GenYdbScheme(Columns, Task.GetTable()) : Nothing(); + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader GenYdbScheme done, scheme.Defined()=" << scheme.Defined() << std::endl; TMetadata metadata; metadata.SetVersion(Task.GetEnableChecksums() ? 1 : 0); @@ -419,7 +491,9 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { TVector changefeeds; const bool enableChangefeedsExport = AppData() && AppData()->FeatureFlags.GetEnableChangefeedsExport(); + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader enableChangefeedsExport=" << enableChangefeedsExport << std::endl; if (enableChangefeedsExport) { + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader processing changefeeds" << std::endl; const auto& persQueues = Task.GetChangefeedUnderlyingTopics(); const auto& cdcStreams = Task.GetTable().GetTable().GetCdcStreams(); Y_ASSERT(persQueues.size() == cdcStreams.size()); @@ -453,9 +527,11 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { } } + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader before GenYdbPermissions" << std::endl; auto permissions = (Task.GetEnablePermissions() && Task.GetShardNum() == 0) ? GenYdbPermissions(Task.GetTable()) : Nothing(); + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader GenYdbPermissions done, permissions.Defined()=" << permissions.Defined() << std::endl; TFullBackupMetadata::TPtr backup = new TFullBackupMetadata{ .SnapshotVts = TVirtualTimestamp( @@ -463,15 +539,22 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { Task.GetSnapshotTxId()) }; metadata.AddFullBackup(backup); + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader metadata ready, changefeeds.size=" << changefeeds.size() << std::endl; - return new TFsUploader( + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader creating TFsUploader" << std::endl; + auto* uploader = new TFsUploader( dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), metadata.Serialize()); + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader TFsUploader created=" << (void*)uploader << std::endl; + return uploader; } IExport::IBuffer* TFsExport::CreateBuffer() const { + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateBuffer START" << std::endl; // For schema-only export, return a dummy buffer // Data export will be implemented later - return new TSchemaOnlyBuffer(); + auto* buffer = new TSchemaOnlyBuffer(); + std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateBuffer returning buffer=" << (void*)buffer << std::endl; + return buffer; } } // NDataShard diff --git a/ydb/core/tx/datashard/export_scan.cpp b/ydb/core/tx/datashard/export_scan.cpp index 0275c392c1d4..289c67c6d70f 100644 --- a/ydb/core/tx/datashard/export_scan.cpp +++ b/ydb/core/tx/datashard/export_scan.cpp @@ -73,8 +73,11 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } void MaybeReady() { + std::cerr << "FSEXPORT_DEBUG: TExportScan::MaybeReady IsReady()=" << IsReady() << std::endl; if (IsReady()) { + std::cerr << "FSEXPORT_DEBUG: TExportScan::MaybeReady sending TEvReady to Uploader=" << Uploader.ToString() << std::endl; Send(Uploader, new TEvExportScan::TEvReady()); + std::cerr << "FSEXPORT_DEBUG: TExportScan::MaybeReady TEvReady sent" << std::endl; } } @@ -137,6 +140,7 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } void Handle(TEvExportScan::TEvFinish::TPtr& ev) { + std::cerr << "FSEXPORT_DEBUG: TExportScan::Handle TEvFinish START" << std::endl; Y_ENSURE(IsReady()); EXPORT_LOG_D("Handle TEvExportScan::TEvFinish" @@ -145,7 +149,10 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle Success = ev->Get()->Success; Error = ev->Get()->Error; + std::cerr << "FSEXPORT_DEBUG: TExportScan::Handle TEvFinish Success=" << Success + << " Error=" << Error << " calling Driver->Touch(EScan::Final)" << std::endl; Driver->Touch(EScan::Final); + std::cerr << "FSEXPORT_DEBUG: TExportScan::Handle TEvFinish END" << std::endl; } public: @@ -161,6 +168,7 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle , Driver(nullptr) , Success(false) { + std::cerr << "FSEXPORT_DEBUG: TExportScan constructor buffer=" << (void*)Buffer.Get() << std::endl; } void Describe(IOutputStream& o) const override { diff --git a/ydb/core/tx/datashard/restore_unit.cpp b/ydb/core/tx/datashard/restore_unit.cpp index 6d9bc5a66890..91fce89c3edf 100644 --- a/ydb/core/tx/datashard/restore_unit.cpp +++ b/ydb/core/tx/datashard/restore_unit.cpp @@ -49,8 +49,6 @@ class TRestoreUnit : public TBackupRestoreUnitBaseSetAsyncJobResult(new TImportJobProduct(true, TString(), 0, 0)); break; From c685cdea31fdbbc6006954226b293fbba5f47859 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Thu, 4 Dec 2025 23:50:33 +0000 Subject: [PATCH 19/25] logs --- ydb/core/tx/datashard/backup_unit.cpp | 24 +-- ydb/core/tx/datashard/export_fs_uploader.cpp | 137 ++++++------------ ydb/core/tx/datashard/export_scan.cpp | 8 - .../tx/schemeshard/schemeshard_export.cpp | 2 + 4 files changed, 44 insertions(+), 127 deletions(-) diff --git a/ydb/core/tx/datashard/backup_unit.cpp b/ydb/core/tx/datashard/backup_unit.cpp index 26433ebfb48c..da12a1e2bcbc 100644 --- a/ydb/core/tx/datashard/backup_unit.cpp +++ b/ydb/core/tx/datashard/backup_unit.cpp @@ -33,15 +33,11 @@ class TBackupUnit : public TBackupRestoreUnitBase } bool Run(TOperation::TPtr op, TTransactionContext& txc, const TActorContext& ctx) override { - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run START" << std::endl; TActiveTransaction* tx = dynamic_cast(op.Get()); Y_ENSURE(tx, "cannot cast operation of kind " << op->GetKind()); Y_ENSURE(tx->GetSchemeTx().HasBackup()); const auto& backup = tx->GetSchemeTx().GetBackup(); - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run HasYTSettings=" << backup.HasYTSettings() - << " HasS3Settings=" << backup.HasS3Settings() - << " HasFSSettings=" << backup.HasFSSettings() << std::endl; const ui64 tableId = backup.GetTableId(); Y_ENSURE(DataShard.GetUserTables().contains(tableId)); @@ -80,24 +76,16 @@ class TBackupUnit : public TBackupRestoreUnitBase return false; } } else if (backup.HasFSSettings()) { - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run FSSettings detected" << std::endl; - std::cerr << "FSEXPORT_DEBUG: FSSettings BasePath=" - << " Path=" << std::endl; NBackupRestoreTraits::ECompressionCodec codec; if (!TryCodecFromTask(backup, codec)) { - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run unsupported compression codec" << std::endl; Abort(op, ctx, TStringBuilder() << "Unsupported compression codec" << ": " << backup.GetCompression().GetCodec()); return false; } - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run codec OK" << std::endl; if (auto* exportFactory = appData->DataShardExportFactory) { - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run creating FS export" << std::endl; std::shared_ptr(exportFactory->CreateExportToFs(backup, columns)).swap(exp); - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run FS export created, exp=" << (void*)exp.get() << std::endl; } else { - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run exportFactory is NULL" << std::endl; Abort(op, ctx, "Exports to FS are disabled"); return false; } @@ -106,20 +94,12 @@ class TBackupUnit : public TBackupRestoreUnitBase return false; } - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run creating uploader lambda" << std::endl; auto createUploader = [self = DataShard.SelfId(), txId = op->GetTxId(), exp]() { - std::cerr << "FSEXPORT_DEBUG: createUploader lambda called" << std::endl; - auto* uploader = exp->CreateUploader(self, txId); - std::cerr << "FSEXPORT_DEBUG: createUploader lambda returned uploader=" << (void*)uploader << std::endl; - return uploader; + return exp->CreateUploader(self, txId); }; - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run calling CreateBuffer" << std::endl; THolder buffer{exp->CreateBuffer()}; - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run CreateBuffer returned buffer=" << (void*)buffer.Get() << std::endl; - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run calling CreateExportScan" << std::endl; THolder scan{CreateExportScan(std::move(buffer), createUploader)}; - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run CreateExportScan returned scan=" << (void*)scan.Get() << std::endl; const auto& taskName = appData->DataShardConfig.GetBackupTaskName(); const auto taskPrio = appData->DataShardConfig.GetBackupTaskPriority(); @@ -134,14 +114,12 @@ class TBackupUnit : public TBackupRestoreUnitBase readAheadHi = readAheadHiOverride; } - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run calling QueueScan localTableId=" << localTableId << std::endl; tx->SetScanTask(DataShard.QueueScan(localTableId, scan.Release(), op->GetTxId(), TScanOptions() .SetResourceBroker(taskName, taskPrio) .SetReadAhead(readAheadLo, readAheadHi) .SetReadPrio(TScanOptions::EReadPrio::Low) )); - std::cerr << "FSEXPORT_DEBUG: TBackupUnit::Run QueueScan done, returning true" << std::endl; return true; } diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index f7da076bd473..5ab4717e12fc 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -77,35 +77,26 @@ class TFsUploader: public TActorBootstrapped { }; void WriteToFile(const TString& path, const TString& data) { - std::cerr << "FSEXPORT_DEBUG: WriteToFile START path=" << path << " size=" << data.size() << std::endl; EXPORT_LOG_D("WriteToFile" << ": self# " << SelfId() << ", path# " << path << ", size# " << data.size()); try { - std::cerr << "FSEXPORT_DEBUG: WriteToFile creating TFsPath" << std::endl; TFsPath filePath(path); TFsPath dirPath = filePath.Parent(); - std::cerr << "FSEXPORT_DEBUG: WriteToFile calling MkDirs for " << dirPath.GetPath() << std::endl; dirPath.MkDirs(); - std::cerr << "FSEXPORT_DEBUG: WriteToFile MkDirs done" << std::endl; // Write file - std::cerr << "FSEXPORT_DEBUG: WriteToFile opening file" << std::endl; TFileOutput file(path); - std::cerr << "FSEXPORT_DEBUG: WriteToFile writing data" << std::endl; file.Write(data); - std::cerr << "FSEXPORT_DEBUG: WriteToFile finishing file" << std::endl; file.Finish(); - std::cerr << "FSEXPORT_DEBUG: WriteToFile SUCCESS" << std::endl; EXPORT_LOG_I("Successfully wrote file" << ": self# " << SelfId() << ", path# " << path << ", size# " << data.size()); } catch (const std::exception& ex) { - std::cerr << "FSEXPORT_DEBUG: WriteToFile EXCEPTION: " << ex.what() << std::endl; Error = TStringBuilder() << "Failed to write file '" << path << "': " << ex.what(); EXPORT_LOG_E("WriteToFile error" << ": self# " << SelfId() @@ -137,7 +128,6 @@ class TFsUploader: public TActorBootstrapped { } void UploadMetadata() { - std::cerr << "FSEXPORT_DEBUG: UploadMetadata START" << std::endl; Y_ENSURE(!MetadataUploaded); Y_ENSURE(ShardNum == 0); @@ -146,30 +136,24 @@ class TFsUploader: public TActorBootstrapped { try { if (EnableChecksums) { - std::cerr << "FSEXPORT_DEBUG: UploadMetadata computing checksum" << std::endl; MetadataChecksum = ComputeChecksum(Metadata); - std::cerr << "FSEXPORT_DEBUG: UploadMetadata checksum=" << MetadataChecksum << std::endl; } - std::cerr << "FSEXPORT_DEBUG: UploadMetadata calling WriteToFile" << std::endl; WriteToFile(Settings.GetMetadataPath(), Metadata); - std::cerr << "FSEXPORT_DEBUG: UploadMetadata WriteToFile done" << std::endl; if (EnableChecksums) { - std::cerr << "FSEXPORT_DEBUG: UploadMetadata writing checksum" << std::endl; WriteChecksum(MetadataChecksum, Settings.GetChecksumPath(Settings.GetMetadataPath()), Settings.GetMetadataPath()); } MetadataUploaded = true; - std::cerr << "FSEXPORT_DEBUG: UploadMetadata SUCCESS" << std::endl; + EXPORT_LOG_I("Metadata uploaded successfully" + << ": self# " << SelfId()); } catch (...) { - std::cerr << "FSEXPORT_DEBUG: UploadMetadata EXCEPTION" << std::endl; return Finish(false, Error.GetOrElse("Unknown error during metadata upload")); } } void UploadPermissions() { - std::cerr << "FSEXPORT_DEBUG: UploadPermissions START" << std::endl; Y_ENSURE(EnablePermissions && !PermissionsUploaded); Y_ENSURE(ShardNum == 0); @@ -177,23 +161,20 @@ class TFsUploader: public TActorBootstrapped { << ": self# " << SelfId()); if (!Permissions) { - std::cerr << "FSEXPORT_DEBUG: UploadPermissions Permissions is empty" << std::endl; return Finish(false, "Cannot infer permissions"); } - std::cerr << "FSEXPORT_DEBUG: UploadPermissions has Permissions, calling WriteMessage" << std::endl; try { WriteMessage(Permissions.GetRef(), Settings.GetPermissionsPath(), PermissionsChecksum); PermissionsUploaded = true; - std::cerr << "FSEXPORT_DEBUG: UploadPermissions SUCCESS" << std::endl; + EXPORT_LOG_I("Permissions uploaded successfully" + << ": self# " << SelfId()); } catch (...) { - std::cerr << "FSEXPORT_DEBUG: UploadPermissions EXCEPTION" << std::endl; return Finish(false, Error.GetOrElse("Unknown error during permissions upload")); } } void UploadScheme() { - std::cerr << "FSEXPORT_DEBUG: UploadScheme START" << std::endl; Y_ENSURE(!SchemeUploaded); Y_ENSURE(ShardNum == 0); @@ -201,17 +182,15 @@ class TFsUploader: public TActorBootstrapped { << ": self# " << SelfId()); if (!Scheme) { - std::cerr << "FSEXPORT_DEBUG: UploadScheme Scheme is empty" << std::endl; return Finish(false, "Cannot infer scheme"); } - std::cerr << "FSEXPORT_DEBUG: UploadScheme has Scheme, calling WriteMessage" << std::endl; try { WriteMessage(Scheme.GetRef(), Settings.GetSchemePath(), SchemeChecksum); SchemeUploaded = true; - std::cerr << "FSEXPORT_DEBUG: UploadScheme SUCCESS" << std::endl; + EXPORT_LOG_I("Scheme uploaded successfully" + << ": self# " << SelfId()); } catch (...) { - std::cerr << "FSEXPORT_DEBUG: UploadScheme EXCEPTION" << std::endl; return Finish(false, Error.GetOrElse("Unknown error during scheme upload")); } } @@ -259,7 +238,6 @@ class TFsUploader: public TActorBootstrapped { } void Finish(bool success = true, const TString& error = TString()) { - std::cerr << "FSEXPORT_DEBUG: Finish called success=" << success << " error=" << error << std::endl; EXPORT_LOG_I("Finish" << ": self# " << SelfId() << ", success# " << success @@ -267,26 +245,17 @@ class TFsUploader: public TActorBootstrapped { if (!success) { Error = error; - std::cerr << "FSEXPORT_DEBUG: Finish setting Error=" << error << std::endl; } - std::cerr << "FSEXPORT_DEBUG: Finish calling PassAway" << std::endl; PassAway(); } void PassAway() override { - std::cerr << "FSEXPORT_DEBUG: PassAway called Scanner=" << (void*)&Scanner << std::endl; if (Scanner) { - std::cerr << "FSEXPORT_DEBUG: PassAway sending TEvFinish to Scanner" << std::endl; Send(Scanner, new TEvExportScan::TEvFinish(Error.Empty(), Error.GetOrElse(TString()))); - std::cerr << "FSEXPORT_DEBUG: PassAway TEvFinish sent" << std::endl; - } else { - std::cerr << "FSEXPORT_DEBUG: PassAway Scanner is NULL" << std::endl; } - std::cerr << "FSEXPORT_DEBUG: PassAway calling IActor::PassAway" << std::endl; IActor::PassAway(); - std::cerr << "FSEXPORT_DEBUG: PassAway done" << std::endl; } public: @@ -321,94 +290,76 @@ class TFsUploader: public TActorBootstrapped { , EnablePermissions(task.GetEnablePermissions()) { Y_UNUSED(TxId); - std::cerr << "FSEXPORT_DEBUG: TFsUploader constructor shardNum=" << ShardNum - << " hasScheme=" << Scheme.Defined() - << " hasPermissions=" << Permissions.Defined() - << " changefeeds.size=" << Changefeeds.size() - << " EnableChecksums=" << EnableChecksums - << " EnablePermissions=" << EnablePermissions << std::endl; } void Bootstrap() { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::Bootstrap START shardNum=" << ShardNum << std::endl; EXPORT_LOG_D("Bootstrap" << ": self# " << SelfId() << ", shardNum# " << ShardNum); Become(&TThis::StateBase); - std::cerr << "FSEXPORT_DEBUG: TFsUploader::Bootstrap END (waiting for TEvReady)" << std::endl; } void DoWork() { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork START shardNum=" << ShardNum << std::endl; + EXPORT_LOG_D("DoWork started" + << ": self# " << SelfId() + << ", shardNum# " << ShardNum); if (ShardNum != 0) { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork shardNum!=0, finishing" << std::endl; return Finish(); } try { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork MetadataUploaded=" << MetadataUploaded << std::endl; if (!MetadataUploaded) { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadMetadata" << std::endl; UploadMetadata(); - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadMetadata done" << std::endl; } - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork EnablePermissions=" << EnablePermissions - << " PermissionsUploaded=" << PermissionsUploaded << std::endl; if (EnablePermissions && !PermissionsUploaded) { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadPermissions" << std::endl; UploadPermissions(); - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadPermissions done" << std::endl; } - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork SchemeUploaded=" << SchemeUploaded << std::endl; if (!SchemeUploaded) { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadScheme" << std::endl; UploadScheme(); - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadScheme done" << std::endl; } - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork ChangefeedsUploaded=" << ChangefeedsUploaded << std::endl; if (!ChangefeedsUploaded) { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork calling UploadChangefeed" << std::endl; UploadChangefeed(); - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork UploadChangefeed done" << std::endl; } else { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork no changefeeds, calling Finish" << std::endl; Finish(); } } catch (...) { - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork EXCEPTION caught" << std::endl; Finish(false, Error ? *Error : "Unknown error during work"); } - std::cerr << "FSEXPORT_DEBUG: TFsUploader::DoWork END" << std::endl; } STATEFN(StateBase) { switch (ev->GetTypeRewrite()) { hFunc(TEvExportScan::TEvReady, Handle); + hFunc(TEvExportScan::TEvBuffer, HandleBuffer); sFunc(TEvents::TEvPoisonPill, PassAway); } } void Handle(TEvExportScan::TEvReady::TPtr& ev) { - std::cerr << "FSEXPORT_DEBUG: Handle TEvExportScan::TEvReady START sender=" << ev->Sender.ToString() << std::endl; EXPORT_LOG_D("Handle TEvExportScan::TEvReady" << ": self# " << SelfId() << ", sender# " << ev->Sender); Scanner = ev->Sender; - std::cerr << "FSEXPORT_DEBUG: Handle TEvExportScan::TEvReady Scanner set, calling DoWork" << std::endl; if (Error) { - std::cerr << "FSEXPORT_DEBUG: Handle TEvExportScan::TEvReady Error is set, calling PassAway" << std::endl; return PassAway(); } DoWork(); } + + void HandleBuffer(TEvExportScan::TEvBuffer::TPtr&) { + // Schema-only export doesn't process data buffers + // Just ignore them and continue waiting for scan completion + EXPORT_LOG_D("Handle TEvExportScan::TEvBuffer (ignored for schema-only export)" + << ": self# " << SelfId()); + } private: TFsSettings Settings; @@ -443,47 +394,52 @@ class TFsUploader: public TActorBootstrapped { class TSchemaOnlyBuffer : public NExportScan::IBuffer { public: - TSchemaOnlyBuffer() { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer constructor" << std::endl; - } + TSchemaOnlyBuffer() = default; void ColumnsOrder(const TVector&) override { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::ColumnsOrder called" << std::endl; } bool Collect(const NTable::IScan::TRow&) override { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::Collect called - returning false" << std::endl; - return false; + // For schema-only export, we don't actually collect data + // Count rows to stop scanning after first row + ++RowCount; + // Return true to indicate success (false would be interpreted as an error) + return true; } - IEventBase* PrepareEvent(bool, TStats&) override { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::PrepareEvent called - returning nullptr" << std::endl; - return nullptr; + IEventBase* PrepareEvent(bool last, TStats& stats) override { + // Schema-only export doesn't need actual data + // Send empty event to satisfy scanner protocol + stats.Rows = 0; + stats.BytesRead = 0; + stats.BytesSent = 0; + + // Send empty buffer - uploader will ignore it + return new TEvExportScan::TEvBuffer(TString(), last); } void Clear() override { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::Clear called" << std::endl; + RowCount = 0; } bool IsFilled() const override { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::IsFilled called - returning false" << std::endl; - return false; + // For schema-only export, we want to stop scanning immediately + // Return true after any row to minimize scanning overhead + return RowCount > 0; } TString GetError() const override { - std::cerr << "FSEXPORT_DEBUG: TSchemaOnlyBuffer::GetError called" << std::endl; return {}; } + +private: + ui64 RowCount = 0; }; IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader START shardNum=" << Task.GetShardNum() << std::endl; - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader dataShard=" << dataShard.ToString() << " txId=" << txId << std::endl; - auto scheme = (Task.GetShardNum() == 0) ? GenYdbScheme(Columns, Task.GetTable()) : Nothing(); - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader GenYdbScheme done, scheme.Defined()=" << scheme.Defined() << std::endl; TMetadata metadata; metadata.SetVersion(Task.GetEnableChecksums() ? 1 : 0); @@ -491,9 +447,7 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { TVector changefeeds; const bool enableChangefeedsExport = AppData() && AppData()->FeatureFlags.GetEnableChangefeedsExport(); - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader enableChangefeedsExport=" << enableChangefeedsExport << std::endl; if (enableChangefeedsExport) { - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader processing changefeeds" << std::endl; const auto& persQueues = Task.GetChangefeedUnderlyingTopics(); const auto& cdcStreams = Task.GetTable().GetTable().GetCdcStreams(); Y_ASSERT(persQueues.size() == cdcStreams.size()); @@ -527,11 +481,9 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { } } - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader before GenYdbPermissions" << std::endl; auto permissions = (Task.GetEnablePermissions() && Task.GetShardNum() == 0) ? GenYdbPermissions(Task.GetTable()) : Nothing(); - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader GenYdbPermissions done, permissions.Defined()=" << permissions.Defined() << std::endl; TFullBackupMetadata::TPtr backup = new TFullBackupMetadata{ .SnapshotVts = TVirtualTimestamp( @@ -539,22 +491,15 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { Task.GetSnapshotTxId()) }; metadata.AddFullBackup(backup); - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader metadata ready, changefeeds.size=" << changefeeds.size() << std::endl; - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader creating TFsUploader" << std::endl; - auto* uploader = new TFsUploader( + return new TFsUploader( dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), metadata.Serialize()); - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateUploader TFsUploader created=" << (void*)uploader << std::endl; - return uploader; } IExport::IBuffer* TFsExport::CreateBuffer() const { - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateBuffer START" << std::endl; // For schema-only export, return a dummy buffer // Data export will be implemented later - auto* buffer = new TSchemaOnlyBuffer(); - std::cerr << "FSEXPORT_DEBUG: TFsExport::CreateBuffer returning buffer=" << (void*)buffer << std::endl; - return buffer; + return new TSchemaOnlyBuffer(); } } // NDataShard diff --git a/ydb/core/tx/datashard/export_scan.cpp b/ydb/core/tx/datashard/export_scan.cpp index 289c67c6d70f..0275c392c1d4 100644 --- a/ydb/core/tx/datashard/export_scan.cpp +++ b/ydb/core/tx/datashard/export_scan.cpp @@ -73,11 +73,8 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } void MaybeReady() { - std::cerr << "FSEXPORT_DEBUG: TExportScan::MaybeReady IsReady()=" << IsReady() << std::endl; if (IsReady()) { - std::cerr << "FSEXPORT_DEBUG: TExportScan::MaybeReady sending TEvReady to Uploader=" << Uploader.ToString() << std::endl; Send(Uploader, new TEvExportScan::TEvReady()); - std::cerr << "FSEXPORT_DEBUG: TExportScan::MaybeReady TEvReady sent" << std::endl; } } @@ -140,7 +137,6 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } void Handle(TEvExportScan::TEvFinish::TPtr& ev) { - std::cerr << "FSEXPORT_DEBUG: TExportScan::Handle TEvFinish START" << std::endl; Y_ENSURE(IsReady()); EXPORT_LOG_D("Handle TEvExportScan::TEvFinish" @@ -149,10 +145,7 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle Success = ev->Get()->Success; Error = ev->Get()->Error; - std::cerr << "FSEXPORT_DEBUG: TExportScan::Handle TEvFinish Success=" << Success - << " Error=" << Error << " calling Driver->Touch(EScan::Final)" << std::endl; Driver->Touch(EScan::Final); - std::cerr << "FSEXPORT_DEBUG: TExportScan::Handle TEvFinish END" << std::endl; } public: @@ -168,7 +161,6 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle , Driver(nullptr) , Success(false) { - std::cerr << "FSEXPORT_DEBUG: TExportScan constructor buffer=" << (void*)Buffer.Get() << std::endl; } void Describe(IOutputStream& o) const override { diff --git a/ydb/core/tx/schemeshard/schemeshard_export.cpp b/ydb/core/tx/schemeshard/schemeshard_export.cpp index 613ab498dae4..52db1916b750 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export.cpp @@ -224,6 +224,8 @@ void TSchemeShard::PersistExportItemState(NIceDb::TNiceDb& db, const TExportInfo } void TSchemeShard::Handle(TEvExport::TEvCreateExportRequest::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Handle TEvExport::TEvCreateExportRequest, txId# " << ev->Get()->Record.GetTxId()); Execute(CreateTxCreateExport(ev), ctx); } From acd9b7dd86ba80f0ee4ba9d2786afa8656965286 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 5 Dec 2025 01:03:07 +0000 Subject: [PATCH 20/25] uploader --- ydb/apps/ydbd/export.cpp | 7 + ydb/apps/ydbd/export.h | 1 + ydb/core/tx/datashard/export_fs_uploader.cpp | 583 +++++++++--------- .../tx/schemeshard/ut_export/ut_export_fs.cpp | 30 +- 4 files changed, 310 insertions(+), 311 deletions(-) diff --git a/ydb/apps/ydbd/export.cpp b/ydb/apps/ydbd/export.cpp index de0efb5a834f..218b6dae97dc 100644 --- a/ydb/apps/ydbd/export.cpp +++ b/ydb/apps/ydbd/export.cpp @@ -1,6 +1,7 @@ #include "export.h" #include +#include NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToYt( const IExport::TTask& task, const IExport::TTableColumns& columns) const @@ -22,5 +23,11 @@ NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToS3( #endif } +NKikimr::NDataShard::IExport* TDataShardExportFactory::CreateExportToFs( + const IExport::TTask& task, const IExport::TTableColumns& columns) const +{ + return new NKikimr::NDataShard::TFsExport(task, columns); +} + void TDataShardExportFactory::Shutdown() { } diff --git a/ydb/apps/ydbd/export.h b/ydb/apps/ydbd/export.h index 9d077f16aa5e..e7613aaff639 100644 --- a/ydb/apps/ydbd/export.h +++ b/ydb/apps/ydbd/export.h @@ -8,5 +8,6 @@ class TDataShardExportFactory : public NKikimr::NDataShard::IExportFactory { public: IExport* CreateExportToYt(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; IExport* CreateExportToS3(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; + IExport* CreateExportToFs(const IExport::TTask& task, const IExport::TTableColumns& columns) const override; void Shutdown() override; }; diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index 5ab4717e12fc..3d710e4095bc 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -1,6 +1,6 @@ #include "export_common.h" #include "export_fs.h" -#include "export_scan.h" +#include "export_s3_buffer.h" #include "backup_restore_traits.h" #include @@ -15,12 +15,13 @@ #include #include -#include #include #include #include #include +#include #include +#include #include @@ -28,6 +29,54 @@ namespace NKikimr { namespace NDataShard { using namespace NBackup; +using namespace NBackupRestoreTraits; + +// Settings class for filesystem export +class TFsSettings { +public: + const TString BasePath; // Base path on filesystem (e.g., /mnt/exports) + const TString RelativePath; // Relative path for this export item + const ui32 Shard; + + explicit TFsSettings(const NKikimrSchemeOp::TFSSettings& settings, ui32 shard) + : BasePath(settings.GetBasePath()) + , RelativePath(settings.GetPath()) + , Shard(shard) + { + } + + static TFsSettings FromBackupTask(const NKikimrSchemeOp::TBackupTask& task) { + return TFsSettings(task.GetFSSettings(), task.GetShardNum()); + } + + TString GetFullPath() const { + return TFsPath(BasePath) / RelativePath; + } + + TString GetPermissionsKey() const { + return TFsPath(GetFullPath()) / PermissionsKeySuffix(false); + } + + TString GetMetadataKey() const { + return TFsPath(GetFullPath()) / MetadataKeySuffix(false); + } + + TString GetSchemeKey() const { + return TFsPath(GetFullPath()) / SchemeKeySuffix(false); + } + + TString GetDataKey(EDataFormat format, ECompressionCodec codec) const { + return TFsPath(GetFullPath()) / DataKeySuffix(Shard, format, codec, false); + } + + TString GetChangefeedKey(const TString& changefeedPrefix) const { + return TFsPath(GetFullPath()) / changefeedPrefix / ChangefeedKeySuffix(false); + } + + TString GetTopicKey(const TString& changefeedPrefix) const { + return TFsPath(GetFullPath()) / changefeedPrefix / TopicKeySuffix(false); + } +}; struct TChangefeedExportDescriptions { const Ydb::Table::ChangefeedDescription ChangefeedDescription; @@ -37,236 +86,230 @@ struct TChangefeedExportDescriptions { }; class TFsUploader: public TActorBootstrapped { - struct TFsSettings { - TString BasePath; - TString Path; - - TString GetMetadataPath() const { - return TFsPath(BasePath) / Path / "metadata"; - } - - TString GetSchemePath() const { - return TFsPath(BasePath) / Path / "scheme"; - } - - TString GetPermissionsPath() const { - return TFsPath(BasePath) / Path / "permissions"; - } - - TString GetChangefeedPath(const TString& prefix) const { - return TFsPath(BasePath) / Path / (prefix + "_changefeed"); - } - - TString GetTopicPath(const TString& prefix) const { - return TFsPath(BasePath) / Path / (prefix + "_topic"); - } - - TString GetChecksumPath(const TString& objectPath) const { - return objectPath + ".checksum"; - } - - static TFsSettings FromBackupTask(const NKikimrSchemeOp::TBackupTask& task) { - Y_ENSURE(task.HasFSSettings()); - const auto& fsSettings = task.GetFSSettings(); - - TFsSettings result; - result.BasePath = fsSettings.GetBasePath(); - result.Path = fsSettings.GetPath(); - return result; - } - }; + using TEvBuffer = TEvExportScan::TEvBuffer; - void WriteToFile(const TString& path, const TString& data) { - EXPORT_LOG_D("WriteToFile" - << ": self# " << SelfId() - << ", path# " << path - << ", size# " << data.size()); - + // Write data to a file, creating parent directories if needed + bool WriteFile(const TString& path, const TString& data, TString& error) { try { - TFsPath filePath(path); - TFsPath dirPath = filePath.Parent(); - dirPath.MkDirs(); + TFsPath fsPath(path); + fsPath.Parent().MkDirs(); - // Write file - TFileOutput file(path); - file.Write(data); - file.Finish(); + TFile file(path, CreateAlways | WrOnly); + file.Write(data.data(), data.size()); + file.Close(); - EXPORT_LOG_I("Successfully wrote file" + EXPORT_LOG_D("WriteFile succeeded" << ": self# " << SelfId() << ", path# " << path << ", size# " << data.size()); + + return true; } catch (const std::exception& ex) { - Error = TStringBuilder() << "Failed to write file '" << path << "': " << ex.what(); - EXPORT_LOG_E("WriteToFile error" + error = TStringBuilder() << "Failed to write file " << path << ": " << ex.what(); + EXPORT_LOG_E("WriteFile failed" << ": self# " << SelfId() << ", path# " << path - << ", error# " << Error); - throw; + << ", error# " << error); + return false; } } - - void WriteMessage(const google::protobuf::Message& message, const TString& path, TString& checksum) { + + // Write protobuf message to file + bool WriteMessage(const google::protobuf::Message& message, const TString& path, TString& error) { TString data; google::protobuf::TextFormat::PrintToString(message, &data); - - if (EnableChecksums) { - checksum = ComputeChecksum(data); + return WriteFile(path, data, error); + } + + // Write data with checksum + bool WriteFileWithChecksum(const TString& path, const TString& data, TString& error) { + if (!WriteFile(path, data, error)) { + return false; } - - WriteToFile(path, data); - + if (EnableChecksums) { - WriteChecksum(checksum, Settings.GetChecksumPath(path), path); + TString checksum = ComputeChecksum(data); + // Extract filename for checksum file format + TFsPath fsPath(path); + TString filename = fsPath.GetName(); + checksum += ' ' + filename; + + TString checksumPath = ChecksumKey(path); + if (!WriteFile(checksumPath, checksum, error)) { + return false; + } } + + return true; } - - void WriteChecksum(const TString& checksum, const TString& checksumPath, const TString& objectPath) { - // Format compatible with sha256sum CLI tool - TString checksumData = checksum + " " + TFsPath(objectPath).GetName(); - WriteToFile(checksumPath, checksumData); + + // Write protobuf message with checksum + bool WriteMessageWithChecksum(const google::protobuf::Message& message, const TString& path, TString& error) { + TString data; + google::protobuf::TextFormat::PrintToString(message, &data); + return WriteFileWithChecksum(path, data, error); } - + void UploadMetadata() { - Y_ENSURE(!MetadataUploaded); - Y_ENSURE(ShardNum == 0); - EXPORT_LOG_D("UploadMetadata" << ": self# " << SelfId()); + + TString error; + if (!WriteFileWithChecksum(Settings.GetMetadataKey(), Metadata, error)) { + return Finish(false, error); + } + + MetadataUploaded = true; - try { - if (EnableChecksums) { - MetadataChecksum = ComputeChecksum(Metadata); - } - - WriteToFile(Settings.GetMetadataPath(), Metadata); - - if (EnableChecksums) { - WriteChecksum(MetadataChecksum, Settings.GetChecksumPath(Settings.GetMetadataPath()), Settings.GetMetadataPath()); - } - - MetadataUploaded = true; - EXPORT_LOG_I("Metadata uploaded successfully" - << ": self# " << SelfId()); - } catch (...) { - return Finish(false, Error.GetOrElse("Unknown error during metadata upload")); + if (EnablePermissions) { + UploadPermissions(); + } else { + UploadScheme(); } } - + void UploadPermissions() { - Y_ENSURE(EnablePermissions && !PermissionsUploaded); - Y_ENSURE(ShardNum == 0); - EXPORT_LOG_D("UploadPermissions" << ": self# " << SelfId()); - + if (!Permissions) { return Finish(false, "Cannot infer permissions"); } - - try { - WriteMessage(Permissions.GetRef(), Settings.GetPermissionsPath(), PermissionsChecksum); - PermissionsUploaded = true; - EXPORT_LOG_I("Permissions uploaded successfully" - << ": self# " << SelfId()); - } catch (...) { - return Finish(false, Error.GetOrElse("Unknown error during permissions upload")); + + TString error; + if (!WriteMessageWithChecksum(Permissions.GetRef(), Settings.GetPermissionsKey(), error)) { + return Finish(false, error); } + + PermissionsUploaded = true; + UploadScheme(); } - + void UploadScheme() { - Y_ENSURE(!SchemeUploaded); - Y_ENSURE(ShardNum == 0); - EXPORT_LOG_D("UploadScheme" << ": self# " << SelfId()); - + if (!Scheme) { return Finish(false, "Cannot infer scheme"); } - - try { - WriteMessage(Scheme.GetRef(), Settings.GetSchemePath(), SchemeChecksum); - SchemeUploaded = true; - EXPORT_LOG_I("Scheme uploaded successfully" - << ": self# " << SelfId()); - } catch (...) { - return Finish(false, Error.GetOrElse("Unknown error during scheme upload")); + + TString error; + if (!WriteMessageWithChecksum(Scheme.GetRef(), Settings.GetSchemeKey(), error)) { + return Finish(false, error); } + + SchemeUploaded = true; + UploadChangefeeds(); } - - void UploadChangefeed() { - Y_ENSURE(!ChangefeedsUploaded); - Y_ENSURE(ShardNum == 0); - - EXPORT_LOG_D("UploadChangefeed" + + void UploadChangefeeds() { + EXPORT_LOG_D("UploadChangefeeds" << ": self# " << SelfId() << ", index# " << IndexExportedChangefeed << ", total# " << Changefeeds.size()); - - if (IndexExportedChangefeed == Changefeeds.size()) { - ChangefeedsUploaded = true; - return Finish(); + + while (IndexExportedChangefeed < Changefeeds.size()) { + const auto& desc = Changefeeds[IndexExportedChangefeed]; + + TString error; + + // Write changefeed description + if (!WriteMessageWithChecksum(desc.ChangefeedDescription, Settings.GetChangefeedKey(desc.Prefix), error)) { + return Finish(false, error); + } + + // Write topic description + if (!WriteMessageWithChecksum(desc.Topic, Settings.GetTopicKey(desc.Prefix), error)) { + return Finish(false, error); + } + + ++IndexExportedChangefeed; } + + ChangefeedsUploaded = true; - try { - const auto& desc = Changefeeds[IndexExportedChangefeed]; - WriteMessage(desc.ChangefeedDescription, Settings.GetChangefeedPath(desc.Prefix), ChangefeedChecksum); - UploadTopic(); - } catch (...) { - return Finish(false, Error.GetOrElse("Unknown error during changefeed upload")); + // Scheme upload is done, now wait for scanner to be ready for data export + // For now, we skip data export and finish successfully + if (Scanner) { + // Tell scanner we're done (skip data export for now) + Finish(true); + } else { + // Wait for scanner to be ready + Become(&TThis::StateWaitForScanner); } } - - void UploadTopic() { - Y_ENSURE(IndexExportedChangefeed < Changefeeds.size()); - Y_ENSURE(ShardNum == 0); - - EXPORT_LOG_D("UploadTopic" + + void Handle(TEvExportScan::TEvReady::TPtr& ev) { + EXPORT_LOG_D("Handle TEvExportScan::TEvReady" << ": self# " << SelfId() - << ", index# " << IndexExportedChangefeed); - - try { - const auto& desc = Changefeeds[IndexExportedChangefeed]; - WriteMessage(desc.Topic, Settings.GetTopicPath(desc.Prefix), TopicChecksum); - - ++IndexExportedChangefeed; - UploadChangefeed(); - } catch (...) { - return Finish(false, Error.GetOrElse("Unknown error during topic upload")); + << ", sender# " << ev->Sender); + + Scanner = ev->Sender; + + if (Error) { + return PassAway(); + } + + const bool permissionsDone = !EnablePermissions || PermissionsUploaded; + if (SchemeUploaded && MetadataUploaded && permissionsDone && ChangefeedsUploaded) { + // Scheme export is done, finish successfully + // Data export will be implemented later + Finish(true); } } - + + void Handle(TEvBuffer::TPtr& ev) { + EXPORT_LOG_D("Handle TEvExportScan::TEvBuffer" + << ": self# " << SelfId() + << ", sender# " << ev->Sender + << ", msg# " << ev->Get()->ToString()); + + // For now, we don't handle data - just acknowledge and finish + // Data export will be implemented later + if (ev->Sender == Scanner) { + if (ev->Get()->Last) { + Finish(true); + } else { + // Request more data (but we'll finish when we get the last buffer) + Send(Scanner, new TEvExportScan::TEvFeed()); + } + } + } + void Finish(bool success = true, const TString& error = TString()) { EXPORT_LOG_I("Finish" << ": self# " << SelfId() << ", success# " << success << ", error# " << error); - + if (!success) { Error = error; } - + + if (Scanner) { + Send(Scanner, new TEvExportScan::TEvFinish(success, error)); + } + PassAway(); } - + void PassAway() override { - if (Scanner) { - Send(Scanner, new TEvExportScan::TEvFinish(Error.Empty(), Error.GetOrElse(TString()))); + if (Scanner && Error) { + Send(Scanner, new TEvExportScan::TEvFinish(false, Error.GetOrElse(TString()))); } - + IActor::PassAway(); } public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::EXPORT_S3_UPLOADER_ACTOR; // Reuse S3 activity type + return NKikimrServices::TActivity::EXPORT_S3_UPLOADER_ACTOR; // Reuse existing activity type } - + static constexpr TStringBuf LogPrefix() { return "fs"sv; } - + explicit TFsUploader( const TActorId& dataShard, ui64 txId, const NKikimrSchemeOp::TBackupTask& task, @@ -281,160 +324,78 @@ class TFsUploader: public TActorBootstrapped { , Changefeeds(std::move(changefeeds)) , Metadata(std::move(metadata)) , Permissions(std::move(permissions)) - , ShardNum(task.GetShardNum()) - , SchemeUploaded(ShardNum == 0 ? false : true) - , ChangefeedsUploaded(ShardNum == 0 ? false : true) - , MetadataUploaded(ShardNum == 0 ? false : true) - , PermissionsUploaded(ShardNum == 0 ? false : true) + , Retries(task.GetNumberOfRetries()) + , SchemeUploaded(task.GetShardNum() == 0 ? false : true) + , ChangefeedsUploaded(task.GetShardNum() == 0 ? false : true) + , MetadataUploaded(task.GetShardNum() == 0 ? false : true) + , PermissionsUploaded(task.GetShardNum() == 0 ? false : true) , EnableChecksums(task.GetEnableChecksums()) , EnablePermissions(task.GetEnablePermissions()) { + Y_UNUSED(DataShard); Y_UNUSED(TxId); + Y_UNUSED(Retries); } - + void Bootstrap() { EXPORT_LOG_D("Bootstrap" << ": self# " << SelfId() - << ", shardNum# " << ShardNum); - - Become(&TThis::StateBase); - } - - void DoWork() { - EXPORT_LOG_D("DoWork started" - << ": self# " << SelfId() - << ", shardNum# " << ShardNum); - - if (ShardNum != 0) { - return Finish(); - } - - try { - if (!MetadataUploaded) { - UploadMetadata(); - } - - if (EnablePermissions && !PermissionsUploaded) { - UploadPermissions(); - } - - if (!SchemeUploaded) { - UploadScheme(); - } - - if (!ChangefeedsUploaded) { - UploadChangefeed(); - } else { - Finish(); - } - } catch (...) { - Finish(false, Error ? *Error : "Unknown error during work"); + << ", shardNum# " << Settings.Shard + << ", basePath# " << Settings.BasePath + << ", relativePath# " << Settings.RelativePath); + + // Only shard 0 uploads metadata/scheme/permissions + if (!MetadataUploaded) { + UploadMetadata(); + } else { + // Non-zero shards wait for scanner and then finish + // (data export will be implemented later) + Become(&TThis::StateWaitForScanner); } } - + STATEFN(StateBase) { switch (ev->GetTypeRewrite()) { hFunc(TEvExportScan::TEvReady, Handle); - hFunc(TEvExportScan::TEvBuffer, HandleBuffer); + + sFunc(TEvents::TEvWakeup, Bootstrap); sFunc(TEvents::TEvPoisonPill, PassAway); } } - - void Handle(TEvExportScan::TEvReady::TPtr& ev) { - EXPORT_LOG_D("Handle TEvExportScan::TEvReady" - << ": self# " << SelfId() - << ", sender# " << ev->Sender); - - Scanner = ev->Sender; - - if (Error) { - return PassAway(); + + STATEFN(StateWaitForScanner) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvExportScan::TEvReady, Handle); + hFunc(TEvBuffer, Handle); + + sFunc(TEvents::TEvPoisonPill, PassAway); } - - DoWork(); - } - - void HandleBuffer(TEvExportScan::TEvBuffer::TPtr&) { - // Schema-only export doesn't process data buffers - // Just ignore them and continue waiting for scan completion - EXPORT_LOG_D("Handle TEvExportScan::TEvBuffer (ignored for schema-only export)" - << ": self# " << SelfId()); } private: TFsSettings Settings; + const TActorId DataShard; const ui64 TxId; const TMaybe Scheme; const TVector Changefeeds; const TString Metadata; const TMaybe Permissions; - - const ui32 ShardNum; + + const ui32 Retries; + ui64 IndexExportedChangefeed = 0; + + TActorId Scanner; bool SchemeUploaded; bool ChangefeedsUploaded; bool MetadataUploaded; bool PermissionsUploaded; - - ui64 IndexExportedChangefeed = 0; - - TActorId Scanner; - - bool EnableChecksums; - bool EnablePermissions; - - TString MetadataChecksum; - TString ChangefeedChecksum; - TString TopicChecksum; - TString SchemeChecksum; - TString PermissionsChecksum; - TMaybe Error; -}; // TFsUploader - -class TSchemaOnlyBuffer : public NExportScan::IBuffer { -public: - TSchemaOnlyBuffer() = default; - - void ColumnsOrder(const TVector&) override { - } - - bool Collect(const NTable::IScan::TRow&) override { - // For schema-only export, we don't actually collect data - // Count rows to stop scanning after first row - ++RowCount; - // Return true to indicate success (false would be interpreted as an error) - return true; - } - IEventBase* PrepareEvent(bool last, TStats& stats) override { - // Schema-only export doesn't need actual data - // Send empty event to satisfy scanner protocol - stats.Rows = 0; - stats.BytesRead = 0; - stats.BytesSent = 0; - - // Send empty buffer - uploader will ignore it - return new TEvExportScan::TEvBuffer(TString(), last); - } - - void Clear() override { - RowCount = 0; - } - - bool IsFilled() const override { - // For schema-only export, we want to stop scanning immediately - // Return true after any row to minimize scanning overhead - return RowCount > 0; - } - - TString GetError() const override { - return {}; - } + bool EnableChecksums; + bool EnablePermissions; -private: - ui64 RowCount = 0; -}; +}; // TFsUploader IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { auto scheme = (Task.GetShardNum() == 0) @@ -444,22 +405,21 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { TMetadata metadata; metadata.SetVersion(Task.GetEnableChecksums() ? 1 : 0); metadata.SetEnablePermissions(Task.GetEnablePermissions()); - + TVector changefeeds; - const bool enableChangefeedsExport = AppData() && AppData()->FeatureFlags.GetEnableChangefeedsExport(); - if (enableChangefeedsExport) { + if (AppData()->FeatureFlags.GetEnableChangefeedsExport()) { const auto& persQueues = Task.GetChangefeedUnderlyingTopics(); const auto& cdcStreams = Task.GetTable().GetTable().GetCdcStreams(); Y_ASSERT(persQueues.size() == cdcStreams.size()); - + const int changefeedsCount = cdcStreams.size(); changefeeds.reserve(changefeedsCount); - + for (int i = 0; i < changefeedsCount; ++i) { Ydb::Table::ChangefeedDescription changefeed; const auto& cdcStream = cdcStreams.at(i); FillChangefeedDescription(changefeed, cdcStream); - + Ydb::Topic::DescribeTopicResult topic; const auto& pq = persQueues.at(i); Ydb::StatusIds::StatusCode status; @@ -468,40 +428,71 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { // Unnecessary fields topic.clear_self(); topic.clear_topic_stats(); - + auto& descr = changefeeds.emplace_back(changefeed, topic); descr.Name = descr.ChangefeedDescription.name(); - // For filesystem, use actual names (no anonymization for now) descr.Prefix = descr.Name; - + metadata.AddChangefeed(TChangefeedMetadata{ .ExportPrefix = descr.Prefix, .Name = descr.Name, }); } } - + auto permissions = (Task.GetEnablePermissions() && Task.GetShardNum() == 0) ? GenYdbPermissions(Task.GetTable()) : Nothing(); - + TFullBackupMetadata::TPtr backup = new TFullBackupMetadata{ .SnapshotVts = TVirtualTimestamp( Task.GetSnapshotStep(), Task.GetSnapshotTxId()) }; metadata.AddFullBackup(backup); - + return new TFsUploader( dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), metadata.Serialize()); } +// CreateBuffer implementation - reuse S3 buffer for now since we need proper CSV serialization +// Data export will be fully implemented later IExport::IBuffer* TFsExport::CreateBuffer() const { - // For schema-only export, return a dummy buffer - // Data export will be implemented later - return new TSchemaOnlyBuffer(); +#ifndef KIKIMR_DISABLE_S3_OPS + using namespace NBackupRestoreTraits; + + const auto& scanSettings = Task.GetScanSettings(); + const ui64 maxRows = scanSettings.GetRowsBatchSize() ? scanSettings.GetRowsBatchSize() : Max(); + const ui64 maxBytes = scanSettings.GetBytesBatchSize(); + + TS3ExportBufferSettings bufferSettings; + bufferSettings + .WithColumns(Columns) + .WithMaxRows(maxRows) + .WithMaxBytes(maxBytes) + .WithMinBytes(0); // No minimum for filesystem + + if (Task.GetEnableChecksums()) { + bufferSettings.WithChecksum(TS3ExportBufferSettings::Sha256Checksum()); + } + + switch (CodecFromTask(Task)) { + case ECompressionCodec::None: + break; + case ECompressionCodec::Zstd: + bufferSettings + .WithCompression(TS3ExportBufferSettings::ZstdCompression(Task.GetCompression().GetLevel())); + break; + case ECompressionCodec::Invalid: + Y_ENSURE(false, "unreachable"); + } + + return CreateS3ExportBuffer(std::move(bufferSettings)); +#else + Y_ENSURE(false, "S3 ops disabled, cannot create export buffer"); + return nullptr; +#endif } } // NDataShard } // NKikimr - diff --git a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp index 725cee1ea1a0..048e73ea7699 100644 --- a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp @@ -75,25 +75,25 @@ namespace { TFsPath exportPath = TFsPath(basePath) / destinationPath; // Check metadata file - TFsPath metadataPath = exportPath / "metadata"; + TFsPath metadataPath = exportPath / "metadata.json"; UNIT_ASSERT_C(metadataPath.Exists(), "Metadata file should exist: " << metadataPath.GetPath()); // Check scheme file - TFsPath schemePath = exportPath / "scheme"; + TFsPath schemePath = exportPath / "scheme.pb"; UNIT_ASSERT_C(schemePath.Exists(), "Scheme file should exist: " << schemePath.GetPath()); // Check permissions file (if enabled) if (Runtime().GetAppData().FeatureFlags.GetEnablePermissionsExport()) { - TFsPath permissionsPath = exportPath / "permissions"; + TFsPath permissionsPath = exportPath / "permissions.pb"; UNIT_ASSERT_C(permissionsPath.Exists(), "Permissions file should exist: " << permissionsPath.GetPath()); } // Check checksums (if enabled) if (Runtime().GetAppData().FeatureFlags.GetEnableChecksumsExport()) { - TFsPath metadataChecksumPath = exportPath / "metadata.checksum"; + TFsPath metadataChecksumPath = exportPath / "metadata.json.sha256"; UNIT_ASSERT_C(metadataChecksumPath.Exists(), "Metadata checksum should exist: " << metadataChecksumPath.GetPath()); - TFsPath schemeChecksumPath = exportPath / "scheme.checksum"; + TFsPath schemeChecksumPath = exportPath / "scheme.pb.sha256"; UNIT_ASSERT_C(schemeChecksumPath.Exists(), "Scheme checksum should exist: " << schemeChecksumPath.GetPath()); } } @@ -129,7 +129,7 @@ namespace { if (expectedStatus == Ydb::StatusIds::SUCCESS) { for (size_t i = 0; i < destinationPaths.size(); ++i) { TFsPath exportPath = TFsPath(basePath) / destinationPaths[i]; - TFsPath schemePath = exportPath / "scheme"; + TFsPath schemePath = exportPath / "scheme.pb"; UNIT_ASSERT_C(schemePath.Exists(), "Scheme file should exist for table " << i << ": " << schemePath.GetPath()); } } @@ -265,15 +265,15 @@ Y_UNIT_TEST_SUITE_F(TExportToFsTests, TFsExportFixture) { }, basePath, destinationPath); // Check all expected files exist - UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata"), "metadata"); - UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme"), "scheme"); - UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions"), "permissions"); - UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata.checksum"), "metadata.checksum"); - UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme.checksum"), "scheme.checksum"); - UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions.checksum"), "permissions.checksum"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata.json"), "metadata.json"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme.pb"), "scheme.pb"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions.pb"), "permissions.pb"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata.json.sha256"), "metadata.json.sha256"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme.pb.sha256"), "scheme.pb.sha256"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions.pb.sha256"), "permissions.pb.sha256"); // Check scheme content - TString schemeContent = GetFsFileContent(basePath, destinationPath + "/scheme"); + TString schemeContent = GetFsFileContent(basePath, destinationPath + "/scheme.pb"); UNIT_ASSERT_C(!schemeContent.empty(), "Scheme file should not be empty"); Ydb::Table::CreateTableRequest schemeProto; @@ -287,9 +287,9 @@ Y_UNIT_TEST_SUITE_F(TExportToFsTests, TFsExportFixture) { UNIT_ASSERT_VALUES_EQUAL(schemeProto.primary_key(0), "key"); // Check checksum format - TString checksumContent = GetFsFileContent(basePath, destinationPath + "/metadata.checksum"); + TString checksumContent = GetFsFileContent(basePath, destinationPath + "/metadata.json.sha256"); UNIT_ASSERT_C(!checksumContent.empty(), "Checksum should not be empty"); - UNIT_ASSERT_C(checksumContent.Contains("metadata"), "Checksum should contain filename"); + UNIT_ASSERT_C(checksumContent.Contains("metadata.json"), "Checksum should contain filename"); UNIT_ASSERT_GE(checksumContent.size(), 64); // sha256 is 64 hex chars } From d5bac0cf904775ceb6a2885709ba1f4e1c585833 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 5 Dec 2025 08:40:35 +0000 Subject: [PATCH 21/25] test --- ydb/core/tx/datashard/export_fs_uploader.cpp | 3 -- .../tx/schemeshard/ut_export/ut_export_fs.cpp | 51 +++++++++++++++---- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index 3d710e4095bc..6996bc23776c 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -344,12 +344,9 @@ class TFsUploader: public TActorBootstrapped { << ", basePath# " << Settings.BasePath << ", relativePath# " << Settings.RelativePath); - // Only shard 0 uploads metadata/scheme/permissions if (!MetadataUploaded) { UploadMetadata(); } else { - // Non-zero shards wait for scanner and then finish - // (data export will be implemented later) Become(&TThis::StateWaitForScanner); } } diff --git a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp index 048e73ea7699..487cb091c30e 100644 --- a/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp +++ b/ydb/core/tx/schemeshard/ut_export/ut_export_fs.cpp @@ -294,34 +294,67 @@ Y_UNIT_TEST_SUITE_F(TExportToFsTests, TFsExportFixture) { } Y_UNIT_TEST(ShouldAcceptCompressionSettings) { - TTestBasicRuntime runtime; - TTestEnv env(runtime); + TString basePath = TempDir().Path(); + TString destinationPath = "backup/Table"; ui64 txId = 100; - TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Env(); + Runtime().GetAppData().FeatureFlags.SetEnableChecksumsExport(true); + Runtime().GetAppData().FeatureFlags.SetEnablePermissionsExport(true); + + TestCreateTable(Runtime(), ++txId, "/MyRoot", R"( Name: "Table" Columns { Name: "key" Type: "Utf8" } Columns { Name: "value" Type: "Utf8" } KeyColumnNames: ["key"] )"); - env.TestWaitNotification(runtime, txId); + Env().TestWaitNotification(Runtime(), txId); - TestExport(runtime, ++txId, "/MyRoot", R"( + TString request = Sprintf(R"( ExportToFsSettings { - base_path: "/tmp/ydb_export" + base_path: "%s" compression: "zstd-3" items { source_path: "/MyRoot/Table" - destination_path: "backup/Table" + destination_path: "%s" } } - )"); + )", basePath.c_str(), destinationPath.c_str()); + + TestExport(Runtime(), ++txId, "/MyRoot", request); + Env().TestWaitNotification(Runtime(), txId); - auto response = TestGetExport(runtime, txId, "/MyRoot"); + auto response = TestGetExport(Runtime(), txId, "/MyRoot"); UNIT_ASSERT(response.GetResponse().GetEntry().HasExportToFsSettings()); const auto& settings = response.GetResponse().GetEntry().GetExportToFsSettings(); UNIT_ASSERT_VALUES_EQUAL(settings.compression(), "zstd-3"); + + // Check that files exist on filesystem + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata.json"), + "metadata.json should exist"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme.pb"), + "scheme.pb should exist"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions.pb"), + "permissions.pb should exist"); + + // Check checksums exist + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/metadata.json.sha256"), + "metadata.json.sha256 should exist"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/scheme.pb.sha256"), + "scheme.pb.sha256 should exist"); + UNIT_ASSERT_C(HasFsFile(basePath, destinationPath + "/permissions.pb.sha256"), + "permissions.pb.sha256 should exist"); + + TString schemeContent = GetFsFileContent(basePath, destinationPath + "/scheme.pb"); + UNIT_ASSERT_C(!schemeContent.empty(), "Scheme file should not be empty"); + + Ydb::Table::CreateTableRequest schemeProto; + UNIT_ASSERT_C(google::protobuf::TextFormat::ParseFromString(schemeContent, &schemeProto), + "Should parse scheme protobuf"); + + UNIT_ASSERT_VALUES_EQUAL(schemeProto.columns_size(), 2); + UNIT_ASSERT_VALUES_EQUAL(schemeProto.primary_key_size(), 1); } Y_UNIT_TEST(ShouldFailOnNonExistentPath) { From dab57192623d0d58c20d251749235d418f7d3990 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 5 Dec 2025 09:13:08 +0000 Subject: [PATCH 22/25] logs and done --- ydb/core/tx/datashard/backup_unit.cpp | 44 +++++- ydb/core/tx/datashard/export_fs_uploader.cpp | 138 ++++++++++++++----- ydb/core/tx/datashard/export_scan.cpp | 75 +++++++++- 3 files changed, 217 insertions(+), 40 deletions(-) diff --git a/ydb/core/tx/datashard/backup_unit.cpp b/ydb/core/tx/datashard/backup_unit.cpp index da12a1e2bcbc..531d3d5cf123 100644 --- a/ydb/core/tx/datashard/backup_unit.cpp +++ b/ydb/core/tx/datashard/backup_unit.cpp @@ -7,6 +7,7 @@ #include "export_fs.h" #include +#include namespace NKikimr { namespace NDataShard { @@ -76,6 +77,13 @@ class TBackupUnit : public TBackupRestoreUnitBase return false; } } else if (backup.HasFSSettings()) { + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::Run - FS export" + << ", tableId# " << tableId + << ", basePath# " << backup.GetFSSettings().GetBasePath() + << ", path# " << backup.GetFSSettings().GetPath() + << ", shardNum# " << backup.GetShardNum()); + NBackupRestoreTraits::ECompressionCodec codec; if (!TryCodecFromTask(backup, codec)) { Abort(op, ctx, TStringBuilder() << "Unsupported compression codec" @@ -85,6 +93,10 @@ class TBackupUnit : public TBackupRestoreUnitBase if (auto* exportFactory = appData->DataShardExportFactory) { std::shared_ptr(exportFactory->CreateExportToFs(backup, columns)).swap(exp); + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::Run - FS export created" + << ", tableId# " << tableId + << ", exportPtr# " << (void*)exp.get()); } else { Abort(op, ctx, "Exports to FS are disabled"); return false; @@ -98,9 +110,19 @@ class TBackupUnit : public TBackupRestoreUnitBase return exp->CreateUploader(self, txId); }; + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::Run - creating buffer and scan" + << ", tableId# " << tableId + << ", txId# " << op->GetTxId()); + THolder buffer{exp->CreateBuffer()}; THolder scan{CreateExportScan(std::move(buffer), createUploader)}; + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::Run - scan created, queueing" + << ", tableId# " << tableId + << ", localTableId# " << localTableId); + const auto& taskName = appData->DataShardConfig.GetBackupTaskName(); const auto taskPrio = appData->DataShardConfig.GetBackupTaskPriority(); @@ -128,13 +150,21 @@ class TBackupUnit : public TBackupRestoreUnitBase return op->HasScanResult(); } - bool ProcessResult(TOperation::TPtr op, const TActorContext&) override { + bool ProcessResult(TOperation::TPtr op, const TActorContext& ctx) override { TActiveTransaction* tx = dynamic_cast(op.Get()); Y_ENSURE(tx, "cannot cast operation of kind " << op->GetKind()); auto* result = CheckedCast(op->ScanResult().Get()); bool done = true; + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::ProcessResult" + << ", txId# " << op->GetTxId() + << ", outcome# " << static_cast(result->Outcome) + << ", error# " << result->Error + << ", bytesRead# " << result->BytesRead + << ", rowsRead# " << result->RowsRead); + switch (result->Outcome) { case EExportOutcome::Success: case EExportOutcome::Error: @@ -143,11 +173,18 @@ class TBackupUnit : public TBackupRestoreUnitBase schemeOp->Error = std::move(result->Error); schemeOp->BytesProcessed = result->BytesRead; schemeOp->RowsProcessed = result->RowsRead; + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::ProcessResult - updated schemeOp" + << ", txId# " << op->GetTxId() + << ", success# " << schemeOp->Success); } else { Y_ENSURE(false, "Cannot find schema tx: " << op->GetTxId()); } break; case EExportOutcome::Aborted: + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::ProcessResult - aborted" + << ", txId# " << op->GetTxId()); done = false; break; } @@ -155,6 +192,11 @@ class TBackupUnit : public TBackupRestoreUnitBase op->SetScanResult(nullptr); tx->SetScanTask(0); + LOG_INFO_S(ctx, NKikimrServices::DATASHARD_BACKUP, + "TBackupUnit::ProcessResult - done" + << ", txId# " << op->GetTxId() + << ", done# " << done); + return done; } diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index 6996bc23776c..75c9611005b5 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -31,7 +31,6 @@ namespace NDataShard { using namespace NBackup; using namespace NBackupRestoreTraits; -// Settings class for filesystem export class TFsSettings { public: const TString BasePath; // Base path on filesystem (e.g., /mnt/exports) @@ -88,7 +87,6 @@ struct TChangefeedExportDescriptions { class TFsUploader: public TActorBootstrapped { using TEvBuffer = TEvExportScan::TEvBuffer; - // Write data to a file, creating parent directories if needed bool WriteFile(const TString& path, const TString& data, TString& error) { try { TFsPath fsPath(path); @@ -114,14 +112,12 @@ class TFsUploader: public TActorBootstrapped { } } - // Write protobuf message to file bool WriteMessage(const google::protobuf::Message& message, const TString& path, TString& error) { TString data; google::protobuf::TextFormat::PrintToString(message, &data); return WriteFile(path, data, error); } - // Write data with checksum bool WriteFileWithChecksum(const TString& path, const TString& data, TString& error) { if (!WriteFile(path, data, error)) { return false; @@ -143,7 +139,6 @@ class TFsUploader: public TActorBootstrapped { return true; } - // Write protobuf message with checksum bool WriteMessageWithChecksum(const google::protobuf::Message& message, const TString& path, TString& error) { TString data; google::protobuf::TextFormat::PrintToString(message, &data); @@ -151,15 +146,23 @@ class TFsUploader: public TActorBootstrapped { } void UploadMetadata() { - EXPORT_LOG_D("UploadMetadata" - << ": self# " << SelfId()); + EXPORT_LOG_I("UploadMetadata started" + << ": self# " << SelfId() + << ", path# " << Settings.GetMetadataKey() + << ", metadataSize# " << Metadata.size()); TString error; if (!WriteFileWithChecksum(Settings.GetMetadataKey(), Metadata, error)) { + EXPORT_LOG_E("UploadMetadata failed" + << ": self# " << SelfId() + << ", error# " << error); return Finish(false, error); } MetadataUploaded = true; + EXPORT_LOG_I("UploadMetadata completed" + << ": self# " << SelfId() + << ", enablePermissions# " << EnablePermissions); if (EnablePermissions) { UploadPermissions(); @@ -169,41 +172,59 @@ class TFsUploader: public TActorBootstrapped { } void UploadPermissions() { - EXPORT_LOG_D("UploadPermissions" - << ": self# " << SelfId()); + EXPORT_LOG_I("UploadPermissions started" + << ": self# " << SelfId() + << ", path# " << Settings.GetPermissionsKey() + << ", hasPermissions# " << Permissions.Defined()); if (!Permissions) { + EXPORT_LOG_E("UploadPermissions failed - no permissions" + << ": self# " << SelfId()); return Finish(false, "Cannot infer permissions"); } TString error; if (!WriteMessageWithChecksum(Permissions.GetRef(), Settings.GetPermissionsKey(), error)) { + EXPORT_LOG_E("UploadPermissions failed" + << ": self# " << SelfId() + << ", error# " << error); return Finish(false, error); } PermissionsUploaded = true; + EXPORT_LOG_I("UploadPermissions completed" + << ": self# " << SelfId()); UploadScheme(); } void UploadScheme() { - EXPORT_LOG_D("UploadScheme" - << ": self# " << SelfId()); + EXPORT_LOG_I("UploadScheme started" + << ": self# " << SelfId() + << ", path# " << Settings.GetSchemeKey() + << ", hasScheme# " << Scheme.Defined()); if (!Scheme) { + EXPORT_LOG_E("UploadScheme failed - no scheme" + << ": self# " << SelfId()); return Finish(false, "Cannot infer scheme"); } TString error; if (!WriteMessageWithChecksum(Scheme.GetRef(), Settings.GetSchemeKey(), error)) { + EXPORT_LOG_E("UploadScheme failed" + << ": self# " << SelfId() + << ", error# " << error); return Finish(false, error); } SchemeUploaded = true; + EXPORT_LOG_I("UploadScheme completed" + << ": self# " << SelfId()); UploadChangefeeds(); } void UploadChangefeeds() { - EXPORT_LOG_D("UploadChangefeeds" + EXPORT_LOG_I("UploadChangefeeds started" << ": self# " << SelfId() << ", index# " << IndexExportedChangefeed << ", total# " << Changefeeds.size()); @@ -211,15 +232,25 @@ class TFsUploader: public TActorBootstrapped { while (IndexExportedChangefeed < Changefeeds.size()) { const auto& desc = Changefeeds[IndexExportedChangefeed]; + EXPORT_LOG_I("UploadChangefeeds processing changefeed" + << ": self# " << SelfId() + << ", index# " << IndexExportedChangefeed + << ", name# " << desc.Name + << ", prefix# " << desc.Prefix); + TString error; - // Write changefeed description if (!WriteMessageWithChecksum(desc.ChangefeedDescription, Settings.GetChangefeedKey(desc.Prefix), error)) { + EXPORT_LOG_E("UploadChangefeeds failed to write changefeed" + << ": self# " << SelfId() + << ", error# " << error); return Finish(false, error); } - // Write topic description if (!WriteMessageWithChecksum(desc.Topic, Settings.GetTopicKey(desc.Prefix), error)) { + EXPORT_LOG_E("UploadChangefeeds failed to write topic" + << ": self# " << SelfId() + << ", error# " << error); return Finish(false, error); } @@ -227,50 +258,74 @@ class TFsUploader: public TActorBootstrapped { } ChangefeedsUploaded = true; + EXPORT_LOG_I("UploadChangefeeds completed" + << ": self# " << SelfId() + << ", scanner# " << Scanner); - // Scheme upload is done, now wait for scanner to be ready for data export - // For now, we skip data export and finish successfully if (Scanner) { + EXPORT_LOG_I("Scanner already ready, finishing export" + << ": self# " << SelfId()); // Tell scanner we're done (skip data export for now) Finish(true); } else { + EXPORT_LOG_I("Waiting for scanner to become ready" + << ": self# " << SelfId()); // Wait for scanner to be ready Become(&TThis::StateWaitForScanner); } } void Handle(TEvExportScan::TEvReady::TPtr& ev) { - EXPORT_LOG_D("Handle TEvExportScan::TEvReady" + EXPORT_LOG_I("Handle TEvExportScan::TEvReady" << ": self# " << SelfId() - << ", sender# " << ev->Sender); + << ", sender# " << ev->Sender + << ", metadataUploaded# " << MetadataUploaded + << ", schemeUploaded# " << SchemeUploaded + << ", permissionsUploaded# " << PermissionsUploaded + << ", changefeedsUploaded# " << ChangefeedsUploaded + << ", error# " << Error.GetOrElse("none")); Scanner = ev->Sender; if (Error) { + EXPORT_LOG_I("Handle TEvReady - has error, passing away" + << ": self# " << SelfId() + << ", error# " << Error.GetOrElse("none")); return PassAway(); } const bool permissionsDone = !EnablePermissions || PermissionsUploaded; + EXPORT_LOG_I("Handle TEvReady - checking completion" + << ": self# " << SelfId() + << ", permissionsDone# " << permissionsDone + << ", enablePermissions# " << EnablePermissions); + if (SchemeUploaded && MetadataUploaded && permissionsDone && ChangefeedsUploaded) { - // Scheme export is done, finish successfully - // Data export will be implemented later + EXPORT_LOG_I("Handle TEvReady - all uploads done, finishing" + << ": self# " << SelfId()); Finish(true); + } else { + EXPORT_LOG_I("Handle TEvReady - waiting for uploads to complete" + << ": self# " << SelfId()); } } void Handle(TEvBuffer::TPtr& ev) { - EXPORT_LOG_D("Handle TEvExportScan::TEvBuffer" + EXPORT_LOG_I("Handle TEvExportScan::TEvBuffer" << ": self# " << SelfId() << ", sender# " << ev->Sender + << ", isScanner# " << (ev->Sender == Scanner) + << ", last# " << ev->Get()->Last << ", msg# " << ev->Get()->ToString()); - // For now, we don't handle data - just acknowledge and finish - // Data export will be implemented later if (ev->Sender == Scanner) { if (ev->Get()->Last) { + EXPORT_LOG_I("Handle TEvBuffer - last buffer received, finishing" + << ": self# " << SelfId()); Finish(true); } else { - // Request more data (but we'll finish when we get the last buffer) + EXPORT_LOG_I("Handle TEvBuffer - requesting more data" + << ": self# " << SelfId()); Send(Scanner, new TEvExportScan::TEvFeed()); } } @@ -338,34 +393,58 @@ class TFsUploader: public TActorBootstrapped { } void Bootstrap() { - EXPORT_LOG_D("Bootstrap" + EXPORT_LOG_I("Bootstrap" << ": self# " << SelfId() << ", shardNum# " << Settings.Shard << ", basePath# " << Settings.BasePath - << ", relativePath# " << Settings.RelativePath); + << ", relativePath# " << Settings.RelativePath + << ", metadataUploaded# " << MetadataUploaded + << ", schemeUploaded# " << SchemeUploaded + << ", permissionsUploaded# " << PermissionsUploaded + << ", changefeedsUploaded# " << ChangefeedsUploaded); if (!MetadataUploaded) { + EXPORT_LOG_I("Starting metadata upload (shard 0 path)" + << ": self# " << SelfId()); UploadMetadata(); } else { + EXPORT_LOG_I("Non-zero shard, waiting for scanner" + << ": self# " << SelfId()); Become(&TThis::StateWaitForScanner); } } STATEFN(StateBase) { + EXPORT_LOG_D("StateBase received event" + << ": self# " << SelfId() + << ", type# " << ev->GetTypeRewrite()); switch (ev->GetTypeRewrite()) { hFunc(TEvExportScan::TEvReady, Handle); sFunc(TEvents::TEvWakeup, Bootstrap); sFunc(TEvents::TEvPoisonPill, PassAway); + default: + EXPORT_LOG_W("StateBase unhandled event" + << ": self# " << SelfId() + << ", type# " << ev->GetTypeRewrite()); + break; } } STATEFN(StateWaitForScanner) { + EXPORT_LOG_D("StateWaitForScanner received event" + << ": self# " << SelfId() + << ", type# " << ev->GetTypeRewrite()); switch (ev->GetTypeRewrite()) { hFunc(TEvExportScan::TEvReady, Handle); hFunc(TEvBuffer, Handle); sFunc(TEvents::TEvPoisonPill, PassAway); + default: + EXPORT_LOG_W("StateWaitForScanner unhandled event" + << ": self# " << SelfId() + << ", type# " << ev->GetTypeRewrite()); + break; } } @@ -452,10 +531,7 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), metadata.Serialize()); } -// CreateBuffer implementation - reuse S3 buffer for now since we need proper CSV serialization -// Data export will be fully implemented later IExport::IBuffer* TFsExport::CreateBuffer() const { -#ifndef KIKIMR_DISABLE_S3_OPS using namespace NBackupRestoreTraits; const auto& scanSettings = Task.GetScanSettings(); @@ -485,10 +561,6 @@ IExport::IBuffer* TFsExport::CreateBuffer() const { } return CreateS3ExportBuffer(std::move(bufferSettings)); -#else - Y_ENSURE(false, "S3 ops disabled, cannot create export buffer"); - return nullptr; -#endif } } // NDataShard diff --git a/ydb/core/tx/datashard/export_scan.cpp b/ydb/core/tx/datashard/export_scan.cpp index 0275c392c1d4..864e2a8fdfd1 100644 --- a/ydb/core/tx/datashard/export_scan.cpp +++ b/ydb/core/tx/datashard/export_scan.cpp @@ -73,19 +73,42 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } void MaybeReady() { + EXPORT_LOG_I("MaybeReady" + << ": self# " << SelfId() + << ", uploader# " << Uploader + << ", isRegistered# " << State.Test(ES_REGISTERED) + << ", isInitialized# " << State.Test(ES_INITIALIZED) + << ", isReady# " << IsReady()); if (IsReady()) { + EXPORT_LOG_I("MaybeReady - sending TEvReady to uploader" + << ": self# " << SelfId() + << ", uploader# " << Uploader); Send(Uploader, new TEvExportScan::TEvReady()); } } EScan MaybeSendBuffer() { const bool noMoreData = State.Test(ES_NO_MORE_DATA); + const bool bufferFilled = Buffer->IsFilled(); + const bool uploaderReady = State.Test(ES_UPLOADER_READY); + const bool bufferSent = State.Test(ES_BUFFER_SENT); + + EXPORT_LOG_D("MaybeSendBuffer" + << ": self# " << SelfId() + << ", noMoreData# " << noMoreData + << ", bufferFilled# " << bufferFilled + << ", uploaderReady# " << uploaderReady + << ", bufferSent# " << bufferSent); - if (!noMoreData && !Buffer->IsFilled()) { + if (!noMoreData && !bufferFilled) { return EScan::Feed; } - if (!State.Test(ES_UPLOADER_READY) || State.Test(ES_BUFFER_SENT)) { + if (!uploaderReady || bufferSent) { + EXPORT_LOG_D("MaybeSendBuffer - sleeping" + << ": self# " << SelfId() + << ", uploaderReady# " << uploaderReady + << ", bufferSent# " << bufferSent); Spent->Alter(false); return EScan::Sleep; } @@ -96,14 +119,25 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle if (!ev) { Success = false; Error = Buffer->GetError(); + EXPORT_LOG_E("MaybeSendBuffer - failed to prepare event" + << ": self# " << SelfId() + << ", error# " << Error); return EScan::Final; } + EXPORT_LOG_I("MaybeSendBuffer - sending buffer to uploader" + << ": self# " << SelfId() + << ", uploader# " << Uploader + << ", noMoreData# " << noMoreData + << ", rows# " << stats.Rows + << ", bytesRead# " << stats.BytesRead); Send(Uploader, std::move(ev)); State.Set(ES_BUFFER_SENT); Stats->Aggr(stats); if (noMoreData) { + EXPORT_LOG_I("MaybeSendBuffer - no more data, sleeping" + << ": self# " << SelfId()); Spent->Alter(false); return EScan::Sleep; } @@ -114,7 +148,7 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle void Handle(TEvExportScan::TEvReset::TPtr&) { Y_ENSURE(IsReady()); - EXPORT_LOG_D("Handle TEvExportScan::TEvReset" + EXPORT_LOG_I("Handle TEvExportScan::TEvReset" << ": self# " << SelfId()); Stats.Reset(new TStats); @@ -126,8 +160,9 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle void Handle(TEvExportScan::TEvFeed::TPtr&) { Y_ENSURE(IsReady()); - EXPORT_LOG_D("Handle TEvExportScan::TEvFeed" - << ": self# " << SelfId()); + EXPORT_LOG_I("Handle TEvExportScan::TEvFeed" + << ": self# " << SelfId() + << ", uploader# " << Uploader); State.Set(ES_UPLOADER_READY).Reset(ES_BUFFER_SENT); Spent->Alter(true); @@ -139,12 +174,17 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle void Handle(TEvExportScan::TEvFinish::TPtr& ev) { Y_ENSURE(IsReady()); - EXPORT_LOG_D("Handle TEvExportScan::TEvFinish" + EXPORT_LOG_I("Handle TEvExportScan::TEvFinish" << ": self# " << SelfId() + << ", sender# " << ev->Sender + << ", success# " << ev->Get()->Success + << ", error# " << ev->Get()->Error << ", msg# " << ev->Get()->ToString()); Success = ev->Get()->Success; Error = ev->Get()->Error; + EXPORT_LOG_I("Handle TEvFinish - touching driver with Final" + << ": self# " << SelfId()); Driver->Touch(EScan::Final); } @@ -184,17 +224,27 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } void Registered(TActorSystem* sys, const TActorId&) override { + EXPORT_LOG_I("Registered - creating uploader" + << ": self# " << SelfId()); Uploader = sys->Register(CreateUploaderFn(), TMailboxType::HTSwap, AppData()->BatchPoolId); + EXPORT_LOG_I("Registered - uploader created" + << ": self# " << SelfId() + << ", uploader# " << Uploader); State.Set(ES_REGISTERED); MaybeReady(); } EScan Seek(TLead& lead, ui64) override { + EXPORT_LOG_I("Seek called" + << ": self# " << SelfId() + << ", uploader# " << Uploader); lead.To(Scheme->Tags(), {}, ESeek::Lower); Buffer->Clear(); State.Set(ES_INITIALIZED); + EXPORT_LOG_I("Seek - set initialized, calling MaybeReady" + << ": self# " << SelfId()); MaybeReady(); Spent->Alter(true); @@ -213,11 +263,19 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle } EScan Exhausted() override { + EXPORT_LOG_I("Exhausted - no more data" + << ": self# " << SelfId() + << ", uploader# " << Uploader); State.Set(ES_NO_MORE_DATA); return MaybeSendBuffer(); } TAutoPtr Finish(EStatus status) override { + EXPORT_LOG_I("Finish called" + << ": self# " << SelfId() + << ", status# " << static_cast(status) + << ", success# " << Success + << ", error# " << Error); auto outcome = EExportOutcome::Success; if (status != EStatus::Done) { outcome = status == EStatus::Exception @@ -227,6 +285,11 @@ class TExportScan: private NActors::IActorCallback, public IActorExceptionHandle outcome = EExportOutcome::Error; } + EXPORT_LOG_I("Finish - outcome determined" + << ": self# " << SelfId() + << ", outcome# " << static_cast(outcome) + << ", bytesRead# " << Stats->BytesRead + << ", rows# " << Stats->Rows); PassAway(); return new TExportScanProduct(outcome, Error, Stats->BytesRead, Stats->Rows); } From 3ccbaed64cfdb720817cf8c1d3a0ccd124d6054e Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 5 Dec 2025 09:39:08 +0000 Subject: [PATCH 23/25] done data export --- ydb/core/tx/datashard/export_fs_uploader.cpp | 144 ++++++++++++++++--- 1 file changed, 124 insertions(+), 20 deletions(-) diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index 75c9611005b5..2ffe74074d72 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -87,7 +87,7 @@ struct TChangefeedExportDescriptions { class TFsUploader: public TActorBootstrapped { using TEvBuffer = TEvExportScan::TEvBuffer; - bool WriteFile(const TString& path, const TString& data, TString& error) { + bool WriteFile(const TString& path, const TStringBuf& data, TString& error) { try { TFsPath fsPath(path); fsPath.Parent().MkDirs(); @@ -112,6 +112,31 @@ class TFsUploader: public TActorBootstrapped { } } + bool AppendFile(const TString& path, const TStringBuf& data, TString& error) { + try { + TFsPath fsPath(path); + fsPath.Parent().MkDirs(); + + TFile file(path, OpenAlways | WrOnly | ForAppend); + file.Write(data.data(), data.size()); + file.Close(); + + EXPORT_LOG_D("AppendFile succeeded" + << ": self# " << SelfId() + << ", path# " << path + << ", size# " << data.size()); + + return true; + } catch (const std::exception& ex) { + error = TStringBuilder() << "Failed to append to file " << path << ": " << ex.what(); + EXPORT_LOG_E("AppendFile failed" + << ": self# " << SelfId() + << ", path# " << path + << ", error# " << error); + return false; + } + } + bool WriteMessage(const google::protobuf::Message& message, const TString& path, TString& error) { TString data; google::protobuf::TextFormat::PrintToString(message, &data); @@ -262,16 +287,25 @@ class TFsUploader: public TActorBootstrapped { << ": self# " << SelfId() << ", scanner# " << Scanner); + StartDataUpload(); + } + + void StartDataUpload() { + EXPORT_LOG_I("StartDataUpload" + << ": self# " << SelfId() + << ", scanner# " << Scanner + << ", dataPath# " << Settings.GetDataKey(EDataFormat::Csv, CompressionCodec)); + + Become(&TThis::StateUploadData); + if (Scanner) { - EXPORT_LOG_I("Scanner already ready, finishing export" + // Scanner is ready, request first data buffer + EXPORT_LOG_I("StartDataUpload - scanner ready, requesting data" << ": self# " << SelfId()); - // Tell scanner we're done (skip data export for now) - Finish(true); + Send(Scanner, new TEvExportScan::TEvFeed()); } else { - EXPORT_LOG_I("Waiting for scanner to become ready" + EXPORT_LOG_I("StartDataUpload - waiting for scanner" << ": self# " << SelfId()); - // Wait for scanner to be ready - Become(&TThis::StateWaitForScanner); } } @@ -301,33 +335,81 @@ class TFsUploader: public TActorBootstrapped { << ", enablePermissions# " << EnablePermissions); if (SchemeUploaded && MetadataUploaded && permissionsDone && ChangefeedsUploaded) { - EXPORT_LOG_I("Handle TEvReady - all uploads done, finishing" + EXPORT_LOG_I("Handle TEvReady - scheme done, starting data upload" << ": self# " << SelfId()); - Finish(true); + StartDataUpload(); } else { EXPORT_LOG_I("Handle TEvReady - waiting for uploads to complete" << ": self# " << SelfId()); } } + void HandleDataReady(TEvExportScan::TEvReady::TPtr& ev) { + EXPORT_LOG_I("HandleDataReady" + << ": self# " << SelfId() + << ", sender# " << ev->Sender); + + Scanner = ev->Sender; + + if (Error) { + return PassAway(); + } + + Send(Scanner, new TEvExportScan::TEvFeed()); + } + void Handle(TEvBuffer::TPtr& ev) { EXPORT_LOG_I("Handle TEvExportScan::TEvBuffer" << ": self# " << SelfId() << ", sender# " << ev->Sender << ", isScanner# " << (ev->Sender == Scanner) << ", last# " << ev->Get()->Last + << ", bufferSize# " << ev->Get()->Buffer.Size() << ", msg# " << ev->Get()->ToString()); - if (ev->Sender == Scanner) { - if (ev->Get()->Last) { - EXPORT_LOG_I("Handle TEvBuffer - last buffer received, finishing" - << ": self# " << SelfId()); - Finish(true); - } else { - EXPORT_LOG_I("Handle TEvBuffer - requesting more data" - << ": self# " << SelfId()); - Send(Scanner, new TEvExportScan::TEvFeed()); + if (ev->Sender != Scanner) { + EXPORT_LOG_W("Handle TEvBuffer - ignoring buffer from unknown sender" + << ": self# " << SelfId() + << ", sender# " << ev->Sender + << ", scanner# " << Scanner); + return; + } + + auto& buffer = ev->Get()->Buffer; + const TString dataPath = Settings.GetDataKey(EDataFormat::Csv, CompressionCodec); + + // Append data to file + if (buffer.Size() > 0) { + TString error; + if (!AppendFile(dataPath, TStringBuf(buffer.Data(), buffer.Size()), error)) { + return Finish(false, error); + } + DataBytesWritten += buffer.Size(); + } + + if (ev->Get()->Last) { + EXPORT_LOG_I("Handle TEvBuffer - last buffer received" + << ": self# " << SelfId() + << ", totalBytesWritten# " << DataBytesWritten + << ", checksum# " << ev->Get()->Checksum); + + if (EnableChecksums && !ev->Get()->Checksum.empty()) { + TString checksumPath = ChecksumKey(dataPath); + TFsPath fsPath(dataPath); + TString checksumContent = ev->Get()->Checksum + ' ' + fsPath.GetName(); + + TString error; + if (!WriteFile(checksumPath, checksumContent, error)) { + return Finish(false, error); + } } + + Finish(true); + } else { + EXPORT_LOG_I("Handle TEvBuffer - requesting more data" + << ": self# " << SelfId() + << ", bytesWrittenSoFar# " << DataBytesWritten); + Send(Scanner, new TEvExportScan::TEvFeed()); } } @@ -371,7 +453,8 @@ class TFsUploader: public TActorBootstrapped { TMaybe&& scheme, TVector changefeeds, TMaybe&& permissions, - TString&& metadata) + TString&& metadata, + ECompressionCodec compressionCodec) : Settings(TFsSettings::FromBackupTask(task)) , DataShard(dataShard) , TxId(txId) @@ -380,6 +463,7 @@ class TFsUploader: public TActorBootstrapped { , Metadata(std::move(metadata)) , Permissions(std::move(permissions)) , Retries(task.GetNumberOfRetries()) + , CompressionCodec(compressionCodec) , SchemeUploaded(task.GetShardNum() == 0 ? false : true) , ChangefeedsUploaded(task.GetShardNum() == 0 ? false : true) , MetadataUploaded(task.GetShardNum() == 0 ? false : true) @@ -448,6 +532,23 @@ class TFsUploader: public TActorBootstrapped { } } + STATEFN(StateUploadData) { + EXPORT_LOG_D("StateUploadData received event" + << ": self# " << SelfId() + << ", type# " << ev->GetTypeRewrite()); + switch (ev->GetTypeRewrite()) { + hFunc(TEvExportScan::TEvReady, HandleDataReady); + hFunc(TEvBuffer, Handle); + + sFunc(TEvents::TEvPoisonPill, PassAway); + default: + EXPORT_LOG_W("StateUploadData unhandled event" + << ": self# " << SelfId() + << ", type# " << ev->GetTypeRewrite()); + break; + } + } + private: TFsSettings Settings; @@ -459,7 +560,9 @@ class TFsUploader: public TActorBootstrapped { const TMaybe Permissions; const ui32 Retries; + const ECompressionCodec CompressionCodec; ui64 IndexExportedChangefeed = 0; + ui64 DataBytesWritten = 0; TActorId Scanner; bool SchemeUploaded; @@ -528,7 +631,8 @@ IActor* TFsExport::CreateUploader(const TActorId& dataShard, ui64 txId) const { metadata.AddFullBackup(backup); return new TFsUploader( - dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), metadata.Serialize()); + dataShard, txId, Task, std::move(scheme), std::move(changefeeds), std::move(permissions), + metadata.Serialize(), CodecFromTask(Task)); } IExport::IBuffer* TFsExport::CreateBuffer() const { From 50aa73d904907ed19d610b37e407476a2c56cb4c Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 5 Dec 2025 17:25:33 +0000 Subject: [PATCH 24/25] small fixes --- ydb/core/tx/datashard/export_fs_uploader.cpp | 126 +++++-------------- 1 file changed, 33 insertions(+), 93 deletions(-) diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index 2ffe74074d72..a277cc5e9ab5 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -52,27 +52,27 @@ class TFsSettings { return TFsPath(BasePath) / RelativePath; } - TString GetPermissionsKey() const { + TString GetPermissionsPath() const { return TFsPath(GetFullPath()) / PermissionsKeySuffix(false); } - TString GetMetadataKey() const { + TString GetMetadataPath() const { return TFsPath(GetFullPath()) / MetadataKeySuffix(false); } - TString GetSchemeKey() const { + TString GetSchemePath() const { return TFsPath(GetFullPath()) / SchemeKeySuffix(false); } - TString GetDataKey(EDataFormat format, ECompressionCodec codec) const { + TString GetDataPath(EDataFormat format, ECompressionCodec codec) const { return TFsPath(GetFullPath()) / DataKeySuffix(Shard, format, codec, false); } - TString GetChangefeedKey(const TString& changefeedPrefix) const { + TString GetChangefeedPath(const TString& changefeedPrefix) const { return TFsPath(GetFullPath()) / changefeedPrefix / ChangefeedKeySuffix(false); } - TString GetTopicKey(const TString& changefeedPrefix) const { + TString GetTopicPath(const TString& changefeedPrefix) const { return TFsPath(GetFullPath()) / changefeedPrefix / TopicKeySuffix(false); } }; @@ -87,12 +87,16 @@ struct TChangefeedExportDescriptions { class TFsUploader: public TActorBootstrapped { using TEvBuffer = TEvExportScan::TEvBuffer; - bool WriteFile(const TString& path, const TStringBuf& data, TString& error) { + bool WriteFile(const TString& path, const TStringBuf& data, TString& error, bool isAppend = false) { try { TFsPath fsPath(path); fsPath.Parent().MkDirs(); - TFile file(path, CreateAlways | WrOnly); + auto flags = CreateAlways | WrOnly; + if (isAppend) { + flags = OpenAlways | WrOnly | ForAppend; + } + TFile file(path, flags); file.Write(data.data(), data.size()); file.Close(); @@ -104,37 +108,12 @@ class TFsUploader: public TActorBootstrapped { return true; } catch (const std::exception& ex) { error = TStringBuilder() << "Failed to write file " << path << ": " << ex.what(); - EXPORT_LOG_E("WriteFile failed" - << ": self# " << SelfId() - << ", path# " << path - << ", error# " << error); return false; } } bool AppendFile(const TString& path, const TStringBuf& data, TString& error) { - try { - TFsPath fsPath(path); - fsPath.Parent().MkDirs(); - - TFile file(path, OpenAlways | WrOnly | ForAppend); - file.Write(data.data(), data.size()); - file.Close(); - - EXPORT_LOG_D("AppendFile succeeded" - << ": self# " << SelfId() - << ", path# " << path - << ", size# " << data.size()); - - return true; - } catch (const std::exception& ex) { - error = TStringBuilder() << "Failed to append to file " << path << ": " << ex.what(); - EXPORT_LOG_E("AppendFile failed" - << ": self# " << SelfId() - << ", path# " << path - << ", error# " << error); - return false; - } + return WriteFile(path, data, error, true); } bool WriteMessage(const google::protobuf::Message& message, const TString& path, TString& error) { @@ -150,7 +129,6 @@ class TFsUploader: public TActorBootstrapped { if (EnableChecksums) { TString checksum = ComputeChecksum(data); - // Extract filename for checksum file format TFsPath fsPath(path); TString filename = fsPath.GetName(); checksum += ' ' + filename; @@ -173,14 +151,11 @@ class TFsUploader: public TActorBootstrapped { void UploadMetadata() { EXPORT_LOG_I("UploadMetadata started" << ": self# " << SelfId() - << ", path# " << Settings.GetMetadataKey() + << ", path# " << Settings.GetMetadataPath() << ", metadataSize# " << Metadata.size()); TString error; - if (!WriteFileWithChecksum(Settings.GetMetadataKey(), Metadata, error)) { - EXPORT_LOG_E("UploadMetadata failed" - << ": self# " << SelfId() - << ", error# " << error); + if (!WriteFileWithChecksum(Settings.GetMetadataPath(), Metadata, error)) { return Finish(false, error); } @@ -199,20 +174,15 @@ class TFsUploader: public TActorBootstrapped { void UploadPermissions() { EXPORT_LOG_I("UploadPermissions started" << ": self# " << SelfId() - << ", path# " << Settings.GetPermissionsKey() + << ", path# " << Settings.GetPermissionsPath() << ", hasPermissions# " << Permissions.Defined()); if (!Permissions) { - EXPORT_LOG_E("UploadPermissions failed - no permissions" - << ": self# " << SelfId()); return Finish(false, "Cannot infer permissions"); } TString error; - if (!WriteMessageWithChecksum(Permissions.GetRef(), Settings.GetPermissionsKey(), error)) { - EXPORT_LOG_E("UploadPermissions failed" - << ": self# " << SelfId() - << ", error# " << error); + if (!WriteMessageWithChecksum(Permissions.GetRef(), Settings.GetPermissionsPath(), error)) { return Finish(false, error); } @@ -225,20 +195,15 @@ class TFsUploader: public TActorBootstrapped { void UploadScheme() { EXPORT_LOG_I("UploadScheme started" << ": self# " << SelfId() - << ", path# " << Settings.GetSchemeKey() + << ", path# " << Settings.GetSchemePath() << ", hasScheme# " << Scheme.Defined()); if (!Scheme) { - EXPORT_LOG_E("UploadScheme failed - no scheme" - << ": self# " << SelfId()); return Finish(false, "Cannot infer scheme"); } TString error; - if (!WriteMessageWithChecksum(Scheme.GetRef(), Settings.GetSchemeKey(), error)) { - EXPORT_LOG_E("UploadScheme failed" - << ": self# " << SelfId() - << ", error# " << error); + if (!WriteMessageWithChecksum(Scheme.GetRef(), Settings.GetSchemePath(), error)) { return Finish(false, error); } @@ -251,35 +216,23 @@ class TFsUploader: public TActorBootstrapped { void UploadChangefeeds() { EXPORT_LOG_I("UploadChangefeeds started" << ": self# " << SelfId() - << ", index# " << IndexExportedChangefeed << ", total# " << Changefeeds.size()); - while (IndexExportedChangefeed < Changefeeds.size()) { - const auto& desc = Changefeeds[IndexExportedChangefeed]; - + for (const auto& desc : Changefeeds) { EXPORT_LOG_I("UploadChangefeeds processing changefeed" << ": self# " << SelfId() - << ", index# " << IndexExportedChangefeed << ", name# " << desc.Name << ", prefix# " << desc.Prefix); TString error; - if (!WriteMessageWithChecksum(desc.ChangefeedDescription, Settings.GetChangefeedKey(desc.Prefix), error)) { - EXPORT_LOG_E("UploadChangefeeds failed to write changefeed" - << ": self# " << SelfId() - << ", error# " << error); + if (!WriteMessageWithChecksum(desc.ChangefeedDescription, Settings.GetChangefeedPath(desc.Prefix), error)) { return Finish(false, error); } - if (!WriteMessageWithChecksum(desc.Topic, Settings.GetTopicKey(desc.Prefix), error)) { - EXPORT_LOG_E("UploadChangefeeds failed to write topic" - << ": self# " << SelfId() - << ", error# " << error); + if (!WriteMessageWithChecksum(desc.Topic, Settings.GetTopicPath(desc.Prefix), error)) { return Finish(false, error); } - - ++IndexExportedChangefeed; } ChangefeedsUploaded = true; @@ -294,17 +247,16 @@ class TFsUploader: public TActorBootstrapped { EXPORT_LOG_I("StartDataUpload" << ": self# " << SelfId() << ", scanner# " << Scanner - << ", dataPath# " << Settings.GetDataKey(EDataFormat::Csv, CompressionCodec)); + << ", dataPath# " << Settings.GetDataPath(EDataFormat::Csv, CompressionCodec)); Become(&TThis::StateUploadData); if (Scanner) { - // Scanner is ready, request first data buffer - EXPORT_LOG_I("StartDataUpload - scanner ready, requesting data" + EXPORT_LOG_I("StartDataUpload: scanner ready, requesting data" << ": self# " << SelfId()); Send(Scanner, new TEvExportScan::TEvFeed()); } else { - EXPORT_LOG_I("StartDataUpload - waiting for scanner" + EXPORT_LOG_I("StartDataUpload: waiting for scanner" << ": self# " << SelfId()); } } @@ -322,24 +274,14 @@ class TFsUploader: public TActorBootstrapped { Scanner = ev->Sender; if (Error) { - EXPORT_LOG_I("Handle TEvReady - has error, passing away" - << ": self# " << SelfId() - << ", error# " << Error.GetOrElse("none")); return PassAway(); } - const bool permissionsDone = !EnablePermissions || PermissionsUploaded; - EXPORT_LOG_I("Handle TEvReady - checking completion" - << ": self# " << SelfId() - << ", permissionsDone# " << permissionsDone - << ", enablePermissions# " << EnablePermissions); - + const bool permissionsDone = !EnablePermissions || PermissionsUploaded; if (SchemeUploaded && MetadataUploaded && permissionsDone && ChangefeedsUploaded) { - EXPORT_LOG_I("Handle TEvReady - scheme done, starting data upload" - << ": self# " << SelfId()); StartDataUpload(); } else { - EXPORT_LOG_I("Handle TEvReady - waiting for uploads to complete" + EXPORT_LOG_I("Handle TEvReady: waiting for uploads to complete" << ": self# " << SelfId()); } } @@ -368,7 +310,7 @@ class TFsUploader: public TActorBootstrapped { << ", msg# " << ev->Get()->ToString()); if (ev->Sender != Scanner) { - EXPORT_LOG_W("Handle TEvBuffer - ignoring buffer from unknown sender" + EXPORT_LOG_W("Handle TEvBuffer: ignoring buffer from unknown sender" << ": self# " << SelfId() << ", sender# " << ev->Sender << ", scanner# " << Scanner); @@ -376,9 +318,8 @@ class TFsUploader: public TActorBootstrapped { } auto& buffer = ev->Get()->Buffer; - const TString dataPath = Settings.GetDataKey(EDataFormat::Csv, CompressionCodec); + const TString dataPath = Settings.GetDataPath(EDataFormat::Csv, CompressionCodec); - // Append data to file if (buffer.Size() > 0) { TString error; if (!AppendFile(dataPath, TStringBuf(buffer.Data(), buffer.Size()), error)) { @@ -388,7 +329,7 @@ class TFsUploader: public TActorBootstrapped { } if (ev->Get()->Last) { - EXPORT_LOG_I("Handle TEvBuffer - last buffer received" + EXPORT_LOG_I("Handle TEvBuffer: last buffer received" << ": self# " << SelfId() << ", totalBytesWritten# " << DataBytesWritten << ", checksum# " << ev->Get()->Checksum); @@ -406,7 +347,7 @@ class TFsUploader: public TActorBootstrapped { Finish(true); } else { - EXPORT_LOG_I("Handle TEvBuffer - requesting more data" + EXPORT_LOG_I("Handle TEvBuffer: requesting more data" << ": self# " << SelfId() << ", bytesWrittenSoFar# " << DataBytesWritten); Send(Scanner, new TEvExportScan::TEvFeed()); @@ -488,11 +429,11 @@ class TFsUploader: public TActorBootstrapped { << ", changefeedsUploaded# " << ChangefeedsUploaded); if (!MetadataUploaded) { - EXPORT_LOG_I("Starting metadata upload (shard 0 path)" + EXPORT_LOG_I("Starting metadata upload" << ": self# " << SelfId()); UploadMetadata(); } else { - EXPORT_LOG_I("Non-zero shard, waiting for scanner" + EXPORT_LOG_I("Waiting for scanner" << ": self# " << SelfId()); Become(&TThis::StateWaitForScanner); } @@ -561,7 +502,6 @@ class TFsUploader: public TActorBootstrapped { const ui32 Retries; const ECompressionCodec CompressionCodec; - ui64 IndexExportedChangefeed = 0; ui64 DataBytesWritten = 0; TActorId Scanner; From 68a6a1e8ac88da243314bd1add860ae2fbcb9917 Mon Sep 17 00:00:00 2001 From: st-shchetinin Date: Fri, 5 Dec 2025 17:26:44 +0000 Subject: [PATCH 25/25] lock --- ydb/core/tx/datashard/export_fs_uploader.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/core/tx/datashard/export_fs_uploader.cpp b/ydb/core/tx/datashard/export_fs_uploader.cpp index a277cc5e9ab5..38e9e1528039 100644 --- a/ydb/core/tx/datashard/export_fs_uploader.cpp +++ b/ydb/core/tx/datashard/export_fs_uploader.cpp @@ -97,6 +97,7 @@ class TFsUploader: public TActorBootstrapped { flags = OpenAlways | WrOnly | ForAppend; } TFile file(path, flags); + file.Flock(LOCK_EX); file.Write(data.data(), data.size()); file.Close();