From 696e7fe51dc540407545a92c615946a00de16d44 Mon Sep 17 00:00:00 2001 From: Stephane Loeuillet Date: Mon, 13 Apr 2026 18:25:51 +0200 Subject: [PATCH 1/3] fix: address audit findings in force_rebalance code paths Fixes several bugs found during audit of PR #1351 force_rebalance logic: 1. RebalancedFiles not persisted to metadata (CRITICAL) TableMetadata.Save() was not copying the RebalancedFiles map, causing it to be lost on resumable downloads when metadata is re-read from disk. Added RebalancedFiles to the saved fields. 2. Archive file name trimming used wrong disk prefix (CRITICAL) In downloadTableData() compressed format path, partName was computed with TrimPrefix(archiveFile, capturedDisk+"_"). But archive files are named with the ORIGINAL disk prefix (e.g. "default_part1.tar.gz"), not the rebalanced disk name. The trim silently failed, breaking the hardlink-exists-files optimization. Use disk (original) for the trim. 3. Loop variable mutation in downloadDiffParts (CRITICAL) `disk` was mutated to `part.RebalancedDisk`, then used later as `capturedDisk := disk` to update `table.Parts[capturedDisk][idx]`. The idx came from iterating table.Parts[original_disk], so the update could go out of bounds or corrupt the wrong entry when the rebalanced and original disk names differed. Use activeDisk per iteration like we already do in filesystemhelper. 4. Unclear error message for store path construction Split the check into two distinct error messages so it's clear whether the disk is missing from diskMap or the UUID is empty. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/backup/download.go | 15 ++++++++++----- pkg/filesystemhelper/filesystemhelper.go | 13 ++++++++----- pkg/metadata/table_metadata.go | 1 + 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index e481ad01..2d9b57b3 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -700,7 +700,9 @@ func (b *Backuper) downloadTableData(ctx context.Context, remoteBackup metadata. } if hardlinkExistsFiles { ext := "." + config.ArchiveExtensions[remoteBackup.DataFormat] - partName := strings.TrimPrefix(strings.TrimSuffix(archiveFile, ext), capturedDisk+"_") + // Archive files are named with the original disk prefix (e.g. "default_part1.tar.gz"), + // not the rebalanced disk name, so use `disk` for the trim + partName := strings.TrimPrefix(strings.TrimSuffix(archiveFile, ext), disk+"_") var foundPart *metadata.Part var idx int for i, part := range capturedParts { @@ -967,21 +969,24 @@ func (b *Backuper) downloadDiffParts(ctx context.Context, remoteBackup metadata. for disk, parts := range table.Parts { diskPath, diskExists := b.DiskToPathMap[disk] for i, part := range parts { + // Use per-iteration variable to avoid corrupting the outer loop variable + activeDisk := disk + activeDiskPath := diskPath if !diskExists && part.RebalancedDisk == "" { return 0, errors.Errorf("downloadDiffParts: table: `%s`.`%s`, disk: %s, part.Name: %s, part.RebalancedDisk: `%s` not rebalanced", table.Table, table.Database, disk, part.Name, part.RebalancedDisk) } if part.RebalancedDisk != "" { - diskPath, diskExists = b.DiskToPathMap[part.RebalancedDisk] + activeDiskPath, diskExists = b.DiskToPathMap[part.RebalancedDisk] if !diskExists { return 0, errors.Errorf("downloadDiffParts: table: `%s`.`%s`, disk: %s, part.Name: %s, part.RebalancedDisk: `%s` not rebalanced", table.Table, table.Database, disk, part.Name, part.RebalancedDisk) } - disk = part.RebalancedDisk - if b.shouldDiskNameSkipByNameOrType(disk, disks) { + activeDisk = part.RebalancedDisk + if b.shouldDiskNameSkipByNameOrType(activeDisk, disks) { log.Warn().Str("database", table.Database).Str("table", table.Table).Str("rebalancedDisk", part.RebalancedDisk).Msg("skipped") continue } } - newPath := path.Join(diskPath, "backup", remoteBackup.BackupName, "shadow", dbAndTableDir, disk, part.Name) + newPath := path.Join(activeDiskPath, "backup", remoteBackup.BackupName, "shadow", dbAndTableDir, activeDisk, part.Name) if checkErr := b.checkNewPath(newPath, part); checkErr != nil { return 0, errors.WithMessage(checkErr, "checkNewPath") } diff --git a/pkg/filesystemhelper/filesystemhelper.go b/pkg/filesystemhelper/filesystemhelper.go index f9a30f4c..fa8e5e5c 100644 --- a/pkg/filesystemhelper/filesystemhelper.go +++ b/pkg/filesystemhelper/filesystemhelper.go @@ -153,12 +153,15 @@ func HardlinkBackupPartsToStorage(backupName string, backupTable metadata.TableM // ClickHouse creates store/{uuid}/detached/ on ALL disks of the // storage policy at table load time (MergeTreeData constructor). // Build the path directly so hardlinks stay on the same filesystem. - if rebalancedDiskPath, hasDisk := diskMap[part.RebalancedDisk]; hasDisk && backupTable.UUID != "" { - dstParentDir = filepath.Join(rebalancedDiskPath, "store", backupTable.UUID[:3], backupTable.UUID) - dstParentDirExists = true - } else { - return errors.Errorf("can't build store path for rebalanced disk %s (uuid=%s)", part.RebalancedDisk, backupTable.UUID) + rebalancedDiskPath, hasDisk := diskMap[part.RebalancedDisk] + if !hasDisk { + return errors.Errorf("rebalanced disk %s not found in diskMap", part.RebalancedDisk) } + if backupTable.UUID == "" { + return errors.Errorf("table UUID is empty, can't build store path for rebalanced disk %s", part.RebalancedDisk) + } + dstParentDir = filepath.Join(rebalancedDiskPath, "store", backupTable.UUID[:3], backupTable.UUID) + dstParentDirExists = true } } backupDiskPath := diskMap[activeDisk] diff --git a/pkg/metadata/table_metadata.go b/pkg/metadata/table_metadata.go index b54f30e2..98137f8e 100644 --- a/pkg/metadata/table_metadata.go +++ b/pkg/metadata/table_metadata.go @@ -42,6 +42,7 @@ func (tm *TableMetadata) Save(location string, metadataOnly bool) (uint64, error newTM.Checksums = tm.Checksums newTM.Size = tm.Size newTM.TotalBytes = tm.TotalBytes + newTM.RebalancedFiles = tm.RebalancedFiles newTM.MetadataOnly = false } if err := os.MkdirAll(path.Dir(location), 0750); err != nil { From d815e2dde126f58d01e93c813ff451060aeabc21 Mon Sep 17 00:00:00 2001 From: slach Date: Wed, 6 May 2026 15:45:53 +0500 Subject: [PATCH 2/3] improve checking FIPS compatibility in e2e tests --- pkg/clickhouse/clickhouse.go | 2 +- test/integration/fips_test.go | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pkg/clickhouse/clickhouse.go b/pkg/clickhouse/clickhouse.go index 2f8deeff..20f2349d 100644 --- a/pkg/clickhouse/clickhouse.go +++ b/pkg/clickhouse/clickhouse.go @@ -166,7 +166,7 @@ func (ch *ClickHouse) Connect() error { ch.IsOpen = true break } - if ch.BreakConnectOnError { + if ch.BreakConnectOnError || strings.Contains(err.Error(), "FIPS 140-only") { return errors.WithStack(err) } log.Warn().Msgf("clickhouse connection ping: %s return error: %v, will wait 5 second to reconnect", fmt.Sprintf("tcp://%v:%v", ch.Config.Host, ch.Config.Port), err) diff --git a/test/integration/fips_test.go b/test/integration/fips_test.go index 8be13829..2d3df93f 100644 --- a/test/integration/fips_test.go +++ b/test/integration/fips_test.go @@ -155,14 +155,12 @@ func TestFIPS(t *testing.T) { r.Equal(0, len(inProgressActions), "inProgressActions=%+v", inProgressActions) env.DockerExecNoError(r, "clickhouse", "pkill", "-n", "-f", "clickhouse-backup-fips") } - // P1: Test create_remote + restore_remote in strict GODEBUG=fips140=only mode - // @todo think about FIPS clickhouse-server, which not supported by GODEBUG=fips140=only - // fipsOnlyBackupName := fmt.Sprintf("fips_only_backup_%d", rand.Int()) - //env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml create_remote --tables="+t.Name()+".fips_table "+fipsOnlyBackupName) - //env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml delete local "+fipsOnlyBackupName) - //env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml restore_remote --tables="+t.Name()+".fips_table "+fipsOnlyBackupName) - //env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml delete local "+fipsOnlyBackupName) - //env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml delete remote "+fipsOnlyBackupName) + // P1: Test create_remote doesn't work with clickhouse-server which not compatible with FIPS + fipsOnlyBackupName := fmt.Sprintf("fips_only_backup_%d", rand.Int()) + out, err := env.DockerExecOut("clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml create_remote --tables="+t.Name()+".fips_table "+fipsOnlyBackupName) + r.Error(err) + r.Contains(out, "is not allowed in FIPS 140-only mode") + log.Debug().Msg(out) // https://www.perplexity.ai/search/0920f1e8-59ec-4e14-b779-ba7b2e037196 testTLSCerts("rsa", "4096", "", "ECDHE-RSA-AES128-GCM-SHA256", "ECDHE-RSA-AES256-GCM-SHA384", "AES_128_GCM_SHA256", "AES_256_GCM_SHA384") From 45ed28c976380afdfd0a190be608cf4dfdf7ad44 Mon Sep 17 00:00:00 2001 From: slach Date: Fri, 8 May 2026 19:57:08 +0500 Subject: [PATCH 3/3] fix behavior for properly rebalance, to avoid cross-device link Signed-off-by: slach --- pkg/backup/download.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index 2d9b57b3..c469e488 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -993,7 +993,11 @@ func (b *Backuper) downloadDiffParts(ctx context.Context, remoteBackup metadata. if !part.Required { continue } - existsPath := path.Join(b.DiskToPathMap[disk], "backup", remoteBackup.RequiredBackup, "shadow", dbAndTableDir, disk, part.Name) + // existsPath must point at the active (rebalanced) disk because that's where + // findDiffFileExist routes the source download for parts with RebalancedDisk set. + // Using the original disk would yield a missing/relative path and produce a + // cross-device hardlink later in makePartHardlinks. + existsPath := path.Join(activeDiskPath, "backup", remoteBackup.RequiredBackup, "shadow", dbAndTableDir, activeDisk, part.Name) _, statErr := os.Stat(existsPath) if statErr != nil && !os.IsNotExist(statErr) { return 0, errors.Wrapf(statErr, "%s stat return error", existsPath) @@ -1015,9 +1019,13 @@ func (b *Backuper) downloadDiffParts(ctx context.Context, remoteBackup metadata. } } partForDownload := part - diskForDownload := disk + // diskForDownload follows the active (rebalanced) disk so findDiffBackupFilesRemote + // computes paths under the same physical disk as existsPath/newPath. + diskForDownload := activeDisk capturedExistsPath := existsPath capturedNewPath := newPath + // capturedDisk is the original map key for table.Parts; do not switch to activeDisk + // or table.Parts[capturedDisk][idx] would index the wrong slice. capturedDisk := disk idx := i downloadDiffGroup.Go(func() error {