From 774178572b74c793c482faf8270ff5dcce270c4b Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 23 Jun 2026 17:59:38 +0800 Subject: [PATCH 1/8] bump sarama and fix logger --- go.mod | 2 +- go.sum | 4 ++-- pkg/logger/log.go | 11 +++++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/go.mod b/go.mod index 18d4f3b7c2..372f022fa3 100644 --- a/go.mod +++ b/go.mod @@ -394,7 +394,7 @@ require ( ) replace ( - github.com/IBM/sarama v1.41.2 => github.com/pingcap/sarama v1.41.2-pingcap-20260508 + github.com/IBM/sarama v1.41.2 => github.com/pingcap/sarama v1.41.2-pingcap-20260622.1 // Downgrade grpc to v1.63.2, as well as other related modules. github.com/apache/arrow-go/v18 => github.com/joechenrh/arrow-go/v18 v18.0.0-20250911101656-62c34c9a3b82 diff --git a/go.sum b/go.sum index 470d2e6320..a3e32540d5 100644 --- a/go.sum +++ b/go.sum @@ -759,8 +759,8 @@ github.com/pingcap/log v1.1.1-0.20250917021125-19901e015dc9 h1:qG9BSvlWFEE5otQGa github.com/pingcap/log v1.1.1-0.20250917021125-19901e015dc9/go.mod h1:ORfBOFp1eteu2odzsyaxI+b8TzJwgjwyQcGhI+9SfEA= github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d h1:5JCgncG9X7tOsqKqbIXpV2VG4mu/hv3RvvZewqFj0U4= github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d/go.mod h1:HMNxmg0/lrn3SPGJ6LTZqP0WwEpcXMu9s/4TWJbzT8w= -github.com/pingcap/sarama v1.41.2-pingcap-20260508 h1:3ZFtYLUGMMZeA6U0iz3EyFnNGPHu3qOuPLj5wXxHmeU= -github.com/pingcap/sarama v1.41.2-pingcap-20260508/go.mod h1:PIL6ZKKKhm19IbQpmpJcFnybAi1yXtgLAitDAeBdNCw= +github.com/pingcap/sarama v1.41.2-pingcap-20260622.1 h1:TrtpL+fs51pUc21CZ8mzIotOlJchN0G9rwfA9VtcyTw= +github.com/pingcap/sarama v1.41.2-pingcap-20260622.1/go.mod h1:PIL6ZKKKhm19IbQpmpJcFnybAi1yXtgLAitDAeBdNCw= github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 h1:T4pXRhBflzDeAhmOQHNPRRogMYxP13V7BkYw3ZsoSfE= github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5/go.mod h1:rlimy0GcTvjiJqvD5mXTRr8O2eNZPBrcUgiWVYp9530= github.com/pingcap/tidb v1.1.0-beta.0.20260604031706-f9faeaf4828f h1:Z+Ez33+LxWbKwM88th19M/v81zwFTjbsKFv1qXQk134= diff --git a/pkg/logger/log.go b/pkg/logger/log.go index d4bce8d2d5..c23c574e7e 100644 --- a/pkg/logger/log.go +++ b/pkg/logger/log.go @@ -257,11 +257,14 @@ func initMySQLLogger() error { // initSaramaLogger hacks logger used in sarama lib func initSaramaLogger(level zapcore.Level) error { - logger, err := zap.NewStdLogAt(log.L().With(zap.String("component", "sarama")), level) - if err != nil { - return errors.Trace(err) + // only available less than info level + if !zapcore.InfoLevel.Enabled(level) { + logger, err := zap.NewStdLogAt(log.L().With(zap.String("component", "sarama")), level) + if err != nil { + return errors.Trace(err) + } + sarama.Logger = logger } - sarama.Logger = logger return nil } From 631bcb17dbde5187c54e8dfb0e3e87ed1f8602a5 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 23 Jun 2026 21:07:28 +0800 Subject: [PATCH 2/8] unify the cloud storage path related method --- cmd/storage-consumer/consumer.go | 53 +---- pkg/logger/log.go | 17 +- pkg/logger/log_test.go | 16 ++ pkg/sink/cloudstorage/path.go | 103 +++++--- pkg/sink/cloudstorage/path_key.go | 275 ++++++++++++++++++---- pkg/sink/cloudstorage/path_key_test.go | 194 +++++++++++++++ pkg/sink/cloudstorage/path_test.go | 41 ++++ pkg/sink/cloudstorage/schema_file.go | 54 +++++ pkg/sink/cloudstorage/schema_file_test.go | 84 +++++++ 9 files changed, 699 insertions(+), 138 deletions(-) create mode 100644 pkg/sink/cloudstorage/schema_file.go create mode 100644 pkg/sink/cloudstorage/schema_file_test.go diff --git a/cmd/storage-consumer/consumer.go b/cmd/storage-consumer/consumer.go index 8e54ae3532..b284af72e2 100644 --- a/cmd/storage-consumer/consumer.go +++ b/cmd/storage-consumer/consumer.go @@ -44,10 +44,9 @@ import ( ) const ( - defaultChangefeedName = "storage-consumer" - defaultLogInterval = 5 * time.Second - fakePartitionNumForSchemaFile = -1 - metadataFileName = "metadata" + defaultChangefeedName = "storage-consumer" + defaultLogInterval = 5 * time.Second + metadataFileName = "metadata" ) type ( @@ -468,7 +467,7 @@ func (c *consumer) parseDMLFilePath(ctx context.Context, path string) error { func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { var schemaKey cloudstorage.SchemaPathKey - checksumInFile, err := schemaKey.ParseSchemaFilePath(path) + _, err := schemaKey.ParseSchemaFilePath(path) if err != nil { return errors.Trace(err) } @@ -490,27 +489,13 @@ func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { } // Read tableDef from schema file and check checksum. - var tableDef cloudstorage.TableDefinition - schemaContent, err := c.externalStorage.ReadFile(ctx, path) - if err != nil { - return errors.Trace(err) - } - err = json.Unmarshal(schemaContent, &tableDef) - if err != nil { - return errors.Trace(err) - } - checksumInMem, err := tableDef.Sum32(nil) + _, tableDef, err := cloudstorage.ReadTableDefinitionFromSchemaFile(ctx, c.externalStorage, path) if err != nil { + if errors.ErrStorageSinkInvalidFileName.Equal(err) { + log.Panic("checksum mismatch", zap.Error(err), zap.String("path", path)) + } return errors.Trace(err) } - if checksumInMem != checksumInFile || schemaKey.TableVersion != tableDef.TableVersion { - log.Panic("checksum mismatch", - zap.Uint32("checksumInMem", checksumInMem), - zap.Uint32("checksumInFile", checksumInFile), - zap.Uint64("tableversionInMem", schemaKey.TableVersion), - zap.Uint64("tableversionInFile", tableDef.TableVersion), - zap.String("path", path)) - } // Update tableDefMap. c.tableDefMap[key][tableDef.TableVersion] = &tableDef @@ -530,11 +515,7 @@ func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { // // the DDL event recorded in schema.json should be executed first, then the DML events // in csv files can be executed. - dmlkey := cloudstorage.DmlPathKey{ - SchemaPathKey: schemaKey, - PartitionNum: fakePartitionNumForSchemaFile, - Date: "", - } + dmlkey := cloudstorage.NewSchemaFileDMLPathKey(schemaKey) if _, ok := c.tableDMLIdxMap[dmlkey]; !ok { c.tableDMLIdxMap[dmlkey] = fileIndexKeyMap{} } else { @@ -606,19 +587,7 @@ func (c *consumer) handleNewFiles( keys = append(keys, k) } sort.Slice(keys, func(i, j int) bool { - if keys[i].TableVersion != keys[j].TableVersion { - return keys[i].TableVersion < keys[j].TableVersion - } - if keys[i].PartitionNum != keys[j].PartitionNum { - return keys[i].PartitionNum < keys[j].PartitionNum - } - if keys[i].Date != keys[j].Date { - return keys[i].Date < keys[j].Date - } - if keys[i].Schema != keys[j].Schema { - return keys[i].Schema < keys[j].Schema - } - return keys[i].Table < keys[j].Table + return cloudstorage.CompareDMLPathKey(keys[i], keys[j]) < 0 }) for order, key := range keys { @@ -637,7 +606,7 @@ func (c *consumer) handleNewFiles( // if the key is a fake dml path key which is mainly used for // sorting schema.json file before the dml files, then execute the ddl query. - if key.PartitionNum == fakePartitionNumForSchemaFile && len(key.Date) == 0 && len(tableDef.Query) > 0 { + if key.IsSchemaFileDMLPathKey() && len(tableDef.Query) > 0 { if key.TableVersion <= ddlWatermark { log.Warn("DDL event replayed with stale table version, ignore it", zap.String("schema", key.Schema), zap.String("table", key.Table), diff --git a/pkg/logger/log.go b/pkg/logger/log.go index c23c574e7e..480e627c60 100644 --- a/pkg/logger/log.go +++ b/pkg/logger/log.go @@ -17,6 +17,7 @@ import ( "bytes" "context" "io" + stdlog "log" "os" "strconv" "strings" @@ -257,14 +258,16 @@ func initMySQLLogger() error { // initSaramaLogger hacks logger used in sarama lib func initSaramaLogger(level zapcore.Level) error { - // only available less than info level - if !zapcore.InfoLevel.Enabled(level) { - logger, err := zap.NewStdLogAt(log.L().With(zap.String("component", "sarama")), level) - if err != nil { - return errors.Trace(err) - } - sarama.Logger = logger + if zapcore.InfoLevel.Enabled(level) { + sarama.Logger = stdlog.New(io.Discard, "[Sarama] ", stdlog.LstdFlags) + return nil + } + + logger, err := zap.NewStdLogAt(log.L().With(zap.String("component", "sarama")), level) + if err != nil { + return errors.Trace(err) } + sarama.Logger = logger return nil } diff --git a/pkg/logger/log_test.go b/pkg/logger/log_test.go index 23b5d36238..c03936a145 100644 --- a/pkg/logger/log_test.go +++ b/pkg/logger/log_test.go @@ -14,8 +14,10 @@ package logger import ( + stdlog "log" "testing" + "github.com/IBM/sarama" "github.com/pingcap/log" "github.com/stretchr/testify/require" "go.uber.org/zap/zapcore" @@ -31,3 +33,17 @@ func TestIsDebugEnabled(t *testing.T) { log.SetLevel(zapcore.DebugLevel) require.True(t, IsDebugEnabled()) } + +func TestInitSaramaLoggerResetsWhenInfoEnabled(t *testing.T) { + originalLogger := sarama.Logger + defer func() { + sarama.Logger = originalLogger + }() + + require.NoError(t, initSaramaLogger(zapcore.DebugLevel)) + debugLogger := sarama.Logger + + require.NoError(t, initSaramaLogger(zapcore.InfoLevel)) + require.NotSame(t, debugLogger, sarama.Logger) + require.IsType(t, stdlog.New(nil, "", 0), sarama.Logger) +} diff --git a/pkg/sink/cloudstorage/path.go b/pkg/sink/cloudstorage/path.go index 6dc7230fa0..3074a070b8 100644 --- a/pkg/sink/cloudstorage/path.go +++ b/pkg/sink/cloudstorage/path.go @@ -448,8 +448,6 @@ func (f *FilePathGenerator) GenerateDataFilePath( } func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date string) (string, error) { - var elems []string - tableVersion, ok := f.versionMap[tbl] if !ok || tableVersion == 0 { return "", errors.ErrInternalCheckFailed.GenWithStackByArgs( @@ -458,38 +456,45 @@ func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date str } if f.config.UseTableIDAsPath { - tablePathPart, err := generateTablePath( + if _, err := generateTablePath( tbl.TableNameWithPhysicTableID.Table, tbl.TableNameWithPhysicTableID.TableID, true, - ) - if err != nil { - return "", err - } - elems = append(elems, tablePathPart) - } else { - elems = append(elems, tbl.TableNameWithPhysicTableID.Schema) - tablePathPart, err := generateTablePath( - tbl.TableNameWithPhysicTableID.Table, - tbl.TableNameWithPhysicTableID.TableID, - false, - ) - if err != nil { + ); err != nil { return "", err } - elems = append(elems, tablePathPart) - } - elems = append(elems, fmt.Sprintf("%d", tableVersion)) - - if f.config.EnablePartitionSeparator && tbl.TableNameWithPhysicTableID.IsPartition && !f.config.UseTableIDAsPath { - elems = append(elems, fmt.Sprintf("%d", tbl.TableNameWithPhysicTableID.TableID)) - } - - if len(date) != 0 { - elems = append(elems, date) + return DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: strconv.FormatInt(tbl.TableNameWithPhysicTableID.TableID, 10), + TableVersion: tableVersion, + }, + UseTableIDAsPath: true, + TableID: tbl.TableNameWithPhysicTableID.TableID, + Date: date, + }.generateDMLDataDirPath(), nil + } + + tablePathPart, err := generateTablePath( + tbl.TableNameWithPhysicTableID.Table, + tbl.TableNameWithPhysicTableID.TableID, + false, + ) + if err != nil { + return "", err } - - return path.Join(elems...), nil + var partitionNum int64 + if f.config.EnablePartitionSeparator && tbl.TableNameWithPhysicTableID.IsPartition { + partitionNum = tbl.TableNameWithPhysicTableID.TableID + } + return DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: tbl.TableNameWithPhysicTableID.Schema, + Table: tablePathPart, + TableVersion: tableVersion, + }, + PartitionNum: partitionNum, + Date: date, + }.generateDMLDataDirPath(), nil } func (f *FilePathGenerator) getFileIdxFromIndexFile( @@ -516,23 +521,47 @@ func (f *FilePathGenerator) getFileIdxFromIndexFile( } func FetchIndexFromFileName(fileName string, extension string) (uint64, error) { + fileIndex, err := ParseFileIndexFromFileName(fileName, extension) + if err != nil { + return 0, err + } + return fileIndex.Idx, nil +} + +// ParseFileIndexFromFileName parses a cloud storage data file name. +func ParseFileIndexFromFileName(fileName string, extension string) (FileIndex, error) { if len(fileName) < minFileNamePrefixLen+len(extension) || !strings.HasPrefix(fileName, "CDC") || !strings.HasSuffix(fileName, extension) { - return 0, errors.ErrStorageSinkInvalidFileName.GenWithStack("filename in storage sink is invalid: %q", fileName) + return FileIndex{}, errors.ErrStorageSinkInvalidFileName.GenWithStack("filename in storage sink is invalid: %q", fileName) } // CDC[_{dispatcherID}_]{num}.fileExtension - pathRE, err := regexp.Compile(`CDC(?:_(\w+)_)?(\d+).\w+`) - if err != nil { - return 0, err + name := strings.TrimSuffix(strings.TrimPrefix(fileName, "CDC"), extension) + dispatcherID := "" + idxStr := name + if strings.HasPrefix(name, "_") { + idxSep := strings.LastIndex(name, "_") + if idxSep <= 1 { + return FileIndex{}, errors.ErrStorageSinkInvalidFileName.GenWithStack("cannot match dml path pattern for %q", fileName) + } + dispatcherID = name[1:idxSep] + idxStr = name[idxSep+1:] } - - matches := pathRE.FindStringSubmatch(fileName) - if len(matches) != 3 { - return 0, errors.ErrStorageSinkInvalidFileName.GenWithStack("cannot match dml path pattern for %q", fileName) + if !isDigits(idxStr) { + return FileIndex{}, errors.ErrStorageSinkInvalidFileName.GenWithStack("cannot match dml path pattern for %q", fileName) } - return strconv.ParseUint(matches[2], 10, 64) + idx, err := strconv.ParseUint(idxStr, 10, 64) + if err != nil { + return FileIndex{}, err + } + return FileIndex{ + FileIndexKey: FileIndexKey{ + DispatcherID: dispatcherID, + EnableTableAcrossNodes: dispatcherID != "", + }, + Idx: idx, + }, nil } var dateSeparatorDayRegexp *regexp.Regexp diff --git a/pkg/sink/cloudstorage/path_key.go b/pkg/sink/cloudstorage/path_key.go index 31f4291819..ecfeb4e371 100644 --- a/pkg/sink/cloudstorage/path_key.go +++ b/pkg/sink/cloudstorage/path_key.go @@ -14,8 +14,9 @@ package cloudstorage import ( + "cmp" "fmt" - "regexp" + "path" "strconv" "strings" @@ -24,6 +25,8 @@ import ( "github.com/pingcap/ticdc/pkg/errors" ) +const schemaFilePartitionNum int64 = -1 + // SchemaPathKey is the key of schema path. type SchemaPathKey struct { // Schema is the first directory level in storage sink paths. @@ -88,13 +91,19 @@ type FileIndexKey struct { type FileIndex struct { FileIndexKey // Idx is the monotonically increasing file sequence number in one - // directory scope (schema/table/version[/partition][/date]). + // directory scope (schema/table/version[/partition][/date] or + // tableID/version[/date]). Idx uint64 } // DmlPathKey is the key of dml path. type DmlPathKey struct { SchemaPathKey + // UseTableIDAsPath controls whether TableID is used as the first path + // element instead of Schema/Table. + UseTableIDAsPath bool + // TableID is set when UseTableIDAsPath is true. + TableID int64 // PartitionNum is an optional path level for partition table output. // It is present only when partition-separator is enabled. PartitionNum int64 @@ -103,80 +112,242 @@ type DmlPathKey struct { Date string } +// NewSchemaFileDMLPathKey returns the synthetic DML path key used to order a +// schema file before data files with the same schema version. +func NewSchemaFileDMLPathKey(schemaKey SchemaPathKey) DmlPathKey { + return DmlPathKey{ + SchemaPathKey: schemaKey, + PartitionNum: schemaFilePartitionNum, + } +} + +// IsSchemaFileDMLPathKey checks whether the key represents a schema file marker. +func (d DmlPathKey) IsSchemaFileDMLPathKey() bool { + return d.PartitionNum == schemaFilePartitionNum && d.Date == "" +} + +// CompareDMLPathKey compares DML path keys in cloud storage replay order. +func CompareDMLPathKey(x, y DmlPathKey) int { + if r := cmp.Compare(x.TableVersion, y.TableVersion); r != 0 { + return r + } + if r := cmp.Compare(x.PartitionNum, y.PartitionNum); r != 0 { + return r + } + if r := cmp.Compare(x.Date, y.Date); r != 0 { + return r + } + if x.UseTableIDAsPath != y.UseTableIDAsPath { + if x.UseTableIDAsPath { + return 1 + } + return -1 + } + if r := cmp.Compare(x.TableID, y.TableID); r != 0 { + return r + } + if r := cmp.Compare(x.Schema, y.Schema); r != 0 { + return r + } + return cmp.Compare(x.Table, y.Table) +} + // GenerateDMLFilePath generates the dml file path. func (d *DmlPathKey) GenerateDMLFilePath( fileIndex *FileIndex, extension string, fileIndexWidth int, ) string { - var elems []string - - elems = append(elems, d.Schema) - elems = append(elems, d.Table) - elems = append(elems, fmt.Sprintf("%d", d.TableVersion)) + fileName := generateDataFileName( + fileIndex.EnableTableAcrossNodes, fileIndex.DispatcherID, + fileIndex.Idx, extension, fileIndexWidth) + return path.Join(d.generateDMLDataDirPath(), fileName) +} - if d.PartitionNum != 0 { - elems = append(elems, fmt.Sprintf("%d", d.PartitionNum)) +func (d DmlPathKey) generateDMLDataDirPath() string { + elems := make([]string, 0, 5) + if d.UseTableIDAsPath { + elems = append(elems, strconv.FormatInt(d.TableID, 10)) + } else { + elems = append(elems, d.Schema, d.Table) + } + elems = append(elems, strconv.FormatUint(d.TableVersion, 10)) + if d.PartitionNum != 0 && !d.UseTableIDAsPath { + elems = append(elems, strconv.FormatInt(d.PartitionNum, 10)) } - if len(d.Date) != 0 { + if d.Date != "" { elems = append(elems, d.Date) } - elems = append(elems, generateDataFileName(fileIndex.EnableTableAcrossNodes, fileIndex.DispatcherID, fileIndex.Idx, extension, fileIndexWidth)) - - return strings.Join(elems, "/") + return path.Join(elems...) } -// ParseIndexFilePath parses the index file path and returns the max file index. -// index file path pattern is as follows: -// {schema}/{table}/{table-version-separator}/{partition-separator}/{date-separator}/meta/, where -// partition-separator and date-separator could be empty. -// DML file name pattern is as follows: CDC_{dispatcherID}.index or CDC.index -func (d *DmlPathKey) ParseIndexFilePath(dateSeparator, path string) (string, error) { - var partitionNum int64 +// ParseDMLFilePath parses a cloud storage data file path. +func (d *DmlPathKey) ParseDMLFilePath( + dateSeparator, filePath, extension string, +) (FileIndex, error) { + parts := strings.Split(filePath, "/") + fileIndex, err := ParseFileIndexFromFileName(parts[len(parts)-1], extension) + if err != nil { + return FileIndex{}, err + } + if err = d.parseDMLDataDir(dateSeparator, parts[:len(parts)-1], filePath); err != nil { + return FileIndex{}, err + } + return fileIndex, nil +} - str := `(\w+)\/(\w+)\/(\d+)\/(\d+)?\/*` +func (d *DmlPathKey) parseDMLDataDir( + dateSeparator string, parts []string, filePath string, +) error { + var ( + key DmlPathKey + versionIdx int + dateIdx int + err error + ) switch dateSeparator { case config.DateSeparatorNone.String(): - str += `(\d{4})*` - case config.DateSeparatorYear.String(): - str += `(\d{4})\/` - case config.DateSeparatorMonth.String(): - str += `(\d{4}-\d{2})\/` - case config.DateSeparatorDay.String(): - str += `(\d{4}-\d{2}-\d{2})\/` + switch len(parts) { + case 2: + key.UseTableIDAsPath = true + key.Schema = parts[0] + key.TableID, err = parseTableIDPathPart(parts[0]) + versionIdx = 1 + case 3: + key.Schema, key.Table, versionIdx = parts[0], parts[1], 2 + case 4: + key.Schema, key.Table, versionIdx = parts[0], parts[1], 2 + key.PartitionNum, err = strconv.ParseInt(parts[3], 10, 64) + if err != nil { + return err + } + default: + return errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match dml path pattern for %s", filePath) + } + case config.DateSeparatorYear.String(), + config.DateSeparatorMonth.String(), + config.DateSeparatorDay.String(): + switch len(parts) { + case 3: + key.UseTableIDAsPath = true + key.Schema = parts[0] + key.TableID, err = parseTableIDPathPart(parts[0]) + versionIdx, dateIdx = 1, 2 + case 4: + key.Schema, key.Table, versionIdx, dateIdx = parts[0], parts[1], 2, 3 + case 5: + key.Schema, key.Table, versionIdx, dateIdx = parts[0], parts[1], 2, 4 + key.PartitionNum, err = strconv.ParseInt(parts[3], 10, 64) + if err != nil { + return err + } + default: + return errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match dml path pattern for %s", filePath) + } + if !matchDateSeparatorValue(dateSeparator, parts[dateIdx]) { + return errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match date separator %s for %s", dateSeparator, parts[dateIdx]) + } + key.Date = parts[dateIdx] + default: + return errors.ErrStorageSinkInvalidDateSeparator.GenWithStackByArgs(dateSeparator) } - str += `meta\/` - // CDC[_{dispatcherID}].index - str += `CDC(?:_(\w+))?.index` - pathRE, err := regexp.Compile(str) if err != nil { - return "", err + return err } - - matches := pathRE.FindStringSubmatch(path) - if len(matches) != 7 { - return "", fmt.Errorf("cannot match dml path pattern for %s", path) + if !key.UseTableIDAsPath && (key.Schema == "" || key.Table == "") { + return errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match dml path pattern for %s", filePath) + } + version, err := strconv.ParseUint(parts[versionIdx], 10, 64) + if err != nil { + return err } + key.TableVersion = version + *d = key + return nil +} - version, err := strconv.ParseUint(matches[3], 10, 64) +func parseTableIDPathPart(part string) (int64, error) { + tableID, err := strconv.ParseInt(part, 10, 64) if err != nil { - return "", err + return 0, err } + if tableID <= 0 { + return 0, errors.ErrStorageSinkInvalidFileName.GenWithStack( + "invalid table id path part %s", part) + } + return tableID, nil +} - if len(matches[4]) > 0 { - partitionNum, err = strconv.ParseInt(matches[4], 10, 64) - if err != nil { - return "", err +func matchDateSeparatorValue(separator string, value string) bool { + switch separator { + case config.DateSeparatorYear.String(): + return len(value) == 4 && isDigits(value) + case config.DateSeparatorMonth.String(): + return len(value) == 7 && + value[4] == '-' && + isDigits(value[:4]) && + isDigits(value[5:]) + case config.DateSeparatorDay.String(): + return len(value) == 10 && + value[4] == '-' && + value[7] == '-' && + isDigits(value[:4]) && + isDigits(value[5:7]) && + isDigits(value[8:]) + default: + return false + } +} + +func isDigits(s string) bool { + if s == "" { + return false + } + for _, c := range s { + if c < '0' || c > '9' { + return false } } + return true +} - *d = DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: matches[1], - Table: matches[2], - TableVersion: version, - }, - PartitionNum: partitionNum, - Date: matches[5], +// ParseIndexFilePath parses the index file path and returns the max file index. +// index file path pattern is as follows: +// {schema}/{table}/{table-version-separator}/{partition-separator}/{date-separator}/meta/, where +// partition-separator and date-separator could be empty. +// DML file name pattern is as follows: CDC_{dispatcherID}.index or CDC.index +func (d *DmlPathKey) ParseIndexFilePath(dateSeparator, path string) (string, error) { + parts := strings.Split(path, "/") + if len(parts) < 4 || parts[len(parts)-2] != "meta" { + return "", errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match dml path pattern for %s", path) } + dispatcherID, err := parseIndexFileName(parts[len(parts)-1]) + if err != nil { + return "", err + } + if err = d.parseDMLDataDir(dateSeparator, parts[:len(parts)-2], path); err != nil { + return "", err + } + + return dispatcherID, nil +} - return matches[6], nil +func parseIndexFileName(fileName string) (string, error) { + const indexFileExtension = ".index" + if !strings.HasPrefix(fileName, "CDC") || !strings.HasSuffix(fileName, indexFileExtension) { + return "", errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match dml index file name pattern for %q", fileName) + } + dispatcherID := strings.TrimSuffix(strings.TrimPrefix(fileName, "CDC"), indexFileExtension) + if dispatcherID == "" { + return "", nil + } + if !strings.HasPrefix(dispatcherID, "_") || len(dispatcherID) == 1 { + return "", errors.ErrStorageSinkInvalidFileName.GenWithStack( + "cannot match dml index file name pattern for %q", fileName) + } + return dispatcherID[1:], nil } diff --git a/pkg/sink/cloudstorage/path_key_test.go b/pkg/sink/cloudstorage/path_key_test.go index f768f7cf74..979a10e913 100644 --- a/pkg/sink/cloudstorage/path_key_test.go +++ b/pkg/sink/cloudstorage/path_key_test.go @@ -107,3 +107,197 @@ func TestDmlPathKey(t *testing.T) { require.Equal(t, tc.path, fileName) } } + +func TestParseDMLFilePath(t *testing.T) { + t.Parallel() + + dispatcherID := common.NewDispatcherID().String() + testCases := []struct { + name string + dateSeparator string + path string + fileIndexWidth int + dmlkey DmlPathKey + fileIndex FileIndex + }{ + { + name: "no date no partition", + dateSeparator: "none", + path: "schema1/table1/123456/CDC000010.csv", + fileIndexWidth: 6, + dmlkey: DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "schema1", + Table: "table1", + TableVersion: 123456, + }, + }, + fileIndex: FileIndex{Idx: 10}, + }, + { + name: "no date with partition", + dateSeparator: "none", + path: "schema1/table1/123456/55/CDC000010.csv", + fileIndexWidth: 6, + dmlkey: DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "schema1", + Table: "table1", + TableVersion: 123456, + }, + PartitionNum: 55, + }, + fileIndex: FileIndex{Idx: 10}, + }, + { + name: "no date with table id path", + dateSeparator: "none", + path: "12345/123456/CDC000010.csv", + fileIndexWidth: 6, + dmlkey: DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "12345", + TableVersion: 123456, + }, + UseTableIDAsPath: true, + TableID: 12345, + }, + fileIndex: FileIndex{Idx: 10}, + }, + { + name: "day date no partition", + dateSeparator: "day", + path: fmt.Sprintf("schema1/table1/123456/2023-05-09/CDC_%s_00000000000000000010.csv", dispatcherID), + fileIndexWidth: 20, + dmlkey: DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "schema1", + Table: "table1", + TableVersion: 123456, + }, + Date: "2023-05-09", + }, + fileIndex: FileIndex{ + FileIndexKey: FileIndexKey{ + DispatcherID: dispatcherID, + EnableTableAcrossNodes: true, + }, + Idx: 10, + }, + }, + { + name: "day date with table id path", + dateSeparator: "day", + path: fmt.Sprintf("12345/123456/2023-05-09/CDC_%s_00000000000000000010.csv", dispatcherID), + fileIndexWidth: 20, + dmlkey: DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "12345", + TableVersion: 123456, + }, + UseTableIDAsPath: true, + TableID: 12345, + Date: "2023-05-09", + }, + fileIndex: FileIndex{ + FileIndexKey: FileIndexKey{ + DispatcherID: dispatcherID, + EnableTableAcrossNodes: true, + }, + Idx: 10, + }, + }, + { + name: "day date with partition", + dateSeparator: "day", + path: fmt.Sprintf("schema1/table1/123456/55/2023-05-09/CDC_%s_00000000000000000010.csv", dispatcherID), + fileIndexWidth: 20, + dmlkey: DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "schema1", + Table: "table1", + TableVersion: 123456, + }, + PartitionNum: 55, + Date: "2023-05-09", + }, + fileIndex: FileIndex{ + FileIndexKey: FileIndexKey{ + DispatcherID: dispatcherID, + EnableTableAcrossNodes: true, + }, + Idx: 10, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var dmlkey DmlPathKey + fileIndex, err := dmlkey.ParseDMLFilePath(tc.dateSeparator, tc.path, ".csv") + require.NoError(t, err) + require.Equal(t, tc.dmlkey, dmlkey) + require.Equal(t, tc.fileIndex, fileIndex) + require.Equal(t, tc.path, dmlkey.GenerateDMLFilePath(&fileIndex, ".csv", tc.fileIndexWidth)) + }) + } +} + +func TestParseDMLFilePathRejectsInvalidPath(t *testing.T) { + t.Parallel() + + var dmlkey DmlPathKey + _, err := dmlkey.ParseDMLFilePath("none", "schema1//123456/CDC000010.csv", ".csv") + require.Error(t, err) +} + +func TestParseIndexFilePathWithTableIDAsPath(t *testing.T) { + t.Parallel() + + dispatcherID := common.NewDispatcherID().String() + indexPath := fmt.Sprintf("12345/123456/2023-05-09/meta/CDC_%s.index", dispatcherID) + + var dmlkey DmlPathKey + id, err := dmlkey.ParseIndexFilePath("day", indexPath) + require.NoError(t, err) + require.Equal(t, dispatcherID, id) + require.Equal(t, DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "12345", + TableVersion: 123456, + }, + UseTableIDAsPath: true, + TableID: 12345, + Date: "2023-05-09", + }, dmlkey) +} + +func TestSchemaFileDMLPathKeyOrder(t *testing.T) { + t.Parallel() + + schemaKey := SchemaPathKey{ + Schema: "schema1", + Table: "table1", + TableVersion: 123456, + } + schemaDMLKey := NewSchemaFileDMLPathKey(schemaKey) + require.True(t, schemaDMLKey.IsSchemaFileDMLPathKey()) + + dataDMLKey := DmlPathKey{ + SchemaPathKey: schemaKey, + Date: "2023-05-09", + } + require.Less(t, CompareDMLPathKey(schemaDMLKey, dataDMLKey), 0) + require.Greater(t, CompareDMLPathKey(dataDMLKey, schemaDMLKey), 0) + require.Zero(t, CompareDMLPathKey(schemaDMLKey, NewSchemaFileDMLPathKey(schemaKey))) + + tableIDPathKey := DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "12345", + TableVersion: schemaKey.TableVersion, + }, + UseTableIDAsPath: true, + TableID: 12345, + } + require.NotZero(t, CompareDMLPathKey(dataDMLKey, tableIDPathKey)) +} diff --git a/pkg/sink/cloudstorage/path_test.go b/pkg/sink/cloudstorage/path_test.go index 7c1d5fbed6..b39f4320c8 100644 --- a/pkg/sink/cloudstorage/path_test.go +++ b/pkg/sink/cloudstorage/path_test.go @@ -174,6 +174,25 @@ func TestGenerateDataFilePathWithTableIDAsPath(t *testing.T) { path, err := f.GenerateDataFilePath(ctx, table, date) require.NoError(t, err) require.Equal(t, fmt.Sprintf("12345/5/CDC_%s_000001.json", table.DispatcherID.String()), path) + + var dmlkey DmlPathKey + fileIndex, err := dmlkey.ParseDMLFilePath(config.DateSeparatorNone.String(), path, ".json") + require.NoError(t, err) + require.Equal(t, DmlPathKey{ + SchemaPathKey: SchemaPathKey{ + Schema: "12345", + TableVersion: 5, + }, + UseTableIDAsPath: true, + TableID: 12345, + }, dmlkey) + require.Equal(t, FileIndex{ + FileIndexKey: FileIndexKey{ + DispatcherID: table.DispatcherID.String(), + EnableTableAcrossNodes: true, + }, + Idx: 1, + }, fileIndex) } func TestFetchIndexFromFileName(t *testing.T) { @@ -224,6 +243,28 @@ func TestFetchIndexFromFileName(t *testing.T) { } } +func TestParseFileIndexFromFileName(t *testing.T) { + t.Parallel() + + dispatcherID := commonType.NewDispatcherID().String() + fileIndex, err := ParseFileIndexFromFileName( + fmt.Sprintf("CDC_%s_000011.json", dispatcherID), + ".json", + ) + require.NoError(t, err) + require.Equal(t, FileIndex{ + FileIndexKey: FileIndexKey{ + DispatcherID: dispatcherID, + EnableTableAcrossNodes: true, + }, + Idx: 11, + }, fileIndex) + + fileIndex, err = ParseFileIndexFromFileName("CDC000012.json", ".json") + require.NoError(t, err) + require.Equal(t, FileIndex{Idx: 12}, fileIndex) +} + func TestGenerateDataFilePathWithIndexFile(t *testing.T) { t.Parallel() diff --git a/pkg/sink/cloudstorage/schema_file.go b/pkg/sink/cloudstorage/schema_file.go new file mode 100644 index 0000000000..afb47c6afb --- /dev/null +++ b/pkg/sink/cloudstorage/schema_file.go @@ -0,0 +1,54 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package cloudstorage + +import ( + "context" + "encoding/json" + + "github.com/pingcap/ticdc/pkg/errors" + "github.com/pingcap/tidb/pkg/objstore/storeapi" +) + +// ReadTableDefinitionFromSchemaFile reads a schema file and validates that the +// file name checksum and table version match its table definition content. +func ReadTableDefinitionFromSchemaFile( + ctx context.Context, + storage storeapi.Storage, + path string, +) (SchemaPathKey, TableDefinition, error) { + var schemaKey SchemaPathKey + checksum, err := schemaKey.ParseSchemaFilePath(path) + if err != nil { + return schemaKey, TableDefinition{}, err + } + var tableDef TableDefinition + schemaContent, err := storage.ReadFile(ctx, path) + if err != nil { + return schemaKey, tableDef, errors.Trace(err) + } + if err = json.Unmarshal(schemaContent, &tableDef); err != nil { + return schemaKey, tableDef, errors.Trace(err) + } + checksumInMem, err := tableDef.Sum32(nil) + if err != nil { + return schemaKey, tableDef, errors.Trace(err) + } + if checksumInMem != checksum || schemaKey.TableVersion != tableDef.TableVersion { + return schemaKey, tableDef, errors.ErrStorageSinkInvalidFileName.GenWithStack( + "checksum mismatch in schema file %s: checksum in memory %d, checksum in file %d, table version in path %d, table version in file %d", + path, checksumInMem, checksum, schemaKey.TableVersion, tableDef.TableVersion) + } + return schemaKey, tableDef, nil +} diff --git a/pkg/sink/cloudstorage/schema_file_test.go b/pkg/sink/cloudstorage/schema_file_test.go new file mode 100644 index 0000000000..1bca374159 --- /dev/null +++ b/pkg/sink/cloudstorage/schema_file_test.go @@ -0,0 +1,84 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package cloudstorage + +import ( + "context" + "fmt" + "testing" + + "github.com/pingcap/ticdc/pkg/errors" + "github.com/pingcap/ticdc/pkg/util" + "github.com/stretchr/testify/require" +) + +func TestReadTableDefinitionFromSchemaFile(t *testing.T) { + t.Parallel() + + ctx := context.Background() + storage, err := util.GetExternalStorageWithDefaultTimeout( + ctx, fmt.Sprintf("file:///%s", t.TempDir())) + require.NoError(t, err) + defer storage.Close() + + def, _ := generateTableDef() + schemaFilePath, err := def.GenerateSchemaFilePath(false, 0) + require.NoError(t, err) + encodedDef, err := def.MarshalWithQuery() + require.NoError(t, err) + require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedDef)) + + schemaKey, got, err := ReadTableDefinitionFromSchemaFile(ctx, storage, schemaFilePath) + require.NoError(t, err) + require.Equal(t, SchemaPathKey{ + Schema: def.Schema, + Table: def.Table, + TableVersion: def.TableVersion, + }, schemaKey) + require.Equal(t, def.Schema, got.Schema) + require.Equal(t, def.Table, got.Table) + require.Equal(t, def.Version, got.Version) + require.Equal(t, def.TableVersion, got.TableVersion) + require.Equal(t, def.TotalColumns, got.TotalColumns) + require.Len(t, got.Columns, len(def.Columns)) + + expectedChecksum, err := def.Sum32(nil) + require.NoError(t, err) + gotChecksum, err := got.Sum32(nil) + require.NoError(t, err) + require.Equal(t, expectedChecksum, gotChecksum) +} + +func TestReadTableDefinitionFromSchemaFileChecksumMismatch(t *testing.T) { + t.Parallel() + + ctx := context.Background() + storage, err := util.GetExternalStorageWithDefaultTimeout( + ctx, fmt.Sprintf("file:///%s", t.TempDir())) + require.NoError(t, err) + defer storage.Close() + + def, _ := generateTableDef() + schemaFilePath, err := def.GenerateSchemaFilePath(false, 0) + require.NoError(t, err) + + def.TableVersion++ + encodedDef, err := def.MarshalWithQuery() + require.NoError(t, err) + require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedDef)) + + _, _, err = ReadTableDefinitionFromSchemaFile(ctx, storage, schemaFilePath) + require.Error(t, err) + require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) +} From 6098a7a04a096fa6369a08224037902076359907 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 23 Jun 2026 21:15:21 +0800 Subject: [PATCH 3/8] simplify the code --- cmd/storage-consumer/consumer.go | 2 +- pkg/sink/cloudstorage/path.go | 7 ++++--- pkg/sink/cloudstorage/schema_file.go | 5 ++--- pkg/sink/cloudstorage/schema_file_test.go | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmd/storage-consumer/consumer.go b/cmd/storage-consumer/consumer.go index b284af72e2..b13a586819 100644 --- a/cmd/storage-consumer/consumer.go +++ b/cmd/storage-consumer/consumer.go @@ -489,7 +489,7 @@ func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { } // Read tableDef from schema file and check checksum. - _, tableDef, err := cloudstorage.ReadTableDefinitionFromSchemaFile(ctx, c.externalStorage, path) + _, tableDef, err := cloudstorage.ParseTableDefinition(ctx, c.externalStorage, path) if err != nil { if errors.ErrStorageSinkInvalidFileName.Equal(err) { log.Panic("checksum mismatch", zap.Error(err), zap.String("path", path)) diff --git a/pkg/sink/cloudstorage/path.go b/pkg/sink/cloudstorage/path.go index 3074a070b8..0b3ec5832e 100644 --- a/pkg/sink/cloudstorage/path.go +++ b/pkg/sink/cloudstorage/path.go @@ -456,16 +456,17 @@ func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date str } if f.config.UseTableIDAsPath { - if _, err := generateTablePath( + tableIDPathPart, err := generateTablePath( tbl.TableNameWithPhysicTableID.Table, tbl.TableNameWithPhysicTableID.TableID, true, - ); err != nil { + ) + if err != nil { return "", err } return DmlPathKey{ SchemaPathKey: SchemaPathKey{ - Schema: strconv.FormatInt(tbl.TableNameWithPhysicTableID.TableID, 10), + Schema: tableIDPathPart, TableVersion: tableVersion, }, UseTableIDAsPath: true, diff --git a/pkg/sink/cloudstorage/schema_file.go b/pkg/sink/cloudstorage/schema_file.go index afb47c6afb..d33537544b 100644 --- a/pkg/sink/cloudstorage/schema_file.go +++ b/pkg/sink/cloudstorage/schema_file.go @@ -21,9 +21,8 @@ import ( "github.com/pingcap/tidb/pkg/objstore/storeapi" ) -// ReadTableDefinitionFromSchemaFile reads a schema file and validates that the -// file name checksum and table version match its table definition content. -func ReadTableDefinitionFromSchemaFile( +// ParseTableDefinition parses a schema file and validates its path metadata. +func ParseTableDefinition( ctx context.Context, storage storeapi.Storage, path string, diff --git a/pkg/sink/cloudstorage/schema_file_test.go b/pkg/sink/cloudstorage/schema_file_test.go index 1bca374159..1f40fbe75d 100644 --- a/pkg/sink/cloudstorage/schema_file_test.go +++ b/pkg/sink/cloudstorage/schema_file_test.go @@ -23,7 +23,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestReadTableDefinitionFromSchemaFile(t *testing.T) { +func TestParseTableDefinition(t *testing.T) { t.Parallel() ctx := context.Background() @@ -39,7 +39,7 @@ func TestReadTableDefinitionFromSchemaFile(t *testing.T) { require.NoError(t, err) require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedDef)) - schemaKey, got, err := ReadTableDefinitionFromSchemaFile(ctx, storage, schemaFilePath) + schemaKey, got, err := ParseTableDefinition(ctx, storage, schemaFilePath) require.NoError(t, err) require.Equal(t, SchemaPathKey{ Schema: def.Schema, @@ -60,7 +60,7 @@ func TestReadTableDefinitionFromSchemaFile(t *testing.T) { require.Equal(t, expectedChecksum, gotChecksum) } -func TestReadTableDefinitionFromSchemaFileChecksumMismatch(t *testing.T) { +func TestParseTableDefinitionChecksumMismatch(t *testing.T) { t.Parallel() ctx := context.Background() @@ -78,7 +78,7 @@ func TestReadTableDefinitionFromSchemaFileChecksumMismatch(t *testing.T) { require.NoError(t, err) require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedDef)) - _, _, err = ReadTableDefinitionFromSchemaFile(ctx, storage, schemaFilePath) + _, _, err = ParseTableDefinition(ctx, storage, schemaFilePath) require.Error(t, err) require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) } From 07030560292da941c1aec47eb471ce096ae098e8 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 23 Jun 2026 23:33:47 +0800 Subject: [PATCH 4/8] rename to schemaFile --- cmd/storage-consumer/consumer.go | 90 +-- downstreamadapter/sink/cloudstorage/sink.go | 52 +- .../sink/cloudstorage/sink_test.go | 32 +- pkg/sink/cloudstorage/path.go | 69 +- pkg/sink/cloudstorage/path_key.go | 4 +- pkg/sink/cloudstorage/path_key_test.go | 2 +- pkg/sink/cloudstorage/path_test.go | 2 +- pkg/sink/cloudstorage/schema_file.go | 366 +++++++++- pkg/sink/cloudstorage/schema_file_parse.go | 53 ++ .../cloudstorage/schema_file_parse_test.go | 84 +++ pkg/sink/cloudstorage/schema_file_test.go | 624 ++++++++++++++++-- pkg/sink/cloudstorage/table_definition.go | 373 ----------- .../cloudstorage/table_definition_test.go | 611 ----------------- 13 files changed, 1178 insertions(+), 1184 deletions(-) create mode 100644 pkg/sink/cloudstorage/schema_file_parse.go create mode 100644 pkg/sink/cloudstorage/schema_file_parse_test.go delete mode 100644 pkg/sink/cloudstorage/table_definition.go delete mode 100644 pkg/sink/cloudstorage/table_definition_test.go diff --git a/cmd/storage-consumer/consumer.go b/cmd/storage-consumer/consumer.go index b13a586819..b4bbda6a34 100644 --- a/cmd/storage-consumer/consumer.go +++ b/cmd/storage-consumer/consumer.go @@ -76,8 +76,8 @@ type consumer struct { // tableDDLWatermark maintains a map of <`schema`.`table`, max executed DDL table version>. // DML files with smaller table versions are considered stale replays and should be ignored. tableDDLWatermark map[string]uint64 - // tableDefMap maintains a map of <`schema`.`table`, tableDef slice sorted by TableVersion> - tableDefMap map[string]map[uint64]*cloudstorage.TableDefinition + // schemaFileMap maintains a map of <`schema`.`table`, schema files by TableVersion> + schemaFileMap map[string]map[uint64]*cloudstorage.SchemaFile tableIDGenerator *fakeTableIDGenerator errCh chan error @@ -163,7 +163,7 @@ func newConsumer(ctx context.Context) (*consumer, error) { tableDMLIdxMap: make(map[cloudstorage.DmlPathKey]fileIndexKeyMap), eventsGroup: make(map[int64]*util.EventsGroup), tableDDLWatermark: make(map[string]uint64), - tableDefMap: make(map[string]map[uint64]*cloudstorage.TableDefinition), + schemaFileMap: make(map[string]map[uint64]*cloudstorage.SchemaFile), tableIDGenerator: &fakeTableIDGenerator{ tableIDs: make(map[string]int64), }, @@ -310,7 +310,7 @@ func (c *consumer) appendRow2Group(dml *event.DMLEvent, enableTableAcrossNodes b func (c *consumer) appendDMLEvents( ctx context.Context, tableID int64, - tableDetail cloudstorage.TableDefinition, + schemaFile cloudstorage.SchemaFile, pathKey cloudstorage.DmlPathKey, fileIdx *cloudstorage.FileIndex, ) error { @@ -322,7 +322,7 @@ func (c *consumer) appendDMLEvents( } var decoder common.Decoder - tableInfo, err := tableDetail.ToTableInfo() + tableInfo, err := schemaFile.ToTableInfo() if err != nil { return errors.Trace(err) } @@ -479,17 +479,17 @@ func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { return nil } key := schemaKey.GetKey() - if tableDefs, ok := c.tableDefMap[key]; ok { - if _, ok := tableDefs[schemaKey.TableVersion]; ok { - // Skip if tableDef already exists. + if schemaFiles, ok := c.schemaFileMap[key]; ok { + if _, ok := schemaFiles[schemaKey.TableVersion]; ok { + // Skip if schema file already exists. return nil } } else { - c.tableDefMap[key] = make(map[uint64]*cloudstorage.TableDefinition) + c.schemaFileMap[key] = make(map[uint64]*cloudstorage.SchemaFile) } - // Read tableDef from schema file and check checksum. - _, tableDef, err := cloudstorage.ParseTableDefinition(ctx, c.externalStorage, path) + // Read schema file and check checksum. + _, schemaFile, err := cloudstorage.Parse(ctx, c.externalStorage, path) if err != nil { if errors.ErrStorageSinkInvalidFileName.Equal(err) { log.Panic("checksum mismatch", zap.Error(err), zap.String("path", path)) @@ -497,8 +497,8 @@ func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { return errors.Trace(err) } - // Update tableDefMap. - c.tableDefMap[key][tableDef.TableVersion] = &tableDef + // Update schemaFileMap. + c.schemaFileMap[key][schemaFile.TableVersion] = &schemaFile // Fake a dml key for schema.json file, which is useful for putting DDL // in front of the DML files when sorting. @@ -519,38 +519,38 @@ func (c *consumer) parseSchemaFilePath(ctx context.Context, path string) error { if _, ok := c.tableDMLIdxMap[dmlkey]; !ok { c.tableDMLIdxMap[dmlkey] = fileIndexKeyMap{} } else { - // duplicate table schema file found, this should not happen. + // duplicate schema file found, this should not happen. log.Panic("duplicate schema file found", - zap.String("path", path), zap.Any("tableDef", tableDef), + zap.String("path", path), zap.Any("schemaFile", schemaFile), zap.Any("schemaKey", schemaKey), zap.Any("dmlkey", dmlkey)) } return nil } -func (c *consumer) mustGetTableDef(key cloudstorage.SchemaPathKey) cloudstorage.TableDefinition { - var tableDef *cloudstorage.TableDefinition - if tableDefs, ok := c.tableDefMap[key.GetKey()]; ok { - tableDef = tableDefs[key.TableVersion] +func (c *consumer) mustGetSchemaFile(key cloudstorage.SchemaPathKey) cloudstorage.SchemaFile { + var schemaFile *cloudstorage.SchemaFile + if schemaFiles, ok := c.schemaFileMap[key.GetKey()]; ok { + schemaFile = schemaFiles[key.TableVersion] } - if tableDef == nil { - log.Panic("tableDef not found", zap.Any("key", key), zap.Any("tableDefMap", c.tableDefMap)) + if schemaFile == nil { + log.Panic("schema file not found", zap.Any("key", key), zap.Any("schemaFileMap", c.schemaFileMap)) } - return *tableDef + return *schemaFile } -func getRenameTableOldTableKey(tableDef cloudstorage.TableDefinition) (string, bool) { - if tableDef.Type != byte(timodel.ActionRenameTable) { +func getRenameTableOldTableKey(schemaFile cloudstorage.SchemaFile) (string, bool) { + if schemaFile.Type != byte(timodel.ActionRenameTable) { return "", false } - schemaName := tableDef.Schema - stmt, err := parser.New().ParseOneStmt(tableDef.Query, "", "") + schemaName := schemaFile.Schema + stmt, err := parser.New().ParseOneStmt(schemaFile.Query, "", "") if err != nil { - log.Panic("parse statement failed", zap.Any("DDL", tableDef.Query), zap.Error(err)) + log.Panic("parse statement failed", zap.Any("DDL", schemaFile.Query), zap.Error(err)) } // The query in job maybe "RENAME TABLE table1 to table2" renameStmt, ok := stmt.(*ast.RenameTableStmt) if !ok || len(renameStmt.TableToTables) == 0 { - log.Panic("invalid rename table statement", zap.Any("DDL", tableDef.Query)) + log.Panic("invalid rename table statement", zap.Any("DDL", schemaFile.Query)) } oldTable := renameStmt.TableToTables[0].OldTable if oldTable.Schema.O != "" { @@ -560,14 +560,14 @@ func getRenameTableOldTableKey(tableDef cloudstorage.TableDefinition) (string, b return commonType.QuoteSchema(schemaName, tableName), true } -func (c *consumer) updateTableDDLWatermark(tableDef cloudstorage.TableDefinition) string { - key := commonType.QuoteSchema(tableDef.Schema, tableDef.Table) - if c.tableDDLWatermark[key] < tableDef.TableVersion { - c.tableDDLWatermark[key] = tableDef.TableVersion +func (c *consumer) updateTableDDLWatermark(schemaFile cloudstorage.SchemaFile) string { + key := commonType.QuoteSchema(schemaFile.Schema, schemaFile.Table) + if c.tableDDLWatermark[key] < schemaFile.TableVersion { + c.tableDDLWatermark[key] = schemaFile.TableVersion } - if oldTableKey, ok := getRenameTableOldTableKey(tableDef); ok { - if c.tableDDLWatermark[oldTableKey] < tableDef.TableVersion { - c.tableDDLWatermark[oldTableKey] = tableDef.TableVersion + if oldTableKey, ok := getRenameTableOldTableKey(schemaFile); ok { + if c.tableDDLWatermark[oldTableKey] < schemaFile.TableVersion { + c.tableDDLWatermark[oldTableKey] = schemaFile.TableVersion } } return key @@ -591,7 +591,7 @@ func (c *consumer) handleNewFiles( }) for order, key := range keys { - tableDef := c.mustGetTableDef(key.SchemaPathKey) + schemaFile := c.mustGetSchemaFile(key.SchemaPathKey) tableKey := key.GetKey() ddlWatermark := c.tableDDLWatermark[tableKey] log.Info("storage consumer handle file key", @@ -606,12 +606,12 @@ func (c *consumer) handleNewFiles( // if the key is a fake dml path key which is mainly used for // sorting schema.json file before the dml files, then execute the ddl query. - if key.IsSchemaFileDMLPathKey() && len(tableDef.Query) > 0 { + if key.IsSchemaFileDMLPathKey() && len(schemaFile.Query) > 0 { if key.TableVersion <= ddlWatermark { log.Warn("DDL event replayed with stale table version, ignore it", zap.String("schema", key.Schema), zap.String("table", key.Table), zap.Uint64("tableVersion", key.TableVersion), zap.Uint64("ddlWatermark", ddlWatermark), - zap.String("query", tableDef.Query)) + zap.String("query", schemaFile.Query)) continue } @@ -624,26 +624,26 @@ func (c *consumer) handleNewFiles( zap.String("table", key.Table), zap.Uint64("tableVersion", key.TableVersion), zap.Uint64("ddlWatermark", ddlWatermark), - zap.String("query", tableDef.Query)) + zap.String("query", schemaFile.Query)) - ddlEvent, err := tableDef.ToDDLEvent() + ddlEvent, err := schemaFile.ToDDLEvent() if err != nil { return err } if err := c.sink.WriteBlockEvent(ddlEvent); err != nil { return errors.Trace(err) } - watermarkKey := c.updateTableDDLWatermark(tableDef) - // TODO: need to cleanup tableDefMap in the future. + watermarkKey := c.updateTableDDLWatermark(schemaFile) + // TODO: need to cleanup schemaFileMap in the future. log.Info("execute ddl event successfully", - zap.String("query", tableDef.Query), + zap.String("query", schemaFile.Query), zap.String("schema", key.Schema), zap.String("table", key.Table), zap.Uint64("ddlWatermark", c.tableDDLWatermark[tableKey]), zap.String("watermarkKey", watermarkKey)) continue } - // The table schema has already moved to a newer DDL version on downstream. + // The downstream table has already moved to a newer DDL version. // DML files produced with an older table version should be ignored. if key.TableVersion < ddlWatermark { log.Warn("DML files replayed with stale table version, ignore them", @@ -677,7 +677,7 @@ func (c *consumer) handleNewFiles( zap.Bool("enableTableAcrossNodes", indexKey.EnableTableAcrossNodes), zap.Uint64("fileIndex", i), zap.String("path", filePath)) - if err := c.appendDMLEvents(ctx, tableID, tableDef, key, fileIndex); err != nil { + if err := c.appendDMLEvents(ctx, tableID, schemaFile, key, fileIndex); err != nil { return err } } diff --git a/downstreamadapter/sink/cloudstorage/sink.go b/downstreamadapter/sink/cloudstorage/sink.go index 8a64f45396..b448bc7582 100644 --- a/downstreamadapter/sink/cloudstorage/sink.go +++ b/downstreamadapter/sink/cloudstorage/sink.go @@ -63,9 +63,9 @@ type sink struct { lastCheckpointTs atomic.Uint64 lastSendCheckpointTsTime time.Time - tableSchemaStore *commonEvent.TableSchemaStore - cron *cron.Cron - statistics *metrics.Statistics + schemaStore *commonEvent.TableSchemaStore + cron *cron.Cron + statistics *metrics.Statistics isNormal *atomic.Bool cleanupJobs []func() /* only for test */ @@ -244,37 +244,31 @@ func (s *sink) writeDDLEvent(event *commonEvent.DDLEvent) error { } sourceTableInfo := event.MultipleTableInfos[1] - var def cloudstorage.TableDefinition - def.FromTableInfo( + schemaEvent := *event + schemaEvent.TableInfo = event.TableInfo.CloneWithRouting( event.GetTargetExtraSchemaName(), event.GetTargetExtraTableName(), - event.TableInfo, - event.FinishedTs, - s.cfg.OutputColumnID, ) - def.Query = event.Query - def.Type = event.Type - if err := s.writeFile(event, def); err != nil { + var schemaFile cloudstorage.SchemaFile + schemaFile.Build(&schemaEvent, s.cfg.OutputColumnID) + if err := s.writeFile(event, schemaFile); err != nil { return err } - var sourceTableDef cloudstorage.TableDefinition - sourceTableDef.FromTableInfo( + sourceEvent := *event + sourceEvent.TableInfo = sourceTableInfo.CloneWithRouting( event.GetTargetSchemaName(), event.GetTargetTableName(), - sourceTableInfo, - event.FinishedTs, - s.cfg.OutputColumnID, ) - sourceEvent := *event - sourceEvent.TableInfo = sourceTableInfo - if err := s.writeFile(&sourceEvent, sourceTableDef); err != nil { + var sourceSchemaFile cloudstorage.SchemaFile + sourceSchemaFile.Build(&sourceEvent, s.cfg.OutputColumnID) + if err := s.writeFile(&sourceEvent, sourceSchemaFile); err != nil { return err } } else { for _, e := range event.GetEvents() { - var def cloudstorage.TableDefinition - def.FromDDLEvent(e, s.cfg.OutputColumnID) - if err := s.writeFile(e, def); err != nil { + var schemaFile cloudstorage.SchemaFile + schemaFile.Build(e, s.cfg.OutputColumnID) + if err := s.writeFile(e, schemaFile); err != nil { return err } } @@ -291,22 +285,22 @@ func (s *sink) writeDDLEvent(event *commonEvent.DDLEvent) error { return nil } -func (s *sink) writeFile(v *commonEvent.DDLEvent, def cloudstorage.TableDefinition) error { +func (s *sink) writeFile(v *commonEvent.DDLEvent, schemaFile cloudstorage.SchemaFile) error { // skip write database-level event for 'use-table-id-as-path' mode - if s.cfg.UseTableIDAsPath && def.Table == "" { + if s.cfg.UseTableIDAsPath && schemaFile.Table == "" { return nil } - encodedDef, err := def.MarshalWithQuery() + encodedSchemaFile, err := schemaFile.Marshal() if err != nil { return err } - path, err := def.GenerateSchemaFilePath(s.cfg.UseTableIDAsPath, v.GetTableID()) + path, err := schemaFile.GenerateSchemaFilePath(s.cfg.UseTableIDAsPath, v.GetTableID()) if err != nil { return err } return s.statistics.RecordDDLExecution(func() (string, error) { - err = s.storage.WriteFile(s.ctx, path, encodedDef) + err = s.storage.WriteFile(s.ctx, path, encodedSchemaFile) if err != nil { return "", err } @@ -382,8 +376,8 @@ func (s *sink) sendCheckpointTs(ctx context.Context) error { } } -func (s *sink) SetTableSchemaStore(tableSchemaStore *commonEvent.TableSchemaStore) { - s.tableSchemaStore = tableSchemaStore +func (s *sink) SetTableSchemaStore(schemaStore *commonEvent.TableSchemaStore) { + s.schemaStore = schemaStore } func (s *sink) initCron( diff --git a/downstreamadapter/sink/cloudstorage/sink_test.go b/downstreamadapter/sink/cloudstorage/sink_test.go index 69169c0ac9..88cbb88670 100644 --- a/downstreamadapter/sink/cloudstorage/sink_test.go +++ b/downstreamadapter/sink/cloudstorage/sink_test.go @@ -273,7 +273,7 @@ func TestWriteDDLEvent(t *testing.T) { err = cloudStorageSink.WriteBlockEvent(ddlEvent) require.NoError(t, err) - tableSchema, err := os.ReadFile(path.Join(tableDir, "schema_100_4192708364.json")) + schemaContent, err := os.ReadFile(path.Join(tableDir, "schema_100_4192708364.json")) require.NoError(t, err) require.JSONEq(t, `{ "Table": "table1", @@ -295,7 +295,7 @@ func TestWriteDDLEvent(t *testing.T) { } ], "TableColumnsTotal": 2 - }`, string(tableSchema)) + }`, string(schemaContent)) t.Run("flush dml before write ddl", verifyWriteDDLEventFlushDMLBeforeBlock) } @@ -408,9 +408,9 @@ func TestWriteDDLEventWithTableIDAsPath(t *testing.T) { require.NoError(t, err) tableDir := path.Join(parentDir, "20/meta/") - tableSchema, err := os.ReadFile(path.Join(tableDir, "schema_100_4192708364.json")) + schemaContent, err := os.ReadFile(path.Join(tableDir, "schema_100_4192708364.json")) require.NoError(t, err) - require.Contains(t, string(tableSchema), `"Table": "table1"`) + require.Contains(t, string(schemaContent), `"Table": "table1"`) } func TestSkipDatabaseSchemaWithTableIDAsPath(t *testing.T) { @@ -514,7 +514,7 @@ func TestWriteDDLEventWithInvalidExchangePartitionEvent(t *testing.T) { } } -func readSchemaDefinitionForTest(t *testing.T, parentDir, schema, table string) pkgcloudstorage.TableDefinition { +func readSchemaFileForTest(t *testing.T, parentDir, schema, table string) pkgcloudstorage.SchemaFile { t.Helper() files, err := os.ReadDir(filepath.Join(parentDir, schema, table, "meta")) @@ -524,9 +524,9 @@ func readSchemaDefinitionForTest(t *testing.T, parentDir, schema, table string) content, err := os.ReadFile(filepath.Join(parentDir, schema, table, "meta", files[0].Name())) require.NoError(t, err) - var def pkgcloudstorage.TableDefinition - require.NoError(t, json.Unmarshal(content, &def)) - return def + var schemaFile pkgcloudstorage.SchemaFile + require.NoError(t, json.Unmarshal(content, &schemaFile)) + return schemaFile } func TestWriteExchangePartitionDDLEventUsesTargetNames(t *testing.T) { @@ -605,15 +605,15 @@ func TestWriteExchangePartitionDDLEventUsesTargetNames(t *testing.T) { err = cloudStorageSink.WriteBlockEvent(routedEvent) require.NoError(t, err) - exchangeDef := readSchemaDefinitionForTest(t, parentDir, "target_db", "exchange_table_routed") - require.Equal(t, "target_db", exchangeDef.Schema) - require.Equal(t, "exchange_table_routed", exchangeDef.Table) - require.Equal(t, "partition_value", exchangeDef.Columns[1].Name) + exchangeSchemaFile := readSchemaFileForTest(t, parentDir, "target_db", "exchange_table_routed") + require.Equal(t, "target_db", exchangeSchemaFile.Schema) + require.Equal(t, "exchange_table_routed", exchangeSchemaFile.Table) + require.Equal(t, "partition_value", exchangeSchemaFile.Columns[1].Name) - partitionedDef := readSchemaDefinitionForTest(t, parentDir, "target_db", "partitioned_routed") - require.Equal(t, "target_db", partitionedDef.Schema) - require.Equal(t, "partitioned_routed", partitionedDef.Table) - require.Equal(t, "exchange_value", partitionedDef.Columns[1].Name) + partitionedSchemaFile := readSchemaFileForTest(t, parentDir, "target_db", "partitioned_routed") + require.Equal(t, "target_db", partitionedSchemaFile.Schema) + require.Equal(t, "partitioned_routed", partitionedSchemaFile.Table) + require.Equal(t, "exchange_value", partitionedSchemaFile.Columns[1].Name) _, err = os.Stat(filepath.Join(parentDir, "source_db")) require.ErrorIs(t, err, os.ErrNotExist) diff --git a/pkg/sink/cloudstorage/path.go b/pkg/sink/cloudstorage/path.go index 0b3ec5832e..37f122fc0d 100644 --- a/pkg/sink/cloudstorage/path.go +++ b/pkg/sink/cloudstorage/path.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/log" commonType "github.com/pingcap/ticdc/pkg/common" appcontext "github.com/pingcap/ticdc/pkg/common/context" + commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/config" "github.com/pingcap/ticdc/pkg/errors" "github.com/pingcap/ticdc/pkg/hash" @@ -50,9 +51,9 @@ const ( // The database schema is stored in the following path: // /meta/schema_{tableVersion}_{checksum}.json dbSchemaPrefix = "%s/meta/" - // The table schema is stored in the following path: + // The table-level schema file is stored in the following path: // //meta/schema_{tableVersion}_{checksum}.json - tableSchemaPrefix = "%s/%s/meta/" + tableMetaPrefix = "%s/%s/meta/" // When use-table-id-as-path, schema is omitted: /meta/... tableIDPrefix = "%s/meta/" ) @@ -64,8 +65,8 @@ func IsSchemaFile(path string) bool { return schemaRE.MatchString(path) } -// mustParseSchemaName parses the version from the schema file name. -func mustParseSchemaName(path string) (uint64, uint32) { +// mustParseSchemaFileName parses the version from the schema file name. +func mustParseSchemaFileName(path string) (uint64, uint32) { reportErr := func(reason string, fields ...zap.Field) { fields = append([]zap.Field{ zap.String("schemaPath", path), @@ -118,8 +119,8 @@ func generateSchemaFilePath( // Generate db schema file path. dir = fmt.Sprintf(dbSchemaPrefix, schema) } else { - // Generate table schema file path. - dir = fmt.Sprintf(tableSchemaPrefix, schema, table) + // Generate table-level schema file path. + dir = fmt.Sprintf(tableMetaPrefix, schema, table) } } name := fmt.Sprintf(schemaFileNameFormat, tableVersion, checksum) @@ -160,7 +161,7 @@ type VersionedTableName struct { // tables, we need to use the physical table ID instead of the // logical table ID.(Especially when the table is a partitioned table). TableNameWithPhysicTableID commonType.TableName - // TableInfoVersion is the table schema version carried with incoming DML. + // TableInfoVersion is the schema file version carried with incoming DML. // Source: // 1. DDL finishedTs for schema-changing DDLs. // 2. Checkpoint/startTs during dispatcher recover/move. @@ -231,30 +232,30 @@ func (f *FilePathGenerator) CheckOrWriteSchema( keyspace := f.changefeedID.Keyspace() changefeed := f.changefeedID.Name() - var def TableDefinition - def.FromTableInfo( - tableInfo.GetTargetSchemaName(), - tableInfo.GetTargetTableName(), - tableInfo, - table.TableInfoVersion, - f.config.OutputColumnID, - ) - if !def.IsTableSchema() { + event := &commonEvent.DDLEvent{ + SchemaName: tableInfo.GetTargetSchemaName(), + TableName: tableInfo.GetTargetTableName(), + TableInfo: tableInfo, + FinishedTs: table.TableInfoVersion, + } + var schemaFile SchemaFile + schemaFile.Build(event, f.config.OutputColumnID) + if !schemaFile.isTableLevel() { // only check schema for table - log.Error("invalid table schema", + log.Error("invalid schema file", zap.String("keyspace", keyspace), zap.String("changefeedID", changefeed), zap.Any("versionedTableName", table), zap.Any("tableInfo", tableInfo)) - return false, errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table schema in FilePathGenerator") + return false, errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid schema file in FilePathGenerator") } // Case 1: point check if the schema file exists. - tblSchemaFile, err := def.GenerateSchemaFilePath(f.config.UseTableIDAsPath, table.TableNameWithPhysicTableID.TableID) + schemaFilePath, err := schemaFile.GenerateSchemaFilePath(f.config.UseTableIDAsPath, table.TableNameWithPhysicTableID.TableID) if err != nil { return false, err } - exist, err := f.storage.FileExists(ctx, tblSchemaFile) + exist, err := f.storage.FileExists(ctx, schemaFilePath) if err != nil { return false, err } @@ -263,10 +264,10 @@ func (f *FilePathGenerator) CheckOrWriteSchema( return false, nil } // walk the table meta path to find the last schema file - _, checksum := mustParseSchemaName(tblSchemaFile) - schemaFileCnt := 0 + _, checksum := mustParseSchemaFileName(schemaFilePath) + schemaFileCount := 0 lastVersion := uint64(0) - tablePathPart, err := generateTablePath(def.Table, table.TableNameWithPhysicTableID.TableID, f.config.UseTableIDAsPath) + tablePathPart, err := generateTablePath(schemaFile.Table, table.TableNameWithPhysicTableID.TableID, f.config.UseTableIDAsPath) if err != nil { return false, err } @@ -274,7 +275,7 @@ func (f *FilePathGenerator) CheckOrWriteSchema( if f.config.UseTableIDAsPath { subDir = fmt.Sprintf(tableIDPrefix, tablePathPart) } else { - subDir = fmt.Sprintf(tableSchemaPrefix, def.Schema, tablePathPart) + subDir = fmt.Sprintf(tableMetaPrefix, schemaFile.Schema, tablePathPart) } checksumSuffix := fmt.Sprintf("%010d.json", checksum) hasNewerSchemaVersion := false @@ -282,11 +283,11 @@ func (f *FilePathGenerator) CheckOrWriteSchema( SubDir: subDir, /* use subDir to prevent walk the whole storage */ ObjPrefix: "schema_", }, func(path string, _ int64) error { - schemaFileCnt++ + schemaFileCount++ if !strings.HasSuffix(path, checksumSuffix) { return nil } - version, parsedChecksum := mustParseSchemaName(path) + version, parsedChecksum := mustParseSchemaFileName(path) if parsedChecksum != checksum { log.Error("invalid schema file name", zap.String("keyspace", keyspace), @@ -312,14 +313,14 @@ func (f *FilePathGenerator) CheckOrWriteSchema( } // Case 2: the table meta path is not empty. - if schemaFileCnt != 0 && lastVersion != 0 { - log.Info("table schema file with exact version not found, using latest available", + if schemaFileCount != 0 && lastVersion != 0 { + log.Info("schema file with exact version not found, using latest available", zap.String("keyspace", keyspace), zap.String("changefeedID", changefeed), zap.Any("versionedTableName", table), zap.Uint64("tableVersion", lastVersion), zap.Uint32("checksum", checksum)) - // record the last version of the table schema file. + // record the last version of the schema file. // we don't need to write schema file to external storage again. f.versionMap[table] = lastVersion return false, nil @@ -328,19 +329,19 @@ func (f *FilePathGenerator) CheckOrWriteSchema( // Case 3: the table meta path is empty, which happens when: // a. the table is existed before changefeed started. We need to write schema file to external storage. // b. the schema file is deleted by the consumer. We write schema file to external storage too. - if schemaFileCnt != 0 && lastVersion == 0 { - log.Warn("no table schema file found in an non-empty meta path", + if schemaFileCount != 0 && lastVersion == 0 { + log.Warn("no schema file found in a non-empty meta path", zap.String("keyspace", keyspace), zap.String("changefeedID", changefeed), zap.Any("versionedTableName", table), zap.Uint32("checksum", checksum)) } - encodedDetail, err := def.MarshalWithQuery() + encodedSchemaFile, err := schemaFile.Marshal() if err != nil { return false, err } f.versionMap[table] = table.TableInfoVersion - return false, f.storage.WriteFile(ctx, tblSchemaFile, encodedDetail) + return false, f.storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile) } // SetClock is used for unit test @@ -451,7 +452,7 @@ func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date str tableVersion, ok := f.versionMap[tbl] if !ok || tableVersion == 0 { return "", errors.ErrInternalCheckFailed.GenWithStackByArgs( - "table schema version is not initialized", + "schema file version is not initialized", ) } diff --git a/pkg/sink/cloudstorage/path_key.go b/pkg/sink/cloudstorage/path_key.go index ecfeb4e371..b900f8b101 100644 --- a/pkg/sink/cloudstorage/path_key.go +++ b/pkg/sink/cloudstorage/path_key.go @@ -32,7 +32,7 @@ type SchemaPathKey struct { // Schema is the first directory level in storage sink paths. // Example: /
//... Schema string - // Table is the second directory level for table schema/data paths. + // Table is the second directory level for table-level schema file and data paths. // For database-level schema files, this field is empty and the path is // /meta/schema_{tableVersion}_{checksum}.json. Table string @@ -69,7 +69,7 @@ func (s *SchemaPathKey) ParseSchemaFilePath(path string) (uint32, error) { } schemaFileName := matches[len(matches)-1] - version, checksum := mustParseSchemaName(schemaFileName) + version, checksum := mustParseSchemaFileName(schemaFileName) *s = SchemaPathKey{ Schema: schema, diff --git a/pkg/sink/cloudstorage/path_key_test.go b/pkg/sink/cloudstorage/path_key_test.go index 979a10e913..a195b793bb 100644 --- a/pkg/sink/cloudstorage/path_key_test.go +++ b/pkg/sink/cloudstorage/path_key_test.go @@ -39,7 +39,7 @@ func TestSchemaPathKey(t *testing.T) { }, checksum: 2, }, - // Test for table schema path: /
/meta/schema_{tableVersion}_{checksum}.json + // Test for table-level schema file path: /
/meta/schema_{tableVersion}_{checksum}.json { path: "test_schema/test_table/meta/schema_11_22.json", schemakey: SchemaPathKey{ diff --git a/pkg/sink/cloudstorage/path_test.go b/pkg/sink/cloudstorage/path_test.go index b39f4320c8..6c9456a365 100644 --- a/pkg/sink/cloudstorage/path_test.go +++ b/pkg/sink/cloudstorage/path_test.go @@ -402,7 +402,7 @@ func TestIsSchemaFile(t *testing.T) { "schema2/meta/schema_123_0123456789.json", true, }, { - "valid table schema /
/meta/", + "valid table-level schema file /
/meta/", "schema1/table1/meta/schema_123_0123456789.json", true, }, {"valid special prefix", "meta/meta/schema_123_0123456789.json", true}, diff --git a/pkg/sink/cloudstorage/schema_file.go b/pkg/sink/cloudstorage/schema_file.go index d33537544b..8e8d4dc8a6 100644 --- a/pkg/sink/cloudstorage/schema_file.go +++ b/pkg/sink/cloudstorage/schema_file.go @@ -1,53 +1,367 @@ -// Copyright 2026 PingCAP, Inc. +// Copyright 2022 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // See the License for the specific language governing permissions and // limitations under the License. - package cloudstorage import ( - "context" "encoding/json" + "sort" + "strconv" + "strings" + "github.com/pingcap/log" + "github.com/pingcap/ticdc/pkg/common" + commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/tidb/pkg/objstore/storeapi" + "github.com/pingcap/ticdc/pkg/hash" + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/charset" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/parser/types" + "go.uber.org/zap" +) + +const ( + defaultSchemaFileVersion = 1 + marshalPrefix = "" + marshalIndent = " " ) -// ParseTableDefinition parses a schema file and validates its path metadata. -func ParseTableDefinition( - ctx context.Context, - storage storeapi.Storage, - path string, -) (SchemaPathKey, TableDefinition, error) { - var schemaKey SchemaPathKey - checksum, err := schemaKey.ParseSchemaFilePath(path) +// TableCol denotes column info stored in a schema file. +type TableCol struct { + ID string `json:"ColumnId,omitempty"` + Name string `json:"ColumnName" ` + Tp string `json:"ColumnType"` + Default interface{} `json:"ColumnDefault,omitempty"` + Precision string `json:"ColumnPrecision,omitempty"` + Scale string `json:"ColumnScale,omitempty"` + Nullable string `json:"ColumnNullable,omitempty"` + IsPK string `json:"ColumnIsPk,omitempty"` + Elems []string `json:"ColumnElems,omitempty"` +} + +// FromTiColumnInfo converts from TiDB ColumnInfo to TableCol. +func (t *TableCol) FromTiColumnInfo(col *model.ColumnInfo, outputColumnID bool) { + defaultFlen, defaultDecimal := mysql.GetDefaultFieldLengthAndDecimal(col.GetType()) + isDecimalNotDefault := col.GetDecimal() != defaultDecimal && + col.GetDecimal() != 0 && + col.GetDecimal() != types.UnspecifiedLength + + displayFlen, displayDecimal := col.GetFlen(), col.GetDecimal() + if displayFlen == types.UnspecifiedLength { + displayFlen = defaultFlen + } + if displayDecimal == types.UnspecifiedLength { + displayDecimal = defaultDecimal + } + + if outputColumnID { + t.ID = strconv.FormatInt(col.ID, 10) + } + t.Name = col.Name.O + t.Tp = strings.ToUpper(types.TypeToStr(col.GetType(), col.GetCharset())) + if mysql.HasUnsignedFlag(col.GetFlag()) { + t.Tp += " UNSIGNED" + } + if mysql.HasPriKeyFlag(col.GetFlag()) { + t.IsPK = "true" + } + if mysql.HasNotNullFlag(col.GetFlag()) { + t.Nullable = "false" + } + t.Default = col.GetDefaultValue() + + switch col.GetType() { + case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDuration: + if isDecimalNotDefault { + t.Scale = strconv.Itoa(displayDecimal) + } + case mysql.TypeDouble, mysql.TypeFloat: + t.Precision = strconv.Itoa(displayFlen) + if isDecimalNotDefault { + t.Scale = strconv.Itoa(displayDecimal) + } + case mysql.TypeNewDecimal: + t.Precision = strconv.Itoa(displayFlen) + t.Scale = strconv.Itoa(displayDecimal) + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong, + mysql.TypeBit, mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeBlob, + mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob: + t.Precision = strconv.Itoa(displayFlen) + case mysql.TypeYear: + t.Precision = strconv.Itoa(displayFlen) + case mysql.TypeEnum, mysql.TypeSet: + t.Elems = col.GetElems() + } +} + +// ToTiColumnInfo converts from TableCol to TiDB ColumnInfo. +func (t *TableCol) ToTiColumnInfo(colID int64) (*model.ColumnInfo, error) { + col := new(model.ColumnInfo) + + if t.ID != "" { + var err error + col.ID, err = strconv.ParseInt(t.ID, 10, 64) + if err != nil { + return nil, errors.WrapError(errors.ErrInternalCheckFailed, err) + } + } + + col.ID = colID + col.Name = ast.NewCIStr(t.Name) + tp := types.StrToType(strings.ToLower(strings.TrimSuffix(t.Tp, " UNSIGNED"))) + col.FieldType = *types.NewFieldType(tp) + if strings.Contains(t.Tp, "UNSIGNED") { + col.AddFlag(mysql.UnsignedFlag) + } + if t.IsPK == "true" { + col.AddFlag(mysql.PriKeyFlag) + } + if t.Nullable == "false" { + col.AddFlag(mysql.NotNullFlag) + } + col.DefaultValue = t.Default + if strings.Contains(t.Tp, "BLOB") || strings.Contains(t.Tp, "BINARY") { + col.SetCharset(charset.CharsetBin) + } else { + col.SetCharset(charset.CharsetUTF8MB4) + } + setFlen := func(precision string) error { + if len(precision) > 0 { + flen, err := strconv.Atoi(precision) + if err != nil { + return errors.WrapError(errors.ErrInternalCheckFailed, err) + } + col.SetFlen(flen) + } + return nil + } + setDecimal := func(scale string) error { + if len(scale) > 0 { + decimal, err := strconv.Atoi(scale) + if err != nil { + return errors.WrapError(errors.ErrInternalCheckFailed, err) + } + col.SetDecimal(decimal) + } + return nil + } + switch col.GetType() { + case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDuration: + err := setDecimal(t.Scale) + if err != nil { + return nil, err + } + case mysql.TypeDouble, mysql.TypeFloat, mysql.TypeNewDecimal: + err := setFlen(t.Precision) + if err != nil { + return nil, err + } + err = setDecimal(t.Scale) + if err != nil { + return nil, err + } + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong, + mysql.TypeBit, mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeBlob, + mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeYear: + err := setFlen(t.Precision) + if err != nil { + return nil, err + } + case mysql.TypeEnum, mysql.TypeSet: + col.SetElems(t.Elems) + } + + return col, nil +} + +// SchemaFile is the payload persisted as schema_*.json. +type SchemaFile struct { + Table string `json:"Table"` + Schema string `json:"Schema"` + Version uint64 `json:"Version"` + // TableVersion is the schema version encoded into schema file name: + // schema_{TableVersion}_{checksum}.json. + // It is passed from tableInfoVersion in path generation. + TableVersion uint64 `json:"TableVersion"` + Query string `json:"Query"` + Type byte `json:"Type"` + Columns []TableCol `json:"TableColumns"` + TotalColumns int `json:"TableColumnsTotal"` +} + +// checksumPayload ignores DDL replay fields and path metadata. +type checksumPayload struct { + Table string `json:"Table"` + Schema string `json:"Schema"` + Version uint64 `json:"Version"` + Columns []TableCol `json:"TableColumns"` + TotalColumns int `json:"TableColumnsTotal"` +} + +// ToDDLEvent converts SchemaFile to DDLEvent. +func (t *SchemaFile) ToDDLEvent() (*commonEvent.DDLEvent, error) { + tableInfo, err := t.ToTableInfo() + if err != nil { + return nil, err + } + return &commonEvent.DDLEvent{ + TableInfo: tableInfo, + FinishedTs: t.TableVersion, + Type: t.Type, + Query: t.Query, + SchemaName: t.Schema, + TableName: t.Table, + BlockedTables: &commonEvent.InfluencedTables{InfluenceType: commonEvent.InfluenceTypeAll}, + }, nil +} + +// Build fills SchemaFile from DDLEvent. +func (t *SchemaFile) Build(event *commonEvent.DDLEvent, outputColumnID bool) { + t.Version = defaultSchemaFileVersion + t.TableVersion = event.FinishedTs + t.Query = event.Query + t.Type = event.Type + + info := event.TableInfo + if info == nil { + t.Schema = event.GetTargetSchemaName() + t.Table = event.GetTargetTableName() + return + } + t.Schema = info.GetTargetSchemaName() + t.Table = info.GetTargetTableName() + t.TotalColumns = len(info.GetColumns()) + for _, col := range info.GetColumns() { + var tableCol TableCol + tableCol.FromTiColumnInfo(col, outputColumnID) + t.Columns = append(t.Columns, tableCol) + } +} + +// ToTableInfo converts SchemaFile to TableInfo. +func (t *SchemaFile) ToTableInfo() (*common.TableInfo, error) { + tidbTableInfo := &model.TableInfo{ + Name: ast.NewCIStr(t.Table), + } + nextMockID := int64(100) // 100 is an arbitrary number + for _, col := range t.Columns { + tiCol, err := col.ToTiColumnInfo(nextMockID) + if err != nil { + return nil, err + } + if mysql.HasPriKeyFlag(tiCol.GetFlag()) { + // use PKIsHandle to make sure that the primary keys can be detected + tidbTableInfo.PKIsHandle = true + } + tidbTableInfo.Columns = append(tidbTableInfo.Columns, tiCol) + nextMockID++ + } + info := common.NewTableInfo4Decoder(t.Schema, tidbTableInfo) + return info, nil +} + +// isTableLevel returns whether this file describes a table. +func (t *SchemaFile) isTableLevel() bool { + if len(t.Columns) != t.TotalColumns { + log.Panic("invalid schema file", zap.Any("schemaFile", t)) + } + return t.TotalColumns != 0 +} + +// Marshal marshals SchemaFile. +func (t *SchemaFile) Marshal() ([]byte, error) { + data, err := json.MarshalIndent(t, marshalPrefix, marshalIndent) if err != nil { - return schemaKey, TableDefinition{}, err + return nil, errors.WrapError(errors.ErrMarshalFailed, err) } - var tableDef TableDefinition - schemaContent, err := storage.ReadFile(ctx, path) + return data, nil +} + +// marshalForChecksum marshals fields covered by the path checksum. +func (t *SchemaFile) marshalForChecksum() ([]byte, error) { + // sort columns by name + sortedColumns := make([]TableCol, len(t.Columns)) + copy(sortedColumns, t.Columns) + sort.Slice(sortedColumns, func(i, j int) bool { + return sortedColumns[i].Name < sortedColumns[j].Name + }) + + payload := checksumPayload{ + Table: t.Table, + Schema: t.Schema, + Columns: sortedColumns, + TotalColumns: t.TotalColumns, + } + + data, err := json.MarshalIndent(payload, marshalPrefix, marshalIndent) if err != nil { - return schemaKey, tableDef, errors.Trace(err) + return nil, errors.WrapError(errors.ErrMarshalFailed, err) } - if err = json.Unmarshal(schemaContent, &tableDef); err != nil { - return schemaKey, tableDef, errors.Trace(err) + return data, nil +} + +// Sum32 returns the 32-bits hash value of SchemaFile. +func (t *SchemaFile) Sum32(hasher *hash.PositionInertia) (uint32, error) { + if hasher == nil { + hasher = hash.NewPositionInertia() } - checksumInMem, err := tableDef.Sum32(nil) + hasher.Reset() + data, err := t.marshalForChecksum() + if err != nil { + return 0, err + } + + hasher.Write(data) + return hasher.Sum32(), nil +} + +// GenerateSchemaFilePath generates the schema file path for SchemaFile +// with optional table id path. +func (t *SchemaFile) GenerateSchemaFilePath(useTableIDAsPath bool, tableID int64) (string, error) { + checksum, err := t.Sum32(nil) if err != nil { - return schemaKey, tableDef, errors.Trace(err) + return "", err } - if checksumInMem != checksum || schemaKey.TableVersion != tableDef.TableVersion { - return schemaKey, tableDef, errors.ErrStorageSinkInvalidFileName.GenWithStack( - "checksum mismatch in schema file %s: checksum in memory %d, checksum in file %d, table version in path %d, table version in file %d", - path, checksumInMem, checksum, schemaKey.TableVersion, tableDef.TableVersion) + if t.Schema == "" { + return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("schema cannot be empty") + } + if t.TableVersion == 0 { + return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("table version cannot be zero") + } + if len(t.Columns) != t.TotalColumns { + return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid schema file") + } + isTableLevel := t.TotalColumns != 0 + if !isTableLevel && t.Table != "" { + return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid schema file") + } + if useTableIDAsPath && isTableLevel && tableID <= 0 { + return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table id for table-id path") + } + + table := t.Table + if isTableLevel { + tablePath, err := generateTablePath(t.Table, tableID, useTableIDAsPath) + if err != nil { + return "", err + } + table = tablePath + } + omitSchema := useTableIDAsPath && isTableLevel + path, err := generateSchemaFilePath(t.Schema, table, t.TableVersion, checksum, omitSchema) + if err != nil { + return "", err } - return schemaKey, tableDef, nil + return path, nil } diff --git a/pkg/sink/cloudstorage/schema_file_parse.go b/pkg/sink/cloudstorage/schema_file_parse.go new file mode 100644 index 0000000000..dfa54700fc --- /dev/null +++ b/pkg/sink/cloudstorage/schema_file_parse.go @@ -0,0 +1,53 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package cloudstorage + +import ( + "context" + "encoding/json" + + "github.com/pingcap/ticdc/pkg/errors" + "github.com/pingcap/tidb/pkg/objstore/storeapi" +) + +// Parse parses a schema file and validates its path metadata. +func Parse( + ctx context.Context, + storage storeapi.Storage, + path string, +) (SchemaPathKey, SchemaFile, error) { + var schemaKey SchemaPathKey + checksum, err := schemaKey.ParseSchemaFilePath(path) + if err != nil { + return schemaKey, SchemaFile{}, err + } + var schemaFile SchemaFile + schemaContent, err := storage.ReadFile(ctx, path) + if err != nil { + return schemaKey, schemaFile, errors.Trace(err) + } + if err = json.Unmarshal(schemaContent, &schemaFile); err != nil { + return schemaKey, schemaFile, errors.Trace(err) + } + checksumInMem, err := schemaFile.Sum32(nil) + if err != nil { + return schemaKey, schemaFile, errors.Trace(err) + } + if checksumInMem != checksum || schemaKey.TableVersion != schemaFile.TableVersion { + return schemaKey, schemaFile, errors.ErrStorageSinkInvalidFileName.GenWithStack( + "checksum mismatch in schema file %s: checksum in memory %d, checksum in file %d, table version in path %d, table version in file %d", + path, checksumInMem, checksum, schemaKey.TableVersion, schemaFile.TableVersion) + } + return schemaKey, schemaFile, nil +} diff --git a/pkg/sink/cloudstorage/schema_file_parse_test.go b/pkg/sink/cloudstorage/schema_file_parse_test.go new file mode 100644 index 0000000000..c4e7592daa --- /dev/null +++ b/pkg/sink/cloudstorage/schema_file_parse_test.go @@ -0,0 +1,84 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package cloudstorage + +import ( + "context" + "fmt" + "testing" + + "github.com/pingcap/ticdc/pkg/errors" + "github.com/pingcap/ticdc/pkg/util" + "github.com/stretchr/testify/require" +) + +func TestParse(t *testing.T) { + t.Parallel() + + ctx := context.Background() + storage, err := util.GetExternalStorageWithDefaultTimeout( + ctx, fmt.Sprintf("file:///%s", t.TempDir())) + require.NoError(t, err) + defer storage.Close() + + schemaFile, _ := generateSchemaFile() + schemaFilePath, err := schemaFile.GenerateSchemaFilePath(false, 0) + require.NoError(t, err) + encodedSchemaFile, err := schemaFile.Marshal() + require.NoError(t, err) + require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile)) + + schemaKey, got, err := Parse(ctx, storage, schemaFilePath) + require.NoError(t, err) + require.Equal(t, SchemaPathKey{ + Schema: schemaFile.Schema, + Table: schemaFile.Table, + TableVersion: schemaFile.TableVersion, + }, schemaKey) + require.Equal(t, schemaFile.Schema, got.Schema) + require.Equal(t, schemaFile.Table, got.Table) + require.Equal(t, schemaFile.Version, got.Version) + require.Equal(t, schemaFile.TableVersion, got.TableVersion) + require.Equal(t, schemaFile.TotalColumns, got.TotalColumns) + require.Len(t, got.Columns, len(schemaFile.Columns)) + + expectedChecksum, err := schemaFile.Sum32(nil) + require.NoError(t, err) + gotChecksum, err := got.Sum32(nil) + require.NoError(t, err) + require.Equal(t, expectedChecksum, gotChecksum) +} + +func TestParseChecksumMismatch(t *testing.T) { + t.Parallel() + + ctx := context.Background() + storage, err := util.GetExternalStorageWithDefaultTimeout( + ctx, fmt.Sprintf("file:///%s", t.TempDir())) + require.NoError(t, err) + defer storage.Close() + + schemaFile, _ := generateSchemaFile() + schemaFilePath, err := schemaFile.GenerateSchemaFilePath(false, 0) + require.NoError(t, err) + + schemaFile.TableVersion++ + encodedSchemaFile, err := schemaFile.Marshal() + require.NoError(t, err) + require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile)) + + _, _, err = Parse(ctx, storage, schemaFilePath) + require.Error(t, err) + require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) +} diff --git a/pkg/sink/cloudstorage/schema_file_test.go b/pkg/sink/cloudstorage/schema_file_test.go index 1f40fbe75d..4fcd1644e3 100644 --- a/pkg/sink/cloudstorage/schema_file_test.go +++ b/pkg/sink/cloudstorage/schema_file_test.go @@ -1,84 +1,616 @@ -// Copyright 2026 PingCAP, Inc. +// Copyright 2022 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // See the License for the specific language governing permissions and // limitations under the License. - package cloudstorage import ( - "context" - "fmt" + "encoding/json" + "math" + "math/rand" "testing" - "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/util" + "github.com/pingcap/ticdc/pkg/common" + commonEvent "github.com/pingcap/ticdc/pkg/common/event" + timodel "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/charset" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" "github.com/stretchr/testify/require" ) -func TestParseTableDefinition(t *testing.T) { +func generateSchemaFile() (SchemaFile, *common.TableInfo) { + var columns []*timodel.ColumnInfo + ft := types.NewFieldType(mysql.TypeLong) + ft.SetFlag(mysql.PriKeyFlag | mysql.NotNullFlag) + col := &timodel.ColumnInfo{ + Name: ast.NewCIStr("Id"), + FieldType: *ft, + DefaultValue: 10, + } + columns = append(columns, col) + + ft = types.NewFieldType(mysql.TypeVarchar) + ft.SetFlag(mysql.NotNullFlag) + ft.SetFlen(128) + col = &timodel.ColumnInfo{ + Name: ast.NewCIStr("LastName"), + FieldType: *ft, + DefaultValue: "Default LastName", + } + columns = append(columns, col) + + ft = types.NewFieldType(mysql.TypeVarchar) + ft.SetFlen(64) + col = &timodel.ColumnInfo{ + Name: ast.NewCIStr("FirstName"), + FieldType: *ft, + DefaultValue: "Default FirstName", + } + columns = append(columns, col) + + ft = types.NewFieldType(mysql.TypeDatetime) + col = &timodel.ColumnInfo{ + Name: ast.NewCIStr("Birthday"), + FieldType: *ft, + DefaultValue: 12345678, + } + columns = append(columns, col) + + tableInfo := common.WrapTableInfo("schema1", &timodel.TableInfo{ + ID: 20, + Name: ast.NewCIStr("table1"), + Columns: columns, + UpdateTS: 100, + }) + event := &commonEvent.DDLEvent{ + SchemaName: tableInfo.GetSchemaName(), + TableName: tableInfo.GetTableName(), + TableInfo: tableInfo, + FinishedTs: tableInfo.GetUpdateTS(), + } + var schemaFile SchemaFile + schemaFile.Build(event, false) + return schemaFile, tableInfo +} + +func TestBuildUsesTargetNames(t *testing.T) { t.Parallel() - ctx := context.Background() - storage, err := util.GetExternalStorageWithDefaultTimeout( - ctx, fmt.Sprintf("file:///%s", t.TempDir())) - require.NoError(t, err) - defer storage.Close() + idFieldType := types.NewFieldType(mysql.TypeLong) + idFieldType.SetFlag(mysql.PriKeyFlag | mysql.NotNullFlag) + routedTableInfo := common.WrapTableInfo("source_db", &timodel.TableInfo{ + ID: 20, + Name: ast.NewCIStr("source_table"), + UpdateTS: 100, + Columns: []*timodel.ColumnInfo{ + { + ID: 1, + Name: ast.NewCIStr("id"), + FieldType: *idFieldType, + State: timodel.StatePublic, + }, + }, + }).CloneWithRouting("target_db", "target_table") + sourceDDL := &commonEvent.DDLEvent{ + Version: commonEvent.DDLEventVersion1, + Type: byte(timodel.ActionCreateTable), + SchemaName: "source_db", + TableName: "source_table", + Query: "CREATE TABLE `source_db`.`source_table` (`id` INT PRIMARY KEY)", + TableInfo: routedTableInfo, + FinishedTs: 100, + } - def, _ := generateTableDef() - schemaFilePath, err := def.GenerateSchemaFilePath(false, 0) - require.NoError(t, err) - encodedDef, err := def.MarshalWithQuery() + routedDDL := commonEvent.NewRoutedDDLEvent( + sourceDDL, + "CREATE TABLE `target_db`.`target_table` (`id` INT PRIMARY KEY)", + "target_db", + "target_table", + "", + "", + routedTableInfo, + nil, + nil, + ) + + var schemaFile SchemaFile + schemaFile.Build(routedDDL, false) + require.Equal(t, "target_db", schemaFile.Schema) + require.Equal(t, "target_table", schemaFile.Table) + require.Contains(t, schemaFile.Query, "`target_db`.`target_table`") +} + +func TestTableCol(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + filedType byte + flen int + decimal int + flag uint + charset string + expected string + }{ + { + name: "time", + filedType: mysql.TypeDuration, + flen: math.MinInt, + decimal: 5, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"TIME","ColumnScale":"5"}`, + }, + { + name: "int(5) UNSIGNED", + filedType: mysql.TypeLong, + flen: 5, + decimal: math.MinInt, + flag: mysql.UnsignedFlag, + expected: `{"ColumnName":"","ColumnType":"INT UNSIGNED","ColumnPrecision":"5"}`, + }, + { + name: "float(12,3)", + filedType: mysql.TypeFloat, + flen: 12, + decimal: 3, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"12","ColumnScale":"3"}`, + }, + { + name: "float", + filedType: mysql.TypeFloat, + flen: 12, + decimal: -1, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"12"}`, + }, + { + name: "float", + filedType: mysql.TypeFloat, + flen: 5, + decimal: -1, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"5"}`, + }, + { + name: "float(7,3)", + filedType: mysql.TypeFloat, + flen: 7, + decimal: 3, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"7","ColumnScale":"3"}`, + }, + { + name: "double(12,3)", + filedType: mysql.TypeDouble, + flen: 12, + decimal: 3, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"12","ColumnScale":"3"}`, + }, + { + name: "double", + filedType: mysql.TypeDouble, + flen: 12, + decimal: -1, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"12"}`, + }, + { + name: "double", + filedType: mysql.TypeDouble, + flen: 5, + decimal: -1, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"5"}`, + }, + { + name: "double(7,3)", + filedType: mysql.TypeDouble, + flen: 7, + decimal: 3, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"7","ColumnScale":"3"}`, + }, + { + name: "tinyint(5)", + filedType: mysql.TypeTiny, + flen: 5, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"TINYINT","ColumnPrecision":"5"}`, + }, + { + name: "smallint(5)", + filedType: mysql.TypeShort, + flen: 5, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"SMALLINT","ColumnPrecision":"5"}`, + }, + { + name: "mediumint(10)", + filedType: mysql.TypeInt24, + flen: 10, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"MEDIUMINT","ColumnPrecision":"10"}`, + }, + { + name: "int(11)", + filedType: mysql.TypeLong, + flen: math.MinInt, + decimal: math.MinInt, + flag: mysql.PriKeyFlag, + expected: `{"ColumnIsPk":"true", "ColumnName":"", "ColumnPrecision":"11", "ColumnType":"INT"}`, + }, + { + name: "bigint(20)", + filedType: mysql.TypeLonglong, + flen: 20, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"BIGINT","ColumnPrecision":"20"}`, + }, + { + name: "bit(5)", + filedType: mysql.TypeBit, + flen: 5, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"BIT","ColumnPrecision":"5"}`, + }, + { + name: "varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin", + filedType: mysql.TypeVarchar, + flen: 128, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"VARCHAR","ColumnPrecision":"128"}`, + }, + { + name: "char(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin", + filedType: mysql.TypeString, + flen: 32, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"CHAR","ColumnPrecision":"32"}`, + }, + { + name: "var_string(64)", + filedType: mysql.TypeVarString, + flen: 64, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"VAR_STRING","ColumnPrecision":"64"}`, + }, + { + name: "blob", + filedType: mysql.TypeBlob, + flen: 100, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"BLOB","ColumnPrecision":"100"}`, + }, + { + name: "text", + filedType: mysql.TypeBlob, + flen: 100, + decimal: math.MinInt, + flag: 0, + charset: charset.CharsetUTF8MB4, + expected: `{"ColumnName":"","ColumnType":"TEXT","ColumnPrecision":"100"}`, + }, + { + name: "tinyblob", + filedType: mysql.TypeTinyBlob, + flen: 120, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"TINYBLOB","ColumnPrecision":"120"}`, + }, + { + name: "mediumblob", + filedType: mysql.TypeMediumBlob, + flen: 100, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"MEDIUMBLOB","ColumnPrecision":"100"}`, + }, + { + name: "longblob", + filedType: mysql.TypeLongBlob, + flen: 5, + decimal: math.MinInt, + flag: 0, + expected: `{"ColumnName":"","ColumnType":"LONGBLOB","ColumnPrecision":"5"}`, + }, + { + name: "enum", + filedType: mysql.TypeEnum, + expected: `{"ColumnName":"","ColumnType":"ENUM"}`, + }, + { + name: "set", + filedType: mysql.TypeSet, + expected: `{"ColumnName":"","ColumnType":"SET"}`, + }, + { + name: "timestamp(2)", + filedType: mysql.TypeTimestamp, + flen: 8, + decimal: 2, + expected: `{"ColumnName":"","ColumnType":"TIMESTAMP","ColumnScale":"2"}`, + }, + { + name: "timestamp", + filedType: mysql.TypeTimestamp, + flen: 8, + decimal: 0, + expected: `{"ColumnName":"","ColumnType":"TIMESTAMP"}`, + }, + { + name: "datetime(2)", + filedType: mysql.TypeDatetime, + flen: 8, + decimal: 2, + expected: `{"ColumnName":"","ColumnType":"DATETIME","ColumnScale":"2"}`, + }, + { + name: "datetime", + filedType: mysql.TypeDatetime, + flen: 8, + decimal: 0, + expected: `{"ColumnName":"","ColumnType":"DATETIME"}`, + }, + { + name: "date", + filedType: mysql.TypeDate, + flen: 8, + decimal: 2, + expected: `{"ColumnName":"","ColumnType":"DATE"}`, + }, + { + name: "date", + filedType: mysql.TypeDate, + flen: 8, + decimal: 0, + expected: `{"ColumnName":"","ColumnType":"DATE"}`, + }, + { + name: "year(4)", + filedType: mysql.TypeYear, + flen: 4, + decimal: 0, + expected: `{"ColumnName":"","ColumnType":"YEAR","ColumnPrecision":"4"}`, + }, + { + name: "year(2)", + filedType: mysql.TypeYear, + flen: 2, + decimal: 2, + expected: `{"ColumnName":"","ColumnType":"YEAR","ColumnPrecision":"2"}`, + }, + } + + for _, tc := range testCases { + ft := types.NewFieldType(tc.filedType) + if tc.flen != math.MinInt { + ft.SetFlen(tc.flen) + } + if tc.decimal != math.MinInt { + ft.SetDecimal(tc.decimal) + } + if tc.flag != 0 { + ft.SetFlag(tc.flag) + } + if len(tc.charset) != 0 { + ft.SetCharset(tc.charset) + } + col := &timodel.ColumnInfo{FieldType: *ft} + var tableCol TableCol + tableCol.FromTiColumnInfo(col, false) + encodedCol, err := json.Marshal(tableCol) + require.Nil(t, err, tc.name) + require.JSONEq(t, tc.expected, string(encodedCol), tc.name) + + _, err = tableCol.ToTiColumnInfo(100) + require.NoError(t, err) + } +} + +func TestSchemaFile(t *testing.T) { + t.Parallel() + + schemaFile, tableInfo := generateSchemaFile() + encodedSchemaFile, err := json.MarshalIndent(schemaFile, "", " ") require.NoError(t, err) - require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedDef)) + require.JSONEq(t, `{ + "Table": "table1", + "Schema": "schema1", + "Version": 1, + "TableVersion": 100, + "Query": "", + "Type": 0, + "TableColumns": [ + { + "ColumnName": "Id", + "ColumnType": "INT", + "ColumnPrecision": "11", + "ColumnDefault":10, + "ColumnNullable": "false", + "ColumnIsPk": "true" + }, + { + "ColumnName": "LastName", + "ColumnType": "VARCHAR", + "ColumnDefault":"Default LastName", + "ColumnPrecision": "128", + "ColumnNullable": "false" + }, + { + "ColumnName": "FirstName", + "ColumnDefault":"Default FirstName", + "ColumnType": "VARCHAR", + "ColumnPrecision": "64" + }, + { + "ColumnName": "Birthday", + "ColumnDefault":1.2345678e+07, + "ColumnType": "DATETIME" + } + ], + "TableColumnsTotal": 4 + }`, string(encodedSchemaFile)) - schemaKey, got, err := ParseTableDefinition(ctx, storage, schemaFilePath) + schemaFile = SchemaFile{} + event := &commonEvent.DDLEvent{ + FinishedTs: tableInfo.GetUpdateTS(), + Type: byte(timodel.ActionAddColumn), + Query: "alter table schema1.table1 add Birthday date", + TableInfo: tableInfo, + SchemaName: "schema1", + TableName: "table1", + } + schemaFile.Build(event, false) + encodedSchemaFile, err = json.MarshalIndent(schemaFile, "", " ") require.NoError(t, err) - require.Equal(t, SchemaPathKey{ - Schema: def.Schema, - Table: def.Table, - TableVersion: def.TableVersion, - }, schemaKey) - require.Equal(t, def.Schema, got.Schema) - require.Equal(t, def.Table, got.Table) - require.Equal(t, def.Version, got.Version) - require.Equal(t, def.TableVersion, got.TableVersion) - require.Equal(t, def.TotalColumns, got.TotalColumns) - require.Len(t, got.Columns, len(def.Columns)) - - expectedChecksum, err := def.Sum32(nil) + require.JSONEq(t, `{ + "Table": "table1", + "Schema": "schema1", + "Version": 1, + "TableVersion": 100, + "Query": "alter table schema1.table1 add Birthday date", + "Type": 5, + "TableColumns": [ + { + "ColumnName": "Id", + "ColumnType": "INT", + "ColumnPrecision": "11", + "ColumnDefault":10, + "ColumnNullable": "false", + "ColumnIsPk": "true" + }, + { + "ColumnName": "LastName", + "ColumnType": "VARCHAR", + "ColumnDefault":"Default LastName", + "ColumnPrecision": "128", + "ColumnNullable": "false" + }, + { + "ColumnName": "FirstName", + "ColumnDefault":"Default FirstName", + "ColumnType": "VARCHAR", + "ColumnPrecision": "64" + }, + { + "ColumnName": "Birthday", + "ColumnDefault":1.2345678e+07, + "ColumnType": "DATETIME" + } + ], + "TableColumnsTotal": 4 + }`, string(encodedSchemaFile)) + + tableInfo, err = schemaFile.ToTableInfo() require.NoError(t, err) - gotChecksum, err := got.Sum32(nil) + require.Len(t, tableInfo.GetColumns(), 4) + + event, err = schemaFile.ToDDLEvent() require.NoError(t, err) - require.Equal(t, expectedChecksum, gotChecksum) + require.Equal(t, byte(timodel.ActionAddColumn), event.Type) + require.Equal(t, uint64(100), event.FinishedTs) } -func TestParseTableDefinitionChecksumMismatch(t *testing.T) { +func TestSchemaFileGenFilePath(t *testing.T) { t.Parallel() - ctx := context.Background() - storage, err := util.GetExternalStorageWithDefaultTimeout( - ctx, fmt.Sprintf("file:///%s", t.TempDir())) + dbSchemaFile := &SchemaFile{ + Schema: "schema1", + Version: defaultSchemaFileVersion, + TableVersion: 100, + } + schemaPath, err := dbSchemaFile.GenerateSchemaFilePath(false, 0) require.NoError(t, err) - defer storage.Close() + require.Equal(t, "schema1/meta/schema_100_3233644819.json", schemaPath) - def, _ := generateTableDef() - schemaFilePath, err := def.GenerateSchemaFilePath(false, 0) + schemaPath, err = dbSchemaFile.GenerateSchemaFilePath(true, 0) require.NoError(t, err) + require.Equal(t, "schema1/meta/schema_100_3233644819.json", schemaPath) - def.TableVersion++ - encodedDef, err := def.MarshalWithQuery() + schemaFile, _ := generateSchemaFile() + tablePath, err := schemaFile.GenerateSchemaFilePath(false, 0) require.NoError(t, err) - require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedDef)) + require.Equal(t, "schema1/table1/meta/schema_100_3752767265.json", tablePath) - _, _, err = ParseTableDefinition(ctx, storage, schemaFilePath) + tablePath, err = schemaFile.GenerateSchemaFilePath(true, 12345) + require.NoError(t, err) + require.Equal(t, "12345/meta/schema_100_3752767265.json", tablePath) +} + +func TestGenerateSchemaFilePathValidation(t *testing.T) { + t.Parallel() + + schemaFile, _ := generateSchemaFile() + + // empty schema + emptySchemaFile := &SchemaFile{Schema: "", Table: "t1", TableVersion: 100, TotalColumns: 1, Columns: []TableCol{{}}} + _, err := emptySchemaFile.GenerateSchemaFilePath(false, 0) + require.Error(t, err) + require.Contains(t, err.Error(), "schema cannot be empty") + + // zero table version + zeroVersionSchemaFile := &SchemaFile{Schema: "s1", Table: "t1", TableVersion: 0, TotalColumns: 1, Columns: []TableCol{{}}} + _, err = zeroVersionSchemaFile.GenerateSchemaFilePath(false, 0) + require.Error(t, err) + require.Contains(t, err.Error(), "table version cannot be zero") + + // use-table-id-as-path with invalid tableID + _, err = schemaFile.GenerateSchemaFilePath(true, 0) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid table id for table-id path") + _, err = schemaFile.GenerateSchemaFilePath(true, -1) require.Error(t, err) - require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) + require.Contains(t, err.Error(), "invalid table id for table-id path") + + invalidSchemaFile := &SchemaFile{Schema: "s1", Table: "t1", TableVersion: 100, TotalColumns: 1, Columns: nil} + _, err = invalidSchemaFile.GenerateSchemaFilePath(false, 0) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid schema file") +} + +func TestSchemaFileSum32(t *testing.T) { + t.Parallel() + + schemaFile, _ := generateSchemaFile() + checksum1, err := schemaFile.Sum32(nil) + require.NoError(t, err) + checksum2, err := schemaFile.Sum32(nil) + require.NoError(t, err) + require.Equal(t, checksum1, checksum2) + + n := len(schemaFile.Columns) + newCol := make([]TableCol, n) + copy(newCol, schemaFile.Columns) + newSchemaFile := schemaFile + newSchemaFile.Columns = newCol + + for i := 0; i < n; i++ { + target := rand.Intn(n) + newSchemaFile.Columns[i], newSchemaFile.Columns[target] = newSchemaFile.Columns[target], newSchemaFile.Columns[i] + newChecksum, err := newSchemaFile.Sum32(nil) + require.NoError(t, err) + require.Equal(t, checksum1, newChecksum) + } } diff --git a/pkg/sink/cloudstorage/table_definition.go b/pkg/sink/cloudstorage/table_definition.go deleted file mode 100644 index 220e4e05b5..0000000000 --- a/pkg/sink/cloudstorage/table_definition.go +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2022 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. -package cloudstorage - -import ( - "encoding/json" - "sort" - "strconv" - "strings" - - "github.com/pingcap/log" - "github.com/pingcap/ticdc/pkg/common" - commonEvent "github.com/pingcap/ticdc/pkg/common/event" - "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/hash" - "github.com/pingcap/tidb/pkg/meta/model" - "github.com/pingcap/tidb/pkg/parser/ast" - "github.com/pingcap/tidb/pkg/parser/charset" - "github.com/pingcap/tidb/pkg/parser/mysql" - "github.com/pingcap/tidb/pkg/parser/types" - "go.uber.org/zap" -) - -const ( - defaultTableDefinitionVersion = 1 - marshalPrefix = "" - marshalIndent = " " -) - -// TableCol denotes the column info for a table definition. -type TableCol struct { - ID string `json:"ColumnId,omitempty"` - Name string `json:"ColumnName" ` - Tp string `json:"ColumnType"` - Default interface{} `json:"ColumnDefault,omitempty"` - Precision string `json:"ColumnPrecision,omitempty"` - Scale string `json:"ColumnScale,omitempty"` - Nullable string `json:"ColumnNullable,omitempty"` - IsPK string `json:"ColumnIsPk,omitempty"` - Elems []string `json:"ColumnElems,omitempty"` -} - -// FromTiColumnInfo converts from TiDB ColumnInfo to TableCol. -func (t *TableCol) FromTiColumnInfo(col *model.ColumnInfo, outputColumnID bool) { - defaultFlen, defaultDecimal := mysql.GetDefaultFieldLengthAndDecimal(col.GetType()) - isDecimalNotDefault := col.GetDecimal() != defaultDecimal && - col.GetDecimal() != 0 && - col.GetDecimal() != types.UnspecifiedLength - - displayFlen, displayDecimal := col.GetFlen(), col.GetDecimal() - if displayFlen == types.UnspecifiedLength { - displayFlen = defaultFlen - } - if displayDecimal == types.UnspecifiedLength { - displayDecimal = defaultDecimal - } - - if outputColumnID { - t.ID = strconv.FormatInt(col.ID, 10) - } - t.Name = col.Name.O - t.Tp = strings.ToUpper(types.TypeToStr(col.GetType(), col.GetCharset())) - if mysql.HasUnsignedFlag(col.GetFlag()) { - t.Tp += " UNSIGNED" - } - if mysql.HasPriKeyFlag(col.GetFlag()) { - t.IsPK = "true" - } - if mysql.HasNotNullFlag(col.GetFlag()) { - t.Nullable = "false" - } - t.Default = col.GetDefaultValue() - - switch col.GetType() { - case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDuration: - if isDecimalNotDefault { - t.Scale = strconv.Itoa(displayDecimal) - } - case mysql.TypeDouble, mysql.TypeFloat: - t.Precision = strconv.Itoa(displayFlen) - if isDecimalNotDefault { - t.Scale = strconv.Itoa(displayDecimal) - } - case mysql.TypeNewDecimal: - t.Precision = strconv.Itoa(displayFlen) - t.Scale = strconv.Itoa(displayDecimal) - case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong, - mysql.TypeBit, mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeBlob, - mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob: - t.Precision = strconv.Itoa(displayFlen) - case mysql.TypeYear: - t.Precision = strconv.Itoa(displayFlen) - case mysql.TypeEnum, mysql.TypeSet: - t.Elems = col.GetElems() - } -} - -// ToTiColumnInfo converts from TableCol to TiDB ColumnInfo. -func (t *TableCol) ToTiColumnInfo(colID int64) (*model.ColumnInfo, error) { - col := new(model.ColumnInfo) - - if t.ID != "" { - var err error - col.ID, err = strconv.ParseInt(t.ID, 10, 64) - if err != nil { - return nil, errors.WrapError(errors.ErrInternalCheckFailed, err) - } - } - - col.ID = colID - col.Name = ast.NewCIStr(t.Name) - tp := types.StrToType(strings.ToLower(strings.TrimSuffix(t.Tp, " UNSIGNED"))) - col.FieldType = *types.NewFieldType(tp) - if strings.Contains(t.Tp, "UNSIGNED") { - col.AddFlag(mysql.UnsignedFlag) - } - if t.IsPK == "true" { - col.AddFlag(mysql.PriKeyFlag) - } - if t.Nullable == "false" { - col.AddFlag(mysql.NotNullFlag) - } - col.DefaultValue = t.Default - if strings.Contains(t.Tp, "BLOB") || strings.Contains(t.Tp, "BINARY") { - col.SetCharset(charset.CharsetBin) - } else { - col.SetCharset(charset.CharsetUTF8MB4) - } - setFlen := func(precision string) error { - if len(precision) > 0 { - flen, err := strconv.Atoi(precision) - if err != nil { - return errors.WrapError(errors.ErrInternalCheckFailed, err) - } - col.SetFlen(flen) - } - return nil - } - setDecimal := func(scale string) error { - if len(scale) > 0 { - decimal, err := strconv.Atoi(scale) - if err != nil { - return errors.WrapError(errors.ErrInternalCheckFailed, err) - } - col.SetDecimal(decimal) - } - return nil - } - switch col.GetType() { - case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDuration: - err := setDecimal(t.Scale) - if err != nil { - return nil, err - } - case mysql.TypeDouble, mysql.TypeFloat, mysql.TypeNewDecimal: - err := setFlen(t.Precision) - if err != nil { - return nil, err - } - err = setDecimal(t.Scale) - if err != nil { - return nil, err - } - case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong, - mysql.TypeBit, mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeBlob, - mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeYear: - err := setFlen(t.Precision) - if err != nil { - return nil, err - } - case mysql.TypeEnum, mysql.TypeSet: - col.SetElems(t.Elems) - } - - return col, nil -} - -// TableDefinition is the detailed table definition used for cloud storage sink. -// TODO: find a better name for this struct. -type TableDefinition struct { - Table string `json:"Table"` - Schema string `json:"Schema"` - Version uint64 `json:"Version"` - // TableVersion is the schema version encoded into schema file name: - // schema_{TableVersion}_{checksum}.json. - // It is passed from tableInfoVersion in path generation. - TableVersion uint64 `json:"TableVersion"` - Query string `json:"Query"` - Type byte `json:"Type"` - Columns []TableCol `json:"TableColumns"` - TotalColumns int `json:"TableColumnsTotal"` -} - -// tableDefWithoutQuery is the table definition without query, which ignores the -// Query, Type and TableVersion field. -type tableDefWithoutQuery struct { - Table string `json:"Table"` - Schema string `json:"Schema"` - Version uint64 `json:"Version"` - Columns []TableCol `json:"TableColumns"` - TotalColumns int `json:"TableColumnsTotal"` -} - -// FromDDLEvent converts from DDLEvent to TableDefinition. -func (t *TableDefinition) FromDDLEvent(event *commonEvent.DDLEvent, outputColumnID bool) { - t.FromTableInfo(event.GetTargetSchemaName(), event.GetTargetTableName(), event.TableInfo, event.FinishedTs, outputColumnID) - t.Query = event.Query - t.Type = event.Type -} - -// ToDDLEvent converts from TableDefinition to DDLEvent. -func (t *TableDefinition) ToDDLEvent() (*commonEvent.DDLEvent, error) { - tableInfo, err := t.ToTableInfo() - if err != nil { - return nil, err - } - return &commonEvent.DDLEvent{ - TableInfo: tableInfo, - FinishedTs: t.TableVersion, - Type: t.Type, - Query: t.Query, - SchemaName: t.Schema, - TableName: t.Table, - BlockedTables: &commonEvent.InfluencedTables{InfluenceType: commonEvent.InfluenceTypeAll}, - }, nil -} - -// FromTableInfo converts from TableInfo to TableDefinition. -func (t *TableDefinition) FromTableInfo( - schemaName string, tableName string, info *common.TableInfo, tableInfoVersion uint64, outputColumnID bool, -) { - t.Version = defaultTableDefinitionVersion - t.TableVersion = tableInfoVersion - - t.Schema = schemaName - t.Table = tableName - if info == nil { - return - } - t.TotalColumns = len(info.GetColumns()) - for _, col := range info.GetColumns() { - var tableCol TableCol - tableCol.FromTiColumnInfo(col, outputColumnID) - t.Columns = append(t.Columns, tableCol) - } -} - -// ToTableInfo converts from TableDefinition to DDLEvent. -func (t *TableDefinition) ToTableInfo() (*common.TableInfo, error) { - tidbTableInfo := &model.TableInfo{ - Name: ast.NewCIStr(t.Table), - } - nextMockID := int64(100) // 100 is an arbitrary number - for _, col := range t.Columns { - tiCol, err := col.ToTiColumnInfo(nextMockID) - if err != nil { - return nil, err - } - if mysql.HasPriKeyFlag(tiCol.GetFlag()) { - // use PKIsHandle to make sure that the primary keys can be detected - tidbTableInfo.PKIsHandle = true - } - tidbTableInfo.Columns = append(tidbTableInfo.Columns, tiCol) - nextMockID++ - } - info := common.NewTableInfo4Decoder(t.Schema, tidbTableInfo) - return info, nil -} - -// IsTableSchema returns whether the TableDefinition is a table schema. -func (t *TableDefinition) IsTableSchema() bool { - if len(t.Columns) != t.TotalColumns { - log.Panic("invalid table definition", zap.Any("tableDef", t)) - } - return t.TotalColumns != 0 -} - -// MarshalWithQuery marshals TableDefinition with Query field. -func (t *TableDefinition) MarshalWithQuery() ([]byte, error) { - data, err := json.MarshalIndent(t, marshalPrefix, marshalIndent) - if err != nil { - return nil, errors.WrapError(errors.ErrMarshalFailed, err) - } - return data, nil -} - -// marshalWithoutQuery marshals TableDefinition without Query field. -func (t *TableDefinition) marshalWithoutQuery() ([]byte, error) { - // sort columns by name - sortedColumns := make([]TableCol, len(t.Columns)) - copy(sortedColumns, t.Columns) - sort.Slice(sortedColumns, func(i, j int) bool { - return sortedColumns[i].Name < sortedColumns[j].Name - }) - - defWithoutQuery := tableDefWithoutQuery{ - Table: t.Table, - Schema: t.Schema, - Columns: sortedColumns, - TotalColumns: t.TotalColumns, - } - - data, err := json.MarshalIndent(defWithoutQuery, marshalPrefix, marshalIndent) - if err != nil { - return nil, errors.WrapError(errors.ErrMarshalFailed, err) - } - return data, nil -} - -// Sum32 returns the 32-bits hash value of TableDefinition. -func (t *TableDefinition) Sum32(hasher *hash.PositionInertia) (uint32, error) { - if hasher == nil { - hasher = hash.NewPositionInertia() - } - hasher.Reset() - data, err := t.marshalWithoutQuery() - if err != nil { - return 0, err - } - - hasher.Write(data) - return hasher.Sum32(), nil -} - -// GenerateSchemaFilePath generates the schema file path for TableDefinition -// with optional table id path. -func (t *TableDefinition) GenerateSchemaFilePath(useTableIDAsPath bool, tableID int64) (string, error) { - checksum, err := t.Sum32(nil) - if err != nil { - return "", err - } - if t.Schema == "" { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("schema cannot be empty") - } - if t.TableVersion == 0 { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("table version cannot be zero") - } - if len(t.Columns) != t.TotalColumns { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table definition") - } - isTableSchema := t.TotalColumns != 0 - if !isTableSchema && t.Table != "" { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table definition") - } - if useTableIDAsPath && isTableSchema && tableID <= 0 { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table id for table-id path") - } - - table := t.Table - if isTableSchema { - tablePath, err := generateTablePath(t.Table, tableID, useTableIDAsPath) - if err != nil { - return "", err - } - table = tablePath - } - omitSchema := useTableIDAsPath && isTableSchema - path, err := generateSchemaFilePath(t.Schema, table, t.TableVersion, checksum, omitSchema) - if err != nil { - return "", err - } - return path, nil -} diff --git a/pkg/sink/cloudstorage/table_definition_test.go b/pkg/sink/cloudstorage/table_definition_test.go deleted file mode 100644 index 342059f571..0000000000 --- a/pkg/sink/cloudstorage/table_definition_test.go +++ /dev/null @@ -1,611 +0,0 @@ -// Copyright 2022 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. -package cloudstorage - -import ( - "encoding/json" - "math" - "math/rand" - "testing" - - "github.com/pingcap/ticdc/pkg/common" - commonEvent "github.com/pingcap/ticdc/pkg/common/event" - timodel "github.com/pingcap/tidb/pkg/meta/model" - "github.com/pingcap/tidb/pkg/parser/ast" - "github.com/pingcap/tidb/pkg/parser/charset" - "github.com/pingcap/tidb/pkg/parser/mysql" - "github.com/pingcap/tidb/pkg/types" - "github.com/stretchr/testify/require" -) - -func generateTableDef() (TableDefinition, *common.TableInfo) { - var columns []*timodel.ColumnInfo - ft := types.NewFieldType(mysql.TypeLong) - ft.SetFlag(mysql.PriKeyFlag | mysql.NotNullFlag) - col := &timodel.ColumnInfo{ - Name: ast.NewCIStr("Id"), - FieldType: *ft, - DefaultValue: 10, - } - columns = append(columns, col) - - ft = types.NewFieldType(mysql.TypeVarchar) - ft.SetFlag(mysql.NotNullFlag) - ft.SetFlen(128) - col = &timodel.ColumnInfo{ - Name: ast.NewCIStr("LastName"), - FieldType: *ft, - DefaultValue: "Default LastName", - } - columns = append(columns, col) - - ft = types.NewFieldType(mysql.TypeVarchar) - ft.SetFlen(64) - col = &timodel.ColumnInfo{ - Name: ast.NewCIStr("FirstName"), - FieldType: *ft, - DefaultValue: "Default FirstName", - } - columns = append(columns, col) - - ft = types.NewFieldType(mysql.TypeDatetime) - col = &timodel.ColumnInfo{ - Name: ast.NewCIStr("Birthday"), - FieldType: *ft, - DefaultValue: 12345678, - } - columns = append(columns, col) - - tableInfo := common.WrapTableInfo("schema1", &timodel.TableInfo{ - ID: 20, - Name: ast.NewCIStr("table1"), - Columns: columns, - UpdateTS: 100, - }) - var def TableDefinition - def.FromTableInfo(tableInfo.GetSchemaName(), tableInfo.GetTableName(), tableInfo, tableInfo.GetUpdateTS(), false) - return def, tableInfo -} - -func TestFromDDLEventUsesTargetNames(t *testing.T) { - t.Parallel() - - idFieldType := types.NewFieldType(mysql.TypeLong) - idFieldType.SetFlag(mysql.PriKeyFlag | mysql.NotNullFlag) - routedTableInfo := common.WrapTableInfo("source_db", &timodel.TableInfo{ - ID: 20, - Name: ast.NewCIStr("source_table"), - UpdateTS: 100, - Columns: []*timodel.ColumnInfo{ - { - ID: 1, - Name: ast.NewCIStr("id"), - FieldType: *idFieldType, - State: timodel.StatePublic, - }, - }, - }).CloneWithRouting("target_db", "target_table") - sourceDDL := &commonEvent.DDLEvent{ - Version: commonEvent.DDLEventVersion1, - Type: byte(timodel.ActionCreateTable), - SchemaName: "source_db", - TableName: "source_table", - Query: "CREATE TABLE `source_db`.`source_table` (`id` INT PRIMARY KEY)", - TableInfo: routedTableInfo, - FinishedTs: 100, - } - - routedDDL := commonEvent.NewRoutedDDLEvent( - sourceDDL, - "CREATE TABLE `target_db`.`target_table` (`id` INT PRIMARY KEY)", - "target_db", - "target_table", - "", - "", - routedTableInfo, - nil, - nil, - ) - - var def TableDefinition - def.FromDDLEvent(routedDDL, false) - require.Equal(t, "target_db", def.Schema) - require.Equal(t, "target_table", def.Table) - require.Contains(t, def.Query, "`target_db`.`target_table`") -} - -func TestTableCol(t *testing.T) { - t.Parallel() - - testCases := []struct { - name string - filedType byte - flen int - decimal int - flag uint - charset string - expected string - }{ - { - name: "time", - filedType: mysql.TypeDuration, - flen: math.MinInt, - decimal: 5, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"TIME","ColumnScale":"5"}`, - }, - { - name: "int(5) UNSIGNED", - filedType: mysql.TypeLong, - flen: 5, - decimal: math.MinInt, - flag: mysql.UnsignedFlag, - expected: `{"ColumnName":"","ColumnType":"INT UNSIGNED","ColumnPrecision":"5"}`, - }, - { - name: "float(12,3)", - filedType: mysql.TypeFloat, - flen: 12, - decimal: 3, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"12","ColumnScale":"3"}`, - }, - { - name: "float", - filedType: mysql.TypeFloat, - flen: 12, - decimal: -1, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"12"}`, - }, - { - name: "float", - filedType: mysql.TypeFloat, - flen: 5, - decimal: -1, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"5"}`, - }, - { - name: "float(7,3)", - filedType: mysql.TypeFloat, - flen: 7, - decimal: 3, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"FLOAT","ColumnPrecision":"7","ColumnScale":"3"}`, - }, - { - name: "double(12,3)", - filedType: mysql.TypeDouble, - flen: 12, - decimal: 3, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"12","ColumnScale":"3"}`, - }, - { - name: "double", - filedType: mysql.TypeDouble, - flen: 12, - decimal: -1, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"12"}`, - }, - { - name: "double", - filedType: mysql.TypeDouble, - flen: 5, - decimal: -1, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"5"}`, - }, - { - name: "double(7,3)", - filedType: mysql.TypeDouble, - flen: 7, - decimal: 3, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"DOUBLE","ColumnPrecision":"7","ColumnScale":"3"}`, - }, - { - name: "tinyint(5)", - filedType: mysql.TypeTiny, - flen: 5, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"TINYINT","ColumnPrecision":"5"}`, - }, - { - name: "smallint(5)", - filedType: mysql.TypeShort, - flen: 5, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"SMALLINT","ColumnPrecision":"5"}`, - }, - { - name: "mediumint(10)", - filedType: mysql.TypeInt24, - flen: 10, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"MEDIUMINT","ColumnPrecision":"10"}`, - }, - { - name: "int(11)", - filedType: mysql.TypeLong, - flen: math.MinInt, - decimal: math.MinInt, - flag: mysql.PriKeyFlag, - expected: `{"ColumnIsPk":"true", "ColumnName":"", "ColumnPrecision":"11", "ColumnType":"INT"}`, - }, - { - name: "bigint(20)", - filedType: mysql.TypeLonglong, - flen: 20, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"BIGINT","ColumnPrecision":"20"}`, - }, - { - name: "bit(5)", - filedType: mysql.TypeBit, - flen: 5, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"BIT","ColumnPrecision":"5"}`, - }, - { - name: "varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin", - filedType: mysql.TypeVarchar, - flen: 128, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"VARCHAR","ColumnPrecision":"128"}`, - }, - { - name: "char(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin", - filedType: mysql.TypeString, - flen: 32, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"CHAR","ColumnPrecision":"32"}`, - }, - { - name: "var_string(64)", - filedType: mysql.TypeVarString, - flen: 64, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"VAR_STRING","ColumnPrecision":"64"}`, - }, - { - name: "blob", - filedType: mysql.TypeBlob, - flen: 100, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"BLOB","ColumnPrecision":"100"}`, - }, - { - name: "text", - filedType: mysql.TypeBlob, - flen: 100, - decimal: math.MinInt, - flag: 0, - charset: charset.CharsetUTF8MB4, - expected: `{"ColumnName":"","ColumnType":"TEXT","ColumnPrecision":"100"}`, - }, - { - name: "tinyblob", - filedType: mysql.TypeTinyBlob, - flen: 120, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"TINYBLOB","ColumnPrecision":"120"}`, - }, - { - name: "mediumblob", - filedType: mysql.TypeMediumBlob, - flen: 100, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"MEDIUMBLOB","ColumnPrecision":"100"}`, - }, - { - name: "longblob", - filedType: mysql.TypeLongBlob, - flen: 5, - decimal: math.MinInt, - flag: 0, - expected: `{"ColumnName":"","ColumnType":"LONGBLOB","ColumnPrecision":"5"}`, - }, - { - name: "enum", - filedType: mysql.TypeEnum, - expected: `{"ColumnName":"","ColumnType":"ENUM"}`, - }, - { - name: "set", - filedType: mysql.TypeSet, - expected: `{"ColumnName":"","ColumnType":"SET"}`, - }, - { - name: "timestamp(2)", - filedType: mysql.TypeTimestamp, - flen: 8, - decimal: 2, - expected: `{"ColumnName":"","ColumnType":"TIMESTAMP","ColumnScale":"2"}`, - }, - { - name: "timestamp", - filedType: mysql.TypeTimestamp, - flen: 8, - decimal: 0, - expected: `{"ColumnName":"","ColumnType":"TIMESTAMP"}`, - }, - { - name: "datetime(2)", - filedType: mysql.TypeDatetime, - flen: 8, - decimal: 2, - expected: `{"ColumnName":"","ColumnType":"DATETIME","ColumnScale":"2"}`, - }, - { - name: "datetime", - filedType: mysql.TypeDatetime, - flen: 8, - decimal: 0, - expected: `{"ColumnName":"","ColumnType":"DATETIME"}`, - }, - { - name: "date", - filedType: mysql.TypeDate, - flen: 8, - decimal: 2, - expected: `{"ColumnName":"","ColumnType":"DATE"}`, - }, - { - name: "date", - filedType: mysql.TypeDate, - flen: 8, - decimal: 0, - expected: `{"ColumnName":"","ColumnType":"DATE"}`, - }, - { - name: "year(4)", - filedType: mysql.TypeYear, - flen: 4, - decimal: 0, - expected: `{"ColumnName":"","ColumnType":"YEAR","ColumnPrecision":"4"}`, - }, - { - name: "year(2)", - filedType: mysql.TypeYear, - flen: 2, - decimal: 2, - expected: `{"ColumnName":"","ColumnType":"YEAR","ColumnPrecision":"2"}`, - }, - } - - for _, tc := range testCases { - ft := types.NewFieldType(tc.filedType) - if tc.flen != math.MinInt { - ft.SetFlen(tc.flen) - } - if tc.decimal != math.MinInt { - ft.SetDecimal(tc.decimal) - } - if tc.flag != 0 { - ft.SetFlag(tc.flag) - } - if len(tc.charset) != 0 { - ft.SetCharset(tc.charset) - } - col := &timodel.ColumnInfo{FieldType: *ft} - var tableCol TableCol - tableCol.FromTiColumnInfo(col, false) - encodedCol, err := json.Marshal(tableCol) - require.Nil(t, err, tc.name) - require.JSONEq(t, tc.expected, string(encodedCol), tc.name) - - _, err = tableCol.ToTiColumnInfo(100) - require.NoError(t, err) - } -} - -func TestTableDefinition(t *testing.T) { - t.Parallel() - - def, tableInfo := generateTableDef() - encodedDef, err := json.MarshalIndent(def, "", " ") - require.NoError(t, err) - require.JSONEq(t, `{ - "Table": "table1", - "Schema": "schema1", - "Version": 1, - "TableVersion": 100, - "Query": "", - "Type": 0, - "TableColumns": [ - { - "ColumnName": "Id", - "ColumnType": "INT", - "ColumnPrecision": "11", - "ColumnDefault":10, - "ColumnNullable": "false", - "ColumnIsPk": "true" - }, - { - "ColumnName": "LastName", - "ColumnType": "VARCHAR", - "ColumnDefault":"Default LastName", - "ColumnPrecision": "128", - "ColumnNullable": "false" - }, - { - "ColumnName": "FirstName", - "ColumnDefault":"Default FirstName", - "ColumnType": "VARCHAR", - "ColumnPrecision": "64" - }, - { - "ColumnName": "Birthday", - "ColumnDefault":1.2345678e+07, - "ColumnType": "DATETIME" - } - ], - "TableColumnsTotal": 4 - }`, string(encodedDef)) - - def = TableDefinition{} - event := &commonEvent.DDLEvent{ - FinishedTs: tableInfo.GetUpdateTS(), - Type: byte(timodel.ActionAddColumn), - Query: "alter table schema1.table1 add Birthday date", - TableInfo: tableInfo, - SchemaName: "schema1", - TableName: "table1", - } - def.FromDDLEvent(event, false) - encodedDef, err = json.MarshalIndent(def, "", " ") - require.NoError(t, err) - require.JSONEq(t, `{ - "Table": "table1", - "Schema": "schema1", - "Version": 1, - "TableVersion": 100, - "Query": "alter table schema1.table1 add Birthday date", - "Type": 5, - "TableColumns": [ - { - "ColumnName": "Id", - "ColumnType": "INT", - "ColumnPrecision": "11", - "ColumnDefault":10, - "ColumnNullable": "false", - "ColumnIsPk": "true" - }, - { - "ColumnName": "LastName", - "ColumnType": "VARCHAR", - "ColumnDefault":"Default LastName", - "ColumnPrecision": "128", - "ColumnNullable": "false" - }, - { - "ColumnName": "FirstName", - "ColumnDefault":"Default FirstName", - "ColumnType": "VARCHAR", - "ColumnPrecision": "64" - }, - { - "ColumnName": "Birthday", - "ColumnDefault":1.2345678e+07, - "ColumnType": "DATETIME" - } - ], - "TableColumnsTotal": 4 - }`, string(encodedDef)) - - tableInfo, err = def.ToTableInfo() - require.NoError(t, err) - require.Len(t, tableInfo.GetColumns(), 4) - - event, err = def.ToDDLEvent() - require.NoError(t, err) - require.Equal(t, byte(timodel.ActionAddColumn), event.Type) - require.Equal(t, uint64(100), event.FinishedTs) -} - -func TestTableDefinitionGenFilePath(t *testing.T) { - t.Parallel() - - schemaDef := &TableDefinition{ - Schema: "schema1", - Version: defaultTableDefinitionVersion, - TableVersion: 100, - } - schemaPath, err := schemaDef.GenerateSchemaFilePath(false, 0) - require.NoError(t, err) - require.Equal(t, "schema1/meta/schema_100_3233644819.json", schemaPath) - - schemaPath, err = schemaDef.GenerateSchemaFilePath(true, 0) - require.NoError(t, err) - require.Equal(t, "schema1/meta/schema_100_3233644819.json", schemaPath) - - def, _ := generateTableDef() - tablePath, err := def.GenerateSchemaFilePath(false, 0) - require.NoError(t, err) - require.Equal(t, "schema1/table1/meta/schema_100_3752767265.json", tablePath) - - tablePath, err = def.GenerateSchemaFilePath(true, 12345) - require.NoError(t, err) - require.Equal(t, "12345/meta/schema_100_3752767265.json", tablePath) -} - -func TestGenerateSchemaFilePathValidation(t *testing.T) { - t.Parallel() - - def, _ := generateTableDef() - - // empty schema - emptySchemaDef := &TableDefinition{Schema: "", Table: "t1", TableVersion: 100, TotalColumns: 1, Columns: []TableCol{{}}} - _, err := emptySchemaDef.GenerateSchemaFilePath(false, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "schema cannot be empty") - - // zero table version - zeroVersionDef := &TableDefinition{Schema: "s1", Table: "t1", TableVersion: 0, TotalColumns: 1, Columns: []TableCol{{}}} - _, err = zeroVersionDef.GenerateSchemaFilePath(false, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "table version cannot be zero") - - // use-table-id-as-path with invalid tableID - _, err = def.GenerateSchemaFilePath(true, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid table id for table-id path") - _, err = def.GenerateSchemaFilePath(true, -1) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid table id for table-id path") - - // invalid table definition - invalidDef := &TableDefinition{Schema: "s1", Table: "t1", TableVersion: 100, TotalColumns: 1, Columns: nil} - _, err = invalidDef.GenerateSchemaFilePath(false, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid table definition") -} - -func TestTableDefinitionSum32(t *testing.T) { - t.Parallel() - - def, _ := generateTableDef() - checksum1, err := def.Sum32(nil) - require.NoError(t, err) - checksum2, err := def.Sum32(nil) - require.NoError(t, err) - require.Equal(t, checksum1, checksum2) - - n := len(def.Columns) - newCol := make([]TableCol, n) - copy(newCol, def.Columns) - newDef := def - newDef.Columns = newCol - - for i := 0; i < n; i++ { - target := rand.Intn(n) - newDef.Columns[i], newDef.Columns[target] = newDef.Columns[target], newDef.Columns[i] - newChecksum, err := newDef.Sum32(nil) - require.NoError(t, err) - require.Equal(t, checksum1, newChecksum) - } -} From 5715c11220ab8a95a9267df5481ee9b89b35a574 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 24 Jun 2026 00:06:44 +0800 Subject: [PATCH 5/8] refactor the code --- cmd/storage-consumer/consumer.go | 2 +- .../sink/cloudstorage/buffer_manager.go | 2 +- .../sink/cloudstorage/buffer_manager_test.go | 2 +- .../sink/cloudstorage/dml_writers.go | 2 +- .../sink/cloudstorage/encoder_group_test.go | 2 +- downstreamadapter/sink/cloudstorage/sink.go | 10 +-- .../sink/cloudstorage/sink_test.go | 10 +-- downstreamadapter/sink/cloudstorage/task.go | 2 +- downstreamadapter/sink/cloudstorage/writer.go | 2 +- .../sink/cloudstorage/writer_test.go | 2 +- pkg/{sink => }/cloudstorage/config.go | 0 pkg/{sink => }/cloudstorage/config_test.go | 0 pkg/{sink => }/cloudstorage/main_test.go | 0 pkg/{sink => }/cloudstorage/path.go | 0 pkg/{sink => }/cloudstorage/path_key.go | 0 pkg/{sink => }/cloudstorage/path_key_test.go | 30 ++----- pkg/{sink => }/cloudstorage/path_test.go | 0 pkg/{sink => }/cloudstorage/schema_file.go | 29 +++++++ .../cloudstorage/schema_file_test.go | 43 ++++++++++ pkg/logger/log_test.go | 16 ---- pkg/sink/cloudstorage/schema_file_parse.go | 53 ------------ .../cloudstorage/schema_file_parse_test.go | 84 ------------------- 22 files changed, 98 insertions(+), 193 deletions(-) rename pkg/{sink => }/cloudstorage/config.go (100%) rename pkg/{sink => }/cloudstorage/config_test.go (100%) rename pkg/{sink => }/cloudstorage/main_test.go (100%) rename pkg/{sink => }/cloudstorage/path.go (100%) rename pkg/{sink => }/cloudstorage/path_key.go (100%) rename pkg/{sink => }/cloudstorage/path_key_test.go (92%) rename pkg/{sink => }/cloudstorage/path_test.go (100%) rename pkg/{sink => }/cloudstorage/schema_file.go (90%) rename pkg/{sink => }/cloudstorage/schema_file_test.go (91%) delete mode 100644 pkg/sink/cloudstorage/schema_file_parse.go delete mode 100644 pkg/sink/cloudstorage/schema_file_parse_test.go diff --git a/cmd/storage-consumer/consumer.go b/cmd/storage-consumer/consumer.go index b4bbda6a34..07f446e1f7 100644 --- a/cmd/storage-consumer/consumer.go +++ b/cmd/storage-consumer/consumer.go @@ -25,11 +25,11 @@ import ( "github.com/pingcap/ticdc/cmd/util" "github.com/pingcap/ticdc/downstreamadapter/sink" "github.com/pingcap/ticdc/downstreamadapter/sink/helper" + "github.com/pingcap/ticdc/pkg/cloudstorage" commonType "github.com/pingcap/ticdc/pkg/common" "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/config" "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/sink/codec/canal" "github.com/pingcap/ticdc/pkg/sink/codec/common" "github.com/pingcap/ticdc/pkg/sink/codec/csv" diff --git a/downstreamadapter/sink/cloudstorage/buffer_manager.go b/downstreamadapter/sink/cloudstorage/buffer_manager.go index 6c028bc8d7..6ad0451167 100644 --- a/downstreamadapter/sink/cloudstorage/buffer_manager.go +++ b/downstreamadapter/sink/cloudstorage/buffer_manager.go @@ -20,9 +20,9 @@ import ( "github.com/pingcap/ticdc/downstreamadapter/sink/cloudstorage/spool" "github.com/pingcap/ticdc/downstreamadapter/sink/metrics" + "github.com/pingcap/ticdc/pkg/cloudstorage" "github.com/pingcap/ticdc/pkg/common" "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" ) const ( diff --git a/downstreamadapter/sink/cloudstorage/buffer_manager_test.go b/downstreamadapter/sink/cloudstorage/buffer_manager_test.go index c77678e304..48518eadd1 100644 --- a/downstreamadapter/sink/cloudstorage/buffer_manager_test.go +++ b/downstreamadapter/sink/cloudstorage/buffer_manager_test.go @@ -19,10 +19,10 @@ import ( "time" "github.com/pingcap/ticdc/downstreamadapter/sink/cloudstorage/spool" + "github.com/pingcap/ticdc/pkg/cloudstorage" commonType "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/sink/codec/common" "github.com/stretchr/testify/require" ) diff --git a/downstreamadapter/sink/cloudstorage/dml_writers.go b/downstreamadapter/sink/cloudstorage/dml_writers.go index fc94265ffa..37f54fd258 100644 --- a/downstreamadapter/sink/cloudstorage/dml_writers.go +++ b/downstreamadapter/sink/cloudstorage/dml_writers.go @@ -19,10 +19,10 @@ import ( "github.com/pingcap/ticdc/downstreamadapter/sink/cloudstorage/spool" sinkmetrics "github.com/pingcap/ticdc/downstreamadapter/sink/metrics" + "github.com/pingcap/ticdc/pkg/cloudstorage" commonType "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/metrics" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/sink/codec/common" "github.com/pingcap/ticdc/utils/chann" "github.com/pingcap/tidb/pkg/objstore/storeapi" diff --git a/downstreamadapter/sink/cloudstorage/encoder_group_test.go b/downstreamadapter/sink/cloudstorage/encoder_group_test.go index acde673e5a..269702ec04 100644 --- a/downstreamadapter/sink/cloudstorage/encoder_group_test.go +++ b/downstreamadapter/sink/cloudstorage/encoder_group_test.go @@ -20,10 +20,10 @@ import ( "time" "github.com/pingcap/ticdc/downstreamadapter/sink/helper" + "github.com/pingcap/ticdc/pkg/cloudstorage" commonType "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/config" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/sink/codec/common" timodel "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser/ast" diff --git a/downstreamadapter/sink/cloudstorage/sink.go b/downstreamadapter/sink/cloudstorage/sink.go index b448bc7582..a8b05e7567 100644 --- a/downstreamadapter/sink/cloudstorage/sink.go +++ b/downstreamadapter/sink/cloudstorage/sink.go @@ -22,12 +22,12 @@ import ( "github.com/pingcap/log" "github.com/pingcap/ticdc/downstreamadapter/sink/helper" + "github.com/pingcap/ticdc/pkg/cloudstorage" "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/config" "github.com/pingcap/ticdc/pkg/errors" "github.com/pingcap/ticdc/pkg/metrics" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/util" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/objstore/storeapi" @@ -63,9 +63,8 @@ type sink struct { lastCheckpointTs atomic.Uint64 lastSendCheckpointTsTime time.Time - schemaStore *commonEvent.TableSchemaStore - cron *cron.Cron - statistics *metrics.Statistics + cron *cron.Cron + statistics *metrics.Statistics isNormal *atomic.Bool cleanupJobs []func() /* only for test */ @@ -376,8 +375,7 @@ func (s *sink) sendCheckpointTs(ctx context.Context) error { } } -func (s *sink) SetTableSchemaStore(schemaStore *commonEvent.TableSchemaStore) { - s.schemaStore = schemaStore +func (s *sink) SetTableSchemaStore(_ *commonEvent.TableSchemaStore) { } func (s *sink) initCron( diff --git a/downstreamadapter/sink/cloudstorage/sink_test.go b/downstreamadapter/sink/cloudstorage/sink_test.go index 88cbb88670..1edcd30fb9 100644 --- a/downstreamadapter/sink/cloudstorage/sink_test.go +++ b/downstreamadapter/sink/cloudstorage/sink_test.go @@ -26,11 +26,11 @@ import ( "testing" "time" + "github.com/pingcap/ticdc/pkg/cloudstorage" "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/config" "github.com/pingcap/ticdc/pkg/pdutil" - pkgcloudstorage "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/util" timodel "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser/ast" @@ -219,7 +219,7 @@ func TestIgnoreCallsAfterRunError(t *testing.T) { func TestCloudStorageSinkBatchConfig(t *testing.T) { sink := &sink{ - cfg: &pkgcloudstorage.Config{ + cfg: &cloudstorage.Config{ FileSize: 2048, }, } @@ -514,7 +514,7 @@ func TestWriteDDLEventWithInvalidExchangePartitionEvent(t *testing.T) { } } -func readSchemaFileForTest(t *testing.T, parentDir, schema, table string) pkgcloudstorage.SchemaFile { +func readSchemaFileForTest(t *testing.T, parentDir, schema, table string) cloudstorage.SchemaFile { t.Helper() files, err := os.ReadDir(filepath.Join(parentDir, schema, table, "meta")) @@ -524,7 +524,7 @@ func readSchemaFileForTest(t *testing.T, parentDir, schema, table string) pkgclo content, err := os.ReadFile(filepath.Join(parentDir, schema, table, "meta", files[0].Name())) require.NoError(t, err) - var schemaFile pkgcloudstorage.SchemaFile + var schemaFile cloudstorage.SchemaFile require.NoError(t, json.Unmarshal(content, &schemaFile)) return schemaFile } @@ -710,7 +710,7 @@ func TestCleanupExpiredFiles(t *testing.T) { cloudStorageSink := &sink{ changefeedID: common.NewChangefeedID4Test("test", "test"), - cfg: &pkgcloudstorage.Config{ + cfg: &cloudstorage.Config{ DateSeparator: config.DateSeparatorDay.String(), FileExpirationDays: 1, FileCleanupCronSpec: util.GetOrZero(replicaConfig.Sink.CloudStorageConfig.FileCleanupCronSpec), diff --git a/downstreamadapter/sink/cloudstorage/task.go b/downstreamadapter/sink/cloudstorage/task.go index d832547d4a..3b9ff1a082 100644 --- a/downstreamadapter/sink/cloudstorage/task.go +++ b/downstreamadapter/sink/cloudstorage/task.go @@ -17,10 +17,10 @@ import ( "context" "sync/atomic" + "github.com/pingcap/ticdc/pkg/cloudstorage" commonType "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/sink/codec/common" ) diff --git a/downstreamadapter/sink/cloudstorage/writer.go b/downstreamadapter/sink/cloudstorage/writer.go index 0ac5083d07..e56315fd7c 100644 --- a/downstreamadapter/sink/cloudstorage/writer.go +++ b/downstreamadapter/sink/cloudstorage/writer.go @@ -22,10 +22,10 @@ import ( "github.com/pingcap/log" "github.com/pingcap/ticdc/downstreamadapter/sink/cloudstorage/spool" "github.com/pingcap/ticdc/downstreamadapter/sink/metrics" + "github.com/pingcap/ticdc/pkg/cloudstorage" "github.com/pingcap/ticdc/pkg/common" "github.com/pingcap/ticdc/pkg/errors" pmetrics "github.com/pingcap/ticdc/pkg/metrics" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/tidb/pkg/objstore/storeapi" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" diff --git a/downstreamadapter/sink/cloudstorage/writer_test.go b/downstreamadapter/sink/cloudstorage/writer_test.go index 64000931da..250c7b00a6 100644 --- a/downstreamadapter/sink/cloudstorage/writer_test.go +++ b/downstreamadapter/sink/cloudstorage/writer_test.go @@ -27,12 +27,12 @@ import ( "time" "github.com/pingcap/ticdc/downstreamadapter/sink/cloudstorage/spool" + "github.com/pingcap/ticdc/pkg/cloudstorage" commonType "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/config" "github.com/pingcap/ticdc/pkg/metrics" "github.com/pingcap/ticdc/pkg/pdutil" - "github.com/pingcap/ticdc/pkg/sink/cloudstorage" "github.com/pingcap/ticdc/pkg/sink/codec/common" "github.com/pingcap/ticdc/pkg/util" "github.com/pingcap/tidb/pkg/meta/model" diff --git a/pkg/sink/cloudstorage/config.go b/pkg/cloudstorage/config.go similarity index 100% rename from pkg/sink/cloudstorage/config.go rename to pkg/cloudstorage/config.go diff --git a/pkg/sink/cloudstorage/config_test.go b/pkg/cloudstorage/config_test.go similarity index 100% rename from pkg/sink/cloudstorage/config_test.go rename to pkg/cloudstorage/config_test.go diff --git a/pkg/sink/cloudstorage/main_test.go b/pkg/cloudstorage/main_test.go similarity index 100% rename from pkg/sink/cloudstorage/main_test.go rename to pkg/cloudstorage/main_test.go diff --git a/pkg/sink/cloudstorage/path.go b/pkg/cloudstorage/path.go similarity index 100% rename from pkg/sink/cloudstorage/path.go rename to pkg/cloudstorage/path.go diff --git a/pkg/sink/cloudstorage/path_key.go b/pkg/cloudstorage/path_key.go similarity index 100% rename from pkg/sink/cloudstorage/path_key.go rename to pkg/cloudstorage/path_key.go diff --git a/pkg/sink/cloudstorage/path_key_test.go b/pkg/cloudstorage/path_key_test.go similarity index 92% rename from pkg/sink/cloudstorage/path_key_test.go rename to pkg/cloudstorage/path_key_test.go index a195b793bb..9535823f05 100644 --- a/pkg/sink/cloudstorage/path_key_test.go +++ b/pkg/cloudstorage/path_key_test.go @@ -116,6 +116,7 @@ func TestParseDMLFilePath(t *testing.T) { name string dateSeparator string path string + indexPath string fileIndexWidth int dmlkey DmlPathKey fileIndex FileIndex @@ -189,6 +190,7 @@ func TestParseDMLFilePath(t *testing.T) { name: "day date with table id path", dateSeparator: "day", path: fmt.Sprintf("12345/123456/2023-05-09/CDC_%s_00000000000000000010.csv", dispatcherID), + indexPath: fmt.Sprintf("12345/123456/2023-05-09/meta/CDC_%s.index", dispatcherID), fileIndexWidth: 20, dmlkey: DmlPathKey{ SchemaPathKey: SchemaPathKey{ @@ -239,6 +241,13 @@ func TestParseDMLFilePath(t *testing.T) { require.Equal(t, tc.dmlkey, dmlkey) require.Equal(t, tc.fileIndex, fileIndex) require.Equal(t, tc.path, dmlkey.GenerateDMLFilePath(&fileIndex, ".csv", tc.fileIndexWidth)) + if tc.indexPath != "" { + var indexKey DmlPathKey + id, err := indexKey.ParseIndexFilePath(tc.dateSeparator, tc.indexPath) + require.NoError(t, err) + require.Equal(t, tc.fileIndex.DispatcherID, id) + require.Equal(t, tc.dmlkey, indexKey) + } }) } } @@ -251,27 +260,6 @@ func TestParseDMLFilePathRejectsInvalidPath(t *testing.T) { require.Error(t, err) } -func TestParseIndexFilePathWithTableIDAsPath(t *testing.T) { - t.Parallel() - - dispatcherID := common.NewDispatcherID().String() - indexPath := fmt.Sprintf("12345/123456/2023-05-09/meta/CDC_%s.index", dispatcherID) - - var dmlkey DmlPathKey - id, err := dmlkey.ParseIndexFilePath("day", indexPath) - require.NoError(t, err) - require.Equal(t, dispatcherID, id) - require.Equal(t, DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: "12345", - TableVersion: 123456, - }, - UseTableIDAsPath: true, - TableID: 12345, - Date: "2023-05-09", - }, dmlkey) -} - func TestSchemaFileDMLPathKeyOrder(t *testing.T) { t.Parallel() diff --git a/pkg/sink/cloudstorage/path_test.go b/pkg/cloudstorage/path_test.go similarity index 100% rename from pkg/sink/cloudstorage/path_test.go rename to pkg/cloudstorage/path_test.go diff --git a/pkg/sink/cloudstorage/schema_file.go b/pkg/cloudstorage/schema_file.go similarity index 90% rename from pkg/sink/cloudstorage/schema_file.go rename to pkg/cloudstorage/schema_file.go index 8e8d4dc8a6..4b53d80c38 100644 --- a/pkg/sink/cloudstorage/schema_file.go +++ b/pkg/cloudstorage/schema_file.go @@ -13,6 +13,7 @@ package cloudstorage import ( + "context" "encoding/json" "sort" "strconv" @@ -24,6 +25,7 @@ import ( "github.com/pingcap/ticdc/pkg/errors" "github.com/pingcap/ticdc/pkg/hash" "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/objstore/storeapi" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/charset" "github.com/pingcap/tidb/pkg/parser/mysql" @@ -288,6 +290,33 @@ func (t *SchemaFile) Marshal() ([]byte, error) { return data, nil } +// Parse parses a schema file and validates its path metadata. +func Parse(ctx context.Context, storage storeapi.Storage, path string) (SchemaPathKey, SchemaFile, error) { + var schemaKey SchemaPathKey + checksum, err := schemaKey.ParseSchemaFilePath(path) + if err != nil { + return schemaKey, SchemaFile{}, err + } + var schemaFile SchemaFile + schemaContent, err := storage.ReadFile(ctx, path) + if err != nil { + return schemaKey, schemaFile, errors.Trace(err) + } + if err = json.Unmarshal(schemaContent, &schemaFile); err != nil { + return schemaKey, schemaFile, errors.Trace(err) + } + checksumInMem, err := schemaFile.Sum32(nil) + if err != nil { + return schemaKey, schemaFile, errors.Trace(err) + } + if checksumInMem != checksum || schemaKey.TableVersion != schemaFile.TableVersion { + return schemaKey, schemaFile, errors.ErrStorageSinkInvalidFileName.GenWithStack( + "checksum mismatch in schema file %s: checksum in memory %d, checksum in file %d, table version in path %d, table version in file %d", + path, checksumInMem, checksum, schemaKey.TableVersion, schemaFile.TableVersion) + } + return schemaKey, schemaFile, nil +} + // marshalForChecksum marshals fields covered by the path checksum. func (t *SchemaFile) marshalForChecksum() ([]byte, error) { // sort columns by name diff --git a/pkg/sink/cloudstorage/schema_file_test.go b/pkg/cloudstorage/schema_file_test.go similarity index 91% rename from pkg/sink/cloudstorage/schema_file_test.go rename to pkg/cloudstorage/schema_file_test.go index 4fcd1644e3..3a5866eb1f 100644 --- a/pkg/sink/cloudstorage/schema_file_test.go +++ b/pkg/cloudstorage/schema_file_test.go @@ -13,13 +13,17 @@ package cloudstorage import ( + "context" "encoding/json" + "fmt" "math" "math/rand" "testing" "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" + "github.com/pingcap/ticdc/pkg/errors" + "github.com/pingcap/ticdc/pkg/util" timodel "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/charset" @@ -559,6 +563,45 @@ func TestSchemaFileGenFilePath(t *testing.T) { require.Equal(t, "12345/meta/schema_100_3752767265.json", tablePath) } +func TestParseSchemaFile(t *testing.T) { + t.Parallel() + + ctx := context.Background() + storage, err := util.GetExternalStorageWithDefaultTimeout(ctx, fmt.Sprintf("file:///%s", t.TempDir())) + require.NoError(t, err) + defer storage.Close() + + schemaFile, _ := generateSchemaFile() + schemaFilePath, err := schemaFile.GenerateSchemaFilePath(false, 0) + require.NoError(t, err) + encodedSchemaFile, err := schemaFile.Marshal() + require.NoError(t, err) + require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile)) + + schemaKey, got, err := Parse(ctx, storage, schemaFilePath) + require.NoError(t, err) + require.Equal(t, SchemaPathKey{ + Schema: schemaFile.Schema, + Table: schemaFile.Table, + TableVersion: schemaFile.TableVersion, + }, schemaKey) + require.Equal(t, schemaFile.Schema, got.Schema) + require.Equal(t, schemaFile.Table, got.Table) + require.Equal(t, schemaFile.Version, got.Version) + require.Equal(t, schemaFile.TableVersion, got.TableVersion) + require.Equal(t, schemaFile.TotalColumns, got.TotalColumns) + require.Len(t, got.Columns, len(schemaFile.Columns)) + + schemaFile.TableVersion++ + encodedSchemaFile, err = schemaFile.Marshal() + require.NoError(t, err) + require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile)) + + _, _, err = Parse(ctx, storage, schemaFilePath) + require.Error(t, err) + require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) +} + func TestGenerateSchemaFilePathValidation(t *testing.T) { t.Parallel() diff --git a/pkg/logger/log_test.go b/pkg/logger/log_test.go index c03936a145..23b5d36238 100644 --- a/pkg/logger/log_test.go +++ b/pkg/logger/log_test.go @@ -14,10 +14,8 @@ package logger import ( - stdlog "log" "testing" - "github.com/IBM/sarama" "github.com/pingcap/log" "github.com/stretchr/testify/require" "go.uber.org/zap/zapcore" @@ -33,17 +31,3 @@ func TestIsDebugEnabled(t *testing.T) { log.SetLevel(zapcore.DebugLevel) require.True(t, IsDebugEnabled()) } - -func TestInitSaramaLoggerResetsWhenInfoEnabled(t *testing.T) { - originalLogger := sarama.Logger - defer func() { - sarama.Logger = originalLogger - }() - - require.NoError(t, initSaramaLogger(zapcore.DebugLevel)) - debugLogger := sarama.Logger - - require.NoError(t, initSaramaLogger(zapcore.InfoLevel)) - require.NotSame(t, debugLogger, sarama.Logger) - require.IsType(t, stdlog.New(nil, "", 0), sarama.Logger) -} diff --git a/pkg/sink/cloudstorage/schema_file_parse.go b/pkg/sink/cloudstorage/schema_file_parse.go deleted file mode 100644 index dfa54700fc..0000000000 --- a/pkg/sink/cloudstorage/schema_file_parse.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2026 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package cloudstorage - -import ( - "context" - "encoding/json" - - "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/tidb/pkg/objstore/storeapi" -) - -// Parse parses a schema file and validates its path metadata. -func Parse( - ctx context.Context, - storage storeapi.Storage, - path string, -) (SchemaPathKey, SchemaFile, error) { - var schemaKey SchemaPathKey - checksum, err := schemaKey.ParseSchemaFilePath(path) - if err != nil { - return schemaKey, SchemaFile{}, err - } - var schemaFile SchemaFile - schemaContent, err := storage.ReadFile(ctx, path) - if err != nil { - return schemaKey, schemaFile, errors.Trace(err) - } - if err = json.Unmarshal(schemaContent, &schemaFile); err != nil { - return schemaKey, schemaFile, errors.Trace(err) - } - checksumInMem, err := schemaFile.Sum32(nil) - if err != nil { - return schemaKey, schemaFile, errors.Trace(err) - } - if checksumInMem != checksum || schemaKey.TableVersion != schemaFile.TableVersion { - return schemaKey, schemaFile, errors.ErrStorageSinkInvalidFileName.GenWithStack( - "checksum mismatch in schema file %s: checksum in memory %d, checksum in file %d, table version in path %d, table version in file %d", - path, checksumInMem, checksum, schemaKey.TableVersion, schemaFile.TableVersion) - } - return schemaKey, schemaFile, nil -} diff --git a/pkg/sink/cloudstorage/schema_file_parse_test.go b/pkg/sink/cloudstorage/schema_file_parse_test.go deleted file mode 100644 index c4e7592daa..0000000000 --- a/pkg/sink/cloudstorage/schema_file_parse_test.go +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2026 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package cloudstorage - -import ( - "context" - "fmt" - "testing" - - "github.com/pingcap/ticdc/pkg/errors" - "github.com/pingcap/ticdc/pkg/util" - "github.com/stretchr/testify/require" -) - -func TestParse(t *testing.T) { - t.Parallel() - - ctx := context.Background() - storage, err := util.GetExternalStorageWithDefaultTimeout( - ctx, fmt.Sprintf("file:///%s", t.TempDir())) - require.NoError(t, err) - defer storage.Close() - - schemaFile, _ := generateSchemaFile() - schemaFilePath, err := schemaFile.GenerateSchemaFilePath(false, 0) - require.NoError(t, err) - encodedSchemaFile, err := schemaFile.Marshal() - require.NoError(t, err) - require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile)) - - schemaKey, got, err := Parse(ctx, storage, schemaFilePath) - require.NoError(t, err) - require.Equal(t, SchemaPathKey{ - Schema: schemaFile.Schema, - Table: schemaFile.Table, - TableVersion: schemaFile.TableVersion, - }, schemaKey) - require.Equal(t, schemaFile.Schema, got.Schema) - require.Equal(t, schemaFile.Table, got.Table) - require.Equal(t, schemaFile.Version, got.Version) - require.Equal(t, schemaFile.TableVersion, got.TableVersion) - require.Equal(t, schemaFile.TotalColumns, got.TotalColumns) - require.Len(t, got.Columns, len(schemaFile.Columns)) - - expectedChecksum, err := schemaFile.Sum32(nil) - require.NoError(t, err) - gotChecksum, err := got.Sum32(nil) - require.NoError(t, err) - require.Equal(t, expectedChecksum, gotChecksum) -} - -func TestParseChecksumMismatch(t *testing.T) { - t.Parallel() - - ctx := context.Background() - storage, err := util.GetExternalStorageWithDefaultTimeout( - ctx, fmt.Sprintf("file:///%s", t.TempDir())) - require.NoError(t, err) - defer storage.Close() - - schemaFile, _ := generateSchemaFile() - schemaFilePath, err := schemaFile.GenerateSchemaFilePath(false, 0) - require.NoError(t, err) - - schemaFile.TableVersion++ - encodedSchemaFile, err := schemaFile.Marshal() - require.NoError(t, err) - require.NoError(t, storage.WriteFile(ctx, schemaFilePath, encodedSchemaFile)) - - _, _, err = Parse(ctx, storage, schemaFilePath) - require.Error(t, err) - require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) -} From 1c436362bed53a858bfb0ff12dd443ba27fb85d9 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 24 Jun 2026 00:21:16 +0800 Subject: [PATCH 6/8] simplify the code --- pkg/cloudstorage/path_key_test.go | 74 +------------------------ pkg/cloudstorage/path_test.go | 41 -------------- pkg/cloudstorage/schema_file_test.go | 80 ---------------------------- 3 files changed, 2 insertions(+), 193 deletions(-) diff --git a/pkg/cloudstorage/path_key_test.go b/pkg/cloudstorage/path_key_test.go index 9535823f05..560a55e1ba 100644 --- a/pkg/cloudstorage/path_key_test.go +++ b/pkg/cloudstorage/path_key_test.go @@ -121,76 +121,11 @@ func TestParseDMLFilePath(t *testing.T) { dmlkey DmlPathKey fileIndex FileIndex }{ - { - name: "no date no partition", - dateSeparator: "none", - path: "schema1/table1/123456/CDC000010.csv", - fileIndexWidth: 6, - dmlkey: DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: "schema1", - Table: "table1", - TableVersion: 123456, - }, - }, - fileIndex: FileIndex{Idx: 10}, - }, - { - name: "no date with partition", - dateSeparator: "none", - path: "schema1/table1/123456/55/CDC000010.csv", - fileIndexWidth: 6, - dmlkey: DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: "schema1", - Table: "table1", - TableVersion: 123456, - }, - PartitionNum: 55, - }, - fileIndex: FileIndex{Idx: 10}, - }, { name: "no date with table id path", dateSeparator: "none", - path: "12345/123456/CDC000010.csv", - fileIndexWidth: 6, - dmlkey: DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: "12345", - TableVersion: 123456, - }, - UseTableIDAsPath: true, - TableID: 12345, - }, - fileIndex: FileIndex{Idx: 10}, - }, - { - name: "day date no partition", - dateSeparator: "day", - path: fmt.Sprintf("schema1/table1/123456/2023-05-09/CDC_%s_00000000000000000010.csv", dispatcherID), - fileIndexWidth: 20, - dmlkey: DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: "schema1", - Table: "table1", - TableVersion: 123456, - }, - Date: "2023-05-09", - }, - fileIndex: FileIndex{ - FileIndexKey: FileIndexKey{ - DispatcherID: dispatcherID, - EnableTableAcrossNodes: true, - }, - Idx: 10, - }, - }, - { - name: "day date with table id path", - dateSeparator: "day", - path: fmt.Sprintf("12345/123456/2023-05-09/CDC_%s_00000000000000000010.csv", dispatcherID), - indexPath: fmt.Sprintf("12345/123456/2023-05-09/meta/CDC_%s.index", dispatcherID), + path: fmt.Sprintf("12345/123456/CDC_%s_00000000000000000010.csv", dispatcherID), + indexPath: fmt.Sprintf("12345/123456/meta/CDC_%s.index", dispatcherID), fileIndexWidth: 20, dmlkey: DmlPathKey{ SchemaPathKey: SchemaPathKey{ @@ -199,7 +134,6 @@ func TestParseDMLFilePath(t *testing.T) { }, UseTableIDAsPath: true, TableID: 12345, - Date: "2023-05-09", }, fileIndex: FileIndex{ FileIndexKey: FileIndexKey{ @@ -250,10 +184,6 @@ func TestParseDMLFilePath(t *testing.T) { } }) } -} - -func TestParseDMLFilePathRejectsInvalidPath(t *testing.T) { - t.Parallel() var dmlkey DmlPathKey _, err := dmlkey.ParseDMLFilePath("none", "schema1//123456/CDC000010.csv", ".csv") diff --git a/pkg/cloudstorage/path_test.go b/pkg/cloudstorage/path_test.go index 6c9456a365..f2828760c8 100644 --- a/pkg/cloudstorage/path_test.go +++ b/pkg/cloudstorage/path_test.go @@ -174,25 +174,6 @@ func TestGenerateDataFilePathWithTableIDAsPath(t *testing.T) { path, err := f.GenerateDataFilePath(ctx, table, date) require.NoError(t, err) require.Equal(t, fmt.Sprintf("12345/5/CDC_%s_000001.json", table.DispatcherID.String()), path) - - var dmlkey DmlPathKey - fileIndex, err := dmlkey.ParseDMLFilePath(config.DateSeparatorNone.String(), path, ".json") - require.NoError(t, err) - require.Equal(t, DmlPathKey{ - SchemaPathKey: SchemaPathKey{ - Schema: "12345", - TableVersion: 5, - }, - UseTableIDAsPath: true, - TableID: 12345, - }, dmlkey) - require.Equal(t, FileIndex{ - FileIndexKey: FileIndexKey{ - DispatcherID: table.DispatcherID.String(), - EnableTableAcrossNodes: true, - }, - Idx: 1, - }, fileIndex) } func TestFetchIndexFromFileName(t *testing.T) { @@ -243,28 +224,6 @@ func TestFetchIndexFromFileName(t *testing.T) { } } -func TestParseFileIndexFromFileName(t *testing.T) { - t.Parallel() - - dispatcherID := commonType.NewDispatcherID().String() - fileIndex, err := ParseFileIndexFromFileName( - fmt.Sprintf("CDC_%s_000011.json", dispatcherID), - ".json", - ) - require.NoError(t, err) - require.Equal(t, FileIndex{ - FileIndexKey: FileIndexKey{ - DispatcherID: dispatcherID, - EnableTableAcrossNodes: true, - }, - Idx: 11, - }, fileIndex) - - fileIndex, err = ParseFileIndexFromFileName("CDC000012.json", ".json") - require.NoError(t, err) - require.Equal(t, FileIndex{Idx: 12}, fileIndex) -} - func TestGenerateDataFilePathWithIndexFile(t *testing.T) { t.Parallel() diff --git a/pkg/cloudstorage/schema_file_test.go b/pkg/cloudstorage/schema_file_test.go index 3a5866eb1f..ff67c0b0cd 100644 --- a/pkg/cloudstorage/schema_file_test.go +++ b/pkg/cloudstorage/schema_file_test.go @@ -87,53 +87,6 @@ func generateSchemaFile() (SchemaFile, *common.TableInfo) { return schemaFile, tableInfo } -func TestBuildUsesTargetNames(t *testing.T) { - t.Parallel() - - idFieldType := types.NewFieldType(mysql.TypeLong) - idFieldType.SetFlag(mysql.PriKeyFlag | mysql.NotNullFlag) - routedTableInfo := common.WrapTableInfo("source_db", &timodel.TableInfo{ - ID: 20, - Name: ast.NewCIStr("source_table"), - UpdateTS: 100, - Columns: []*timodel.ColumnInfo{ - { - ID: 1, - Name: ast.NewCIStr("id"), - FieldType: *idFieldType, - State: timodel.StatePublic, - }, - }, - }).CloneWithRouting("target_db", "target_table") - sourceDDL := &commonEvent.DDLEvent{ - Version: commonEvent.DDLEventVersion1, - Type: byte(timodel.ActionCreateTable), - SchemaName: "source_db", - TableName: "source_table", - Query: "CREATE TABLE `source_db`.`source_table` (`id` INT PRIMARY KEY)", - TableInfo: routedTableInfo, - FinishedTs: 100, - } - - routedDDL := commonEvent.NewRoutedDDLEvent( - sourceDDL, - "CREATE TABLE `target_db`.`target_table` (`id` INT PRIMARY KEY)", - "target_db", - "target_table", - "", - "", - routedTableInfo, - nil, - nil, - ) - - var schemaFile SchemaFile - schemaFile.Build(routedDDL, false) - require.Equal(t, "target_db", schemaFile.Schema) - require.Equal(t, "target_table", schemaFile.Table) - require.Contains(t, schemaFile.Query, "`target_db`.`target_table`") -} - func TestTableCol(t *testing.T) { t.Parallel() @@ -587,9 +540,7 @@ func TestParseSchemaFile(t *testing.T) { }, schemaKey) require.Equal(t, schemaFile.Schema, got.Schema) require.Equal(t, schemaFile.Table, got.Table) - require.Equal(t, schemaFile.Version, got.Version) require.Equal(t, schemaFile.TableVersion, got.TableVersion) - require.Equal(t, schemaFile.TotalColumns, got.TotalColumns) require.Len(t, got.Columns, len(schemaFile.Columns)) schemaFile.TableVersion++ @@ -602,37 +553,6 @@ func TestParseSchemaFile(t *testing.T) { require.True(t, errors.ErrStorageSinkInvalidFileName.Equal(err)) } -func TestGenerateSchemaFilePathValidation(t *testing.T) { - t.Parallel() - - schemaFile, _ := generateSchemaFile() - - // empty schema - emptySchemaFile := &SchemaFile{Schema: "", Table: "t1", TableVersion: 100, TotalColumns: 1, Columns: []TableCol{{}}} - _, err := emptySchemaFile.GenerateSchemaFilePath(false, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "schema cannot be empty") - - // zero table version - zeroVersionSchemaFile := &SchemaFile{Schema: "s1", Table: "t1", TableVersion: 0, TotalColumns: 1, Columns: []TableCol{{}}} - _, err = zeroVersionSchemaFile.GenerateSchemaFilePath(false, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "table version cannot be zero") - - // use-table-id-as-path with invalid tableID - _, err = schemaFile.GenerateSchemaFilePath(true, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid table id for table-id path") - _, err = schemaFile.GenerateSchemaFilePath(true, -1) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid table id for table-id path") - - invalidSchemaFile := &SchemaFile{Schema: "s1", Table: "t1", TableVersion: 100, TotalColumns: 1, Columns: nil} - _, err = invalidSchemaFile.GenerateSchemaFilePath(false, 0) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid schema file") -} - func TestSchemaFileSum32(t *testing.T) { t.Parallel() From eb69660c2cd200b7a30db61329aabeacff6a4cdd Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 24 Jun 2026 00:35:00 +0800 Subject: [PATCH 7/8] simplify the code --- cmd/storage-consumer/consumer.go | 11 ++++------- pkg/cloudstorage/path.go | 12 ++++++------ pkg/cloudstorage/path_key.go | 10 +++++++--- pkg/cloudstorage/path_test.go | 4 ++-- pkg/cloudstorage/schema_file.go | 10 +++++----- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/cmd/storage-consumer/consumer.go b/cmd/storage-consumer/consumer.go index 07f446e1f7..9f397a2c0c 100644 --- a/cmd/storage-consumer/consumer.go +++ b/cmd/storage-consumer/consumer.go @@ -442,16 +442,13 @@ func (c *consumer) parseDMLFilePath(ctx context.Context, path string) error { return errors.Trace(err) } fileName := strings.TrimSuffix(string(data), "\n") - fileIdx, err := cloudstorage.FetchIndexFromFileName(fileName, c.fileExtension) + fileIndex, err := cloudstorage.ParseFileIndexFromFileName(fileName, c.fileExtension) if err != nil { return err } - fileIndex := &cloudstorage.FileIndex{ - FileIndexKey: cloudstorage.FileIndexKey{ - DispatcherID: dispatcherID, - EnableTableAcrossNodes: dispatcherID != "", - }, - Idx: fileIdx, + fileIndex.FileIndexKey = cloudstorage.FileIndexKey{ + DispatcherID: dispatcherID, + EnableTableAcrossNodes: dispatcherID != "", } m, ok := c.tableDMLIdxMap[dmlkey] diff --git a/pkg/cloudstorage/path.go b/pkg/cloudstorage/path.go index 37f122fc0d..8c25308122 100644 --- a/pkg/cloudstorage/path.go +++ b/pkg/cloudstorage/path.go @@ -240,7 +240,11 @@ func (f *FilePathGenerator) CheckOrWriteSchema( } var schemaFile SchemaFile schemaFile.Build(event, f.config.OutputColumnID) - if !schemaFile.isTableLevel() { + isTableLevel, err := schemaFile.isTableLevel() + if err != nil { + return false, err + } + if !isTableLevel { // only check schema for table log.Error("invalid schema file", zap.String("keyspace", keyspace), @@ -519,11 +523,7 @@ func (f *FilePathGenerator) getFileIdxFromIndexFile( return 0, err } fileName := strings.TrimSuffix(string(data), "\n") - return FetchIndexFromFileName(fileName, f.extension) -} - -func FetchIndexFromFileName(fileName string, extension string) (uint64, error) { - fileIndex, err := ParseFileIndexFromFileName(fileName, extension) + fileIndex, err := ParseFileIndexFromFileName(fileName, f.extension) if err != nil { return 0, err } diff --git a/pkg/cloudstorage/path_key.go b/pkg/cloudstorage/path_key.go index b900f8b101..1e5de49d56 100644 --- a/pkg/cloudstorage/path_key.go +++ b/pkg/cloudstorage/path_key.go @@ -166,11 +166,15 @@ func (d DmlPathKey) generateDMLDataDirPath() string { elems := make([]string, 0, 5) if d.UseTableIDAsPath { elems = append(elems, strconv.FormatInt(d.TableID, 10)) - } else { - elems = append(elems, d.Schema, d.Table) + elems = append(elems, strconv.FormatUint(d.TableVersion, 10)) + if d.Date != "" { + elems = append(elems, d.Date) + } + return path.Join(elems...) } + elems = append(elems, d.Schema, d.Table) elems = append(elems, strconv.FormatUint(d.TableVersion, 10)) - if d.PartitionNum != 0 && !d.UseTableIDAsPath { + if d.PartitionNum != 0 { elems = append(elems, strconv.FormatInt(d.PartitionNum, 10)) } if d.Date != "" { diff --git a/pkg/cloudstorage/path_test.go b/pkg/cloudstorage/path_test.go index f2828760c8..59fa10fff9 100644 --- a/pkg/cloudstorage/path_test.go +++ b/pkg/cloudstorage/path_test.go @@ -176,7 +176,7 @@ func TestGenerateDataFilePathWithTableIDAsPath(t *testing.T) { require.Equal(t, fmt.Sprintf("12345/5/CDC_%s_000001.json", table.DispatcherID.String()), path) } -func TestFetchIndexFromFileName(t *testing.T) { +func TestParseFileIndexFromFileName(t *testing.T) { t.Parallel() ctx, cancel := context.WithCancel(context.TODO()) @@ -215,7 +215,7 @@ func TestFetchIndexFromFileName(t *testing.T) { } for _, tc := range testCases { - _, err := FetchIndexFromFileName(tc.fileName, f.extension) + _, err := ParseFileIndexFromFileName(tc.fileName, f.extension) if len(tc.wantErr) != 0 { require.Contains(t, err.Error(), tc.wantErr) } else { diff --git a/pkg/cloudstorage/schema_file.go b/pkg/cloudstorage/schema_file.go index 4b53d80c38..7496e0ebfd 100644 --- a/pkg/cloudstorage/schema_file.go +++ b/pkg/cloudstorage/schema_file.go @@ -19,7 +19,6 @@ import ( "strconv" "strings" - "github.com/pingcap/log" "github.com/pingcap/ticdc/pkg/common" commonEvent "github.com/pingcap/ticdc/pkg/common/event" "github.com/pingcap/ticdc/pkg/errors" @@ -30,7 +29,6 @@ import ( "github.com/pingcap/tidb/pkg/parser/charset" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/types" - "go.uber.org/zap" ) const ( @@ -274,11 +272,13 @@ func (t *SchemaFile) ToTableInfo() (*common.TableInfo, error) { } // isTableLevel returns whether this file describes a table. -func (t *SchemaFile) isTableLevel() bool { +func (t *SchemaFile) isTableLevel() (bool, error) { if len(t.Columns) != t.TotalColumns { - log.Panic("invalid schema file", zap.Any("schemaFile", t)) + return false, errors.ErrInternalCheckFailed.GenWithStack( + "invalid schema file: columns %d does not match total columns %d", + len(t.Columns), t.TotalColumns) } - return t.TotalColumns != 0 + return t.TotalColumns != 0, nil } // Marshal marshals SchemaFile. From f3b7ad78b0745d564f5c31a1dc23d977d2321057 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 24 Jun 2026 12:06:28 +0800 Subject: [PATCH 8/8] simplify the code --- cmd/storage-consumer/consumer.go | 10 +- downstreamadapter/sink/cloudstorage/writer.go | 10 +- pkg/cloudstorage/path.go | 126 ++++-------------- pkg/cloudstorage/path_key.go | 4 - pkg/cloudstorage/path_test.go | 11 +- pkg/cloudstorage/schema_file.go | 44 +----- 6 files changed, 42 insertions(+), 163 deletions(-) diff --git a/cmd/storage-consumer/consumer.go b/cmd/storage-consumer/consumer.go index 9f397a2c0c..024a4687f9 100644 --- a/cmd/storage-consumer/consumer.go +++ b/cmd/storage-consumer/consumer.go @@ -423,7 +423,7 @@ func (c *consumer) flushDMLEvents(ctx context.Context, tableID int64) error { func (c *consumer) parseDMLFilePath(ctx context.Context, path string) error { var dmlkey cloudstorage.DmlPathKey - dispatcherID, err := dmlkey.ParseIndexFilePath( + _, err := dmlkey.ParseIndexFilePath( putil.GetOrZero(c.replicationCfg.Sink.DateSeparator), path, ) @@ -446,17 +446,15 @@ func (c *consumer) parseDMLFilePath(ctx context.Context, path string) error { if err != nil { return err } - fileIndex.FileIndexKey = cloudstorage.FileIndexKey{ - DispatcherID: dispatcherID, - EnableTableAcrossNodes: dispatcherID != "", - } m, ok := c.tableDMLIdxMap[dmlkey] if !ok { c.tableDMLIdxMap[dmlkey] = fileIndexKeyMap{ fileIndex.FileIndexKey: fileIndex.Idx, } - } else if fileIndex.Idx >= m[fileIndex.FileIndexKey] { + return nil + } + if fileIndex.Idx >= m[fileIndex.FileIndexKey] { c.tableDMLIdxMap[dmlkey][fileIndex.FileIndexKey] = fileIndex.Idx } return nil diff --git a/downstreamadapter/sink/cloudstorage/writer.go b/downstreamadapter/sink/cloudstorage/writer.go index e56315fd7c..da7da3d87c 100644 --- a/downstreamadapter/sink/cloudstorage/writer.go +++ b/downstreamadapter/sink/cloudstorage/writer.go @@ -179,15 +179,7 @@ func (d *writer) flushMessages(ctx context.Context) error { zap.Error(err)) return err } - indexFilePath, err := d.filePathGenerator.GenerateIndexFilePath(table, date) - if err != nil { - log.Error("failed to generate index file path", - zap.String("keyspace", keyspace), - zap.String("changefeed", changefeed), - zap.Int("shardID", d.shardID), - zap.Error(err)) - return err - } + indexFilePath := d.filePathGenerator.GenerateIndexFilePath(table, date) payload, err := buildPayload(d.spool, tableTask) if err != nil { diff --git a/pkg/cloudstorage/path.go b/pkg/cloudstorage/path.go index 8c25308122..9e76b98e2c 100644 --- a/pkg/cloudstorage/path.go +++ b/pkg/cloudstorage/path.go @@ -97,46 +97,22 @@ func mustParseSchemaFileName(path string) (uint64, uint32) { func generateSchemaFilePath( schema, table string, tableVersion uint64, checksum uint32, omitSchema bool, -) (string, error) { - if schema == "" || tableVersion == 0 { - return "", errors.ErrInternalCheckFailed.GenWithStack( - "invalid schema or tableVersion, schema=%q table=%q tableVersion=%d", - schema, table, tableVersion, - ) - } - - var dir string +) string { + name := fmt.Sprintf(schemaFileNameFormat, tableVersion, checksum) if omitSchema { - if table == "" { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs( - "table cannot be empty when 'use-table-id-as-path' is true", - ) - } - // use-table-id-as-path: omit schema, path is /meta/ - dir = fmt.Sprintf(tableIDPrefix, table) - } else { - if table == "" { - // Generate db schema file path. - dir = fmt.Sprintf(dbSchemaPrefix, schema) - } else { - // Generate table-level schema file path. - dir = fmt.Sprintf(tableMetaPrefix, schema, table) - } + return path.Join(fmt.Sprintf(tableIDPrefix, table), name) } - name := fmt.Sprintf(schemaFileNameFormat, tableVersion, checksum) - return path.Join(dir, name), nil + if table == "" { + return path.Join(fmt.Sprintf(dbSchemaPrefix, schema), name) + } + return path.Join(fmt.Sprintf(tableMetaPrefix, schema, table), name) } -func generateTablePath(tableName string, tableID int64, useTableIDAsPath bool) (string, error) { +func generateTablePath(tableName string, tableID int64, useTableIDAsPath bool) string { if useTableIDAsPath { - if tableID <= 0 { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs( - "invalid table id for table-id path", - ) - } - return fmt.Sprintf("%d", tableID), nil + return fmt.Sprintf("%d", tableID) } - return tableName, nil + return tableName } func generateDataFileName(enableTableAcrossNodes bool, dispatcherID string, index uint64, extension string, fileIndexWidth int) string { @@ -240,19 +216,6 @@ func (f *FilePathGenerator) CheckOrWriteSchema( } var schemaFile SchemaFile schemaFile.Build(event, f.config.OutputColumnID) - isTableLevel, err := schemaFile.isTableLevel() - if err != nil { - return false, err - } - if !isTableLevel { - // only check schema for table - log.Error("invalid schema file", - zap.String("keyspace", keyspace), - zap.String("changefeedID", changefeed), - zap.Any("versionedTableName", table), - zap.Any("tableInfo", tableInfo)) - return false, errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid schema file in FilePathGenerator") - } // Case 1: point check if the schema file exists. schemaFilePath, err := schemaFile.GenerateSchemaFilePath(f.config.UseTableIDAsPath, table.TableNameWithPhysicTableID.TableID) @@ -271,15 +234,10 @@ func (f *FilePathGenerator) CheckOrWriteSchema( _, checksum := mustParseSchemaFileName(schemaFilePath) schemaFileCount := 0 lastVersion := uint64(0) - tablePathPart, err := generateTablePath(schemaFile.Table, table.TableNameWithPhysicTableID.TableID, f.config.UseTableIDAsPath) - if err != nil { - return false, err - } - var subDir string + tablePathPart := generateTablePath(schemaFile.Table, table.TableNameWithPhysicTableID.TableID, f.config.UseTableIDAsPath) + subDir := fmt.Sprintf(tableMetaPrefix, schemaFile.Schema, tablePathPart) if f.config.UseTableIDAsPath { subDir = fmt.Sprintf(tableIDPrefix, tablePathPart) - } else { - subDir = fmt.Sprintf(tableMetaPrefix, schemaFile.Schema, tablePathPart) } checksumSuffix := fmt.Sprintf("%010d.json", checksum) hasNewerSchemaVersion := false @@ -291,16 +249,7 @@ func (f *FilePathGenerator) CheckOrWriteSchema( if !strings.HasSuffix(path, checksumSuffix) { return nil } - version, parsedChecksum := mustParseSchemaFileName(path) - if parsedChecksum != checksum { - log.Error("invalid schema file name", - zap.String("keyspace", keyspace), - zap.String("changefeedID", changefeed), - zap.String("path", path), zap.Any("checksum", checksum)) - return errors.ErrInternalCheckFailed.GenWithStack( - "invalid schema filename in storage sink, expected checksum: %d, actual checksum: %d", - checksum, parsedChecksum) - } + version, _ := mustParseSchemaFileName(path) if version > table.TableInfoVersion { hasNewerSchemaVersion = true } @@ -374,28 +323,23 @@ func (f *FilePathGenerator) GenerateDateStr() string { } // GenerateIndexFilePath generates a canonical path for index file. -func (f *FilePathGenerator) GenerateIndexFilePath(tbl VersionedTableName, date string) (string, error) { - dir, err := f.generateDataDirPath(tbl, date) - if err != nil { - return "", err - } +func (f *FilePathGenerator) GenerateIndexFilePath(tbl VersionedTableName, date string) string { + dir := f.generateDataDirPath(tbl, date) name := defaultIndexFileName if f.config.EnableTableAcrossNodes { name = fmt.Sprintf(defaultTableAcrossNodesIndexFileName, tbl.DispatcherID.String()) } - return path.Join(dir, name), nil + return path.Join(dir, name) } // GenerateDataFilePath generates a canonical path for data file. func (f *FilePathGenerator) GenerateDataFilePath( ctx context.Context, tbl VersionedTableName, date string, ) (string, error) { - dir, err := f.generateDataDirPath(tbl, date) - if err != nil { - return "", err - } + dir := f.generateDataDirPath(tbl, date) loadedIndexFile := false - if idx, ok := f.fileIndex[tbl]; !ok { + idx, ok := f.fileIndex[tbl] + if !ok { fileIdx, err := f.getFileIdxFromIndexFile(ctx, tbl, date) if err != nil { return "", err @@ -406,7 +350,8 @@ func (f *FilePathGenerator) GenerateDataFilePath( index: fileIdx, } loadedIndexFile = true - } else { + } + if ok { idx.currDate = date } // if date changed, reset the counter @@ -452,23 +397,14 @@ func (f *FilePathGenerator) GenerateDataFilePath( } } -func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date string) (string, error) { - tableVersion, ok := f.versionMap[tbl] - if !ok || tableVersion == 0 { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs( - "schema file version is not initialized", - ) - } - +func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date string) string { + tableVersion := f.versionMap[tbl] if f.config.UseTableIDAsPath { - tableIDPathPart, err := generateTablePath( + tableIDPathPart := generateTablePath( tbl.TableNameWithPhysicTableID.Table, tbl.TableNameWithPhysicTableID.TableID, true, ) - if err != nil { - return "", err - } return DmlPathKey{ SchemaPathKey: SchemaPathKey{ Schema: tableIDPathPart, @@ -477,17 +413,14 @@ func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date str UseTableIDAsPath: true, TableID: tbl.TableNameWithPhysicTableID.TableID, Date: date, - }.generateDMLDataDirPath(), nil + }.generateDMLDataDirPath() } - tablePathPart, err := generateTablePath( + tablePathPart := generateTablePath( tbl.TableNameWithPhysicTableID.Table, tbl.TableNameWithPhysicTableID.TableID, false, ) - if err != nil { - return "", err - } var partitionNum int64 if f.config.EnablePartitionSeparator && tbl.TableNameWithPhysicTableID.IsPartition { partitionNum = tbl.TableNameWithPhysicTableID.TableID @@ -500,16 +433,13 @@ func (f *FilePathGenerator) generateDataDirPath(tbl VersionedTableName, date str }, PartitionNum: partitionNum, Date: date, - }.generateDMLDataDirPath(), nil + }.generateDMLDataDirPath() } func (f *FilePathGenerator) getFileIdxFromIndexFile( ctx context.Context, tbl VersionedTableName, date string, ) (uint64, error) { - indexFile, err := f.GenerateIndexFilePath(tbl, date) - if err != nil { - return 0, err - } + indexFile := f.GenerateIndexFilePath(tbl, date) exist, err := f.storage.FileExists(ctx, indexFile) if err != nil { return 0, err diff --git a/pkg/cloudstorage/path_key.go b/pkg/cloudstorage/path_key.go index 1e5de49d56..05f0d5078c 100644 --- a/pkg/cloudstorage/path_key.go +++ b/pkg/cloudstorage/path_key.go @@ -277,10 +277,6 @@ func parseTableIDPathPart(part string) (int64, error) { if err != nil { return 0, err } - if tableID <= 0 { - return 0, errors.ErrStorageSinkInvalidFileName.GenWithStack( - "invalid table id path part %s", part) - } return tableID, nil } diff --git a/pkg/cloudstorage/path_test.go b/pkg/cloudstorage/path_test.go index 59fa10fff9..89ea86cd02 100644 --- a/pkg/cloudstorage/path_test.go +++ b/pkg/cloudstorage/path_test.go @@ -248,9 +248,8 @@ func TestGenerateDataFilePathWithIndexFile(t *testing.T) { } f.versionMap[table] = table.TableInfoVersion date := f.GenerateDateStr() - indexFilePath, err := f.GenerateIndexFilePath(table, date) - require.NoError(t, err) - err = f.storage.WriteFile(ctx, indexFilePath, []byte(fmt.Sprintf("CDC_%s_000005.json\n", dispatcherID.String()))) + indexFilePath := f.GenerateIndexFilePath(table, date) + err := f.storage.WriteFile(ctx, indexFilePath, []byte(fmt.Sprintf("CDC_%s_000005.json\n", dispatcherID.String()))) require.NoError(t, err) dataFilePath, err := f.GenerateDataFilePath(ctx, table, date) @@ -280,8 +279,7 @@ func TestGenerateDataFilePathResyncIndexFile(t *testing.T) { f2.versionMap[table] = table.TableInfoVersion date := "" - indexFilePath, err := f1.GenerateIndexFilePath(table, date) - require.NoError(t, err) + indexFilePath := f1.GenerateIndexFilePath(table, date) // Simulate dispatcher moved between captures: // 1) f1 generates CDC_..._000001 and writes index file. @@ -337,8 +335,7 @@ func TestGenerateDataFilePathReconcilesStaleIndexFile(t *testing.T) { date := "" firstDataFile := fmt.Sprintf("test/table1/5/CDC_%s_000001.json", dispatcherID.String()) secondDataFile := fmt.Sprintf("test/table1/5/CDC_%s_000002.json", dispatcherID.String()) - indexFilePath, err := f.GenerateIndexFilePath(table, date) - require.NoError(t, err) + indexFilePath := f.GenerateIndexFilePath(table, date) require.NoError(t, f.storage.WriteFile(ctx, firstDataFile, []byte("test1"))) require.NoError(t, f.storage.WriteFile(ctx, secondDataFile, []byte("test2"))) require.NoError(t, f.storage.WriteFile(ctx, indexFilePath, fmt.Appendf(nil, "CDC_%s_000001.json\n", dispatcherID.String()))) diff --git a/pkg/cloudstorage/schema_file.go b/pkg/cloudstorage/schema_file.go index 7496e0ebfd..420c0379b6 100644 --- a/pkg/cloudstorage/schema_file.go +++ b/pkg/cloudstorage/schema_file.go @@ -271,16 +271,6 @@ func (t *SchemaFile) ToTableInfo() (*common.TableInfo, error) { return info, nil } -// isTableLevel returns whether this file describes a table. -func (t *SchemaFile) isTableLevel() (bool, error) { - if len(t.Columns) != t.TotalColumns { - return false, errors.ErrInternalCheckFailed.GenWithStack( - "invalid schema file: columns %d does not match total columns %d", - len(t.Columns), t.TotalColumns) - } - return t.TotalColumns != 0, nil -} - // Marshal marshals SchemaFile. func (t *SchemaFile) Marshal() ([]byte, error) { data, err := json.MarshalIndent(t, marshalPrefix, marshalIndent) @@ -362,35 +352,11 @@ func (t *SchemaFile) GenerateSchemaFilePath(useTableIDAsPath bool, tableID int64 if err != nil { return "", err } - if t.Schema == "" { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("schema cannot be empty") - } - if t.TableVersion == 0 { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("table version cannot be zero") - } - if len(t.Columns) != t.TotalColumns { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid schema file") - } - isTableLevel := t.TotalColumns != 0 - if !isTableLevel && t.Table != "" { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid schema file") - } - if useTableIDAsPath && isTableLevel && tableID <= 0 { - return "", errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table id for table-id path") - } - + tableLevel := t.TotalColumns != 0 table := t.Table - if isTableLevel { - tablePath, err := generateTablePath(t.Table, tableID, useTableIDAsPath) - if err != nil { - return "", err - } - table = tablePath - } - omitSchema := useTableIDAsPath && isTableLevel - path, err := generateSchemaFilePath(t.Schema, table, t.TableVersion, checksum, omitSchema) - if err != nil { - return "", err + if tableLevel { + table = generateTablePath(t.Table, tableID, useTableIDAsPath) } - return path, nil + omitSchema := useTableIDAsPath && tableLevel + return generateSchemaFilePath(t.Schema, table, t.TableVersion, checksum, omitSchema), nil }