Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/v2/changefeed.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,14 @@ func (h *OpenAPIV2) ResumeChangefeed(c *gin.Context) {
}
middleware.SetChangefeedOperationTarget(c, cfInfo.ChangefeedID.Keyspace(), cfInfo.ChangefeedID.Name())

// Resume validation must use persisted metadata because stopped changefeeds
// can be edited outside the coordinator process during legacy migration.
cfInfo, err = co.GetPersistedChangefeedInfo(ctx, cfInfo.ChangefeedID)
if err != nil {
_ = c.Error(err)
return
}

// If there is no overrideCheckpointTs, then check whether the currentCheckpointTs is smaller than gc safepoint or not.
newCheckpointTs := status.CheckpointTs
overwriteCheckpointTs := false
Expand Down
7 changes: 7 additions & 0 deletions api/v2/changefeed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ func (c *resumeNormalCoordinator) GetChangefeed(ctx context.Context, changefeedD
}, nil
}

func (c *resumeNormalCoordinator) GetPersistedChangefeedInfo(ctx context.Context, id common.ChangeFeedID) (*config.ChangeFeedInfo, error) {
return &config.ChangeFeedInfo{
ChangefeedID: id,
State: config.StateNormal,
}, nil
}

func (c *resumeNormalCoordinator) CreateChangefeed(ctx context.Context, info *config.ChangeFeedInfo) error {
return nil
}
Expand Down
6 changes: 4 additions & 2 deletions coordinator/changefeed/changefeed_db_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
type Backend interface {
// GetAllChangefeeds returns all changefeeds from the backend db, include stopped and failed changefeeds
GetAllChangefeeds(ctx context.Context) (map[common.ChangeFeedID]*ChangefeedMetaWrapper, error)
// GetChangefeedInfo returns the latest persisted changefeed info from the backend db.
GetChangefeedInfo(ctx context.Context, id common.ChangeFeedID) (*config.ChangeFeedInfo, error)
// CreateChangefeed saves changefeed info and status to db
CreateChangefeed(ctx context.Context, info *config.ChangeFeedInfo) error
// UpdateChangefeed updates changefeed info to db
Expand All @@ -34,8 +36,8 @@ type Backend interface {
DeleteChangefeed(ctx context.Context, id common.ChangeFeedID) error
// SetChangefeedProgress persists the operation progress status to db for a changefeed
SetChangefeedProgress(ctx context.Context, id common.ChangeFeedID, progress config.Progress) error
// ResumeChangefeed persists the resumed status to db for a changefeed
ResumeChangefeed(ctx context.Context, id common.ChangeFeedID, newCheckpointTs uint64) error
// ResumeChangefeed persists the resumed status to db for a changefeed and returns the resumed info.
ResumeChangefeed(ctx context.Context, id common.ChangeFeedID, newCheckpointTs uint64) (*config.ChangeFeedInfo, error)
// UpdateChangefeedCheckpointTs persists the checkpointTs for changefeeds
UpdateChangefeedCheckpointTs(ctx context.Context, checkpointTs map[common.ChangeFeedID]uint64) error
}
Expand Down
41 changes: 32 additions & 9 deletions coordinator/changefeed/etcd_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,21 @@ func (b *EtcdBackend) GetAllChangefeeds(ctx context.Context) (map[common.ChangeF
return cfMap, nil
}

// GetChangefeedInfo returns the latest persisted changefeed info from etcd.
func (b *EtcdBackend) GetChangefeedInfo(ctx context.Context, id common.ChangeFeedID) (*config.ChangeFeedInfo, error) {
info, err := b.etcdClient.GetChangeFeedInfo(ctx, id.DisplayName)
if err != nil {
return nil, errors.Trace(err)
}
// Old metadata may not embed ChangefeedID in the value. Keep the backend
// lookup key as the source of truth so callers can safely use the returned
// info for validation and in-memory replacement.
if info.ChangefeedID.Name() == "" {
info.ChangefeedID = id
}
return info, nil
}

func (b *EtcdBackend) CreateChangefeed(ctx context.Context,
info *config.ChangeFeedInfo,
) error {
Expand Down Expand Up @@ -248,17 +263,25 @@ func (b *EtcdBackend) DeleteChangefeed(ctx context.Context,
return nil
}

// ResumeChangefeed persists a resumed changefeed and returns the metadata used by the caller.
func (b *EtcdBackend) ResumeChangefeed(ctx context.Context,
id common.ChangeFeedID, newCheckpointTs uint64,
) error {
info, err := b.etcdClient.GetChangeFeedInfo(ctx, id.DisplayName)
) (*config.ChangeFeedInfo, error) {
info, err := b.GetChangefeedInfo(ctx, id)
if err != nil {
return errors.Trace(err)
return nil, errors.Trace(err)
}
// Legacy stopped changefeeds can contain sparse metadata that was completed
// during coordinator bootstrap. Complete it again before persisting the
// resumed state so backend-loaded metadata does not drop compatibility defaults.
if info.Config == nil {
info.Config = config.GetDefaultReplicaConfig()
}
info.VerifyAndComplete()
info.State = config.StateNormal
newStr, err := info.Marshal()
if err != nil {
return errors.Trace(err)
return nil, errors.Trace(err)
}
infoKey := etcd.GetEtcdKeyChangeFeedInfo(b.etcdClient.GetClusterID(), id.DisplayName)
opsThen := []clientv3.Op{
Expand All @@ -267,27 +290,27 @@ func (b *EtcdBackend) ResumeChangefeed(ctx context.Context,
if newCheckpointTs > 0 {
status, _, err := b.etcdClient.GetChangeFeedStatus(ctx, id)
if err != nil {
return errors.Trace(err)
return nil, errors.Trace(err)
}
status.CheckpointTs = newCheckpointTs
status.Progress = config.ProgressNone
jobValue, err := status.Marshal()
if err != nil {
return errors.Trace(err)
return nil, errors.Trace(err)
}
jobKey := etcd.GetEtcdKeyJob(b.etcdClient.GetClusterID(), id.DisplayName)
opsThen = append(opsThen, clientv3.OpPut(jobKey, jobValue))
}

putResp, err := b.etcdClient.GetEtcdClient().Txn(ctx, nil, opsThen, []clientv3.Op{})
if err != nil {
return errors.Trace(err)
return nil, errors.Trace(err)
}
if !putResp.Succeeded {
err = cerror.ErrMetaOpFailed.GenWithStackByArgs(fmt.Sprintf("resume changefeed %s", info.ChangefeedID.Name()))
return errors.Trace(err)
return nil, errors.Trace(err)
}
return nil
return info, nil
}

func (b *EtcdBackend) SetChangefeedProgress(ctx context.Context, id common.ChangeFeedID, progress config.Progress) error {
Expand Down
60 changes: 59 additions & 1 deletion coordinator/changefeed/etcd_backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@
}

func TestResumeChangefeed(t *testing.T) {
// Scenario: resuming a stopped changefeed persists the normal state and
// returns the metadata that was actually loaded from etcd.
// Steps:
// 1) Load legacy changefeed info without an embedded ChangefeedID.
// 2) Resume the changefeed and assert the returned info is normalized.
ctrl := gomock.NewController(t)
defer ctrl.Finish()

Expand All @@ -210,8 +215,61 @@
cdcClient.EXPECT().GetChangeFeedStatus(gomock.Any(), changefeedID).Return(status, int64(0), nil).Times(1)
etcdClient.EXPECT().Txn(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(&clientv3.TxnResponse{Succeeded: true}, nil).Times(1)

err := backend.ResumeChangefeed(context.Background(), changefeedID, 200)
resumedInfo, err := backend.ResumeChangefeed(context.Background(), changefeedID, 200)
require.Nil(t, err)
require.Equal(t, config.StateNormal, resumedInfo.State)
require.Equal(t, changefeedID, resumedInfo.ChangefeedID)
}

func TestResumeChangefeedCompletesLegacySchedulerDefaults(t *testing.T) {
// Scenario: an old owner persisted a stopped changefeed with only explicit scheduler fields.
// Steps: resume that sparse metadata, inspect the etcd put payload, and verify resume persists
// compatibility defaults such as RegionCountPerSpan before returning the resumed info.
ctrl := gomock.NewController(t)
defer ctrl.Finish()

cdcClient := etcd.NewMockCDCEtcdClient(ctrl)
etcdClient := etcd.NewMockClient(ctrl)
cdcClient.EXPECT().GetEtcdClient().Return(etcdClient).AnyTimes()
cdcClient.EXPECT().GetClusterID().Return("test-cluster-id").AnyTimes()
backend := NewEtcdBackend(cdcClient)

changefeedID := common.NewChangeFeedIDWithName("test-scheduler-defaults", common.DefaultKeyspaceName)
enableTableAcrossNodes := false
regionThreshold := 20
writeKeyThreshold := 10485760
info := &config.ChangeFeedInfo{
ChangefeedID: changefeedID,
Config: config.GetDefaultReplicaConfig(),
State: config.StateStopped,
SinkURI: "mysql://127.0.0.1:3306",
}
info.Config.Scheduler = &config.ChangefeedSchedulerConfig{
EnableTableAcrossNodes: &enableTableAcrossNodes,
RegionThreshold: &regionThreshold,
WriteKeyThreshold: &writeKeyThreshold,
}

cdcClient.EXPECT().GetChangeFeedInfo(gomock.Any(), changefeedID.DisplayName).Return(info, nil).Times(1)
etcdClient.EXPECT().Txn(gomock.Any(), gomock.Any(), NewFuncMatcher(func(i interface{}) bool {

Check failure on line 254 in coordinator/changefeed/etcd_backend_test.go

View workflow job for this annotation

GitHub Actions / Check

any: interface{} can be replaced by any (modernize)
ops := i.([]clientv3.Op)
require.Len(t, ops, 1)
require.True(t, ops[0].IsPut())

persistedInfo := &config.ChangeFeedInfo{}
require.NoError(t, persistedInfo.Unmarshal(ops[0].ValueBytes()))
require.Equal(t, config.StateNormal, persistedInfo.State)
require.NotNil(t, persistedInfo.Config)
require.NotNil(t, persistedInfo.Config.Scheduler)
require.NotNil(t, persistedInfo.Config.Scheduler.RegionCountPerSpan)
require.Greater(t, *persistedInfo.Config.Scheduler.RegionCountPerSpan, 0)
return true
}), gomock.Any()).Return(&clientv3.TxnResponse{Succeeded: true}, nil).Times(1)

resumedInfo, err := backend.ResumeChangefeed(context.Background(), changefeedID, 0)
require.NoError(t, err)
require.NotNil(t, resumedInfo.Config.Scheduler.RegionCountPerSpan)
require.Greater(t, *resumedInfo.Config.Scheduler.RegionCountPerSpan, 0)
}

func TestSetChangefeedProgress(t *testing.T) {
Expand Down
22 changes: 19 additions & 3 deletions coordinator/changefeed/mock/changefeed_db_backend.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 30 additions & 3 deletions coordinator/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -899,11 +899,17 @@ func (c *Controller) ResumeChangefeed(
return err
}

if err := c.backend.ResumeChangefeed(ctx, id, newCheckpointTs); err != nil {
resumedInfo, err := c.backend.ResumeChangefeed(ctx, id, newCheckpointTs)
if err != nil {
return err
}
if resumedInfo == nil {
return errors.New("resumed changefeed info is nil")
}

clone, err := cf.GetInfo().Clone()
// Use the backend-returned info so direct metadata edits made while the
// changefeed was stopped are not overwritten by the stale in-memory copy.
clone, err := resumedInfo.Clone()
if err != nil {
return err
}
Expand Down Expand Up @@ -959,6 +965,10 @@ func (c *Controller) ListChangefeeds(_ context.Context, keyspace string) ([]*con
return infos, statuses, nil
}

// GetChangefeed returns a copy of the changefeed info and the current status.
// API callers mutate the returned info when validating update requests, so the
// copy prevents those writes from racing with coordinator goroutines that read
// the in-memory changefeed state.
func (c *Controller) GetChangefeed(
_ context.Context,
changefeedDisplayName common.ChangeFeedDisplayName,
Expand All @@ -975,6 +985,11 @@ func (c *Controller) GetChangefeed(
return nil, nil, errors.ErrChangeFeedNotExists.GenWithStackByArgs(changefeedDisplayName.Name)
}

info, err := cf.GetInfo().Clone()
if err != nil {
return nil, nil, errors.Trace(err)
}

maintainerID := cf.GetNodeID()
nodeInfo := c.nodeManager.GetNodeInfo(maintainerID)
maintainerAddr := ""
Expand All @@ -983,7 +998,19 @@ func (c *Controller) GetChangefeed(
}
status := &config.ChangeFeedStatus{CheckpointTs: cf.GetStatus().CheckpointTs, LastSyncedTs: cf.GetStatus().LastSyncedTs, LogCoordinatorResolvedTs: cf.GetLogCoordinatorResolvedTs()}
status.SetMaintainerAddr(maintainerAddr)
return cf.GetInfo(), status, nil
return info, status, nil
}

// GetPersistedChangefeedInfo returns the latest changefeed info persisted in the backend.
//
// Use this for resume-time validation because stopped changefeed metadata can
// be changed outside the coordinator process, for example during metadata
// migration or by legacy tooling. GetChangefeed intentionally returns the
// coordinator's in-memory copy.
func (c *Controller) GetPersistedChangefeedInfo(ctx context.Context, id common.ChangeFeedID) (*config.ChangeFeedInfo, error) {
c.apiLock.RLock()
defer c.apiLock.RUnlock()
return c.backend.GetChangefeedInfo(ctx, id)
}

// getChangefeed returns the changefeed by id, return nil if not found
Expand Down
Loading
Loading