-
Notifications
You must be signed in to change notification settings - Fork 60
coordinator: persist maintainer epochs before ownership changes #5434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
57df930
0d8b952
06a6788
cd9e0b5
716e60f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,7 +14,6 @@ | |
| package changefeed | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "net/url" | ||
| "sync" | ||
|
|
||
|
|
@@ -41,7 +40,6 @@ type Changefeed struct { | |
| nodeIDMu sync.Mutex | ||
| nodeID node.ID | ||
|
|
||
| configBytes []byte | ||
| // it's saved to the backend db | ||
| lastSavedCheckpointTs *atomic.Uint64 | ||
| logCoordinatorResolvedTs *atomic.Uint64 | ||
|
|
@@ -57,21 +55,20 @@ func NewChangefeed(cfID common.ChangeFeedID, | |
| checkpointTs uint64, | ||
| isNew bool, | ||
| ) *Changefeed { | ||
| if info == nil { | ||
| log.Panic("changefeed info is nil", zap.Stringer("changefeedID", cfID)) | ||
| } | ||
| if info.Config == nil { | ||
| log.Panic("changefeed config is nil", zap.Stringer("changefeedID", cfID)) | ||
| } | ||
| uri, err := url.Parse(info.SinkURI) | ||
| if err != nil { | ||
| log.Panic("unable to parse sink-uri", | ||
| zap.String("url", info.SinkURI), zap.Error(err)) | ||
| } | ||
| bytes, err := json.Marshal(info) | ||
| if err != nil { | ||
| log.Panic("unable to marshal changefeed config", | ||
| zap.Error(err)) | ||
| } | ||
|
|
||
| res := &Changefeed{ | ||
| ID: cfID, | ||
| info: atomic.NewPointer(info), | ||
| configBytes: bytes, | ||
| lastSavedCheckpointTs: atomic.NewUint64(checkpointTs), | ||
| logCoordinatorResolvedTs: atomic.NewUint64(checkpointTs), | ||
| sinkType: getSinkType(uri.Scheme), | ||
|
|
@@ -100,7 +97,7 @@ func NewChangefeed(cfID common.ChangeFeedID, | |
|
|
||
| // GetInfo returns the latest ChangeFeedInfo stored in memory. | ||
| // | ||
| // It may return nil if the changefeed hasn't been fully initialized. | ||
| // Changefeed keeps info non-nil after construction. | ||
| func (c *Changefeed) GetInfo() *config.ChangeFeedInfo { | ||
| if c == nil || c.info == nil { | ||
| return nil | ||
|
|
@@ -109,15 +106,13 @@ func (c *Changefeed) GetInfo() *config.ChangeFeedInfo { | |
| } | ||
|
|
||
| // SetInfo updates the in-memory ChangeFeedInfo for the changefeed. | ||
| // | ||
| // It lazily initializes the internal pointer for uninitialized changefeeds | ||
| // (primarily used by unit tests). | ||
| // | ||
| // If the receiver is nil, it does nothing. | ||
| func (c *Changefeed) SetInfo(info *config.ChangeFeedInfo) { | ||
| if c == nil { | ||
| return | ||
| } | ||
| if info == nil { | ||
| log.Panic("changefeed info is nil", zap.Stringer("changefeedID", c.ID)) | ||
| } | ||
| if c.info == nil { | ||
| c.info = atomic.NewPointer(info) | ||
| return | ||
|
|
@@ -243,10 +238,12 @@ func (c *Changefeed) GetStatusForResume() *heartbeatpb.MaintainerStatus { | |
| } | ||
|
|
||
| clone := &heartbeatpb.MaintainerStatus{ | ||
| CheckpointTs: status.CheckpointTs, | ||
| FeedState: status.FeedState, | ||
| State: status.State, | ||
| // we don't clone the errors from status, because the old error is meaningless for the resume action, but only blocks. | ||
| CheckpointTs: status.CheckpointTs, | ||
| FeedState: status.FeedState, | ||
| State: status.State, | ||
| MaintainerEpoch: status.MaintainerEpoch, | ||
| // Resume creates a new maintainer owner, so errors reported by the | ||
| // previous owner must not block the resumed in-memory status. | ||
| Err: []*heartbeatpb.RunningError{}, | ||
| } | ||
|
|
||
|
|
@@ -272,21 +269,27 @@ func (c *Changefeed) GetLastSavedCheckPointTs() uint64 { | |
| } | ||
|
|
||
| func (c *Changefeed) NewAddMaintainerMessage(server node.ID) *messaging.TargetMessage { | ||
| info := c.GetInfo() | ||
| configData, err := info.MarshalWithTruncation(false) | ||
| if err != nil { | ||
| log.Panic("unable to marshal changefeed config", zap.Error(err)) | ||
| } | ||
| checkpointTs := c.GetLastSavedCheckPointTs() | ||
| if status := c.GetStatus(); status != nil { | ||
| checkpointTs = status.CheckpointTs | ||
| } | ||
| return messaging.NewSingleTargetMessage(server, | ||
| messaging.MaintainerManagerTopic, | ||
| &heartbeatpb.AddMaintainerRequest{ | ||
| Id: c.ID.ToPB(), | ||
| CheckpointTs: c.GetStatus().CheckpointTs, | ||
| Config: c.configBytes, | ||
| CheckpointTs: checkpointTs, | ||
| Config: []byte(configData), | ||
| IsNewChangefeed: c.isNew, | ||
| KeyspaceId: c.GetKeyspaceID(), | ||
| KeyspaceId: info.KeyspaceID, | ||
| MaintainerEpoch: info.Epoch, | ||
| }) | ||
|
Comment on lines
281
to
290
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
checkpointTs := c.GetLastSavedCheckPointTs()
if status := c.GetStatus(); status != nil {
checkpointTs = status.CheckpointTs
}
return messaging.NewSingleTargetMessage(server,
messaging.MaintainerManagerTopic,
&heartbeatpb.AddMaintainerRequest{
Id: c.ID.ToPB(),
CheckpointTs: checkpointTs,
Config: []byte(configData),
IsNewChangefeed: c.isNew,
KeyspaceId: info.KeyspaceID,
MaintainerEpoch: info.Epoch,
}) |
||
| } | ||
|
|
||
| func (c *Changefeed) NewRemoveMaintainerMessage(server node.ID, casCade, removed bool) *messaging.TargetMessage { | ||
| return RemoveMaintainerMessage(c.GetKeyspaceID(), c.ID, server, casCade, removed) | ||
| } | ||
|
|
||
| func (c *Changefeed) NewCheckpointTsMessage(ts uint64) *messaging.TargetMessage { | ||
| return messaging.NewSingleTargetMessage(c.GetNodeID(), | ||
| messaging.MaintainerManagerTopic, | ||
|
|
@@ -296,15 +299,25 @@ func (c *Changefeed) NewCheckpointTsMessage(ts uint64) *messaging.TargetMessage | |
| }) | ||
| } | ||
|
|
||
| func RemoveMaintainerMessage(keyspaceID uint32, id common.ChangeFeedID, server node.ID, casCade bool, removed bool) *messaging.TargetMessage { | ||
| // RemoveMaintainerMessage builds the fenced remove request sent to a maintainer owner. | ||
| // The maintainer epoch identifies the owner generation that is allowed to stop. | ||
| func RemoveMaintainerMessage( | ||
| keyspaceID uint32, | ||
| id common.ChangeFeedID, | ||
| server node.ID, | ||
| casCade bool, | ||
| removed bool, | ||
| maintainerEpoch uint64, | ||
| ) *messaging.TargetMessage { | ||
| casCade = casCade || removed | ||
| return messaging.NewSingleTargetMessage(server, | ||
| messaging.MaintainerManagerTopic, | ||
| &heartbeatpb.RemoveMaintainerRequest{ | ||
| Id: id.ToPB(), | ||
| Cascade: casCade, | ||
| Removed: removed, | ||
| KeyspaceId: keyspaceID, | ||
| Id: id.ToPB(), | ||
| Cascade: casCade, | ||
| Removed: removed, | ||
| KeyspaceId: keyspaceID, | ||
| MaintainerEpoch: maintainerEpoch, | ||
| }) | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,19 @@ import ( | |
| "github.com/pingcap/ticdc/pkg/config" | ||
| ) | ||
|
|
||
| // EpochBumpOptions carries metadata persisted together with a changefeed epoch bump. | ||
| type EpochBumpOptions struct { | ||
| CheckpointTs uint64 | ||
| Progress config.Progress | ||
| // UpdateStatus controls whether CheckpointTs and Progress overwrite the | ||
| // persisted status read by the bump transaction. | ||
| UpdateStatus bool | ||
| State *config.FeedState | ||
| Error *config.RunningError | ||
| // UpdateError controls whether Error overwrites the persisted runtime error. | ||
| UpdateError bool | ||
| } | ||
|
|
||
| // Backend is the metastore for the changefeed | ||
| type Backend interface { | ||
| // GetAllChangefeeds returns all changefeeds from the backend db, include stopped and failed changefeeds | ||
|
|
@@ -30,14 +43,18 @@ type Backend interface { | |
| CreateChangefeed(ctx context.Context, info *config.ChangeFeedInfo) error | ||
| // UpdateChangefeed updates changefeed info to db | ||
| UpdateChangefeed(ctx context.Context, info *config.ChangeFeedInfo, checkpointTs uint64, progress config.Progress) error | ||
| // ResumeChangefeed persists the resumed status with a new owner epoch. | ||
| ResumeChangefeed(ctx context.Context, id common.ChangeFeedID, candidateEpoch uint64, checkpointTs uint64) (*config.ChangeFeedInfo, error) | ||
| // BumpChangefeedEpoch is the low-level ownership boundary used before a | ||
| // coordinator path can create a new maintainer owner. It only reads and | ||
| // updates stored status when UpdateStatus is set. | ||
| BumpChangefeedEpoch(ctx context.Context, id common.ChangeFeedID, candidateEpoch uint64, options EpochBumpOptions) (*config.ChangeFeedInfo, error) | ||
| // PauseChangefeed persists the pause status to db for a changefeed | ||
| PauseChangefeed(ctx context.Context, id common.ChangeFeedID) error | ||
| // DeleteChangefeed removes all related info of a changefeed from db | ||
| DeleteChangefeed(ctx context.Context, id common.ChangeFeedID) error | ||
| // SetChangefeedProgress persists the operation progress status to db for a changefeed | ||
| SetChangefeedProgress(ctx context.Context, id common.ChangeFeedID, progress config.Progress) error | ||
| // ResumeChangefeed persists the resumed status to db for a changefeed and returns the resumed info. | ||
| ResumeChangefeed(ctx context.Context, id common.ChangeFeedID, newCheckpointTs uint64) (*config.ChangeFeedInfo, error) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why remove this method ? |
||
| // UpdateChangefeedCheckpointTs persists the checkpointTs for changefeeds | ||
| UpdateChangefeedCheckpointTs(ctx context.Context, checkpointTs map[common.ChangeFeedID]uint64) error | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.