@@ -32,6 +32,7 @@ import (
3232 "mcpproxy-go/cmd/mcpproxy-tray/internal/api"
3333 "mcpproxy-go/cmd/mcpproxy-tray/internal/monitor"
3434 "mcpproxy-go/cmd/mcpproxy-tray/internal/state"
35+ "mcpproxy-go/internal/config"
3536 "mcpproxy-go/internal/storage"
3637 "mcpproxy-go/internal/tray"
3738)
@@ -887,6 +888,73 @@ func maskAPIKey(apiKey string) string {
887888 return apiKey [:4 ] + "****" + apiKey [len (apiKey )- 4 :]
888889}
889890
891+ // dockerRecoverySettings holds Docker recovery configuration
892+ type dockerRecoverySettings struct {
893+ intervals []time.Duration
894+ maxRetries int
895+ notifyOnStart bool
896+ notifyOnSuccess bool
897+ notifyOnFailure bool
898+ notifyOnRetry bool
899+ persistentState bool
900+ }
901+
902+ // loadDockerRecoverySettings loads Docker recovery settings from environment or defaults
903+ func loadDockerRecoverySettings () * dockerRecoverySettings {
904+ settings := & dockerRecoverySettings {
905+ intervals : config .DefaultCheckIntervals (),
906+ maxRetries : 0 , // Unlimited by default
907+ notifyOnStart : true ,
908+ notifyOnSuccess : true ,
909+ notifyOnFailure : true ,
910+ notifyOnRetry : false ,
911+ persistentState : true ,
912+ }
913+
914+ // Check for environment variable overrides
915+ if intervalsStr := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_INTERVALS" ); intervalsStr != "" {
916+ // Parse comma-separated duration strings: "2s,5s,10s,30s,60s"
917+ parts := strings .Split (intervalsStr , "," )
918+ intervals := make ([]time.Duration , 0 , len (parts ))
919+ for _ , part := range parts {
920+ if dur , err := time .ParseDuration (strings .TrimSpace (part )); err == nil {
921+ intervals = append (intervals , dur )
922+ }
923+ }
924+ if len (intervals ) > 0 {
925+ settings .intervals = intervals
926+ }
927+ }
928+
929+ if maxRetriesStr := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_MAX_RETRIES" ); maxRetriesStr != "" {
930+ if val , err := strconv .Atoi (maxRetriesStr ); err == nil {
931+ settings .maxRetries = val
932+ }
933+ }
934+
935+ if val := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_NOTIFY_ON_START" ); val != "" {
936+ settings .notifyOnStart = val == "1" || strings .EqualFold (val , "true" )
937+ }
938+
939+ if val := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_NOTIFY_ON_SUCCESS" ); val != "" {
940+ settings .notifyOnSuccess = val == "1" || strings .EqualFold (val , "true" )
941+ }
942+
943+ if val := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_NOTIFY_ON_FAILURE" ); val != "" {
944+ settings .notifyOnFailure = val == "1" || strings .EqualFold (val , "true" )
945+ }
946+
947+ if val := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_NOTIFY_ON_RETRY" ); val != "" {
948+ settings .notifyOnRetry = val == "1" || strings .EqualFold (val , "true" )
949+ }
950+
951+ if val := os .Getenv ("MCPPROXY_DOCKER_RECOVERY_PERSISTENT_STATE" ); val != "" {
952+ settings .persistentState = val == "1" || strings .EqualFold (val , "true" )
953+ }
954+
955+ return settings
956+ }
957+
890958// getDockerRecoveryStateFilePath returns the path to the tray's Docker recovery state file
891959func getDockerRecoveryStateFilePath () (string , error ) {
892960 homeDir , err := os .UserHomeDir ()
@@ -983,6 +1051,7 @@ type CoreProcessLauncher struct {
9831051 dockerRetryMu sync.Mutex
9841052 dockerRetryCancel context.CancelFunc
9851053 dockerReconnectPending bool
1054+ recoverySettings * dockerRecoverySettings
9861055}
9871056
9881057// NewCoreProcessLauncher creates a new core process launcher
@@ -995,12 +1064,13 @@ func NewCoreProcessLauncher(
9951064 coreTimeout time.Duration ,
9961065) * CoreProcessLauncher {
9971066 return & CoreProcessLauncher {
998- coreURL : coreURL ,
999- logger : logger ,
1000- stateMachine : stateMachine ,
1001- apiClient : apiClient ,
1002- trayApp : trayApp ,
1003- coreTimeout : coreTimeout ,
1067+ coreURL : coreURL ,
1068+ logger : logger ,
1069+ stateMachine : stateMachine ,
1070+ apiClient : apiClient ,
1071+ trayApp : trayApp ,
1072+ coreTimeout : coreTimeout ,
1073+ recoverySettings : loadDockerRecoverySettings (),
10041074 }
10051075}
10061076
@@ -1467,25 +1537,25 @@ func (cpl *CoreProcessLauncher) handleDockerUnavailable(ctx context.Context) {
14671537 cpl .logger .Warn ("Docker engine unavailable - waiting for recovery" )
14681538 }
14691539
1470- // Load existing recovery state to resume or initialize new state
1471- recoveryState , err := loadDockerRecoveryState (cpl .logger )
1472- if err != nil {
1473- cpl .logger .Warn ("Failed to load Docker recovery state, starting fresh" , zap .Error (err ))
1474- recoveryState = nil
1475- }
1476-
1477- // Initialize failure count from persistent state if available
1540+ // Load existing recovery state to resume or initialize new state (if persistent state is enabled)
14781541 failureCount := 0
1479- if recoveryState != nil && ! recoveryState .DockerAvailable {
1480- failureCount = recoveryState .FailureCount
1481- cpl .logger .Infow ("Resuming Docker recovery from persistent state" ,
1482- "previous_attempts" , failureCount ,
1483- "last_attempt" , recoveryState .LastAttempt )
1542+ if cpl .recoverySettings .persistentState {
1543+ recoveryState , err := loadDockerRecoveryState (cpl .logger )
1544+ if err != nil {
1545+ cpl .logger .Warn ("Failed to load Docker recovery state, starting fresh" , zap .Error (err ))
1546+ } else if recoveryState != nil && ! recoveryState .DockerAvailable {
1547+ failureCount = recoveryState .FailureCount
1548+ cpl .logger .Infow ("Resuming Docker recovery from persistent state" ,
1549+ "previous_attempts" , failureCount ,
1550+ "last_attempt" , recoveryState .LastAttempt )
1551+ }
14841552 }
14851553
1486- // Show notification that Docker recovery has started
1487- if err := tray .ShowDockerRecoveryStarted (); err != nil {
1488- cpl .logger .Warn ("Failed to show Docker recovery notification" , zap .Error (err ))
1554+ // Show notification that Docker recovery has started (if enabled)
1555+ if cpl .recoverySettings .notifyOnStart {
1556+ if err := tray .ShowDockerRecoveryStarted (); err != nil {
1557+ cpl .logger .Warn ("Failed to show Docker recovery notification" , zap .Error (err ))
1558+ }
14891559 }
14901560
14911561 cpl .dockerRetryMu .Lock ()
@@ -1497,14 +1567,8 @@ func (cpl *CoreProcessLauncher) handleDockerUnavailable(ctx context.Context) {
14971567 cpl .dockerRetryMu .Unlock ()
14981568
14991569 go func () {
1500- // Exponential backoff intervals: fast when Docker just paused, slower when off for longer
1501- intervals := []time.Duration {
1502- 2 * time .Second , // Immediate retry (Docker just paused)
1503- 5 * time .Second , // Quick retry
1504- 10 * time .Second , // Normal retry
1505- 30 * time .Second , // Slow retry
1506- 60 * time .Second , // Very slow retry (max backoff)
1507- }
1570+ // Use configured intervals or defaults
1571+ intervals := cpl .recoverySettings .intervals
15081572
15091573 // Resume from persistent state if available
15101574 attempt := failureCount
@@ -1520,7 +1584,8 @@ func (cpl *CoreProcessLauncher) handleDockerUnavailable(ctx context.Context) {
15201584 case <- time .After (currentInterval ):
15211585 attempt ++
15221586
1523- if err := cpl .ensureDockerAvailable (retryCtx ); err == nil {
1587+ checkErr := cpl .ensureDockerAvailable (retryCtx )
1588+ if checkErr == nil {
15241589 elapsed := time .Since (startTime )
15251590 cpl .logger .Info ("Docker engine available - transitioning to recovery state" ,
15261591 zap .Int ("attempts" , attempt ),
@@ -1542,26 +1607,36 @@ func (cpl *CoreProcessLauncher) handleDockerUnavailable(ctx context.Context) {
15421607
15431608 // Docker still unavailable, save state
15441609 lastErrMsg := ""
1545- if err != nil {
1546- lastErrMsg = err .Error ()
1610+ if checkErr != nil {
1611+ lastErrMsg = checkErr .Error ()
15471612 cpl .logger .Debug ("Docker still unavailable" ,
15481613 zap .Int ("attempt" , attempt ),
15491614 zap .Duration ("next_check_in" , intervals [min (attempt , len (intervals )- 1 )]),
1550- zap .Error (err ))
1615+ zap .Error (checkErr ))
15511616 }
15521617
1553- // Save recovery state for persistence across restarts
1554- stateToSave := & storage.DockerRecoveryState {
1555- LastAttempt : time .Now (),
1556- FailureCount : attempt ,
1557- DockerAvailable : false ,
1558- RecoveryMode : true ,
1559- LastError : lastErrMsg ,
1560- AttemptsSinceUp : attempt ,
1561- LastSuccessfulAt : time.Time {},
1618+ // Show retry notification if enabled
1619+ if cpl .recoverySettings .notifyOnRetry && attempt > 1 {
1620+ nextRetryIn := intervals [min (attempt , len (intervals )- 1 )].String ()
1621+ if notifyErr := tray .ShowDockerRecoveryRetry (attempt , nextRetryIn ); notifyErr != nil {
1622+ cpl .logger .Warn ("Failed to show Docker recovery retry notification" , zap .Error (notifyErr ))
1623+ }
15621624 }
1563- if saveErr := saveDockerRecoveryState (stateToSave , cpl .logger ); saveErr != nil {
1564- cpl .logger .Warn ("Failed to save Docker recovery state" , zap .Error (saveErr ))
1625+
1626+ // Save recovery state for persistence across restarts (if enabled)
1627+ if cpl .recoverySettings .persistentState {
1628+ stateToSave := & storage.DockerRecoveryState {
1629+ LastAttempt : time .Now (),
1630+ FailureCount : attempt ,
1631+ DockerAvailable : false ,
1632+ RecoveryMode : true ,
1633+ LastError : lastErrMsg ,
1634+ AttemptsSinceUp : attempt ,
1635+ LastSuccessfulAt : time.Time {},
1636+ }
1637+ if saveErr := saveDockerRecoveryState (stateToSave , cpl .logger ); saveErr != nil {
1638+ cpl .logger .Warn ("Failed to save Docker recovery state" , zap .Error (saveErr ))
1639+ }
15651640 }
15661641 }
15671642 }
@@ -1679,14 +1754,18 @@ func (cpl *CoreProcessLauncher) triggerForceReconnect(reason string) {
16791754 zap .String ("reason" , reason ),
16801755 zap .Int ("attempt" , attempt ))
16811756
1682- // Clear recovery state since recovery is complete
1683- if clearErr := clearDockerRecoveryState (cpl .logger ); clearErr != nil {
1684- cpl .logger .Warn ("Failed to clear Docker recovery state" , zap .Error (clearErr ))
1757+ // Clear recovery state since recovery is complete (if persistent state is enabled)
1758+ if cpl .recoverySettings .persistentState {
1759+ if clearErr := clearDockerRecoveryState (cpl .logger ); clearErr != nil {
1760+ cpl .logger .Warn ("Failed to clear Docker recovery state" , zap .Error (clearErr ))
1761+ }
16851762 }
16861763
1687- // Show success notification
1688- if err := tray .ShowDockerRecoverySuccess (0 ); err != nil {
1689- cpl .logger .Warn ("Failed to show recovery success notification" , zap .Error (err ))
1764+ // Show success notification (if enabled)
1765+ if cpl .recoverySettings .notifyOnSuccess {
1766+ if err := tray .ShowDockerRecoverySuccess (0 ); err != nil {
1767+ cpl .logger .Warn ("Failed to show recovery success notification" , zap .Error (err ))
1768+ }
16901769 }
16911770 return
16921771 }
@@ -1695,23 +1774,27 @@ func (cpl *CoreProcessLauncher) triggerForceReconnect(reason string) {
16951774 zap .String ("reason" , reason ),
16961775 zap .Int ("attempts" , maxAttempts ))
16971776
1698- // Save failure state
1699- failedState := & storage.DockerRecoveryState {
1700- LastAttempt : time .Now (),
1701- FailureCount : maxAttempts ,
1702- DockerAvailable : true , // Docker is available, but reconnection failed
1703- RecoveryMode : false ,
1704- LastError : "Max reconnection attempts exceeded" ,
1705- AttemptsSinceUp : maxAttempts ,
1706- LastSuccessfulAt : time.Time {},
1707- }
1708- if saveErr := saveDockerRecoveryState (failedState , cpl .logger ); saveErr != nil {
1709- cpl .logger .Warn ("Failed to save recovery failure state" , zap .Error (saveErr ))
1777+ // Save failure state (if persistent state is enabled)
1778+ if cpl .recoverySettings .persistentState {
1779+ failedState := & storage.DockerRecoveryState {
1780+ LastAttempt : time .Now (),
1781+ FailureCount : maxAttempts ,
1782+ DockerAvailable : true , // Docker is available, but reconnection failed
1783+ RecoveryMode : false ,
1784+ LastError : "Max reconnection attempts exceeded" ,
1785+ AttemptsSinceUp : maxAttempts ,
1786+ LastSuccessfulAt : time.Time {},
1787+ }
1788+ if saveErr := saveDockerRecoveryState (failedState , cpl .logger ); saveErr != nil {
1789+ cpl .logger .Warn ("Failed to save recovery failure state" , zap .Error (saveErr ))
1790+ }
17101791 }
17111792
1712- // Show failure notification
1713- if err := tray .ShowDockerRecoveryFailed ("Max reconnection attempts exceeded" ); err != nil {
1714- cpl .logger .Warn ("Failed to show recovery failure notification" , zap .Error (err ))
1793+ // Show failure notification (if enabled)
1794+ if cpl .recoverySettings .notifyOnFailure {
1795+ if err := tray .ShowDockerRecoveryFailed ("Max reconnection attempts exceeded" ); err != nil {
1796+ cpl .logger .Warn ("Failed to show recovery failure notification" , zap .Error (err ))
1797+ }
17151798 }
17161799}
17171800
0 commit comments