-
Notifications
You must be signed in to change notification settings - Fork 330
Implement pool shutdown for ChannelDbConnectionPool and harden WaitHandleDbConnectionPool shutdown #4302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Implement pool shutdown for ChannelDbConnectionPool and harden WaitHandleDbConnectionPool shutdown #4302
Changes from all commits
6431209
b24d4fb
7ba73a8
b8d56d9
9a45a66
954679d
84873dc
6dc67ce
21f3aa4
d3a7200
751ad2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,6 @@ | |
| using System.Collections.Concurrent; | ||
| using System.Collections.ObjectModel; | ||
| using System.Data.Common; | ||
| using System.Diagnostics; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Threading; | ||
| using System.Threading.Channels; | ||
|
|
@@ -92,6 +91,13 @@ internal sealed class ChannelDbConnectionPool : IDbConnectionPool, IDisposable | |
| /// Must be updated using <see cref="Interlocked"/> operations to ensure thread safety. | ||
| /// </summary> | ||
| private volatile int _isClearing; | ||
|
|
||
| /// <summary> | ||
| /// Tracks whether <see cref="Shutdown"/> has already initiated the shutdown sequence so that | ||
| /// repeated calls are observed as no-ops. Updated atomically via | ||
| /// <see cref="Interlocked.CompareExchange(ref int, int, int)"/>. | ||
| /// </summary> | ||
| private int _shutdownInitiated; | ||
| #endregion | ||
|
|
||
| /// <summary> | ||
|
|
@@ -264,16 +270,56 @@ public void ReturnInternalConnection(DbConnectionInternal connection, DbConnecti | |
| } | ||
| else | ||
| { | ||
| var written = _idleChannel.TryWrite(connection); | ||
| Debug.Assert(written, "Failed to write returning connection to the idle channel."); | ||
| if (!_idleChannel.TryWrite(connection)) | ||
| { | ||
| // The channel has been completed (pool is shutting down). Race window | ||
| // between the State check above and TryWrite: destroy instead of pooling. | ||
| RemoveConnection(connection); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// <inheritdoc /> | ||
| public void Shutdown() | ||
| { | ||
| // idempotent. Compare-and-exchange ensures only one caller performs shutdown work. | ||
| if (Interlocked.CompareExchange(ref _shutdownInitiated, 1, 0) != 0) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| SqlClientEventSource.Log.TryPoolerTraceEvent( | ||
| "<prov.DbConnectionPool.Shutdown|RES|INFO|CPOOL> {0}", Id); | ||
|
|
||
| // Transition to ShuttingDown. After this point, ReturnInternalConnection | ||
| // routes returning connections to RemoveConnection. | ||
| State = ShuttingDown; | ||
|
|
||
| // Stop the idle-pruning timer before draining so a tick cannot race with | ||
| // the final drain below. PoolPruner.Dispose is idempotent and non-throwing. | ||
| Pruner?.Dispose(); | ||
|
|
||
| // Complete the channel writer so: | ||
|
priyankatiwari08 marked this conversation as resolved.
|
||
| // - no further idle connections can be enqueued (TryWrite returns false), and | ||
| // - in-flight / future async waiters on ReadAsync fault with ChannelClosedException. | ||
| _idleChannel.Complete(); | ||
|
|
||
| // Reuse Clear() for the drain. Clear bumps _clearGeneration so any active | ||
| // checked-out connection fails IsLiveConnection on return and is removed, and it | ||
| // drains the idle channel up to its captured IdleCount. | ||
| Clear(); | ||
|
|
||
| // Clear() may short-circuit if another caller is already draining. Because the | ||
| // channel is now completed, no new items can be enqueued, so it is safe to do a | ||
| // final unbounded drain to mop up anything Clear() may have skipped. | ||
| while (_idleChannel.TryRead(out DbConnectionInternal? connection)) | ||
| { | ||
| if (connection is not null) | ||
| { | ||
| RemoveConnection(connection); | ||
| } | ||
| // null sentinels are wake-up signals only; nothing to destroy. | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have we finished the shutdown process here? Should we transition to State Shutdown (which doesn't exixst yet)? What happens if anything between setting State = ShuttingDown and here throws? Should we allow a subsequent Shutdown() to try again? Should Shutdown() be idempotent? |
||
|
|
||
| /// <summary> | ||
|
|
@@ -284,7 +330,12 @@ public void Shutdown() | |
| /// <inheritdoc /> | ||
| public void Startup() | ||
| { | ||
| // No-op for now, warmup will be implemented later. | ||
| // This pool has no background timers today (idle timeout is enforced lazily in | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But does it make sense to set Running in the constructor before Startup() is called? Maybe we need a few more states, and they should be consistently set across both pools: Created - set by constructors Then Startup() and Shutdown() can be retryable and idempotent. @mdaigle - Thoughts? Is this overkill? Do we just need an isRunning bool instead? |
||
| // IsLiveConnection on retrieval; pruning is not implemented). State is set to Running | ||
| // in the constructor, so this is currently the symmetrical counterpart of Shutdown. | ||
| // Background work (warmup, pruning timers) will be added here when introduced. | ||
| SqlClientEventSource.Log.TryPoolerTraceEvent( | ||
| "<prov.DbConnectionPool.Startup|RES|INFO|CPOOL> {0}", Id); | ||
| } | ||
|
|
||
| /// <inheritdoc /> | ||
|
|
@@ -300,6 +351,19 @@ public bool TryGetConnection( | |
| TimeoutTimer timeout, | ||
| out DbConnectionInternal? connection) | ||
| { | ||
| // Short-circuit when the pool is not Running (i.e., shut down or never started). | ||
| // Returning (true, null) matches WaitHandleDbConnectionPool.TryGetConnection and tells | ||
| // the caller "completed; no connection available" without entering the channel path, | ||
| // which would otherwise reserve a slot, attempt to open a fresh physical connection, | ||
| // and then immediately destroy it on return because State == ShuttingDown. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's not assume that ShuttingDown is the only possible other state - this leads to misleading docs as time goes on. "... destroy it on return because the pool isn't running." That is (mostly) future proof. |
||
| if (State is not Running) | ||
| { | ||
| SqlClientEventSource.Log.TryPoolerTraceEvent( | ||
| "<prov.DbConnectionPool.TryGetConnection|RES|CPOOL> {0}, State != Running.", Id); | ||
| connection = null; | ||
| return true; | ||
| } | ||
|
|
||
| // If taskCompletionSource is null, we are in a sync context. | ||
| if (taskCompletionSource is null) | ||
| { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,10 +26,19 @@ internal IdleConnectionChannel() | |
| { | ||
| var channel = Channel.CreateUnbounded<DbConnectionInternal?>(); | ||
| _reader = channel.Reader; | ||
| //TODO: the channel should be completed on pool shutdown | ||
| _writer = channel.Writer; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Marks the channel writer as complete. After completion, <see cref="TryWrite"/> | ||
| /// returns <see langword="false"/> for any future writes, and any in-flight or future | ||
| /// <see cref="ReadAsync"/> waiters will fault with <see cref="System.Threading.Channels.ChannelClosedException"/> | ||
| /// once the channel is drained. Used by the connection pool to signal shutdown. | ||
| /// </summary> | ||
| /// <returns><see langword="true"/> if this call completed the channel; otherwise <see langword="false"/> | ||
| /// (channel was already completed).</returns> | ||
| internal bool Complete() => _writer.TryComplete(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing tests for this new method. |
||
|
|
||
| /// <summary> | ||
| /// The number of non-null connections currently in the channel. | ||
| /// </summary> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -191,16 +191,16 @@ public void Dispose() | |
|
|
||
| private readonly WaitCallback _poolCreateRequest; | ||
|
|
||
| private int _waitCount; | ||
| internal int _waitCount; | ||
| private readonly PoolWaitHandles _waitHandles; | ||
|
|
||
| private Exception _resError; | ||
| private volatile bool _errorOccurred; | ||
|
|
||
| private int _errorWait; | ||
| private Timer _errorTimer; | ||
| internal Timer _errorTimer; | ||
|
|
||
| private Timer _cleanupTimer; | ||
| internal Timer _cleanupTimer; | ||
|
|
||
| private readonly TransactedConnectionPool _transactedConnectionPool; | ||
|
|
||
|
|
@@ -329,8 +329,16 @@ public bool IsRunning | |
|
|
||
| public TransactedConnectionPool TransactedConnectionPool => _transactedConnectionPool; | ||
|
|
||
| private void CleanupCallback(object state) | ||
| internal void CleanupCallback(object state) | ||
| { | ||
| // If the pool is shutting down, skip work. Shutdown disposes the timer, but | ||
| // a callback may already be in-flight when Shutdown runs; this guard ensures it does | ||
| // not perform pruning or re-arm pool create requests. | ||
| if (State == ShuttingDown) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think Same for |
||
| { | ||
| return; | ||
| } | ||
|
|
||
| // Called when the cleanup-timer ticks over. | ||
|
|
||
| // This is the automatic pruning method. Every period, we will | ||
|
|
@@ -762,8 +770,15 @@ private void DestroyObject(DbConnectionInternal obj) | |
| } | ||
| } | ||
|
|
||
| private void ErrorCallback(object state) | ||
| internal void ErrorCallback(object state) | ||
| { | ||
| // Skip work if the pool is shutting down. The shutdown path disposes the | ||
| // timer; this guard handles the in-flight-callback race. | ||
| if (State == ShuttingDown) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| SqlClientEventSource.Log.TryPoolerTraceEvent("<prov.DbConnectionPool.ErrorCallback|RES|CPOOL> {0}, Resetting Error handling.", Id); | ||
| _errorOccurred = false; | ||
| _waitHandles.ErrorEvent.Reset(); | ||
|
|
@@ -989,6 +1004,31 @@ private bool TryGetConnection(DbConnection owningObject, uint waitForMultipleObj | |
| { | ||
| waitResult = WaitHandle.WaitAny(_waitHandles.GetHandles(allowCreate), unchecked((int)waitForMultipleObjectsTimeout)); | ||
|
|
||
| // After waking, observe shutdown state and bail out so waiters | ||
| // do not spin against a drained pool. If WaitAny consumed a | ||
| // PoolSemaphore slot, release it back so the accounting stays | ||
| // balanced; otherwise the slot would leak and other waiters | ||
| // (or callers that arrive after Shutdown completes its own | ||
| // Release loop) would starve. | ||
| if (State != Running) | ||
| { | ||
| SqlClientEventSource.Log.TryPoolerTraceEvent("<prov.DbConnectionPool.GetConnection|RES|CPOOL> {0}, Pool is shutting down; abandoning wait.", Id); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All we know is that the pool isn't running. We don't know that it is shutting down. Logs shouldn't make that assumption. "Pool is not running; abandoning wait" Look for other cases where the logic, docs, or logs assume that "not Running" means "Shutting Down", which isn't future proof. I'm not sure there are many cases where we actually want to check if State == ShuttingDown. I think State != Running is the correct check. We chose an enum for State presumably so we can add states in the future. If it's just 2 values, then it should be a bool isRunning. |
||
| if (waitResult == SEMAPHORE_HANDLE || waitResult == WAIT_ABANDONED + SEMAPHORE_HANDLE) | ||
| { | ||
|
Comment on lines
+1013
to
+1017
|
||
| try | ||
| { | ||
| _waitHandles.PoolSemaphore.Release(1); | ||
| } | ||
| catch (SemaphoreFullException) | ||
| { | ||
| // Pool semaphore was already saturated by Shutdown's bulk release; safe to ignore. | ||
| } | ||
| } | ||
| Interlocked.Decrement(ref _waitCount); | ||
| connection = null; | ||
| return false; | ||
| } | ||
|
|
||
|
priyankatiwari08 marked this conversation as resolved.
|
||
| // From the WaitAny docs: "If more than one object became signaled during | ||
| // the call, this is the array index of the signaled object with the | ||
| // smallest index value of all the signaled objects." This is important | ||
|
|
@@ -1504,6 +1544,16 @@ private bool ReclaimEmancipatedObjects() | |
| public void Startup() | ||
| { | ||
| SqlClientEventSource.Log.TryPoolerTraceEvent("<prov.DbConnectionPool.Startup|RES|INFO|CPOOL> {0}, CleanupWait={1}", Id, _cleanupWait); | ||
|
|
||
| // Do not resurrect a shut-down pool. Without this guard a Startup() call after | ||
| // Shutdown() would create a fresh _cleanupTimer and queue a PoolCreateRequest | ||
| // against a pool that will never accept connections back, leaking the timer | ||
| // and scheduling background work that immediately short-circuits. | ||
| if (State is ShuttingDown) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another case where State != Running is the correct check. |
||
| { | ||
| return; | ||
| } | ||
|
|
||
| _cleanupTimer = CreateCleanupTimer(); | ||
|
|
||
| if (NeedToReplenish) | ||
|
|
@@ -1515,15 +1565,45 @@ public void Startup() | |
| public void Shutdown() | ||
| { | ||
| SqlClientEventSource.Log.TryPoolerTraceEvent("<prov.DbConnectionPool.Shutdown|RES|INFO|CPOOL> {0}", Id); | ||
|
|
||
| // Idempotent: subsequent calls observe ShuttingDown and bail. | ||
| if (State == ShuttingDown) | ||
| { | ||
| return; | ||
| } | ||
|
priyankatiwari08 marked this conversation as resolved.
|
||
| State = ShuttingDown; | ||
|
|
||
| // deactivate timer callbacks | ||
| Timer t = _cleanupTimer; | ||
| _cleanupTimer = null; | ||
| if (t != null) | ||
| // Dispose all background timers so they no longer schedule new work. | ||
| // Note that any timer callback already in flight may still observe State == ShuttingDown | ||
| // and short-circuit (see CleanupCallback / ErrorCallback). | ||
| Timer cleanup = Interlocked.Exchange(ref _cleanupTimer, null); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make sure we've addressed this bug: #1881
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR adds the per-pool Shutdown() machinery that #1881 ultimately needs, but it doesn't actually touch SqlConnectionFactory._pruningTimer — so the symptom (the PruneConnectionPoolGroups trace firing every 30 s after ClearAllPools()) will still reproduce. I'd prefer to land #4302 as the foundation and address #1881 in a small follow-up that adds a "park the timer when there's no work" guard to PruneConnectionPoolGroups and a re-arm in GetConnectionPoolGroup. Happy to file that follow-up issue and link it. |
||
| cleanup?.Dispose(); | ||
| Timer error = Interlocked.Exchange(ref _errorTimer, null); | ||
| error?.Dispose(); | ||
|
|
||
| // Wake any threads parked in WaitHandle.WaitAny by releasing as many semaphore | ||
| // slots as there are recorded waiters. Using _waitCount (rather than MaxPoolSize) | ||
| // avoids ArgumentOutOfRangeException when MaxPoolSize == 0 (unlimited) and ensures | ||
| // we wake every parked waiter even when _waitCount exceeds MaxPoolSize. Waiters | ||
| // observe State != Running after wake-up and bail. | ||
| int waitersToWake = Volatile.Read(ref _waitCount); | ||
| if (waitersToWake > 0) | ||
| { | ||
| t.Dispose(); | ||
| try | ||
| { | ||
| _waitHandles.PoolSemaphore.Release(waitersToWake); | ||
| } | ||
| catch (SemaphoreFullException) | ||
| { | ||
| // Semaphore already saturated; nothing to do. | ||
| } | ||
| } | ||
|
|
||
| // Reuse Clear() to doom every connection (including active checked-out ones), drain | ||
| // both idle stacks, and reclaim emancipated objects. Active connections destroy | ||
| // themselves on return either via the doom flag or via DeactivateObject's | ||
| // State == ShuttingDown branch. | ||
| Clear(); | ||
| } | ||
|
|
||
| // TransactionEnded merely provides the plumbing for DbConnectionInternal to access the transacted pool | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.