Skip to content

Commit 80f983b

Browse files
sellakumaranclaude
andauthored
fix: prevent InternalServerError loop when --update-endpoint fails on create (#304)
* fix: prevent InternalServerError loop when --update-endpoint fails on create When --update-endpoint fails during the CREATE step (e.g. Azure InternalServerError), Azure may partially provision the new endpoint. Because BotMessagingEndpoint only updates on success, all subsequent runs delete the stale old endpoint and attempt to create the same partially-provisioned endpoint again, hitting InternalServerError on every retry. Add Step 1.5 in UpdateEndpointAsync: pre-emptively delete the target endpoint name (derived from newEndpointUrl) before creating it, ensuring a clean slate regardless of any previous failed run. Only applies when NeedDeployment=false (non-Azure-hosted agents). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Normalize location reuse in endpoint update logic Refactor UpdateEndpointAsync to normalize location once and reuse for all endpoint operations, reducing duplication. Add warning logging if pre-create cleanup does not confirm deletion. Update unit tests to reflect normalization and endpoint name logic. Add test to verify both old and target endpoints are deleted when partially-provisioned endpoints exist. * Improve logging for endpoint deletion and diagnostics Enhanced logging for endpoint deletion by: - Including both endpoint name and source URL in removal logs. - Promoting endpoint name and agent blueprint ID logs to Information level. - Logging full delete request payload details (instance name, AppId, TenantId, location, environment). These changes improve traceability and aid in debugging endpoint deletion issues. * Fix environment logging to use local config variable Updated the delete request payload logging to use config.Environment instead of _config.Environment, ensuring the correct environment value is logged from the local configuration. * Improve correlation ID handling and logging in cleanup Each cleanup and blueprint command now generates and logs its own correlation ID at execution time for better traceability. The correlation ID is propagated through async operations and method calls, improving observability and debugging by ensuring all related logs are tagged with a unique identifier. Method signatures and invocations were updated to support this change. --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 495c8ea commit 80f983b

5 files changed

Lines changed: 163 additions & 19 deletions

File tree

src/Microsoft.Agents.A365.DevTools.Cli/Commands/CleanupCommand.cs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,18 @@ public static Command CreateCommand(
4040
cleanupCommand.AddOption(configOption);
4141
cleanupCommand.AddOption(verboseOption);
4242

43-
// Generate correlation ID at workflow entry point
44-
var correlationId = HttpClientFactory.GenerateCorrelationId();
45-
4643
// Set default handler for 'a365 cleanup' (without subcommand) - cleans up everything
4744
cleanupCommand.SetHandler(async (configFile, verbose) =>
4845
{
46+
// Generate correlation ID at workflow entry point
47+
var correlationId = HttpClientFactory.GenerateCorrelationId();
48+
logger.LogInformation("Starting cleanup (CorrelationId: {CorrelationId})", correlationId);
49+
4950
await ExecuteAllCleanupAsync(logger, configService, botConfigurator, executor, agentBlueprintService, confirmationProvider, federatedCredentialService, configFile, correlationId: correlationId);
5051
}, configOption, verboseOption);
5152

5253
// Add subcommands for granular control
53-
cleanupCommand.AddCommand(CreateBlueprintCleanupCommand(logger, configService, botConfigurator, executor, agentBlueprintService, federatedCredentialService, correlationId: correlationId));
54+
cleanupCommand.AddCommand(CreateBlueprintCleanupCommand(logger, configService, botConfigurator, executor, agentBlueprintService, federatedCredentialService));
5455
cleanupCommand.AddCommand(CreateAzureCleanupCommand(logger, configService, executor));
5556
cleanupCommand.AddCommand(CreateInstanceCleanupCommand(logger, configService, executor));
5657

@@ -63,8 +64,7 @@ private static Command CreateBlueprintCleanupCommand(
6364
IBotConfigurator botConfigurator,
6465
CommandExecutor executor,
6566
AgentBlueprintService agentBlueprintService,
66-
FederatedCredentialService federatedCredentialService,
67-
string? correlationId = null)
67+
FederatedCredentialService federatedCredentialService)
6868
{
6969
var command = new Command("blueprint", "Remove Entra ID blueprint application and service principal");
7070

@@ -91,6 +91,10 @@ private static Command CreateBlueprintCleanupCommand(
9191
{
9292
try
9393
{
94+
// Generate correlation ID at workflow entry point
95+
var correlationId = HttpClientFactory.GenerateCorrelationId();
96+
logger.LogInformation("Starting blueprint cleanup (CorrelationId: {CorrelationId})", correlationId);
97+
9498
var config = await LoadConfigAsync(configFile, logger, configService);
9599
if (config == null) return;
96100

src/Microsoft.Agents.A365.DevTools.Cli/Commands/SetupSubcommands/BlueprintSubcommand.cs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ await UpdateEndpointAsync(
197197
logger: logger,
198198
configService: configService,
199199
botConfigurator: botConfigurator,
200-
platformDetector: platformDetector);
200+
platformDetector: platformDetector,
201+
correlationId: correlationId);
201202
}
202203
catch (Agent365Exception ex)
203204
{
@@ -1859,13 +1860,15 @@ await ProjectSettingsSyncHelper.ExecuteAsync(
18591860
/// <param name="configService">Configuration service</param>
18601861
/// <param name="botConfigurator">Bot configurator service</param>
18611862
/// <param name="platformDetector">Platform detector service</param>
1863+
/// <param name="correlationId">Optional correlation ID for tracing</param>
18621864
public static async Task UpdateEndpointAsync(
18631865
string configPath,
18641866
string newEndpointUrl,
18651867
ILogger logger,
18661868
IConfigService configService,
18671869
IBotConfigurator botConfigurator,
1868-
PlatformDetector platformDetector)
1870+
PlatformDetector platformDetector,
1871+
string? correlationId = null)
18691872
{
18701873
var setupConfig = await configService.LoadAsync(configPath);
18711874

@@ -1887,6 +1890,12 @@ public static async Task UpdateEndpointAsync(
18871890
logger.LogInformation("Updating messaging endpoint...");
18881891
logger.LogInformation("");
18891892

1893+
// Normalize location once; used by both Step 1 and Step 1.5.
1894+
// Null-coalescing is intentional: Location is only validated inside the Step 1 block (not here),
1895+
// so it may still be null at this point. The empty-string fallback is never passed to any API —
1896+
// Step 1 throws before using it, and Step 1.5 guards on !IsNullOrWhiteSpace(Location).
1897+
var normalizedLocation = setupConfig.Location?.Replace(" ", "").ToLowerInvariant() ?? string.Empty;
1898+
18901899
// Step 1: Delete existing endpoint if it exists
18911900
if (!string.IsNullOrWhiteSpace(setupConfig.MessagingEndpoint) || !string.IsNullOrWhiteSpace(setupConfig.BotName))
18921901
{
@@ -1916,12 +1925,11 @@ public static async Task UpdateEndpointAsync(
19161925
endpointName = Services.Helpers.EndpointHelper.GetEndpointName(setupConfig.BotName);
19171926
}
19181927

1919-
var normalizedLocation = setupConfig.Location.Replace(" ", "").ToLowerInvariant();
1920-
19211928
var deleted = await botConfigurator.DeleteEndpointWithAgentBlueprintAsync(
19221929
endpointName,
19231930
normalizedLocation,
1924-
setupConfig.AgentBlueprintId);
1931+
setupConfig.AgentBlueprintId,
1932+
correlationId: correlationId);
19251933

19261934
if (!deleted)
19271935
{
@@ -1936,12 +1944,29 @@ public static async Task UpdateEndpointAsync(
19361944
logger.LogInformation("No existing endpoint found. Proceeding with registration.");
19371945
}
19381946

1947+
// Step 1.5: Pre-create cleanup of the target endpoint name.
1948+
// If a previous --update-endpoint failed during the create step, Azure may have
1949+
// partially provisioned the new endpoint and left it in a bad state that blocks
1950+
// subsequent creates with InternalServerError. Delete it now to ensure a clean slate.
1951+
if (!setupConfig.NeedDeployment && !string.IsNullOrWhiteSpace(setupConfig.Location))
1952+
{
1953+
var targetEndpointName = Services.Helpers.EndpointHelper.GetEndpointNameFromUrl(newEndpointUrl, setupConfig.AgentBlueprintId);
1954+
logger.LogInformation("Removing target endpoint '{EndpointName}' (derived from {Url}) to ensure a clean state before registration.", targetEndpointName, newEndpointUrl);
1955+
var preCleanupDeleted = await botConfigurator.DeleteEndpointWithAgentBlueprintAsync(targetEndpointName, normalizedLocation, setupConfig.AgentBlueprintId, correlationId: correlationId);
1956+
if (!preCleanupDeleted)
1957+
{
1958+
// Not fatal — proceed and let Step 2 surface the error if the partially-provisioned
1959+
// endpoint is still blocking. The warning helps diagnose production issues.
1960+
logger.LogWarning("Pre-create cleanup for '{EndpointName}' did not confirm deletion. Proceeding anyway.", targetEndpointName);
1961+
}
1962+
}
1963+
19391964
// Step 2: Register new endpoint with the provided URL
19401965
logger.LogInformation("");
19411966
logger.LogInformation("Registering new messaging endpoint...");
19421967

19431968
var (endpointRegistered, _) = await SetupHelpers.RegisterBlueprintMessagingEndpointAsync(
1944-
setupConfig, logger, botConfigurator, newEndpointUrl);
1969+
setupConfig, logger, botConfigurator, newEndpointUrl, correlationId: correlationId);
19451970

19461971
if (!endpointRegistered)
19471972
{

src/Microsoft.Agents.A365.DevTools.Cli/Services/BotConfigurator.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ public async Task<bool> DeleteEndpointWithAgentBlueprintAsync(
206206
string? correlationId = null)
207207
{
208208
_logger.LogInformation("Deleting endpoint with Agent Blueprint Identity...");
209-
_logger.LogDebug(" Endpoint Name: {EndpointName}", endpointName);
210-
_logger.LogDebug(" Agent Blueprint ID: {AgentBlueprintId}", agentBlueprintId);
209+
_logger.LogInformation(" Endpoint Name: {EndpointName}", endpointName);
210+
_logger.LogInformation(" Agent Blueprint ID: {AgentBlueprintId}", agentBlueprintId);
211211

212212
if (string.IsNullOrWhiteSpace(location))
213213
{
@@ -288,6 +288,12 @@ public async Task<bool> DeleteEndpointWithAgentBlueprintAsync(
288288

289289
// Call the endpoint
290290
_logger.LogInformation("Making request to delete endpoint (Location: {Location}).", normalizedLocation);
291+
_logger.LogInformation("Delete request payload:");
292+
_logger.LogInformation(" AzureBotServiceInstanceName: {Name}", endpointName);
293+
_logger.LogInformation(" AppId: {AppId}", agentBlueprintId);
294+
_logger.LogInformation(" TenantId: {TenantId}", tenantId);
295+
_logger.LogInformation(" Location: {Location}", normalizedLocation);
296+
_logger.LogInformation(" Environment: {Environment}", EndpointHelper.GetDeploymentEnvironment(config.Environment));
291297

292298
using var request = new HttpRequestMessage(HttpMethod.Delete, deleteEndpointUrl);
293299
request.Content = new StringContent(deleteEndpointBody.ToJsonString(), System.Text.Encoding.UTF8, "application/json");

src/Tests/Microsoft.Agents.A365.DevTools.Cli.Tests/Commands/BlueprintSubcommandTests.cs

Lines changed: 97 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using Microsoft.Agents.A365.DevTools.Cli.Commands.SetupSubcommands;
66
using Microsoft.Agents.A365.DevTools.Cli.Models;
77
using Microsoft.Agents.A365.DevTools.Cli.Services;
8+
using Microsoft.Agents.A365.DevTools.Cli.Services.Helpers;
89
using Microsoft.Extensions.Logging;
910
using NSubstitute;
1011
using System.CommandLine;
@@ -1462,7 +1463,7 @@ await _mockBotConfigurator.DidNotReceive().CreateEndpointWithAgentBlueprintAsync
14621463
}
14631464

14641465
[Fact]
1465-
public async Task UpdateEndpointAsync_WithNoExistingEndpoint_ShouldSkipDeleteAndRegister()
1466+
public async Task UpdateEndpointAsync_WithNoExistingOldEndpoint_ShouldOnlyCallPreCreateCleanup()
14661467
{
14671468
// Arrange - Config without BotName (no existing endpoint)
14681469
var config = new Agent365Config
@@ -1490,6 +1491,10 @@ public async Task UpdateEndpointAsync_WithNoExistingEndpoint_ShouldSkipDeleteAnd
14901491
_mockConfigService.SaveStateAsync(Arg.Any<Agent365Config>(), Arg.Any<string>())
14911492
.Returns(Task.CompletedTask);
14921493

1494+
_mockBotConfigurator.DeleteEndpointWithAgentBlueprintAsync(
1495+
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>())
1496+
.Returns(Task.FromResult(true)); // NotFound = success for pre-create cleanup
1497+
14931498
_mockBotConfigurator.CreateEndpointWithAgentBlueprintAsync(
14941499
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>())
14951500
.Returns(EndpointRegistrationResult.Created);
@@ -1503,13 +1508,100 @@ await BlueprintSubcommand.UpdateEndpointAsync(
15031508
_mockBotConfigurator,
15041509
_mockPlatformDetector);
15051510

1506-
// Assert - Should NOT call delete (no existing endpoint)
1507-
await _mockBotConfigurator.DidNotReceive().DeleteEndpointWithAgentBlueprintAsync(
1511+
// Assert - Step 1 (delete old) is skipped — no existing endpoint to delete.
1512+
// Step 1.5 (pre-create cleanup) still calls delete exactly once with the TARGET endpoint name,
1513+
// so there is exactly one delete call total.
1514+
var expectedTargetName = EndpointHelper.GetEndpointNameFromUrl(newEndpointUrl, config.AgentBlueprintId);
1515+
await _mockBotConfigurator.Received(1).DeleteEndpointWithAgentBlueprintAsync(
1516+
expectedTargetName,
1517+
"eastus",
1518+
config.AgentBlueprintId);
1519+
1520+
// Should still register the new endpoint
1521+
await _mockBotConfigurator.Received(1).CreateEndpointWithAgentBlueprintAsync(
15081522
Arg.Any<string>(),
15091523
Arg.Any<string>(),
1510-
Arg.Any<string>());
1524+
newEndpointUrl,
1525+
Arg.Any<string>(),
1526+
config.AgentBlueprintId);
1527+
}
1528+
finally
1529+
{
1530+
if (File.Exists(generatedPath)) File.Delete(generatedPath);
1531+
if (File.Exists(configPath)) File.Delete(configPath);
1532+
}
1533+
}
15111534

1512-
// Should still register the new endpoint
1535+
[Fact]
1536+
public async Task UpdateEndpointAsync_WithExistingOldEndpointAndPartiallyProvisionedTarget_ShouldCallDeleteTwice()
1537+
{
1538+
// Regression: when a prior --update-endpoint failed during the create step, Azure may have
1539+
// partially provisioned the new endpoint. On the next run, BOTH Step 1 (delete old) and
1540+
// Step 1.5 (pre-create cleanup of target) must fire, targeting different endpoint names.
1541+
var currentlyRegisteredUrl = "https://currently-registered-3979.inc1.devtunnels.ms/api/messages";
1542+
var newEndpointUrl = "https://newtunnel-3979.inc1.devtunnels.ms/api/messages";
1543+
1544+
var config = new Agent365Config
1545+
{
1546+
TenantId = "00000000-0000-0000-0000-000000000000",
1547+
AgentBlueprintId = "blueprint-123",
1548+
MessagingEndpoint = currentlyRegisteredUrl, // static config (original tunnel)
1549+
BotMessagingEndpoint = currentlyRegisteredUrl, // generated config (last successful registration)
1550+
Location = "eastus",
1551+
NeedDeployment = false,
1552+
DeploymentProjectPath = Path.GetTempPath()
1553+
};
1554+
1555+
var testId = Guid.NewGuid().ToString();
1556+
var configPath = Path.Combine(Path.GetTempPath(), $"test-config-{testId}.json");
1557+
var generatedPath = Path.Combine(Path.GetTempPath(), $"a365.generated.config-{testId}.json");
1558+
1559+
await File.WriteAllTextAsync(generatedPath, "{}");
1560+
1561+
try
1562+
{
1563+
_mockConfigService.LoadAsync(Arg.Any<string>(), Arg.Any<string>())
1564+
.Returns(Task.FromResult(config));
1565+
1566+
_mockConfigService.SaveStateAsync(Arg.Any<Agent365Config>(), Arg.Any<string>())
1567+
.Returns(Task.CompletedTask);
1568+
1569+
_mockBotConfigurator.DeleteEndpointWithAgentBlueprintAsync(
1570+
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>())
1571+
.Returns(Task.FromResult(true));
1572+
1573+
_mockBotConfigurator.CreateEndpointWithAgentBlueprintAsync(
1574+
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>())
1575+
.Returns(EndpointRegistrationResult.Created);
1576+
1577+
// Act
1578+
await BlueprintSubcommand.UpdateEndpointAsync(
1579+
configPath,
1580+
newEndpointUrl,
1581+
_mockLogger,
1582+
_mockConfigService,
1583+
_mockBotConfigurator,
1584+
_mockPlatformDetector);
1585+
1586+
// Assert — exactly two delete calls with distinct endpoint names
1587+
var oldEndpointName = EndpointHelper.GetEndpointNameFromUrl(currentlyRegisteredUrl, config.AgentBlueprintId);
1588+
var targetEndpointName = EndpointHelper.GetEndpointNameFromUrl(newEndpointUrl, config.AgentBlueprintId);
1589+
1590+
oldEndpointName.Should().NotBe(targetEndpointName, "Step 1 and Step 1.5 must target different endpoints");
1591+
1592+
// Step 1: delete the currently-registered (old) endpoint
1593+
await _mockBotConfigurator.Received(1).DeleteEndpointWithAgentBlueprintAsync(
1594+
oldEndpointName, "eastus", config.AgentBlueprintId);
1595+
1596+
// Step 1.5: pre-create cleanup of the partially-provisioned target endpoint
1597+
await _mockBotConfigurator.Received(1).DeleteEndpointWithAgentBlueprintAsync(
1598+
targetEndpointName, "eastus", config.AgentBlueprintId);
1599+
1600+
// Total: exactly two delete calls
1601+
await _mockBotConfigurator.Received(2).DeleteEndpointWithAgentBlueprintAsync(
1602+
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>());
1603+
1604+
// Step 2: register the new endpoint
15131605
await _mockBotConfigurator.Received(1).CreateEndpointWithAgentBlueprintAsync(
15141606
Arg.Any<string>(),
15151607
Arg.Any<string>(),

src/Tests/Microsoft.Agents.A365.DevTools.Cli.Tests/Services/Helpers/EndpointHelperTests.cs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,23 @@ public void GetEndpointNameFromUrl_WithNgrokUrl_ReturnsExpectedName()
165165
result.Should().Be(expected);
166166
}
167167

168+
[Fact]
169+
public void GetEndpointNameFromUrl_DifferentDevTunnelUrls_ProduceDifferentNames()
170+
{
171+
// Regression: second --update-endpoint must delete the currently registered endpoint,
172+
// not the original MessagingEndpoint. Different URLs must yield different names so
173+
// the delete step targets the right Azure resource.
174+
var originalUrl = "https://x23kz7ll-3979.inc1.devtunnels.ms/api/messages"; // MessagingEndpoint (static)
175+
var updatedUrl = "https://w3jv62pr-3979.inc1.devtunnels.ms/api/messages"; // BotMessagingEndpoint (after first update)
176+
var blueprintId = "e412c0fa-7127-4615-9bef-1a7ba99e76af";
177+
178+
var originalName = EndpointHelper.GetEndpointNameFromUrl(originalUrl, blueprintId);
179+
var updatedName = EndpointHelper.GetEndpointNameFromUrl(updatedUrl, blueprintId);
180+
181+
updatedName.Should().NotBe(originalName, "deletion must target the currently registered URL, not the static MessagingEndpoint");
182+
updatedName.Should().Be("w3jv62pr-3979-inc1-devtunnels-ms-e412c0fa");
183+
}
184+
168185
// GetEndpointNameFromHost tests
169186

170187
[Fact]

0 commit comments

Comments
 (0)