Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -969,13 +969,25 @@ public void reallyRun() {
}

logger.trace("End cleanup expired async-jobs");

cleanupNetworksStuckInImplementing();

} catch (Throwable e) {
logger.error("Unexpected exception when trying to execute queue item, ", e);
}
}
};
}

private void cleanupNetworksStuckInImplementing() {
// Cleanup orphaned networks stuck in Implementing state without async jobs
try {
cleanupOrphanedNetworks();
} catch (Throwable e) {
logger.error("Unexpected exception when trying to cleanup orphaned networks", e);
}
}

@DB
protected void expungeAsyncJob(final AsyncJobVO job) {
Transaction.execute(new TransactionCallbackNoReturn() {
Expand Down Expand Up @@ -1284,6 +1296,74 @@ private void cleanupFailedSnapshotsCreatedWithDefaultStrategy(final long msid) {
}
}

/**
* Cleanup networks that are stuck in Implementing state without associated async jobs.
* This only processes networks that have been stuck for longer than the job expiration threshold.
*/
private void cleanupOrphanedNetworks() {
try {
SearchCriteria<NetworkVO> sc = networkDao.createSearchCriteria();
sc.addAnd("state", SearchCriteria.Op.EQ, Network.State.Implementing);
sc.addAnd("removed", SearchCriteria.Op.NULL);
List<NetworkVO> implementingNetworks = networkDao.search(sc, null);

if (implementingNetworks == null || implementingNetworks.isEmpty()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (implementingNetworks == null || implementingNetworks.isEmpty()) {
if (CollectionUtils.isEmpty(implementingNetworks)) {

return;
}

logger.debug("Found {} networks in Implementing state, checking for orphaned networks", implementingNetworks.size());

final long expireMinutes = JobExpireMinutes.value();
final Date cutoffTime = new Date(System.currentTimeMillis() - (expireMinutes * 60 * 1000));

for (NetworkVO network : implementingNetworks) {
if (network.getCreated().after(cutoffTime)) {
logger.trace("Network {} in Implementing state is only {} minutes old (threshold: {} minutes), skipping cleanup",
network.getId(),
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
expireMinutes);
continue;
}

List<AsyncJobVO> jobs = _jobDao.findInstancePendingAsyncJobs("Network", network.getAccountId());
boolean hasActiveJob = false;
for (AsyncJobVO job : jobs) {
if (job.getInstanceId() != null && job.getInstanceId().equals(network.getId())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can pass these checks to jobDao and find the active job?

hasActiveJob = true;
break;
}
}

if (hasActiveJob) {
logger.debug("Network {} in Implementing state has active async job, skipping cleanup", network.getId());
continue;
}

logger.warn("Found orphaned network {} in Implementing state without async job. " +
"Network created: {}, age: {} minutes, expiration threshold: {} minutes. Transitioning to Shutdown state.",
network.getId(), network.getCreated(),
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
expireMinutes);
updateNetworkState(network);

}
} catch (Exception e) {
logger.error("Error while cleaning up orphaned networks", e);
}
}

private void updateNetworkState(NetworkVO network) {
try {
networkOrchestrationService.stateTransitTo(network, Network.Event.OperationFailed);
logger.info("Successfully transitioned orphaned network {} to Shutdown state using state machine", network.getId());
} catch (final NoTransitionException e) {
logger.debug("State transition failed for orphaned network {}, forcing state update", network.getId());
network.setState(Network.State.Shutdown);
networkDao.update(network.getId(), network);
logger.info("Successfully forced orphaned network {} to Shutdown state", network.getId());
}
}

@Override
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
}
Expand Down
Loading