Skip to content

Commit 096bef1

Browse files
committed
backup(nas): collapse N+1 chain queries when sweeping delete-pending ancestors
Address abh1sar review on PR #13074 (NASBackupProvider.java:995). The previous sweep/cascade path called findChainParent in a loop, each one issuing a fresh listByVmId — O(N) DB calls per chain walk. Add getChainOrderedLeafToRoot(member) which materialises the full chain (every backup row sharing CHAIN_ID) via a single listByVmId, sorted leaf-first by CHAIN_POSITION. Rewrite deleteLeafBackupAndSweepPendingAncestors to snapshot that chain BEFORE the leaf delete (so the in-memory list stays resolvable after the row is gone), then iterate ancestors from the snapshot. Rewrite cascadeDeleteSubtree as a plain leaf-first walk of the ordered chain — NAS backups are a linear chain, no tree-walking needed. findChainParent is kept (the parent-row lookup is still a useful primitive) with a Javadoc note recommending the new method when iterating.
1 parent 3ed8a30 commit 096bef1

1 file changed

Lines changed: 59 additions & 21 deletions

File tree

plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,9 @@ private List<Backup> findLiveChildren(Backup parent) {
935935
/**
936936
* Look up this backup's immediate parent in the chain (by {@code PARENT_BACKUP_ID}).
937937
* Returns {@code null} if this is the full (no parent) or the parent row is gone.
938+
*
939+
* <p>Prefer {@link #getChainOrderedLeafToRoot(Backup)} when walking the whole chain —
940+
* this method hits the DB on each call and is O(N²) when used in a loop.
938941
*/
939942
private Backup findChainParent(Backup backup) {
940943
String parentUuid = readDetail(backup, NASBackupChainKeys.PARENT_BACKUP_ID);
@@ -949,6 +952,33 @@ private Backup findChainParent(Backup backup) {
949952
return null;
950953
}
951954

955+
/**
956+
* Return the chain containing {@code member}, ordered leaf-first
957+
* (highest {@code CHAIN_POSITION} → root).
958+
*
959+
* <p>Materialises the chain via a single {@link BackupDao#listByVmId} call. Callers that
960+
* previously walked the chain by repeatedly calling {@link #findChainParent} were doing
961+
* O(N) {@code listByVmId} calls (one per ancestor); this collapses that to one.
962+
*
963+
* <p>If {@code member} has no {@code CHAIN_ID} metadata it is returned as a one-element
964+
* list (it is its own degenerate chain).
965+
*/
966+
private List<Backup> getChainOrderedLeafToRoot(Backup member) {
967+
String chainId = readDetail(member, NASBackupChainKeys.CHAIN_ID);
968+
if (chainId == null) {
969+
return Collections.singletonList(member);
970+
}
971+
List<Backup> chain = new ArrayList<>();
972+
for (Backup b : backupDao.listByVmId(null, member.getVmId())) {
973+
if (chainId.equals(readDetail(b, NASBackupChainKeys.CHAIN_ID))) {
974+
chain.add(b);
975+
}
976+
}
977+
// Descending CHAIN_POSITION = leaf-first (highest position = furthest from root).
978+
chain.sort((a, b) -> Integer.compare(chainPosition(b), chainPosition(a)));
979+
return chain;
980+
}
981+
952982
/**
953983
* Physically delete the leaf {@code backup}, then walk up the chain while each ancestor
954984
* is in delete-pending state. Mirrors the snapshot subsystem pattern: once a leaf is
@@ -959,48 +989,56 @@ private Backup findChainParent(Backup backup) {
959989
* live-children check is needed inside the loop.
960990
*/
961991
private boolean deleteLeafBackupAndSweepPendingAncestors(Backup backup, BackupRepository repo, Host host) {
962-
// Record the parent BEFORE the delete — deleteBackupFileAndRow removes the backup row,
963-
// after which findChainParent can't resolve PARENT_BACKUP_ID.
964-
Backup parent = findChainParent(backup);
992+
// Snapshot the chain BEFORE the leaf delete — deleteBackupFileAndRow removes the row,
993+
// after which the in-memory list still resolves but the DB no longer would.
994+
List<Backup> chain = getChainOrderedLeafToRoot(backup);
965995
if (!deleteBackupFileAndRow(backup, repo, host)) {
966996
return false;
967997
}
968-
while (parent != null && isDeletePending(parent)) {
969-
Backup nextParent = findChainParent(parent);
970-
if (!deleteBackupFileAndRow(parent, repo, host)) {
998+
// Walk the snapshot from leaf+1 upward, deleting tombstoned ancestors until a live
999+
// one is reached or the root is past.
1000+
int leafIdx = indexOfBackupById(chain, backup.getId());
1001+
if (leafIdx < 0) {
1002+
// Leaf wasn't in its own CHAIN_ID list — degenerate case, nothing more to sweep.
1003+
return true;
1004+
}
1005+
for (int i = leafIdx + 1; i < chain.size(); i++) {
1006+
Backup ancestor = chain.get(i);
1007+
if (!isDeletePending(ancestor)) {
1008+
break;
1009+
}
1010+
if (!deleteBackupFileAndRow(ancestor, repo, host)) {
9711011
// Stop the sweep; the rest of the tombstoned chain will be collected on a
9721012
// future delete that re-runs the sweep.
9731013
return true;
9741014
}
975-
parent = nextParent;
9761015
}
9771016
return true;
9781017
}
9791018

9801019
/**
9811020
* Forced delete of {@code root}'s entire chain, leaf-first. NAS backups form a linear
982-
* chain (full → inc → inc → …), not a tree, so we don't need BFS — find the tail
983-
* (highest {@code CHAIN_POSITION} for {@code root}'s {@code CHAIN_ID}) and delete down.
984-
* Each {@code rm} runs against a chain that still resolves.
1021+
* chain (full → inc → inc → …), not a tree, so we just walk the ordered chain and
1022+
* delete each member without re-querying parents.
9851023
*/
9861024
private boolean cascadeDeleteSubtree(Backup root, BackupRepository repo, Host host) {
987-
Backup tail = findChainTail(root);
988-
if (tail == null) {
989-
return false;
990-
}
991-
// Walk leaf → root via PARENT_BACKUP_ID, deleting unconditionally. We re-fetch parent
992-
// before each delete because deleteBackupFileAndRow removes the backup row.
993-
Backup current = tail;
994-
while (current != null) {
995-
Backup nextParent = findChainParent(current);
996-
if (!deleteBackupFileAndRow(current, repo, host)) {
1025+
for (Backup b : getChainOrderedLeafToRoot(root)) {
1026+
if (!deleteBackupFileAndRow(b, repo, host)) {
9971027
return false;
9981028
}
999-
current = nextParent;
10001029
}
10011030
return true;
10021031
}
10031032

1033+
private static int indexOfBackupById(List<Backup> chain, long id) {
1034+
for (int i = 0; i < chain.size(); i++) {
1035+
if (chain.get(i).getId() == id) {
1036+
return i;
1037+
}
1038+
}
1039+
return -1;
1040+
}
1041+
10041042
/**
10051043
* Return the backup with the highest {@code CHAIN_POSITION} sharing {@code root}'s
10061044
* {@code CHAIN_ID}. Returns {@code root} if it has no chain metadata or is itself the tail.

0 commit comments

Comments
 (0)