From b8bb17f7e07ccfb1fc6b209fdda6c0530e83a719 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 16:28:40 +0100 Subject: [PATCH 1/7] faster growlocal --- .../GrowLocalAutoCoresParallel.hpp | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 7f9ac6cb..71cce899 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -20,7 +20,9 @@ limitations under the License. #include +#include #include +#include #include #include #include @@ -98,13 +100,13 @@ class GrowLocalAutoCoresParallel : public Scheduler { const VertexType n = endNode - startNode; const unsigned p = instance.NumberOfProcessors(); - std::set ready; + std::deque ready; std::vector futureReady; std::vector bestFutureReady; - std::vector> procReady(p); - std::vector> bestProcReady(p); + std::vector> procReady(p); + std::vector> bestProcReady(p); std::vector predec(n, 0); @@ -143,12 +145,15 @@ class GrowLocalAutoCoresParallel : public Scheduler { VertexType index = nodePos - startNode; if (predec[index] == 0) { if constexpr (hasVerticesInTopOrderV) { - ready.insert(nodePos); + ready.emplace_back(nodePos); } else { - ready.insert(topOrder[nodePos]); + ready.emplace_back(topOrder[nodePos]); } } } + if constexpr (not hasVerticesInTopOrderV) { + std::sort(ready.begin(), ready.end(), std::less<>{}); + } std::vector> newAssignments(p); std::vector> bestNewAssignments(p); @@ -166,8 +171,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { double bestScore = 0; double bestParallelism = 0; - typename std::set::iterator readyIter; - typename std::set::iterator bestReadyIter; + typename std::deque::const_iterator readyIter; + typename std::deque::const_iterator bestReadyIter; bool continueSuperstepAttempts = true; @@ -181,7 +186,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { procReady[proc].clear(); } - readyIter = ready.begin(); + readyIter = ready.cbegin(); VertexType newTotalAssigned = 0; VWorkwT weightLimit = 0; @@ -191,9 +196,10 @@ class GrowLocalAutoCoresParallel : public Scheduler { while (newAssignments[0].size() < limit) { VertexType chosenNode = std::numeric_limits::max(); if (!procReady[0].empty()) { - chosenNode = *procReady[0].begin(); - procReady[0].erase(procReady[0].begin()); - } else if (readyIter != ready.end()) { + std::pop_heap(procReady[0].begin(), procReady[0].end(), std::greater<>{}); + chosenNode = procReady[0].back(); + procReady[0].pop_back(); + } else if (readyIter != ready.cend()) { chosenNode = *readyIter; readyIter++; } else { @@ -238,7 +244,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { --predec[succIndex]; if (predec[succIndex] == 0) { if (schedule.AssignedProcessor(succ) == 0) { - procReady[0].insert(succ); + procReady[0].emplace_back(succ); + std::push_heap(procReady[0].begin(), procReady[0].end(), std::greater<>{}); } else { futureReady.push_back(succ); } @@ -254,9 +261,10 @@ class GrowLocalAutoCoresParallel : public Scheduler { while (currentWeightAssigned < weightLimit) { VertexType chosenNode = std::numeric_limits::max(); if (!procReady[proc].empty()) { - chosenNode = *procReady[proc].begin(); - procReady[proc].erase(procReady[proc].begin()); - } else if (readyIter != ready.end()) { + std::pop_heap(procReady[proc].begin(), procReady[proc].end(), std::greater<>{}); + chosenNode = procReady[proc].back(); + procReady[proc].pop_back(); + } else if (readyIter != ready.cend()) { chosenNode = *readyIter; readyIter++; } else { @@ -301,7 +309,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { --predec[succIndex]; if (predec[succIndex] == 0) { if (schedule.AssignedProcessor(succ) == proc) { - procReady[proc].insert(succ); + procReady[proc].emplace_back(succ); + std::push_heap(procReady[proc].begin(), procReady[proc].end(), std::greater<>{}); } else { futureReady.push_back(succ); } @@ -345,6 +354,10 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } + if (readyIter == ready.cend()) { + continueSuperstepAttempts = false; + } + if (totalAssigned + newTotalAssigned == n) { continueSuperstepAttempts = false; } @@ -424,10 +437,14 @@ class GrowLocalAutoCoresParallel : public Scheduler { // apply best iteration ready.erase(ready.begin(), bestReadyIter); - ready.insert(bestFutureReady.begin(), bestFutureReady.end()); + const auto lengthLeftoverReady = std::distance(ready.begin(), ready.end()); + ready.insert(ready.end(), bestFutureReady.begin(), bestFutureReady.end()); for (unsigned proc = 0; proc < p; proc++) { - ready.merge(bestProcReady[proc]); + ready.insert(ready.end(), bestProcReady[proc].begin(), bestProcReady[proc].end()); } + const auto middleIt = std::next(ready.begin(), lengthLeftoverReady); + std::sort(middleIt, ready.end(), std::less<>{}); + std::inplace_merge(ready.begin(), middleIt, ready.end()); for (unsigned proc = 0; proc < p; ++proc) { for (const VertexType &node : bestNewAssignments[proc]) { From 1d314f6592b8db370676ce71cd6e56d9b5a33474 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 08:20:00 +0100 Subject: [PATCH 2/7] reorder --- .../GrowLocalAutoCoresParallel.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 71cce899..27240e6b 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -388,14 +388,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - VertexType succIndex; - if constexpr (hasVerticesInTopOrderV) { - succIndex = succ - startNode; - } else { - succIndex = posInTopOrder[succ] - startNode; - } - - ++predec[succIndex]; + schedule.SetAssignedProcessor(succ, UINT_MAX); } } } @@ -419,7 +412,14 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - schedule.SetAssignedProcessor(succ, UINT_MAX); + VertexType succIndex; + if constexpr (hasVerticesInTopOrderV) { + succIndex = succ - startNode; + } else { + succIndex = posInTopOrder[succ] - startNode; + } + + ++predec[succIndex]; } } } From c28fbb9774bd91642986daa7caf0ef575959bf27 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 08:30:06 +0100 Subject: [PATCH 3/7] removed unnecessary reset --- .../GreedySchedulers/GrowLocalAutoCoresParallel.hpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 27240e6b..8fb779ae 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -363,12 +363,6 @@ class GrowLocalAutoCoresParallel : public Scheduler { } // undo proc assingments and predec increases in any case - for (unsigned proc = 0; proc < p; ++proc) { - for (const VertexType &node : newAssignments[proc]) { - schedule.SetAssignedProcessor(node, UINT_MAX); - } - } - for (unsigned proc = 0; proc < p; ++proc) { for (const VertexType &node : newAssignments[proc]) { for (const VertexType &succ : graph.Children(node)) { From 3c632477487e8625285c9c2c87f5f6e54ae70f63 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 09:16:11 +0100 Subject: [PATCH 4/7] fewer resets --- .../GrowLocalAutoCoresParallel.hpp | 57 +++++++------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 8fb779ae..016d0c14 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -164,6 +164,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { double desiredParallelism = static_cast(p); VertexType totalAssigned = 0; + unsigned totalAttempts = 1U; supstep = 0; while (totalAssigned < n) { @@ -177,6 +178,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { bool continueSuperstepAttempts = true; while (continueSuperstepAttempts) { + assert(totalAttempts < (UINT_MAX / (p + 1U))); for (unsigned proc = 0; proc < p; proc++) { newAssignments[proc].clear(); } @@ -228,10 +230,12 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - if (schedule.AssignedProcessor(succ) == UINT_MAX) { - schedule.SetAssignedProcessor(succ, 0); - } else if (schedule.AssignedProcessor(succ) != 0) { - schedule.SetAssignedProcessor(succ, p); + bool canScheduleSameProc = false; + if ((schedule.AssignedProcessor(succ) / (p + 1U) == totalAttempts) & ((schedule.AssignedProcessor(succ) % (p + 1U)) != 0U)) { + schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + p); + } else { + schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + 0U); + canScheduleSameProc = true; } VertexType succIndex; @@ -243,7 +247,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { --predec[succIndex]; if (predec[succIndex] == 0) { - if (schedule.AssignedProcessor(succ) == 0) { + if (canScheduleSameProc) { procReady[0].emplace_back(succ); std::push_heap(procReady[0].begin(), procReady[0].end(), std::greater<>{}); } else { @@ -293,10 +297,12 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - if (schedule.AssignedProcessor(succ) == UINT_MAX) { - schedule.SetAssignedProcessor(succ, proc); - } else if (schedule.AssignedProcessor(succ) != proc) { - schedule.SetAssignedProcessor(succ, p); + bool canScheduleSameProc = false; + if ((schedule.AssignedProcessor(succ) / (p + 1U) == totalAttempts) & ((schedule.AssignedProcessor(succ) % (p + 1U)) != proc)) { + schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + p); + } else { + schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + proc); + canScheduleSameProc = true; } VertexType succIndex; @@ -308,7 +314,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { --predec[succIndex]; if (predec[succIndex] == 0) { - if (schedule.AssignedProcessor(succ) == proc) { + if (canScheduleSameProc) { procReady[proc].emplace_back(succ); std::push_heap(procReady[proc].begin(), procReady[proc].end(), std::greater<>{}); } else { @@ -362,31 +368,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { continueSuperstepAttempts = false; } - // undo proc assingments and predec increases in any case - for (unsigned proc = 0; proc < p; ++proc) { - for (const VertexType &node : newAssignments[proc]) { - for (const VertexType &succ : graph.Children(node)) { - if constexpr (hasVerticesInTopOrderV) { - if constexpr (hasChildrenInVertexOrderV) { - if (succ >= endNode) { - break; - } - } else { - if (succ >= endNode) { - continue; - } - } - } else { - if (posInTopOrder[succ] >= endNode) { - continue; - } - } - - schedule.SetAssignedProcessor(succ, UINT_MAX); - } - } - } - + // undo predec increases in any case for (unsigned proc = 0; proc < p; ++proc) { for (const VertexType &node : newAssignments[proc]) { for (const VertexType &succ : graph.Children(node)) { @@ -427,6 +409,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { limit++; limit += (limit / 2); + ++totalAttempts; } // apply best iteration @@ -519,9 +502,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { const VertexType n = instance.NumberOfVertices(); - for (VertexType vert = 0; vert < n; ++vert) { - schedule.SetAssignedProcessor(vert, UINT_MAX); - } + schedule.SetAssignedProcessors(std::vector(n, 0U)); VertexType numNodesPerThread = n / numThreads; std::vector startNodes; From 4a8c2fdebfe3088ac89974615e6b1c7ae278b708 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 09:49:24 +0100 Subject: [PATCH 5/7] replaced division by multiplication --- .../GrowLocalAutoCoresParallel.hpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 016d0c14..02a23324 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -231,10 +231,13 @@ class GrowLocalAutoCoresParallel : public Scheduler { } bool canScheduleSameProc = false; - if ((schedule.AssignedProcessor(succ) / (p + 1U) == totalAttempts) & ((schedule.AssignedProcessor(succ) % (p + 1U)) != 0U)) { - schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + p); + const unsigned pp1 = p + 1U; + const unsigned base = pp1 * totalAttempts; + const unsigned remainder = schedule.AssignedProcessor(succ) - base; + if ((remainder < pp1) & (remainder != 0U)) { + schedule.SetAssignedProcessor(succ, base + p); } else { - schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + 0U); + schedule.SetAssignedProcessor(succ, base + 0U); canScheduleSameProc = true; } @@ -298,10 +301,13 @@ class GrowLocalAutoCoresParallel : public Scheduler { } bool canScheduleSameProc = false; - if ((schedule.AssignedProcessor(succ) / (p + 1U) == totalAttempts) & ((schedule.AssignedProcessor(succ) % (p + 1U)) != proc)) { - schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + p); + const unsigned pp1 = p + 1U; + const unsigned base = pp1 * totalAttempts; + const unsigned remainder = schedule.AssignedProcessor(succ) - base; + if ((remainder < pp1) & (remainder != proc)) { + schedule.SetAssignedProcessor(succ, base + p); } else { - schedule.SetAssignedProcessor(succ, totalAttempts * (p + 1U) + proc); + schedule.SetAssignedProcessor(succ, base + proc); canScheduleSameProc = true; } From 223d353f7b75aa85841c2b265a09ae8241a29010 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 10:14:14 +0100 Subject: [PATCH 6/7] added compare to inplace merge --- .../scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 02a23324..f50bd8d8 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -427,7 +427,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { } const auto middleIt = std::next(ready.begin(), lengthLeftoverReady); std::sort(middleIt, ready.end(), std::less<>{}); - std::inplace_merge(ready.begin(), middleIt, ready.end()); + std::inplace_merge(ready.begin(), middleIt, ready.end(), std::less<>{}); for (unsigned proc = 0; proc < p; ++proc) { for (const VertexType &node : bestNewAssignments[proc]) { From f155460f155fba47932737d494642ac835373136 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 11:49:57 +0100 Subject: [PATCH 7/7] added explanation of encoding --- .../GrowLocalAutoCoresParallel.hpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index f50bd8d8..cd48c4d9 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -234,7 +234,14 @@ class GrowLocalAutoCoresParallel : public Scheduler { const unsigned pp1 = p + 1U; const unsigned base = pp1 * totalAttempts; const unsigned remainder = schedule.AssignedProcessor(succ) - base; - if ((remainder < pp1) & (remainder != 0U)) { + // Encoding into processor of children where they can be sheduled locally through + // iteration * (p + 1) + proc + // where proc is either the processor where is can be enqueued in the processor local queue + // or equal to p when it can't be enqueued in any local queue. + // The extra encoding of iteration ensures that previous superstep attempts do not affect the current superstep + if ((remainder < pp1) & (remainder != 0U)) { // The first condition implies that the iteration is + // the same as the current and the second checks if + // it has already been to a different processor schedule.SetAssignedProcessor(succ, base + p); } else { schedule.SetAssignedProcessor(succ, base + 0U); @@ -304,7 +311,14 @@ class GrowLocalAutoCoresParallel : public Scheduler { const unsigned pp1 = p + 1U; const unsigned base = pp1 * totalAttempts; const unsigned remainder = schedule.AssignedProcessor(succ) - base; - if ((remainder < pp1) & (remainder != proc)) { + // Encoding into processor of children where they can be sheduled locally through + // iteration * (p + 1) + proc + // where proc is either the processor where is can be enqueued in the processor local queue + // or equal to p when it can't be enqueued in any local queue. + // The extra encoding of iteration ensures that previous superstep attempts do not affect the current superstep + if ((remainder < pp1) & (remainder != proc)) { // The first condition implies that the iteration is + // the same as the current and the second checks if + // it has already been to a different processor schedule.SetAssignedProcessor(succ, base + p); } else { schedule.SetAssignedProcessor(succ, base + proc);