From 48789766bbc74f1f9c3ec32841557dd519979b85 Mon Sep 17 00:00:00 2001 From: Michal Pandy Date: Thu, 29 Aug 2019 08:12:52 -0700 Subject: [PATCH 1/2] Multithreaded rulebook --- .../Metadata/SubmanifoldConvolutionRules.h | 62 +++++++++++++++---- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h b/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h index b63ca26..b8ae665 100644 --- a/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h +++ b/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h @@ -7,6 +7,8 @@ #ifndef SUBMANIFOLDCONVOLUTIONRULES_H #define SUBMANIFOLDCONVOLUTIONRULES_H +#include + // Full input region for an output point template RectangularRegion @@ -27,20 +29,58 @@ template double SubmanifoldConvolution_SgToRules(SparseGrid &grid, RuleBook &rules, long *size) { double countActiveInputs = 0; - for (auto const &outputIter : grid.mp) { - auto inRegion = - InputRegionCalculator_Submanifold(outputIter.first, size); - Int rulesOffset = 0; - for (auto inputPoint : inRegion) { - auto inputIter = grid.mp.find(inputPoint); - if (inputIter != grid.mp.end()) { - rules[rulesOffset].push_back(inputIter->second + grid.ctr); - rules[rulesOffset].push_back(outputIter.second + grid.ctr); - countActiveInputs++; + const Int threadCount = 4; + std::vector threads; + std::array activeInputs = {}; + std::vector rulebooks; + for (Int t = 0; t < threadCount; ++t) { + rulebooks.push_back(RuleBook(rules.size())); + } + + auto func = [&](const int order) { + auto outputIter = grid.mp.begin(); + auto &rb = rulebooks[order]; + int rem = grid.mp.size(); + int aciveInputCount = 0; + + if (rem > order) { + std::advance(outputIter, order); + rem -= order; + + for (; outputIter != grid.mp.end(); + std::advance(outputIter, std::min(threadCount, rem)), + rem -= threadCount) { + auto inRegion = InputRegionCalculator_Submanifold( + outputIter->first, size); + Int rulesOffset = 0; + for (auto inputPoint : inRegion) { + auto inputIter = grid.mp.find(inputPoint); + if (inputIter != grid.mp.end()) { + aciveInputCount++; + rb[rulesOffset].push_back(inputIter->second + grid.ctr); + rb[rulesOffset].push_back(outputIter->second + grid.ctr); + } + rulesOffset++; + } } - rulesOffset++; } + + activeInputs[order] = aciveInputCount; + }; + + for (Int t = 0; t < threadCount; ++t) { + threads.push_back(std::thread(func, t)); } + + for (Int t = 0; t < threadCount; ++t) { + threads[t].join(); + countActiveInputs += activeInputs[t]; + for (std::size_t i = 0; i < rulebooks[t].size(); ++i) { + rules[i].insert(rules[i].end(), rulebooks[t][i].begin(), + rulebooks[t][i].end()); + } + } + return countActiveInputs; } From 39a194c8611658c4ac73aa3c01ddf23a75f485d1 Mon Sep 17 00:00:00 2001 From: Michal Pandy Date: Wed, 4 Sep 2019 02:26:38 -0700 Subject: [PATCH 2/2] Amalgamate rulebooks after all threads done processing --- .../Metadata/SubmanifoldConvolutionRules.h | 56 +++++++++++++------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h b/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h index b8ae665..8514e60 100644 --- a/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h +++ b/sparseconvnet/SCN/Metadata/SubmanifoldConvolutionRules.h @@ -24,18 +24,34 @@ InputRegionCalculator_Submanifold(const Point &output, long *size) { // Call for each convolutional / max-pooling layer, once for each batch item. // rules is used to carry out the "lowering" whilst carrying out the convolution - template double SubmanifoldConvolution_SgToRules(SparseGrid &grid, RuleBook &rules, long *size) { double countActiveInputs = 0; - const Int threadCount = 4; - std::vector threads; - std::array activeInputs = {}; - std::vector rulebooks; - for (Int t = 0; t < threadCount; ++t) { - rulebooks.push_back(RuleBook(rules.size())); + for (auto const &outputIter : grid.mp) { + auto inRegion = + InputRegionCalculator_Submanifold(outputIter.first, size); + Int rulesOffset = 0; + for (auto inputPoint : inRegion) { + auto inputIter = grid.mp.find(inputPoint); + if (inputIter != grid.mp.end()) { + rules[rulesOffset].push_back(inputIter->second + grid.ctr); + rules[rulesOffset].push_back(outputIter.second + grid.ctr); + countActiveInputs++; + } + rulesOffset++; + } } + return countActiveInputs; +} + +template +double SubmanifoldConvolution_SgToRules_par(SparseGrid &grid, + std::vector &rulebooks, + long *size, const Int threadCount) { + double countActiveInputs = 0; + std::vector threads; + std::vector activeInputs(threadCount, 0); auto func = [&](const int order) { auto outputIter = grid.mp.begin(); @@ -75,10 +91,6 @@ double SubmanifoldConvolution_SgToRules(SparseGrid &grid, for (Int t = 0; t < threadCount; ++t) { threads[t].join(); countActiveInputs += activeInputs[t]; - for (std::size_t i = 0; i < rulebooks[t].size(); ++i) { - rules[i].insert(rules[i].end(), rulebooks[t][i].begin(), - rulebooks[t][i].end()); - } } return countActiveInputs; @@ -89,6 +101,7 @@ Int SubmanifoldConvolution_SgsToRules(SparseGrids &SGs, RuleBook &rules, long *size) { Int sd = volume(size); Int countActiveInputs = 0; + rules.clear(); rules.resize(sd); for (Int i = 0; i < (Int)SGs.size(); i++) @@ -96,21 +109,31 @@ Int SubmanifoldConvolution_SgsToRules(SparseGrids &SGs, SubmanifoldConvolution_SgToRules(SGs[i], rules, size); return countActiveInputs; } + template Int SubmanifoldConvolution_SgsToRules_OMP(SparseGrids &SGs, RuleBook &rules, long *size) { - std::vector rbs(SGs.size()); + std::vector> rbs(SGs.size()); std::vector countActiveInputs(SGs.size()); rules.clear(); Int sd = volume(size); rules.resize(sd); + const Int threadCount = 4; + + for (Int i = 0; i < SGs.size(); ++i) { + std::vector rulebooks; + for (Int t = 0; t < threadCount; ++t) { + rulebooks.push_back(RuleBook(sd)); + } + rbs.push_back(rulebooks); + } + { Int i; #pragma omp parallel for private(i) for (i = 0; i < (Int)SGs.size(); i++) { - rbs[i].resize(sd); - countActiveInputs[i] = - SubmanifoldConvolution_SgToRules(SGs[i], rbs[i], size); + countActiveInputs[i] = SubmanifoldConvolution_SgToRules_par( + SGs[i], rbs[i], size, threadCount); } } { @@ -118,7 +141,8 @@ Int SubmanifoldConvolution_SgsToRules_OMP(SparseGrids &SGs, #pragma omp parallel for private(i) for (i = 0; i < sd; i++) for (auto const &rb : rbs) - rules[i].insert(rules[i].end(), rb[i].begin(), rb[i].end()); + for (Int t = 0; t < threadCount; ++t) + rules[i].insert(rules[i].end(), rb[i][t].begin(), rb[i][t].end()); } Int countActiveInputs_ = 0; for (auto &i : countActiveInputs)