From 5e33bb7d084999ea96b78b6cafb03029e4383f5d Mon Sep 17 00:00:00 2001 From: Muhammad Asif <19404936+asifsamiarain@users.noreply.github.com> Date: Mon, 24 Mar 2025 10:41:39 +0000 Subject: [PATCH 1/2] clean branch for pull request --- Grid/cshift/Cshift_common.h | 219 +++++++++++++++--------------- Grid/cshift/Cshift_mpi.h | 6 + Grid/cshift/Cshift_table.cc | 2 + Grid/qcd/smearing/WilsonFlow.h | 9 +- HMC/FTHMC2p1f.cc | 10 +- HMC/FTHMC2p1f_3GeV.cc | 9 ++ HMC/HMC2p1f_3GeV.cc | 8 ++ tests/smearing/Test_WilsonFlow.cc | 13 +- 8 files changed, 148 insertions(+), 128 deletions(-) diff --git a/Grid/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h index fdb98cd4e3..99b1bc3ad5 100644 --- a/Grid/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -31,21 +31,73 @@ NAMESPACE_BEGIN(Grid); extern std::vector > Cshift_table; extern deviceVector > Cshift_table_device; +extern std::vector Cshift_vector; +extern deviceVector Cshift_vector_device; -inline std::pair *MapCshiftTable(void) +// Copy Cshift map object (table or vector) to device +template +inline void MapCshiftCopy(std::vector &Cshift_obj, deviceVector &Cshift_obj_device) { - // GPU version - uint64_t sz=Cshift_table.size(); - if (Cshift_table_device.size()!=sz ) { - Cshift_table_device.resize(sz); + // GPU version only + uint64_t sz=Cshift_obj.size(); + if (Cshift_obj_device.size()!=sz ) { + Cshift_obj_device.resize(sz); } - acceleratorCopyToDevice((void *)&Cshift_table[0], - (void *)&Cshift_table_device[0], - sizeof(Cshift_table[0])*sz); + acceleratorCopyToDevice((void *)&Cshift_obj[0], + (void *)&Cshift_obj_device[0], + sizeof(Cshift_obj[0])*sz); - return &Cshift_table_device[0]; - // CPU version use identify map } + +// Copy Cshift map object (table or vector) to device and return pointer to device copy +template +inline vobj *MapCshift(std::vector &Cshift_obj, deviceVector &Cshift_obj_device) +{ + MapCshiftCopy(Cshift_obj, Cshift_obj_device); + + return &Cshift_obj_device[0]; +} + +// Calculate Cshift_vector +template +void CalculateCshiftVector(Lattice &ret, const Lattice &rhs, int dimension, int cbmask) +{ + GridBase *grid = rhs.Grid(); + + if ( !grid->CheckerBoarded(dimension) ) { + cbmask=0x3; + } + + int e1=grid->_slice_nblock[dimension]; // clearly loop invariant for icpc + int e2=grid->_slice_block[dimension]; + int stride = grid->_slice_stride[dimension]; + + if (Cshift_vector.size() < e1*e2) Cshift_vector.resize(e1*e2); // Let it grow to biggest + + int ent = 0; + if(cbmask == 0x3 ){ + for(int n=0;nCheckerBoardFromOindex(o); + if ( ocb&cbmask ) { + Cshift_vector[ent++] = o; + } + } + } + } + + if (ent < Cshift_vector.size()) Cshift_vector.resize(ent); // trim vector to actual size (relevant for checkerboarded dimensions) +} + + /////////////////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split /////////////////////////////////////////////////////////////////// @@ -89,7 +141,7 @@ Gather_plane_simple (const Lattice &rhs,deviceVector &buffer,int dim } { auto buffer_p = & buffer[0]; - auto table = MapCshiftTable(); + auto table = MapCshift >(Cshift_table, Cshift_table_device); autoView(rhs_v , rhs, AcceleratorRead); accelerator_for(i,ent,vobj::Nsimd(),{ coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second])); @@ -201,7 +253,7 @@ template void Scatter_plane_simple (Lattice &rhs,deviceVector< { auto buffer_p = & buffer[0]; - auto table = MapCshiftTable(); + auto table = MapCshift >(Cshift_table, Cshift_table_device); autoView( rhs_v, rhs, AcceleratorWrite); accelerator_for(i,ent,vobj::Nsimd(),{ coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second])); @@ -263,94 +315,32 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA template void Copy_plane(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask) { - int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs.Grid()->CheckerBoarded(dimension) ) { - cbmask=0x3; - } int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc - int e2=rhs.Grid()->_slice_block[dimension]; - int stride = rhs.Grid()->_slice_stride[dimension]; - - if(Cshift_table.size()(lo+o,ro+o); - } - } - } else { - for(int n=0;nCheckerBoardFromOindex(o); - if ( ocb&cbmask ) { - Cshift_table[ent++] = std::pair(lo+o,ro+o); - } - } - } - } + auto table = &Cshift_vector_device[0]; - { - auto table = MapCshiftTable(); - autoView(rhs_v , rhs, AcceleratorRead); - autoView(lhs_v , lhs, AcceleratorWrite); - accelerator_for(i,ent,vobj::Nsimd(),{ - coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second])); - }); - } + autoView(rhs_v , rhs, AcceleratorRead); + autoView(lhs_v , lhs, AcceleratorWrite); + accelerator_for(i,Cshift_vector.size(),vobj::Nsimd(),{ + coalescedWrite(lhs_v[table[i]+lo],coalescedRead(rhs_v[table[i]+ro])); + }); } template void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) { - int rd = rhs.Grid()->_rdimensions[dimension]; - - if ( !rhs.Grid()->CheckerBoarded(dimension) ) { - cbmask=0x3; - } int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs.Grid()->_slice_nblock[dimension]; - int e2=rhs.Grid()->_slice_block [dimension]; - int stride = rhs.Grid()->_slice_stride[dimension]; - - if(Cshift_table.size()(lo+o+b,ro+o+b); - }} - } else { - for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) Cshift_table[ent++] = std::pair(lo+o+b,ro+o+b); - }} - } - - { - auto table = MapCshiftTable(); - autoView( rhs_v, rhs, AcceleratorRead); - autoView( lhs_v, lhs, AcceleratorWrite); - accelerator_for(i,ent,1,{ - permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type); + auto table = &Cshift_vector_device[0]; + autoView( rhs_v, rhs, AcceleratorRead); + autoView( lhs_v, lhs, AcceleratorWrite); + accelerator_for(i,Cshift_vector.size(),1,{ + permute(lhs_v[table[i]+lo],rhs_v[table[i]+ro],permute_type); }); - } } ////////////////////////////////////////////////////// @@ -383,51 +373,54 @@ template void Cshift_local(Lattice &ret,const Lattice &r // Map to always positive shift modulo global full dimension. shift = (shift+fd)%fd; + int cb= (cbmask==0x2)? Odd : Even; + int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); + // the permute type ret.Checkerboard() = grid->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension); int permute_dim =grid->PermuteDim(dimension); int permute_type=grid->PermuteType(dimension); int permute_type_dist; - for(int x=0;x rd. + // num is sshift mod rd. + // + // shift 7 + // + // XoXo YcYc + // oXoX cYcY + // XoXo YcYc + // oXoX cYcY + // + // sshift -- + // + // XX YY ; 3 + // XX YY ; 0 + // XX YY ; 3 + // XX YY ; 0 + // + int wrap = sshift/rd; wrap=wrap % ly; + int num = sshift%rd; + + // Calculate Cshift_vector - it's the same for all slices + CalculateCshiftVector(ret, rhs, dimension, cbmask); + // Copy it to the device + MapCshiftCopy(Cshift_vector, Cshift_vector_device); - // int o = 0; - int bo = x * grid->_ostride[dimension]; - int cb= (cbmask==0x2)? Odd : Even; + for(int x=0;xCheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); int sx = (x+sshift)%rd; - // wrap is whether sshift > rd. - // num is sshift mod rd. - // - // shift 7 - // - // XoXo YcYc - // oXoX cYcY - // XoXo YcYc - // oXoX cYcY - // - // sshift -- - // - // XX YY ; 3 - // XX YY ; 0 - // XX YY ; 3 - // XX YY ; 0 - // int permute_slice=0; if(permute_dim){ - int wrap = sshift/rd; wrap=wrap % ly; - int num = sshift%rd; - if ( x< rd-num ) permute_slice=wrap; else permute_slice = (wrap+1)%ly; if ( (ly>2) && (permute_slice) ) { - assert(permute_type & RotateBit); - permute_type_dist = permute_type|permute_slice; + assert(permute_type & RotateBit); + permute_type_dist = permute_type|permute_slice; } else { - permute_type_dist = permute_type; + permute_type_dist = permute_type; } } diff --git a/Grid/cshift/Cshift_mpi.h b/Grid/cshift/Cshift_mpi.h index 710792ee30..0a94ab1aa8 100644 --- a/Grid/cshift/Cshift_mpi.h +++ b/Grid/cshift/Cshift_mpi.h @@ -132,6 +132,12 @@ template void Cshift_comms(Lattice &ret,const Lattice &r int cb= (cbmask==0x2)? Odd : Even; int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); + + // Calculate Cshift_vector - it's the same for all slices + CalculateCshiftVector(ret, rhs, dimension, cbmask); + // Copy it to the device + MapCshiftCopy(Cshift_vector, Cshift_vector_device); + RealD tcopy=0.0; RealD tgather=0.0; RealD tscatter=0.0; diff --git a/Grid/cshift/Cshift_table.cc b/Grid/cshift/Cshift_table.cc index 7e0cbe120c..359d94b8f4 100644 --- a/Grid/cshift/Cshift_table.cc +++ b/Grid/cshift/Cshift_table.cc @@ -2,4 +2,6 @@ NAMESPACE_BEGIN(Grid); std::vector > Cshift_table; deviceVector > Cshift_table_device; +std::vector Cshift_vector; +deviceVector Cshift_vector_device; NAMESPACE_END(Grid); diff --git a/Grid/qcd/smearing/WilsonFlow.h b/Grid/qcd/smearing/WilsonFlow.h index f169d02b9a..dc135823c5 100644 --- a/Grid/qcd/smearing/WilsonFlow.h +++ b/Grid/qcd/smearing/WilsonFlow.h @@ -207,11 +207,14 @@ std::vector WilsonFlowBase::flowMeasureEnergyDensityCloverleaf(con } template -void WilsonFlowBase::setDefaultMeasurements(int topq_meas_interval){ - addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){ +void WilsonFlowBase::setDefaultMeasurements(int meas_interval){ + addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl; }); - addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ + addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Energy density (cloverleaf) : " << step << " " << t << " " << energyDensityCloverleaf(t,U) << std::endl; + }); + addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops::TopologicalCharge(U) << std::endl; }); } diff --git a/HMC/FTHMC2p1f.cc b/HMC/FTHMC2p1f.cc index 7d93d168f8..1e914e872f 100644 --- a/HMC/FTHMC2p1f.cc +++ b/HMC/FTHMC2p1f.cc @@ -25,13 +25,20 @@ directory *************************************************************************************/ /* END LEGAL */ #include + +#if Nc == 3 #include #include +#endif using namespace Grid; int main(int argc, char **argv) { +#if Nc != 3 +#warning FTHMC2p1f will not work for Nc != 3 + std::cout << "This program will currently only work for Nc == 3." << std::endl; +#else std::cout << std::setprecision(12); Grid_init(&argc, &argv); @@ -220,7 +227,6 @@ int main(int argc, char **argv) TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); +#endif } // main - - diff --git a/HMC/FTHMC2p1f_3GeV.cc b/HMC/FTHMC2p1f_3GeV.cc index a8aa67f8cf..36d5caa3b5 100644 --- a/HMC/FTHMC2p1f_3GeV.cc +++ b/HMC/FTHMC2p1f_3GeV.cc @@ -24,14 +24,22 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + #include + +#if Nc == 3 #include #include +#endif using namespace Grid; int main(int argc, char **argv) { +#if Nc != 3 +#warning FTHMC2p1f_3GeV will not work for Nc != 3 + std::cout << "This program will currently only work for Nc == 3." << std::endl; +#else std::cout << std::setprecision(12); Grid_init(&argc, &argv); @@ -220,6 +228,7 @@ int main(int argc, char **argv) TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); +#endif } // main diff --git a/HMC/HMC2p1f_3GeV.cc b/HMC/HMC2p1f_3GeV.cc index 4bf088d7f0..199d4be818 100644 --- a/HMC/HMC2p1f_3GeV.cc +++ b/HMC/HMC2p1f_3GeV.cc @@ -25,13 +25,20 @@ directory *************************************************************************************/ /* END LEGAL */ #include + +#if Nc == 3 #include #include +#endif using namespace Grid; int main(int argc, char **argv) { +#if Nc != 3 +#warning HMC2p1f_3GeV will not work for Nc != 3 + std::cout << "This program will currently only work for Nc == 3." << std::endl; +#else std::cout << std::setprecision(12); Grid_init(&argc, &argv); @@ -220,6 +227,7 @@ int main(int argc, char **argv) TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); +#endif } // main diff --git a/tests/smearing/Test_WilsonFlow.cc b/tests/smearing/Test_WilsonFlow.cc index e0726f874f..4acd3b4fc1 100644 --- a/tests/smearing/Test_WilsonFlow.cc +++ b/tests/smearing/Test_WilsonFlow.cc @@ -33,8 +33,7 @@ namespace Grid{ GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters, int, steps, double, step_size, - int, meas_interval, - double, maxTau); // for the adaptive algorithm + int, meas_interval); template @@ -86,7 +85,7 @@ int main(int argc, char **argv) { WFParameters WFPar(Reader); ConfParameters CPar(Reader); CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix); - BinaryHmcCheckpointer CPBin(CPPar); + NerscHmcCheckpointer CPBin(CPPar); for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){ @@ -96,19 +95,13 @@ int main(int argc, char **argv) { std::cout << GridLogMessage << "Initial plaquette: " << WilsonLoops::avgPlaquette(Umu) << std::endl; - int t=WFPar.maxTau; - WilsonFlowAdaptive WF(WFPar.step_size, WFPar.maxTau, - 1.0e-4, + WilsonFlow WF(WFPar.step_size, WFPar.steps, WFPar.meas_interval); WF.smear(Uflow, Umu); RealD WFlow_plaq = WilsonLoops::avgPlaquette(Uflow); - RealD WFlow_TC = WilsonLoops::TopologicalCharge(Uflow); - RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow); std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl; - std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl; - std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl; std::cout<< GridLogMessage << " Admissibility check:\n"; const double sp_adm = 0.067; // admissible threshold From 4b6d5cbe7d93ef5a64df69e7519acded96e50604 Mon Sep 17 00:00:00 2001 From: Ilektra Christidi Date: Wed, 26 Mar 2025 12:42:02 +0000 Subject: [PATCH 2/2] Revert some white space changes. --- Grid/cshift/Cshift_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h index 99b1bc3ad5..3b3f5212c1 100644 --- a/Grid/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -418,9 +418,9 @@ template void Cshift_local(Lattice &ret,const Lattice &r if ( (ly>2) && (permute_slice) ) { assert(permute_type & RotateBit); - permute_type_dist = permute_type|permute_slice; + permute_type_dist = permute_type|permute_slice; } else { - permute_type_dist = permute_type; + permute_type_dist = permute_type; } }