From b12b4fdaff20f0f615c735c2c5bc5d85e3f0637c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 23 Aug 2024 11:05:09 -0400 Subject: [PATCH 01/34] Attempt at operating on half checkerboard --- Grid/qcd/smearing/GaugeConfigurationMasked.h | 47 +++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index 78846263cc..b3d73c184a 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -32,7 +32,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration // Smear_Stout *StoutSmearing; // std::vector SmearedSet; + GridRedBlackCartesian * UrbGrid; // keep a copy of the redblack grid for life of object std::vector masks; + std::vector cbs; typedef typename SU3Adjoint::AMatrix AdjMatrix; typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField; @@ -147,6 +149,25 @@ class SmearedConfigurationMasked : public SmearedConfiguration } pokeLorentz(Fdet, Fdet_pol, nu); } + + void Compute_MpInvJx_dNxxdSy(int cb, + const GaugeLinkField &PlaqL, + const GaugeLinkField &PlaqR, + AdjMatrixField MpInvJx, + AdjVectorField &Fdet2 ) + { + GaugeLinkField PlaqLeo(UrbGrid); + GaugeLinkField PlaqReo(UrbGrid); + AdjMatrixField MpInvJxeo(UrbGrid); + AdjVectorField Fdet2eo(UrbGrid); + pickCheckerboard(cb,PlaqLeo,PlaqL); + pickCheckerboard(cb,PlaqReo,PlaqR); + pickCheckerboard(cb,MpInvJxeo,MpInvJx); + Fdet2eo.Checkerboard()=cb; + Compute_MpInvJx_dNxxdSy(PlaqLeo,PlaqReo,MpInvJxeo,Fdet2eo); + setCheckerboard(Fdet2,Fdet2eo); + } + void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 ) { GaugeLinkField UtaU(PlaqL.Grid()); @@ -278,8 +299,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration //////////////////////////////////////////////////////////////////////////////// // Mask the gauge field //////////////////////////////////////////////////////////////////////////////// + int cb = cbs[smr]; auto mask=PeekIndex(masks[smr],mu); // the cb mask - + Umsk = U; ApplyMask(Umsk,smr); Utmp = peekLorentz(Umsk,mu); @@ -442,7 +464,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration AdjMatrixField MpInvJx_nu(grid); MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx,FdetV); Fdet2_mu=FdetV; Fdet1_mu=Zero(); @@ -499,7 +521,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration time=-usecond(); PlaqR=(-1.0)*PlaqR; - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx,FdetV); Fdet2_nu = FdetV; time+=usecond(); std::cout << GridLogMessage << "Compute_MpInvJx_dNxxSy (occurs 6x) took "< MpInvJx_nu = Cshift(MpInvJx,mu,-1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_nu = Fdet2_nu+FdetV; ///////////////// -ve nu ///////////////// @@ -539,7 +561,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y; MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_nu = Fdet2_nu+FdetV; // x== @@ -560,7 +582,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration MpInvJx_nu = Cshift(MpInvJx,mu,-1); MpInvJx_nu = Cshift(MpInvJx_nu,nu,1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_nu = Fdet2_nu+FdetV; ///////////////////////////////////////////////////////////////////// @@ -589,7 +611,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration MpInvJx_nu = Cshift(MpInvJx,nu,-1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_mu = Fdet2_mu+FdetV; // __ @@ -609,7 +631,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_mu = Fdet2_mu+FdetV; } @@ -931,6 +953,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration public: /* Standard constructor */ + virtual ~SmearedConfigurationMasked() + { + delete UrbGrid; + } SmearedConfigurationMasked(GridCartesian* _UGrid, unsigned int Nsmear, Smear_Stout& Stout) : SmearedConfiguration(_UGrid, Nsmear,Stout) { @@ -939,7 +965,6 @@ class SmearedConfigurationMasked : public SmearedConfiguration // was resized in base class assert(this->SmearedSet.size()==Nsmear); - GridRedBlackCartesian * UrbGrid; UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(_UGrid); LatticeComplex one(_UGrid); one = ComplexD(1.0,0.0); LatticeComplex tmp(_UGrid); @@ -947,10 +972,11 @@ class SmearedConfigurationMasked : public SmearedConfiguration for (unsigned int i = 0; i < this->smearingLevels; ++i) { masks.push_back(*(new LatticeLorentzComplex(_UGrid))); - int mu= (i/2) %Nd; int cb= (i%2); LatticeComplex tmpcb(UrbGrid); + + cbs.push_back(cb); masks[i]=Zero(); //////////////////// @@ -962,7 +988,6 @@ class SmearedConfigurationMasked : public SmearedConfiguration PokeIndex(masks[i],tmp, mu); } - delete UrbGrid; } virtual void smeared_force(GaugeField &SigmaTilde) From da919949f99d13157bb3109169c3882d4621b8e2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 23 Aug 2024 12:34:41 -0400 Subject: [PATCH 02/34] Clean up the accelerator pick/set checkerboard --- Grid/cartesian/Cartesian_base.h | 1 + Grid/cartesian/Cartesian_red_black.h | 1 + Grid/lattice/Lattice_transfer.h | 41 +++++----------------------- 3 files changed, 9 insertions(+), 34 deletions(-) diff --git a/Grid/cartesian/Cartesian_base.h b/Grid/cartesian/Cartesian_base.h index bb3c3b3f4d..96f2bfd527 100644 --- a/Grid/cartesian/Cartesian_base.h +++ b/Grid/cartesian/Cartesian_base.h @@ -91,6 +91,7 @@ class GridBase : public CartesianCommunicator , public GridThread { //////////////////////////////////////////////////////////////// virtual int CheckerBoarded(int dim)=0; virtual int CheckerBoard(const Coordinate &site)=0; + virtual int CheckerDim(void){ return 0; }; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; diff --git a/Grid/cartesian/Cartesian_red_black.h b/Grid/cartesian/Cartesian_red_black.h index 092d4910b8..5de1af7b7b 100644 --- a/Grid/cartesian/Cartesian_red_black.h +++ b/Grid/cartesian/Cartesian_red_black.h @@ -60,6 +60,7 @@ class GridRedBlackCartesian : public GridBase int _checker_dim; std::vector _checker_board; + virtual int CheckerDim(void){ return _checker_dim; }; virtual int CheckerBoarded(int dim){ if( dim==_checker_dim) return 1; else return 0; diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index c475829d4c..6a53436ae2 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -42,50 +42,21 @@ inline void subdivides(GridBase *coarse,GridBase *fine) assert((fine->_rdimensions[d] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[d]); } } - //////////////////////////////////////////////////////////////////////////////////////////// // remove and insert a half checkerboard //////////////////////////////////////////////////////////////////////////////////////////// + template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full) { - half.Checkerboard() = cb; - - autoView( half_v, half, CpuWrite); - autoView( full_v, full, CpuRead); - thread_for(ss, full.Grid()->oSites(),{ - int cbos; - Coordinate coor; - full.Grid()->oCoorFromOindex(coor,ss); - cbos=half.Grid()->CheckerBoard(coor); - - if (cbos==cb) { - int ssh=half.Grid()->oIndex(coor); - half_v[ssh] = full_v[ss]; - } - }); + acceleratorPickCheckerboard(cb,half,full); } template inline void setCheckerboard(Lattice &full,const Lattice &half) { - int cb = half.Checkerboard(); - autoView( half_v , half, CpuRead); - autoView( full_v , full, CpuWrite); - thread_for(ss,full.Grid()->oSites(),{ - - Coordinate coor; - int cbos; - - full.Grid()->oCoorFromOindex(coor,ss); - cbos=half.Grid()->CheckerBoard(coor); - - if (cbos==cb) { - int ssh=half.Grid()->oIndex(coor); - full_v[ss]=half_v[ssh]; - } - }); + acceleratorSetCheckerboard(full,half); } -template inline void acceleratorPickCheckerboard(int cb,Lattice &half,const Lattice &full, int checker_dim_half=0) +template inline void acceleratorPickCheckerboard(int cb,Lattice &half,const Lattice &full, int dummy=0) { half.Checkerboard() = cb; autoView(half_v, half, AcceleratorWrite); @@ -95,6 +66,7 @@ template inline void acceleratorPickCheckerboard(int cb,Lattice_ndimension; Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask; Coordinate ostride_half = half.Grid()->_ostride; + int checker_dim_half = half.Grid()->CheckerDim(); accelerator_for(ss, full.Grid()->oSites(),full.Grid()->Nsimd(),{ Coordinate coor; @@ -119,7 +91,7 @@ template inline void acceleratorPickCheckerboard(int cb,Lattice inline void acceleratorSetCheckerboard(Lattice &full,const Lattice &half, int checker_dim_half=0) +template inline void acceleratorSetCheckerboard(Lattice &full,const Lattice &half, int dummy=0) { int cb = half.Checkerboard(); autoView(half_v , half, AcceleratorRead); @@ -129,6 +101,7 @@ template inline void acceleratorSetCheckerboard(Lattice &full, unsigned long ndim_half = half.Grid()->_ndimension; Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask; Coordinate ostride_half = half.Grid()->_ostride; + int checker_dim_half = half.Grid()->CheckerDim(); accelerator_for(ss,full.Grid()->oSites(),full.Grid()->Nsimd(),{ Coordinate coor; From dbb79c5279858ad8fd557488bb6c49925e07547e Mon Sep 17 00:00:00 2001 From: Shuhei Yamamoto Date: Tue, 27 Aug 2024 19:25:46 -0400 Subject: [PATCH 03/34] fixed the bug related to inappropriate use of half-checkerboarding --- Grid/qcd/smearing/GaugeConfigurationMasked.h | 670 +++++++++++++++++-- 1 file changed, 627 insertions(+), 43 deletions(-) diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index b3d73c184a..00b24b135c 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -31,7 +31,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration // const unsigned int smearingLevels; // Smear_Stout *StoutSmearing; // std::vector SmearedSet; - + GridRedBlackCartesian * UrbGrid; // keep a copy of the redblack grid for life of object std::vector masks; std::vector cbs; @@ -60,6 +60,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration GaugeLinkField sh_field(grid), temp_Sigma(grid); Real rho_munu, rho_numu; + RealD t = 0; + t-=usecond(); + rho_munu = rho; rho_numu = rho; for(int mu = 0; mu < Nd; ++mu){ @@ -120,12 +123,19 @@ class SmearedConfigurationMasked : public SmearedConfiguration } } + + t+=usecond(); + std::cout << GridLogPerformance << " BaseSmearDerivative " << t/1e3 << " ms " << std::endl; + } void BaseSmear(GaugeLinkField& Cup, const GaugeField& U,int mu,RealD rho) { GridBase *grid = U.Grid(); GaugeLinkField tmp_stpl(grid); WilsonLoops WL; + RealD t = 0; + + t-=usecond(); Cup = Zero(); for(int nu=0; nu Cup += adj(tmp_stpl*rho); } } + t+=usecond(); + std::cout << GridLogPerformance << " BaseSmear " << t/1e3 << " ms " << std::endl; } // Adjoint vector to GaugeField force void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu) { Complex ci(0,1); GaugeLinkField Fdet_pol(Fdet.Grid()); + RealD t = 0; + + t-=usecond(); Fdet_pol=Zero(); for(int e=0;e<8;e++){ ColourMatrix te; @@ -148,13 +163,15 @@ class SmearedConfigurationMasked : public SmearedConfiguration Fdet_pol=Fdet_pol + ci*tmp*te; // but norm of te is different.. why? } pokeLorentz(Fdet, Fdet_pol, nu); - } + t+=usecond(); + std::cout << GridLogPerformance << " InsertForce " << t/1e3 << " ms " << std::endl; + } void Compute_MpInvJx_dNxxdSy(int cb, const GaugeLinkField &PlaqL, - const GaugeLinkField &PlaqR, - AdjMatrixField MpInvJx, - AdjVectorField &Fdet2 ) + const GaugeLinkField &PlaqR, + AdjMatrixField MpInvJx, + AdjVectorField &Fdet2 ) { GaugeLinkField PlaqLeo(UrbGrid); GaugeLinkField PlaqReo(UrbGrid); @@ -167,14 +184,14 @@ class SmearedConfigurationMasked : public SmearedConfiguration Compute_MpInvJx_dNxxdSy(PlaqLeo,PlaqReo,MpInvJxeo,Fdet2eo); setCheckerboard(Fdet2,Fdet2eo); } - void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 ) { - GaugeLinkField UtaU(PlaqL.Grid()); - GaugeLinkField D(PlaqL.Grid()); - AdjMatrixField Dbc(PlaqL.Grid()); - AdjMatrixField Dbc_opt(PlaqL.Grid()); - LatticeComplex tmp(PlaqL.Grid()); + int cb = PlaqL.Checkerboard(); + GaugeLinkField UtaU(PlaqL.Grid()); UtaU.Checkerboard() = cb; + GaugeLinkField D(PlaqL.Grid()); D.Checkerboard() = cb; + AdjMatrixField Dbc(PlaqL.Grid()); Dbc.Checkerboard() = cb; + AdjMatrixField Dbc_opt(PlaqL.Grid()); Dbc_opt.Checkerboard() = cb; + LatticeComplex tmp(PlaqL.Grid()); tmp.Checkerboard() = cb; const int Ngen = SU3Adjoint::Dimension; Complex ci(0,1); ColourMatrix ta,tb,tc; @@ -230,21 +247,34 @@ class SmearedConfigurationMasked : public SmearedConfiguration Complex ci(0,1); ColourMatrix tb; ColourMatrix tc; + RealD t = 0, tta = 0, tp = 0, tgen = 0; + + t-=usecond(); for(int b=0;b(NxAd,tmp,c,b); + auto tmp =closure( -trace(ci*tc*Nx)); + PokeIndex(NxAd,tmp,c,b); } #endif + tp+=usecond(); } + t+=usecond(); + std::cout << GridLogPerformance << " ComputeNxy " << t/1e3 << " ms proj "< //////////////////////////////////////////////////////////////////////////////// int cb = cbs[smr]; auto mask=PeekIndex(masks[smr],mu); // the cb mask - + Umsk = U; ApplyMask(Umsk,smr); Utmp = peekLorentz(Umsk,mu); @@ -464,9 +494,15 @@ class SmearedConfigurationMasked : public SmearedConfiguration AdjMatrixField MpInvJx_nu(grid); MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor + //FdetV=Zero();// debug Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx,FdetV); Fdet2_mu=FdetV; Fdet1_mu=Zero(); + /* debug + AdjVectorField tmp_debug(grid); tmp_debug=Zero(); + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,tmp_debug); + std::cout << GridLogMessage << "|MpInvJx_dNxxdSy_diff|^2 " << norm2(FdetV-tmp_debug) << std::endl; + */ for(int e =0 ; e<8 ; e++){ LatticeComplexD tr(grid); @@ -493,7 +529,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration GaugeField Fdet2(grid); GaugeLinkField Fdet_pol(grid); // one polarisation - RealD t4 = usecond(); + RealD t4 = usecond(), tLR = 0, tNxy = 0, tMJx = 0; for(int nu=0;nu // | | // x== // nu polarisation -- clockwise - time=-usecond(); + time=-usecond(); tLR -= usecond(); PlaqL=Ident; PlaqR=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftBackward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu)))); - time+=usecond(); + time+=usecond(); tLR += usecond(); std::cout << GridLogMessage << "PlaqLR took "< // __ // | " // |__"x // mu polarisation + tLR -= usecond(); PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftBackward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu))); PlaqR=Gimpl::CovShiftIdentityBackward(Umu[nu], nu); - + tLR += usecond(); + + tNxy -= usecond(); dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,-1); ComputeNxy(PlaqL,PlaqR,Nxy); Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y; + tNxy += usecond(); + tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,-1); - Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_mu = Fdet2_mu+FdetV; + tMJx += usecond(); // __ // " | // x__| // mu polarisation - + tLR -= usecond(); PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu))); PlaqR=Gimpl::CovShiftIdentityForward(Umu[nu], nu); + tLR += usecond(); + tNxy -= usecond(); dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1); ComputeNxy(PlaqL,PlaqR,Nxy); Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y; + tNxy += usecond(); + tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_mu = Fdet2_mu+FdetV; - + tMJx += usecond(); } } RealD t5 = usecond(); @@ -645,11 +708,431 @@ class SmearedConfigurationMasked : public SmearedConfiguration force= (-0.5)*( Fdet1 + Fdet2); RealD t1 = usecond(); + std::cout << GridLogMessage << " logDetJacobianForce t3-t0 "< Umu(Nd,grid); + GaugeLinkField Cmu(grid); // U and staple; C contains factor of epsilon + GaugeLinkField Zx(grid); // U times Staple, contains factor of epsilon + GaugeLinkField Nxx(grid); // Nxx fundamental space + GaugeLinkField Utmp(grid); + GaugeLinkField PlaqL(grid); + GaugeLinkField PlaqR(grid); + const int Ngen = SU3Adjoint::Dimension; + AdjMatrix TRb; + ColourMatrix Ident; + LatticeComplex cplx(grid); + + AdjVectorField dJdXe_nMpInv(grid); + AdjVectorField dJdXe_nMpInv_y(grid); + AdjMatrixField MpAd(grid); // Mprime luchang's notes + AdjMatrixField MpAdInv(grid); // Mprime inverse + AdjMatrixField NxxAd(grid); // Nxx in adjoint space + AdjMatrixField JxAd(grid); + AdjMatrixField ZxAd(grid); + AdjMatrixField mZxAd(grid); + AdjMatrixField X(grid); + Complex ci(0,1); + + RealD t0 = usecond(); + Ident = ComplexD(1.0); + for(int d=0;d(masks[smr],mu); // the cb mask + + Umsk = U; + ApplyMask(Umsk,smr); + Utmp = peekLorentz(Umsk,mu); + + //////////////////////////////////////////////////////////////////////////////// + // Retrieve the eps/rho parameter(s) -- could allow all different but not so far + //////////////////////////////////////////////////////////////////////////////// + double rho=this->StoutSmearing->SmearRho[1]; + int idx=0; + for(int mu=0;mu<4;mu++){ + for(int nu=0;nu<4;nu++){ + if ( mu!=nu) assert(this->StoutSmearing->SmearRho[idx]==rho); + else assert(this->StoutSmearing->SmearRho[idx]==0.0); + idx++; + }} + ////////////////////////////////////////////////////////////////// + // Assemble the N matrix + ////////////////////////////////////////////////////////////////// + // Computes ALL the staples -- could compute one only and do it here + RealD time; + time=-usecond(); + BaseSmear(Cmu, U,mu,rho); + + ////////////////////////////////////////////////////////////////// + // Assemble Luscher exp diff map J matrix + ////////////////////////////////////////////////////////////////// + // Ta so Z lives in Lie algabra + Zx = Ta(Cmu * adj(Umu[mu])); + time+=usecond(); + std::cout << GridLogMessage << "Z took "< dJdX; dJdX.resize(8,grid); + std::vector TRb_s; TRb_s.resize(8); + AdjMatrixField tbXn(grid); + AdjMatrixField sumXtbX(grid); + AdjMatrixField t2(grid); + AdjMatrixField dt2(grid); + AdjMatrixField t3(grid); + AdjMatrixField dt3(grid); + AdjMatrixField aunit(grid); + + for(int b=0;b<8;b++){ + SU3Adjoint::generator(b, TRb_s[b]); + dJdX[b] = TRb_s[b]; + } + aunit = ComplexD(1.0); + // Could put into an accelerator_for + X = (-1.0)*ZxAd; + t2 = X; + for (int j = 12; j > 1; --j) { + t3 = t2*(1.0 / (j + 1)) + aunit; + t2 = X * t3; + for(int b=0;b<8;b++){ + dJdX[b]= TRb_s[b] * t3 + X * dJdX[b]*(1.0 / (j + 1)); + } + } + for(int b=0;b<8;b++){ + dJdX[b] = -dJdX[b]; + } +#else + std::vector dJdX; dJdX.resize(8,grid); + AdjMatrixField tbXn(grid); + AdjMatrixField sumXtbX(grid); + AdjMatrixField t2(grid); + AdjMatrixField dt2(grid); + AdjMatrixField t3(grid); + AdjMatrixField dt3(grid); + AdjMatrixField aunit(grid); + for(int b=0;b<8;b++){ + aunit = ComplexD(1.0); + SU3Adjoint::generator(b, TRb); //dt2 + + X = (-1.0)*ZxAd; + t2 = X; + dt2 = TRb; + for (int j = 12; j > 1; --j) { + t3 = t2*(1.0 / (j + 1)) + aunit; + dt3 = dt2*(1.0 / (j + 1)); + t2 = X * t3; + dt2 = TRb * t3 + X * dt3; + } + dJdX[b] = -dt2; + } +#endif + time+=usecond(); + std::cout << GridLogMessage << "dJx took "<(masks[smr],mu); + dJdXe_nMpInv = dJdXe_nMpInv*tmp; + + // dJdXe_nMpInv needs to multiply: + // Nxx_mu (site local) (1) + // Nxy_mu one site forward in each nu direction (3) + // Nxy_mu one site backward in each nu direction (3) + // Nxy_nu 0,0 ; +mu,0; 0,-nu; +mu-nu [ 3x4 = 12] + // 19 terms. + AdjMatrixField Nxy(grid); + + GaugeField Fdet1(grid); + GaugeField Fdet2(grid); + GaugeLinkField Fdet_pol(grid); // one polarisation + + RealD t4 = usecond(), tLR = 0, tNxy = 0, tMJx = 0; + for(int nu=0;nu AdjMatrixField mZac(grid); AdjMatrixField X(grid); + RealD time=0, tta=0, tpk=0, tN=0, tZ=0, tJ=0, tlnDetM=0; int mu= (smr/2) %Nd; + time -= usecond(); auto mask=PeekIndex(masks[smr],mu); // the cb mask ////////////////////////////////////////////////////////////////// // Assemble the N matrix ////////////////////////////////////////////////////////////////// + + tN -= usecond(); double rho=this->StoutSmearing->SmearRho[1]; BaseSmear(Cmu, U,mu,rho); @@ -690,7 +1177,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration for(int b=0;b for(int c=0;c(Ncb,tmp,c,b); + tpk -= usecond(); + PokeIndex(Ncb,tmp,c,b); + tpk += usecond(); } #endif } - + tN += usecond(); + ////////////////////////////////////////////////////////////////// // Assemble Luscher exp diff map J matrix ////////////////////////////////////////////////////////////////// + tZ -= usecond(); // Ta so Z lives in Lie algabra Z = Ta(Cmu * adj(Umu)); @@ -721,10 +1214,12 @@ class SmearedConfigurationMasked : public SmearedConfiguration cplx = 2.0*trace(ci*Tb*Z); // my convention 1/2 delta ba Zac = Zac + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign. } - + tZ += usecond(); + ////////////////////////////////////// // J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)! ////////////////////////////////////// + tJ -= usecond(); X=1.0; Jac = X; mZac = (-1.0)*Zac; @@ -734,7 +1229,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration kpfac = kpfac /(k+1); Jac = Jac + X * kpfac; } + tJ += usecond(); + tlnDetM -= usecond(); //////////////////////////// // Mab //////////////////////////// @@ -757,9 +1254,79 @@ class SmearedConfigurationMasked : public SmearedConfiguration // Masked sum //////////////////////////// ln_det = ln_det * mask; + tlnDetM += usecond(); + time += usecond(); + std::cout << GridLogPerformance << " logDetJacobianLevel " << time/1e3 << " ms ta "<smearingLevels > 0) + { + double start = usecond(), tas = 0; + + GaugeLinkField tmp_mu(force.Grid()); + + for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) { + + // remove U in UdSdU... + for (int mu = 0; mu < Nd; mu++) { + tmp_mu = adj(peekLorentz(this->get_smeared_conf(ismr), mu)) * peekLorentz(force, mu); + pokeLorentz(force, tmp_mu, mu); + } + + tas -= usecond(); + // Propagate existing force + force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1), ismr); + tas += usecond(); + + // Add back U in UdSdU... + for (int mu = 0; mu < Nd; mu++) { + tmp_mu = peekLorentz(this->get_smeared_conf(ismr - 1), mu) * peekLorentz(force, mu); + pokeLorentz(force, tmp_mu, mu); + } + + // Get this levels determinant force + force_det = Zero(); + logDetJacobianForceLevel(old, this->get_smeared_conf(ismr-1),force_det,ismr); + + // Sum the contributions + force = force + force_det; + } + + // remove U in UdSdU... + for (int mu = 0; mu < Nd; mu++) { + tmp_mu = adj(peekLorentz(this->get_smeared_conf(0), mu)) * peekLorentz(force, mu); + pokeLorentz(force, tmp_mu, mu); + } + + tas -= usecond(); + force = this->AnalyticSmearedForce(force, *this->ThinLinks,0); + tas += usecond(); + + for (int mu = 0; mu < Nd; mu++) { + tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu); + pokeLorentz(force, tmp_mu, mu); + } + + force_det = Zero(); + + logDetJacobianForceLevel(old, *this->ThinLinks,force_det,0); + + force = force + force_det; + + force=Ta(force); // Ta + + } // if smearingLevels = 0 do nothing + std::cout << GridLogMessage << " DEBUG: logDetJacobianForce " << std::endl; + } + public: RealD logDetJacobian(void) { @@ -778,6 +1345,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration } return ln_det; } + void logDetJacobianForce(GaugeField &force) { force =Zero(); @@ -785,7 +1353,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration if (this->smearingLevels > 0) { - double start = usecond(); + double start = usecond(), tas = 0; GaugeLinkField tmp_mu(force.Grid()); @@ -796,10 +1364,12 @@ class SmearedConfigurationMasked : public SmearedConfiguration tmp_mu = adj(peekLorentz(this->get_smeared_conf(ismr), mu)) * peekLorentz(force, mu); pokeLorentz(force, tmp_mu, mu); } - + + tas -= usecond(); // Propagate existing force force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1), ismr); - + tas += usecond(); + // Add back U in UdSdU... for (int mu = 0; mu < Nd; mu++) { tmp_mu = peekLorentz(this->get_smeared_conf(ismr - 1), mu) * peekLorentz(force, mu); @@ -820,8 +1390,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration pokeLorentz(force, tmp_mu, mu); } + tas -= usecond(); force = this->AnalyticSmearedForce(force, *this->ThinLinks,0); - + tas += usecond(); + for (int mu = 0; mu < Nd; mu++) { tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu); pokeLorentz(force, tmp_mu, mu); @@ -835,8 +1407,14 @@ class SmearedConfigurationMasked : public SmearedConfiguration force=Ta(force); // Ta +#if 1 // debug + GaugeField force_debug(force.Grid()); + logDetJacobianForce(1,force_debug); + std::cout << GridLogMessage << " DEBUG: logDetJacobianForce_diff " << norm2(force-force_debug) << std::endl; +#endif double end = usecond(); double time = (end - start)/ 1e3; + std::cout << GridLogMessage << "GaugeConfigurationMasked: AnalyticSmearedForce in lnDetJacobianForce took " << tas/1e3 << " ms" << std::endl; std::cout << GridLogMessage << "GaugeConfigurationMasked: lnDetJacobianForce took " << time << " ms" << std::endl; } // if smearingLevels = 0 do nothing } @@ -899,6 +1477,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration int cb= (level%2); double rho=this->StoutSmearing->SmearRho[1]; + RealD time = 0; + + time -= usecond(); // Can override this to do one direction only. SigmaK = Zero(); iLambda = Zero(); @@ -933,7 +1514,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu); pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu); pokeLorentz(iLambda, iLambda_mu, mu); - std::cout << " mu "< // propagate the rest of the force as identity map, just add back //////////////////////////////////////////////////////////////////////////////////// SigmaK = SigmaK+SigmaKPrimeB; + time += usecond(); + std::cout << GridLogMessage << "GaugeConfigurationMasked: Analytic smearing force took " << time/1e3 << " ms" << std::endl; return SigmaK; } @@ -972,12 +1555,13 @@ class SmearedConfigurationMasked : public SmearedConfiguration for (unsigned int i = 0; i < this->smearingLevels; ++i) { masks.push_back(*(new LatticeLorentzComplex(_UGrid))); + int mu= (i/2) %Nd; int cb= (i%2); LatticeComplex tmpcb(UrbGrid); cbs.push_back(cb); - + masks[i]=Zero(); //////////////////// // Setup the mask From 7fe1b822314f834ec7ef3ed163edf985e8b22754 Mon Sep 17 00:00:00 2001 From: Shuhei Yamamoto Date: Tue, 10 Sep 2024 17:42:21 -0400 Subject: [PATCH 04/34] fixed bugs; added timings; redirected timing outputs from performance to Message temporarily --- Grid/qcd/smearing/GaugeConfigurationMasked.h | 126 +++++++++++-------- 1 file changed, 76 insertions(+), 50 deletions(-) diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index 00b24b135c..c794673902 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -16,6 +16,8 @@ template void Dump(const Lattice & lat, peekSite(tmp,lat,site); std::cout << " Dump "< typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField; typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField; + // Assume: lat = full lattice + template void printCheckerboards2norm(T &lat, int cb=-1) + { + T lat_0(UrbGrid); lat_0 = Zero(); + T lat_1(UrbGrid); lat_1 = Zero(); + pickCheckerboard(0,lat_0,lat); + pickCheckerboard(1,lat_1,lat); + + std::string parity = (cb==0)? "Even" : "Odd"; + std::cout << GridLogMessage << " printCheckerboards2norm for " << parity << cb << ": Even Part: " << norm2(lat_0) << " Odd Part: " << norm2(lat_1) << std::endl; + } + void BaseSmearDerivative(GaugeField& SigmaTerm, const GaugeField& iLambda, const GaugeField& U, @@ -125,7 +139,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration } t+=usecond(); - std::cout << GridLogPerformance << " BaseSmearDerivative " << t/1e3 << " ms " << std::endl; + std::cout << GridLogMessage << " BaseSmearDerivative " << t/1e3 << " ms " << std::endl; } @@ -145,8 +159,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration } } t+=usecond(); - std::cout << GridLogPerformance << " BaseSmear " << t/1e3 << " ms " << std::endl; + std::cout << GridLogMessage << " BaseSmear " << t/1e3 << " ms " << std::endl; } + // Adjoint vector to GaugeField force void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu) { @@ -165,7 +180,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration pokeLorentz(Fdet, Fdet_pol, nu); t+=usecond(); - std::cout << GridLogPerformance << " InsertForce " << t/1e3 << " ms " << std::endl; + std::cout << GridLogMessage << " InsertForce " << t/1e3 << " ms " << std::endl; } void Compute_MpInvJx_dNxxdSy(int cb, const GaugeLinkField &PlaqL, @@ -173,6 +188,8 @@ class SmearedConfigurationMasked : public SmearedConfiguration AdjMatrixField MpInvJx, AdjVectorField &Fdet2 ) { + RealD time = -usecond(); + Fdet2 = Zero(); GaugeLinkField PlaqLeo(UrbGrid); GaugeLinkField PlaqReo(UrbGrid); AdjMatrixField MpInvJxeo(UrbGrid); @@ -181,8 +198,12 @@ class SmearedConfigurationMasked : public SmearedConfiguration pickCheckerboard(cb,PlaqReo,PlaqR); pickCheckerboard(cb,MpInvJxeo,MpInvJx); Fdet2eo.Checkerboard()=cb; + time+=usecond(); Compute_MpInvJx_dNxxdSy(PlaqLeo,PlaqReo,MpInvJxeo,Fdet2eo); + time-=usecond(); setCheckerboard(Fdet2,Fdet2eo); + time+=usecond(); + std::cout << GridLogMessage << " Checkerboarding_MpInvJx_dNxxdSy " << time/1e3 << " ms " << std::endl; } void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 ) { @@ -236,7 +257,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration tpk+=usecond(); } t+=usecond(); - std::cout << GridLogPerformance << " Compute_MpInvJx_dNxxdSy " << t/1e3 << " ms proj "< tp+=usecond(); } t+=usecond(); - std::cout << GridLogPerformance << " ComputeNxy " << t/1e3 << " ms proj "< Zx = Ta(Cmu * adj(Umu[mu])); time+=usecond(); std::cout << GridLogMessage << "Z took "< } time+=usecond(); std::cout << GridLogMessage << "ZxAd took "< } time+=usecond(); std::cout << GridLogMessage << "Jx took "< time=-usecond(); tMJx -= usecond(); PlaqR=(-1.0)*PlaqR; - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx,FdetV); Fdet2_nu = FdetV; time+=usecond(); tMJx += usecond(); + printCheckerboards2norm(FdetV,cb); std::cout << GridLogMessage << "Compute_MpInvJx_dNxxSy (occurs 6x) took "< tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,mu,-1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_nu = Fdet2_nu+FdetV; tMJx += usecond(); + printCheckerboards2norm(FdetV,(cb+1)%2); ///////////////// -ve nu ///////////////// // __ @@ -607,9 +630,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_nu = Fdet2_nu+FdetV; tMJx += usecond(); + printCheckerboards2norm(FdetV,(cb+1)%2); // x== // | | @@ -634,9 +658,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,mu,-1); MpInvJx_nu = Cshift(MpInvJx_nu,nu,1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_nu = Fdet2_nu+FdetV; tMJx += usecond(); + printCheckerboards2norm(FdetV,cb); ///////////////////////////////////////////////////////////////////// // Set up the determinant force contribution in 3x3 algebra basis @@ -669,10 +694,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,-1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_mu = Fdet2_mu+FdetV; tMJx += usecond(); - + printCheckerboards2norm(FdetV,(cb+1)%2); // __ // " | // x__| // mu polarisation @@ -694,9 +719,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); Fdet2_mu = Fdet2_mu+FdetV; tMJx += usecond(); + printCheckerboards2norm(FdetV,(cb+1)%2); } } RealD t5 = usecond(); @@ -712,7 +738,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration std::cout << GridLogMessage << " logDetJacobianForce t4-t3 dJdXe_nMpInv "< // Ta so Z lives in Lie algabra Zx = Ta(Cmu * adj(Umu[mu])); time+=usecond(); - std::cout << GridLogMessage << "Z took "< ZxAd = ZxAd + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign. } time+=usecond(); - std::cout << GridLogMessage << "ZxAd took "< JxAd = JxAd + X * kpfac; } time+=usecond(); - std::cout << GridLogMessage << "Jx took "< } #endif time+=usecond(); - std::cout << GridLogMessage << "dJx took "< PlaqR = Utmp*adj(Cmu); ComputeNxy(PlaqL,PlaqR,NxxAd); time+=usecond(); - std::cout << GridLogMessage << "ComputeNxy took "< time=-usecond(); MpAdInv = Inverse(MpAd); time+=usecond(); - std::cout << GridLogMessage << "MpAdInv took "< Gimpl::CovShiftBackward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu)))); time+=usecond(); tLR += usecond(); - std::cout << GridLogMessage << "PlaqLR took "< force= (-0.5)*( Fdet1 + Fdet2); RealD t1 = usecond(); - std::cout << GridLogMessage << " logDetJacobianForce t3-t0 "< ln_det = ln_det * mask; tlnDetM += usecond(); time += usecond(); - std::cout << GridLogPerformance << " logDetJacobianLevel " << time/1e3 << " ms ta "<smearingLevels > 0) + { + double start = usecond(); + for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) { + ln_det+= logDetJacobianLevel(this->get_smeared_conf(ismr-1),ismr); + } + ln_det +=logDetJacobianLevel(*(this->ThinLinks),0); + + double end = usecond(); + double time = (end - start)/ 1e3; + std::cout << GridLogMessage << "GaugeConfigurationMasked: logDetJacobian took " << time << " ms" << std::endl; + } + return ln_det; + } void logDetJacobianForce(int old, GaugeField &force) { force =Zero(); @@ -1327,25 +1372,6 @@ class SmearedConfigurationMasked : public SmearedConfiguration std::cout << GridLogMessage << " DEBUG: logDetJacobianForce " << std::endl; } -public: - RealD logDetJacobian(void) - { - RealD ln_det = 0; - if (this->smearingLevels > 0) - { - double start = usecond(); - for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) { - ln_det+= logDetJacobianLevel(this->get_smeared_conf(ismr-1),ismr); - } - ln_det +=logDetJacobianLevel(*(this->ThinLinks),0); - - double end = usecond(); - double time = (end - start)/ 1e3; - std::cout << GridLogMessage << "GaugeConfigurationMasked: logDetJacobian took " << time << " ms" << std::endl; - } - return ln_det; - } - void logDetJacobianForce(GaugeField &force) { force =Zero(); From c194968f5c2b83944b34d55f33d6ce0e9a783c64 Mon Sep 17 00:00:00 2001 From: Shuhei Yamamoto Date: Fri, 13 Sep 2024 18:00:22 -0400 Subject: [PATCH 05/34] Extended half-checkerboarding in logDetJacobianForceLevel except for staple calculations; enabled tagged tracing --- Grid/qcd/smearing/GaugeConfigurationMasked.h | 273 +++++++++++-------- systems/Frontier/config-command | 2 +- 2 files changed, 161 insertions(+), 114 deletions(-) diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index c794673902..ed8f208318 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -161,6 +161,28 @@ class SmearedConfigurationMasked : public SmearedConfiguration t+=usecond(); std::cout << GridLogMessage << " BaseSmear " << t/1e3 << " ms " << std::endl; } + void BaseSmear_cb(GaugeLinkField& Cup, const GaugeField& U,int mu,RealD rho) { + GridBase *grid = U.Grid(); + GridBase *hgrid = Cup.Grid(); + GaugeLinkField tmp_stpl(grid); + GaugeLinkField tmp_stpl_eo(hgrid); + WilsonLoops WL; + int cb = Cup.Checkerboard(); + RealD t = 0; + + t-=usecond(); + Cup = Zero(); + for(int nu=0; nu } void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 ) { + GRID_TRACE("Compute_MpInvJx_dNxxdSy"); int cb = PlaqL.Checkerboard(); GaugeLinkField UtaU(PlaqL.Grid()); UtaU.Checkerboard() = cb; GaugeLinkField D(PlaqL.Grid()); D.Checkerboard() = cb; @@ -263,6 +286,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd) { + GRID_TRACE("ComputeNxy"); GaugeLinkField Nx(PlaqL.Grid()); const int Ngen = SU3Adjoint::Dimension; Complex ci(0,1); @@ -311,33 +335,35 @@ class SmearedConfigurationMasked : public SmearedConfiguration void logDetJacobianForceLevel(const GaugeField &U, GaugeField &force ,int smr) { + GRID_TRACE("logDetJacobianFOrceLevel"); GridBase* grid = U.Grid(); + GridBase* hgrid = UrbGrid; // For now, assume masking is based on red-black checkerboarding ColourMatrix tb; ColourMatrix tc; ColourMatrix ta; GaugeField C(grid); GaugeField Umsk(grid); std::vector Umu(Nd,grid); - GaugeLinkField Cmu(grid); // U and staple; C contains factor of epsilon - GaugeLinkField Zx(grid); // U times Staple, contains factor of epsilon - GaugeLinkField Nxx(grid); // Nxx fundamental space + GaugeLinkField Cmu(hgrid); // U and staple; C contains factor of epsilon + GaugeLinkField Zx(hgrid); // U times Staple, contains factor of epsilon GaugeLinkField Utmp(grid); - GaugeLinkField PlaqL(grid); - GaugeLinkField PlaqR(grid); + GaugeLinkField Ueo(hgrid); + GaugeLinkField PlaqL(hgrid); + GaugeLinkField PlaqR(hgrid); const int Ngen = SU3Adjoint::Dimension; AdjMatrix TRb; ColourMatrix Ident; - LatticeComplex cplx(grid); + LatticeComplex cplx(hgrid); - AdjVectorField dJdXe_nMpInv(grid); - AdjVectorField dJdXe_nMpInv_y(grid); - AdjMatrixField MpAd(grid); // Mprime luchang's notes - AdjMatrixField MpAdInv(grid); // Mprime inverse - AdjMatrixField NxxAd(grid); // Nxx in adjoint space - AdjMatrixField JxAd(grid); - AdjMatrixField ZxAd(grid); - AdjMatrixField mZxAd(grid); - AdjMatrixField X(grid); + AdjVectorField dJdXe_nMpInv(hgrid); + AdjVectorField dJdXe_nMpInv_y(hgrid); + AdjMatrixField MpAd(hgrid); // Mprime luchang's notes + AdjMatrixField MpAdInv(hgrid); // Mprime inverse + AdjMatrixField NxxAd(hgrid); // Nxx in adjoint space + AdjMatrixField JxAd(hgrid); + AdjMatrixField ZxAd(hgrid); + AdjMatrixField mZxAd(hgrid); + AdjMatrixField X(hgrid); Complex ci(0,1); RealD t0 = usecond(); @@ -356,7 +382,24 @@ class SmearedConfigurationMasked : public SmearedConfiguration Umsk = U; ApplyMask(Umsk,smr); Utmp = peekLorentz(Umsk,mu); - + pickCheckerboard(cb,Ueo,Utmp); + + Cmu.Checkerboard() = cb; + cplx.Checkerboard() = cb; + Zx.Checkerboard() = cb; + Ueo.Checkerboard() = cb; + PlaqL.Checkerboard() = cb; + PlaqR.Checkerboard() = cb; + MpAd.Checkerboard() = cb; + MpAdInv.Checkerboard() = cb; + dJdXe_nMpInv.Checkerboard() = cb; + dJdXe_nMpInv_y.Checkerboard() = cb; + NxxAd.Checkerboard() = cb; + JxAd.Checkerboard() = cb; + ZxAd.Checkerboard() = cb; + mZxAd.Checkerboard() = cb; + X.Checkerboard() = cb; + //////////////////////////////////////////////////////////////////////////////// // Retrieve the eps/rho parameter(s) -- could allow all different but not so far //////////////////////////////////////////////////////////////////////////////// @@ -374,13 +417,13 @@ class SmearedConfigurationMasked : public SmearedConfiguration // Computes ALL the staples -- could compute one only and do it here RealD time; time=-usecond(); - BaseSmear(Cmu, U,mu,rho); - + BaseSmear_cb(Cmu, U, mu, rho); // Changed // + ////////////////////////////////////////////////////////////////// // Assemble Luscher exp diff map J matrix ////////////////////////////////////////////////////////////////// // Ta so Z lives in Lie algabra - Zx = Ta(Cmu * adj(Umu[mu])); + Zx = Ta(Cmu * adj(Ueo)); // CHANGED // time+=usecond(); std::cout << GridLogMessage << "Z took "< ////////////////////////////////////// time=-usecond(); X=1.0; - JxAd = X; + JxAd = X; // can be halved mZxAd = (-1.0)*ZxAd; RealD kpfac = 1; for(int k=1;k<12;k++){ @@ -419,24 +462,25 @@ class SmearedConfigurationMasked : public SmearedConfiguration ////////////////////////////////////// time=-usecond(); #if 1 - std::vector dJdX; dJdX.resize(8,grid); - std::vector TRb_s; TRb_s.resize(8); - AdjMatrixField tbXn(grid); - AdjMatrixField sumXtbX(grid); - AdjMatrixField t2(grid); - AdjMatrixField dt2(grid); - AdjMatrixField t3(grid); - AdjMatrixField dt3(grid); - AdjMatrixField aunit(grid); + std::vector dJdX; dJdX.resize(8,hgrid); for(auto &M : dJdX) M.Checkerboard() = cb; + std::vector TRb_s; TRb_s.resize(8); + AdjMatrixField tbXn(hgrid); tbXn.Checkerboard() = cb; + AdjMatrixField sumXtbX(hgrid); sumXtbX.Checkerboard() = cb; + AdjMatrixField t2(hgrid); t2.Checkerboard() = cb; + AdjMatrixField dt2(hgrid); dt2.Checkerboard() = cb; + AdjMatrixField t3(hgrid); t3.Checkerboard() = cb; + AdjMatrixField dt3(hgrid); dt3.Checkerboard() = cb; + AdjMatrixField aunit(hgrid); aunit.Checkerboard() = cb; for(int b=0;b<8;b++){ SU3Adjoint::generator(b, TRb_s[b]); dJdX[b] = TRb_s[b]; } aunit = ComplexD(1.0); + // Could put into an accelerator_for X = (-1.0)*ZxAd; - t2 = X; + t2 = X; for (int j = 12; j > 1; --j) { t3 = t2*(1.0 / (j + 1)) + aunit; t2 = X * t3; @@ -479,7 +523,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration ///////////////////////////////////////////////////////////////// time=-usecond(); PlaqL = Ident; - PlaqR = Utmp*adj(Cmu); + PlaqR = Ueo*adj(Cmu); ComputeNxy(PlaqL,PlaqR,NxxAd); time+=usecond(); std::cout << GridLogMessage << "ComputeNxy took "< //////////////////////////// // Mab //////////////////////////// - MpAd = Complex(1.0,0.0); - MpAd = MpAd - JxAd * NxxAd; + MpAd = Complex(1.0,0.0);std::cout << GridLogMessage <<"after MpAd def"<LocalIndexToLocalCoor(site, lcoor);int site1=1;hgrid->LocalIndexToLocalCoor(site1, lcoor1); + MpAd = MpAd - JxAd * NxxAd; std::cout << GridLogMessage <<"after MpAd apxy "<< MpAd.Checkerboard()<<" " <CheckerBoard(lcoor) << " "<CheckerBoard(lcoor1)< ///////////////////////////////////////////////////////////////// // Nxx Mp^-1 ///////////////////////////////////////////////////////////////// - AdjVectorField FdetV(grid); + AdjVectorField FdetV(hgrid); FdetV.Checkerboard() = cb; AdjVectorField Fdet1_nu(grid); AdjVectorField Fdet2_nu(grid); AdjVectorField Fdet2_mu(grid); AdjVectorField Fdet1_mu(grid); - AdjMatrixField nMpInv(grid); + AdjVectorField Fdet1_nu_eo(hgrid); Fdet1_nu_eo.Checkerboard() = cb; + AdjVectorField Fdet1_nu_oe(hgrid); Fdet1_nu_oe.Checkerboard() = (cb+1)%2; + AdjVectorField Fdet2_nu_eo(hgrid); Fdet2_nu_eo.Checkerboard() = cb; + AdjVectorField Fdet2_nu_oe(hgrid); Fdet1_nu_oe.Checkerboard() = (cb+1)%2; + + AdjVectorField Fdet1_mu_oe(hgrid); Fdet1_mu_oe = Zero(); Fdet1_mu_oe.Checkerboard() = (cb+1)%2; + AdjVectorField Fdet2_mu_oe(hgrid); Fdet2_mu_oe = Zero(); Fdet2_mu_oe.Checkerboard() = (cb+1)%2; + + AdjMatrixField nMpInv(hgrid); nMpInv.Checkerboard() = cb; nMpInv= NxxAd *MpAdInv; - AdjMatrixField MpInvJx(grid); - AdjMatrixField MpInvJx_nu(grid); + AdjMatrixField MpInvJx(hgrid); MpInvJx.Checkerboard() = cb; + AdjMatrixField MpInvJx_nu(hgrid); MpInvJx_nu.Checkerboard() = cb; MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor - //FdetV=Zero();// debug - Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx,FdetV); - Fdet2_mu=FdetV; - Fdet1_mu=Zero(); - /* debug - AdjVectorField tmp_debug(grid); tmp_debug=Zero(); - Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,tmp_debug); - std::cout << GridLogMessage << "|MpInvJx_dNxxdSy_diff|^2 " << norm2(FdetV-tmp_debug) << std::endl; - */ - + LatticeComplexD tr(hgrid); tr.Checkerboard() = cb; for(int e =0 ; e<8 ; e++){ - LatticeComplexD tr(grid); // ColourMatrix te; // SU3::generator(e, te); tr = trace(dJdX[e] * nMpInv); pokeColour(dJdXe_nMpInv,tr,e); } - /////////////////////////////// - // Mask it off - /////////////////////////////// - auto tmp=PeekIndex(masks[smr],mu); - dJdXe_nMpInv = dJdXe_nMpInv*tmp; - + + setCheckerboard(Fdet1_mu, (AdjVectorField) (transpose(NxxAd)*dJdXe_nMpInv)); + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV); + setCheckerboard(Fdet2_mu,FdetV); + // dJdXe_nMpInv needs to multiply: // Nxx_mu (site local) (1) // Nxy_mu one site forward in each nu direction (3) // Nxy_mu one site backward in each nu direction (3) // Nxy_nu 0,0 ; +mu,0; 0,-nu; +mu-nu [ 3x4 = 12] // 19 terms. - AdjMatrixField Nxy(grid); + AdjMatrixField Nxy(hgrid); GaugeField Fdet1(grid); GaugeField Fdet2(grid); - GaugeLinkField Fdet_pol(grid); // one polarisation RealD t4 = usecond(), tLR = 0, tNxy = 0, tMJx = 0; for(int nu=0;nu time=-usecond(); tLR -= usecond(); PlaqL=Ident; - PlaqR=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu, - Gimpl::CovShiftForward(Umu[mu], mu, - Gimpl::CovShiftBackward(Umu[nu], nu, - Gimpl::CovShiftIdentityBackward(Utmp, mu)))); + pickCheckerboard(cb,PlaqR,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[nu], nu, + Gimpl::CovShiftForward(Umu[mu], mu, + Gimpl::CovShiftBackward(Umu[nu], nu, + Gimpl::CovShiftIdentityBackward(Utmp, mu)))))); time+=usecond(); tLR += usecond(); std::cout << GridLogMessage << "PlaqLR took "< // .__| // nu polarisation -- anticlockwise tLR -= usecond(); - PlaqR=(rho)*Gimpl::CovShiftForward(Umu[nu], nu, - Gimpl::CovShiftBackward(Umu[mu], mu, - Gimpl::CovShiftIdentityBackward(Umu[nu], nu))); + pickCheckerboard((cb+1)%2,PlaqR,(GaugeLinkField) ((rho)*Gimpl::CovShiftForward(Umu[nu], nu, + Gimpl::CovShiftBackward(Umu[mu], mu, + Gimpl::CovShiftIdentityBackward(Umu[nu], nu))))); - PlaqL=Gimpl::CovShiftIdentityBackward(Utmp, mu); + pickCheckerboard((cb+1)%2,PlaqL, (GaugeLinkField) (Gimpl::CovShiftIdentityBackward(Utmp, mu))); tLR += usecond(); tNxy -= usecond(); + Nxy.Checkerboard() = (cb+1)%2; FdetV.Checkerboard() = (cb+1)%2; dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1); ComputeNxy(PlaqL, PlaqR,Nxy); - Fdet1_nu = Fdet1_nu+transpose(Nxy)*dJdXe_nMpInv_y; + Fdet1_nu_oe = transpose(Nxy)*dJdXe_nMpInv_y; tNxy += usecond(); tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,mu,-1); - Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); - Fdet2_nu = Fdet2_nu+FdetV; + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + + Fdet2_nu_oe = FdetV; tMJx += usecond(); - printCheckerboards2norm(FdetV,(cb+1)%2); ///////////////// -ve nu ///////////////// // __ // | | // x== // nu polarisation -- clockwise - tLR -= usecond(); - PlaqL=(rho)* Gimpl::CovShiftForward(Umu[mu], mu, - Gimpl::CovShiftForward(Umu[nu], nu, - Gimpl::CovShiftIdentityBackward(Utmp, mu))); + pickCheckerboard((cb+1)%2,PlaqL,(GaugeLinkField) ((rho)* Gimpl::CovShiftForward(Umu[mu], mu, + Gimpl::CovShiftForward(Umu[nu], nu, + Gimpl::CovShiftIdentityBackward(Utmp, mu))))); - PlaqR = Gimpl::CovShiftIdentityForward(Umu[nu], nu); + pickCheckerboard((cb+1)%2,PlaqR, (GaugeLinkField) (Gimpl::CovShiftIdentityForward(Umu[nu], nu))); tLR += usecond(); tNxy -= usecond(); dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1); ComputeNxy(PlaqL,PlaqR,Nxy); - Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y; + Fdet1_nu_oe = Fdet1_nu_oe + transpose(Nxy)*dJdXe_nMpInv_y; tNxy += usecond(); tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); - Fdet2_nu = Fdet2_nu+FdetV; + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Fdet2_nu_oe = Fdet2_nu_oe+FdetV; tMJx += usecond(); - printCheckerboards2norm(FdetV,(cb+1)%2); // x== // | | // |__| // nu polarisation - tLR -= usecond(); - PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu, - Gimpl::CovShiftIdentityBackward(Utmp, mu)); + pickCheckerboard(cb,PlaqL,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[nu], nu, + Gimpl::CovShiftIdentityBackward(Utmp, mu)))); - PlaqR=Gimpl::CovShiftBackward(Umu[mu], mu, - Gimpl::CovShiftIdentityForward(Umu[nu], nu)); + pickCheckerboard(cb,PlaqR,(GaugeLinkField) (Gimpl::CovShiftBackward(Umu[mu], mu, + Gimpl::CovShiftIdentityForward(Umu[nu], nu)))); tLR += usecond(); tNxy -= usecond(); + Nxy.Checkerboard() = cb; FdetV.Checkerboard() = cb; dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1); dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv_y,nu,1); ComputeNxy(PlaqL,PlaqR,Nxy); - Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y; + Fdet1_nu_eo = Fdet1_nu_eo + transpose(Nxy)*dJdXe_nMpInv_y; tNxy += usecond(); tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,mu,-1); MpInvJx_nu = Cshift(MpInvJx_nu,nu,1); - Compute_MpInvJx_dNxxdSy(cb,PlaqL,PlaqR,MpInvJx_nu,FdetV); - Fdet2_nu = Fdet2_nu+FdetV; + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Fdet2_nu_eo = Fdet2_nu_eo+FdetV; tMJx += usecond(); - printCheckerboards2norm(FdetV,cb); ///////////////////////////////////////////////////////////////////// // Set up the determinant force contribution in 3x3 algebra basis ///////////////////////////////////////////////////////////////////// + setCheckerboard(Fdet1_nu, Fdet1_nu_eo); + setCheckerboard(Fdet1_nu, Fdet1_nu_oe); InsertForce(Fdet1,Fdet1_nu,nu); + setCheckerboard(Fdet2_nu, Fdet2_nu_eo); + setCheckerboard(Fdet2_nu, Fdet2_nu_oe); InsertForce(Fdet2,Fdet2_nu,nu); ////////////////////////////////////////////////// // Parallel direction terms ////////////////////////////////////////////////// + Nxy.Checkerboard() = (cb+1)%2; FdetV.Checkerboard() = (cb+1)%2; + // __ // | " // |__"x // mu polarisation tLR -= usecond(); - PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu, - Gimpl::CovShiftBackward(Umu[nu], nu, - Gimpl::CovShiftIdentityBackward(Utmp, mu))); + pickCheckerboard((cb+1)%2,PlaqL,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[mu], mu, + Gimpl::CovShiftBackward(Umu[nu], nu, + Gimpl::CovShiftIdentityBackward(Utmp, mu))))); - PlaqR=Gimpl::CovShiftIdentityBackward(Umu[nu], nu); + pickCheckerboard((cb+1)%2,PlaqR,(GaugeLinkField) (Gimpl::CovShiftIdentityBackward(Umu[nu], nu))); tLR += usecond(); tNxy -= usecond(); dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,-1); ComputeNxy(PlaqL,PlaqR,Nxy); - Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y; + Fdet1_mu_oe = Fdet1_mu_oe + transpose(Nxy)*dJdXe_nMpInv_y; tNxy += usecond(); tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,-1); - - Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); - Fdet2_mu = Fdet2_mu+FdetV; + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Fdet2_mu_oe = Fdet2_mu_oe+FdetV; tMJx += usecond(); - printCheckerboards2norm(FdetV,(cb+1)%2); + // __ // " | // x__| // mu polarisation tLR -= usecond(); - PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu, - Gimpl::CovShiftForward(Umu[nu], nu, - Gimpl::CovShiftIdentityBackward(Utmp, mu))); + pickCheckerboard((cb+1)%2,PlaqL,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[mu], mu, + Gimpl::CovShiftForward(Umu[nu], nu, + Gimpl::CovShiftIdentityBackward(Utmp, mu))))); - PlaqR=Gimpl::CovShiftIdentityForward(Umu[nu], nu); + pickCheckerboard((cb+1)%2,PlaqR,(GaugeLinkField) (Gimpl::CovShiftIdentityForward(Umu[nu], nu))); tLR += usecond(); tNxy -= usecond(); dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1); ComputeNxy(PlaqL,PlaqR,Nxy); - Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y; + Fdet1_mu_oe = Fdet1_mu_oe + transpose(Nxy)*dJdXe_nMpInv_y; tNxy += usecond(); tMJx -= usecond(); MpInvJx_nu = Cshift(MpInvJx,nu,1); - Compute_MpInvJx_dNxxdSy((cb+1)%2,PlaqL,PlaqR,MpInvJx_nu,FdetV); - Fdet2_mu = Fdet2_mu+FdetV; + Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); + Fdet2_mu_oe = Fdet2_mu_oe+FdetV; tMJx += usecond(); - printCheckerboards2norm(FdetV,(cb+1)%2); + } } RealD t5 = usecond(); - - Fdet1_mu = Fdet1_mu + transpose(NxxAd)*dJdXe_nMpInv; - + setCheckerboard(Fdet1_mu, Fdet1_mu_oe); + setCheckerboard(Fdet2_mu, Fdet2_mu_oe); InsertForce(Fdet1,Fdet1_mu,mu); InsertForce(Fdet2,Fdet2_mu,mu); @@ -1433,7 +1480,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration force=Ta(force); // Ta -#if 1 // debug +#if 0 // debug GaugeField force_debug(force.Grid()); logDetJacobianForce(1,force_debug); std::cout << GridLogMessage << " DEBUG: logDetJacobianForce_diff " << norm2(force-force_debug) << std::endl; diff --git a/systems/Frontier/config-command b/systems/Frontier/config-command index a1c113e4e1..b36a903913 100644 --- a/systems/Frontier/config-command +++ b/systems/Frontier/config-command @@ -3,7 +3,7 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` --with-lime=$CLIME \ --enable-unified=no \ --enable-shm=nvlink \ ---enable-tracing=timer \ +--enable-tracing=roctx \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --disable-gparity \ From bffd30abecfa72c1700cf0df0d40561f73c35cf6 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 19 Sep 2024 15:48:09 -0400 Subject: [PATCH 06/34] Optimise lie algebra project --- Grid/qcd/utils/GaugeGroup.h | 42 ++++++++++++++++++------------------- Grid/qcd/utils/SUn.impl.h | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Grid/qcd/utils/GaugeGroup.h b/Grid/qcd/utils/GaugeGroup.h index 6811d247e9..ee4dd9fc7b 100644 --- a/Grid/qcd/utils/GaugeGroup.h +++ b/Grid/qcd/utils/GaugeGroup.h @@ -418,32 +418,32 @@ static void LieAlgebraProject(LatticeAlgebraMatrix &out,const LatticeMatrix &in, int hNNm1= NNm1/2; RealD sqrt_2 = sqrt(2.0); Complex ci(0.0,1.0); - for(int su2Index=0;su2IndexoSites(),1,{ + + const int nsimd= Matrix::Nsimd(); + accelerator_for(ss,grid->oSites(),nsimd,{ + for(int su2Index=0;su2IndexoSites(),vComplex::Nsimd(),{ - auto tmp = in_v[ss]()()(0,0); + coalescedWrite(out_v[ss]()()(ax,b),0.5*(real(in_v(ss)()()(i2,i1)) - real(in_v(ss)()()(i1,i2)))); + coalescedWrite(out_v[ss]()()(ay,b),0.5*(imag(in_v(ss)()()(i1,i2)) + imag(in_v(ss)()()(i2,i1)))); + } + for(int diagIndex=0;diagIndex &ta) { //////////////////////////////////////////////////////////////////////// // Map a su2 subgroup number to the pair of rows that are non zero //////////////////////////////////////////////////////////////////////// -static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) { +static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) { assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2)); int spare = su2_index; From 9fe84d224c481f94272d4684bc5de22631656ff9 Mon Sep 17 00:00:00 2001 From: Shuhei Yamamoto Date: Thu, 19 Sep 2024 19:33:31 -0400 Subject: [PATCH 07/34] added tracing macros --- Grid/qcd/smearing/GaugeConfigurationMasked.h | 82 +++++++++++++++----- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index ed8f208318..0b28671d0d 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -249,7 +249,10 @@ class SmearedConfigurationMasked : public SmearedConfiguration SU3::generator(a, ta); ta = 2.0 * ci * ta; // Qlat Tb = 2i Tb^Grid - UtaU= adj(PlaqL)*ta*PlaqR; // 6ms + { + GRID_TRACE("UtaU"); + UtaU= adj(PlaqL)*ta*PlaqR; // 6ms + } tta+=usecond(); //////////////////////////////////////////// // Could add this entire C-loop to a projection routine @@ -260,9 +263,15 @@ class SmearedConfigurationMasked : public SmearedConfiguration SU3::generator(c, tc); tc = 2.0*ci*tc; tp-=usecond(); - D = Ta( tc *UtaU); // 2ms + { + GRID_TRACE("D = Ta tc*UtaU"); + D = Ta( tc *UtaU); // 2ms + } #if 1 - SU3::LieAlgebraProject(Dbc_opt,D,c); // 5.5ms + { + GRID_TRACE("LieAlgebraProject"); + SU3::LieAlgebraProject(Dbc_opt,D,c); // 5.5ms + } #else for(int b=0;b // Dump(Dbc_opt,"Dbc_opt"); // Dump(Dbc,"Dbc"); tpk-=usecond(); - tmp = trace(MpInvJx * Dbc_opt); - PokeIndex(Fdet2,tmp,a); + { + GRID_TRACE("traceMpDbc"); + tmp = trace(MpInvJx * Dbc_opt); + } + { + GRID_TRACE("pokeIndecx"); + PokeIndex(Fdet2,tmp,a); + } tpk+=usecond(); } t+=usecond(); @@ -301,16 +316,25 @@ class SmearedConfigurationMasked : public SmearedConfiguration tgen+=usecond(); tb = 2.0 * ci * tb; tta-=usecond(); - Nx = Ta( adj(PlaqL)*tb * PlaqR ); + { + GRID_TRACE("UtaU"); + Nx = Ta( adj(PlaqL)*tb * PlaqR ); + } tta+=usecond(); tp-=usecond(); #if 1 - SU3::LieAlgebraProject(NxAd,Nx,b); + { + GRID_TRACE("LieAlgebraProject"); + SU3::LieAlgebraProject(NxAd,Nx,b); + } #else for(int c=0;c(NxAd,tmp,c,b); + { + GRID_TRACE("pokeIndecx"); + PokeIndex(NxAd,tmp,c,b); + } } #endif tp+=usecond(); @@ -335,7 +359,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration void logDetJacobianForceLevel(const GaugeField &U, GaugeField &force ,int smr) { - GRID_TRACE("logDetJacobianFOrceLevel"); + GRID_TRACE("logDetJacobianForceLevel"); GridBase* grid = U.Grid(); GridBase* hgrid = UrbGrid; // For now, assume masking is based on red-black checkerboarding ColourMatrix tb; @@ -417,13 +441,13 @@ class SmearedConfigurationMasked : public SmearedConfiguration // Computes ALL the staples -- could compute one only and do it here RealD time; time=-usecond(); - BaseSmear_cb(Cmu, U, mu, rho); // Changed // + BaseSmear_cb(Cmu, U, mu, rho); ////////////////////////////////////////////////////////////////// // Assemble Luscher exp diff map J matrix ////////////////////////////////////////////////////////////////// // Ta so Z lives in Lie algabra - Zx = Ta(Cmu * adj(Ueo)); // CHANGED // + Zx = Ta(Cmu * adj(Ueo)); time+=usecond(); std::cout << GridLogMessage << "Z took "< // Move Z to the Adjoint Rep == make_adjoint_representation ZxAd = Zero(); for(int b=0;b<8;b++) { + GRID_TRACE("ZxAd_b"); // Adj group sets traceless antihermitian T's -- Guido, really???? SU3::generator(b, tb); // Fund group sets traceless hermitian T's SU3Adjoint::generator(b,TRb); @@ -446,10 +471,11 @@ class SmearedConfigurationMasked : public SmearedConfiguration ////////////////////////////////////// time=-usecond(); X=1.0; - JxAd = X; // can be halved + JxAd = X; mZxAd = (-1.0)*ZxAd; RealD kpfac = 1; for(int k=1;k<12;k++){ + GRID_TRACE("JxAd_k"); X=X*mZxAd; kpfac = kpfac /(k+1); JxAd = JxAd + X * kpfac; @@ -471,7 +497,8 @@ class SmearedConfigurationMasked : public SmearedConfiguration AdjMatrixField t3(hgrid); t3.Checkerboard() = cb; AdjMatrixField dt3(hgrid); dt3.Checkerboard() = cb; AdjMatrixField aunit(hgrid); aunit.Checkerboard() = cb; - + { + GRID_TRACE("dJx"); for(int b=0;b<8;b++){ SU3Adjoint::generator(b, TRb_s[b]); dJdX[b] = TRb_s[b]; @@ -491,6 +518,7 @@ class SmearedConfigurationMasked : public SmearedConfiguration for(int b=0;b<8;b++){ dJdX[b] = -dJdX[b]; } + } #else std::vector dJdX; dJdX.resize(8,grid); AdjMatrixField tbXn(grid); @@ -518,8 +546,9 @@ class SmearedConfigurationMasked : public SmearedConfiguration #endif time+=usecond(); std::cout << GridLogMessage << "dJx took "< AdjVectorField Fdet1_mu_oe(hgrid); Fdet1_mu_oe = Zero(); Fdet1_mu_oe.Checkerboard() = (cb+1)%2; AdjVectorField Fdet2_mu_oe(hgrid); Fdet2_mu_oe = Zero(); Fdet2_mu_oe.Checkerboard() = (cb+1)%2; - AdjMatrixField nMpInv(hgrid); nMpInv.Checkerboard() = cb; + AdjMatrixField nMpInv(hgrid); nMpInv.Checkerboard() = cb; nMpInv= NxxAd *MpAdInv; - AdjMatrixField MpInvJx(hgrid); MpInvJx.Checkerboard() = cb; - AdjMatrixField MpInvJx_nu(hgrid); MpInvJx_nu.Checkerboard() = cb; + AdjMatrixField MpInvJx(hgrid); MpInvJx.Checkerboard() = cb; + AdjMatrixField MpInvJx_nu(hgrid); MpInvJx_nu.Checkerboard() = cb; MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor LatticeComplexD tr(hgrid); tr.Checkerboard() = cb; @@ -605,11 +634,13 @@ class SmearedConfigurationMasked : public SmearedConfiguration time=-usecond(); tLR -= usecond(); PlaqL=Ident; - + { + GRID_TRACE("Staple"); pickCheckerboard(cb,PlaqR,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftBackward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu)))))); + } time+=usecond(); tLR += usecond(); std::cout << GridLogMessage << "PlaqLR took "< // .__| // nu polarisation -- anticlockwise tLR -= usecond(); + { + GRID_TRACE("Staple"); pickCheckerboard((cb+1)%2,PlaqR,(GaugeLinkField) ((rho)*Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftBackward(Umu[mu], mu, Gimpl::CovShiftIdentityBackward(Umu[nu], nu))))); pickCheckerboard((cb+1)%2,PlaqL, (GaugeLinkField) (Gimpl::CovShiftIdentityBackward(Utmp, mu))); + } tLR += usecond(); tNxy -= usecond(); @@ -660,11 +694,14 @@ class SmearedConfigurationMasked : public SmearedConfiguration // | | // x== // nu polarisation -- clockwise tLR -= usecond(); + { + GRID_TRACE("Staple"); pickCheckerboard((cb+1)%2,PlaqL,(GaugeLinkField) ((rho)* Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu))))); pickCheckerboard((cb+1)%2,PlaqR, (GaugeLinkField) (Gimpl::CovShiftIdentityForward(Umu[nu], nu))); + } tLR += usecond(); tNxy -= usecond(); @@ -683,11 +720,14 @@ class SmearedConfigurationMasked : public SmearedConfiguration // | | // |__| // nu polarisation tLR -= usecond(); + { + GRID_TRACE("Staple"); pickCheckerboard(cb,PlaqL,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu)))); pickCheckerboard(cb,PlaqR,(GaugeLinkField) (Gimpl::CovShiftBackward(Umu[mu], mu, Gimpl::CovShiftIdentityForward(Umu[nu], nu)))); + } tLR += usecond(); tNxy -= usecond(); @@ -726,11 +766,14 @@ class SmearedConfigurationMasked : public SmearedConfiguration // | " // |__"x // mu polarisation tLR -= usecond(); + { + GRID_TRACE("Staple"); pickCheckerboard((cb+1)%2,PlaqL,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftBackward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu))))); pickCheckerboard((cb+1)%2,PlaqR,(GaugeLinkField) (Gimpl::CovShiftIdentityBackward(Umu[nu], nu))); + } tLR += usecond(); tNxy -= usecond(); @@ -750,11 +793,14 @@ class SmearedConfigurationMasked : public SmearedConfiguration // " | // x__| // mu polarisation tLR -= usecond(); + { + GRID_TRACE("Staple"); pickCheckerboard((cb+1)%2,PlaqL,(GaugeLinkField) ((-rho)*Gimpl::CovShiftForward(Umu[mu], mu, Gimpl::CovShiftForward(Umu[nu], nu, Gimpl::CovShiftIdentityBackward(Utmp, mu))))); pickCheckerboard((cb+1)%2,PlaqR,(GaugeLinkField) (Gimpl::CovShiftIdentityForward(Umu[nu], nu))); + } tLR += usecond(); tNxy -= usecond(); From 03f31058c714ad0d2928f2c345781f7ae1fc37ac Mon Sep 17 00:00:00 2001 From: Shuhei Yamamoto Date: Mon, 7 Oct 2024 16:03:03 -0400 Subject: [PATCH 08/34] put some loops in Compute_MpInvJx_dNxxdSy to LieAlgebraProject; rearranged include macros to allow compilation w/t roctcx --- Grid/GridCore.h | 6 +- Grid/qcd/smearing/GaugeConfigurationMasked.h | 80 +++++++++++-- Grid/qcd/utils/GaugeGroup.h | 114 ++++++++++++++++++- Grid/threads/Accelerator.h | 6 +- systems/Frontier/config-command | 3 +- systems/Frontier/sourceme.sh | 5 +- 6 files changed, 194 insertions(+), 20 deletions(-) diff --git a/Grid/GridCore.h b/Grid/GridCore.h index 90d01f2e46..cdb1aeb755 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -42,12 +42,12 @@ Author: paboyle #include #include #include -#include #include -//#include -#include #include #include +#include +//#include +#include #include #include #include diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index 0b28671d0d..cea92970d1 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -13,6 +13,8 @@ template void Dump(const Lattice & lat, Coordinate site = Coordinate({0,0,0,0})) { typename T::scalar_object tmp; + if (lat.Checkerboard() != lat.Grid()->CheckerBoard(site)) + site = Coordinate({0,0,0,1}); peekSite(tmp,lat,site); std::cout << " Dump "< time+=usecond(); std::cout << GridLogMessage << " Checkerboarding_MpInvJx_dNxxdSy " << time/1e3 << " ms " << std::endl; } + void Compute_MpInvJx_dNxxdSy_fused(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 ) + { + GRID_TRACE("Compute_MpInvJx_dNxxdSy_fused"); + int cb = PlaqL.Checkerboard(); + GaugeLinkField UtaU(PlaqL.Grid()); UtaU.Checkerboard() = cb; + GaugeLinkField D(PlaqL.Grid()); D.Checkerboard() = cb; + AdjMatrixField Dbc(PlaqL.Grid()); Dbc.Checkerboard() = cb; + AdjMatrixField Dbc_opt(PlaqL.Grid()); Dbc_opt.Checkerboard() = cb; + LatticeComplex tmp(PlaqL.Grid()); tmp.Checkerboard() = cb; + const int Ngen = SU3Adjoint::Dimension; + Complex ci(0,1); + ColourMatrix ta,tb,tc; + RealD t=0; + RealD tp=0, tpl=0; + RealD tta=0; + RealD tpk=0; + t-=usecond(); + for(int a=0;a(Fdet2,tmp,a); + } + tpk+=usecond(); + } + t+=usecond(); + std::cout << GridLogMessage << " Compute_MpInvJx_dNxxdSy_fused " << t/1e3 << " ms proj "< const int Ngen = SU3Adjoint::Dimension; Complex ci(0,1); ColourMatrix ta,tb,tc; - RealD t=0; - RealD tp=0; + RealD t=0, time; + RealD tp=0, tpl=0; RealD tta=0; RealD tpk=0; t-=usecond(); @@ -270,7 +327,11 @@ class SmearedConfigurationMasked : public SmearedConfiguration #if 1 { GRID_TRACE("LieAlgebraProject"); + time=-usecond(); SU3::LieAlgebraProject(Dbc_opt,D,c); // 5.5ms + time+=usecond(); + tpl+=time; + //std::cout << GridLogMessage << " LieAlgebraProject_in_Compute_MpInvJx " << a << " " << c << " "<