Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
b12b4fd
Attempt at operating on half checkerboard
paboyle Aug 23, 2024
da91994
Clean up the accelerator pick/set checkerboard
paboyle Aug 23, 2024
dbb79c5
fixed the bug related to inappropriate use of half-checkerboarding
Aug 27, 2024
7fe1b82
fixed bugs; added timings; redirected timing outputs from performance…
Sep 10, 2024
c194968
Extended half-checkerboarding in logDetJacobianForceLevel except for …
Sep 13, 2024
bffd30a
Optimise lie algebra project
paboyle Sep 19, 2024
9fe84d2
added tracing macros
Sep 19, 2024
64ab7ce
Merge remote-tracking branch 'refs/remotes/origin/feature/fthmc-optim…
Sep 19, 2024
03f3105
put some loops in Compute_MpInvJx_dNxxdSy to LieAlgebraProject; rear…
Oct 7, 2024
7fe839e
checked correctness of trace_product w/t accelarator_incline; impleme…
Oct 8, 2024
8577233
further loop fusion in Compute_MpInvJx_dNxxdSy_fused is implemented; …
Oct 9, 2024
7662993
Repalced old Compute_MpInvJx_dNxxdSy by the one with loop fusion; cor…
Oct 10, 2024
f368dc7
implemented optimization for logDetJacobianForceLevel except for stap…
Oct 19, 2024
1c410b1
implemented optimization including staple
Oct 22, 2024
93ebe15
Optimized staple (redundant Exchange & stencil setup moved to the con…
Oct 26, 2024
9293ef1
added tracing in Cshift functions; removed timings from Inversion; im…
Nov 1, 2024
48977cb
Confirmed correctness of implemented optimization of FTHMC; attempted…
Nov 20, 2024
3124a8d
added more GRID_TRACE
Feb 21, 2025
138a508
save before cleanup
Mar 4, 2025
2c6a357
fixed minor bugs
Mar 7, 2025
1f5c66d
some cleanup
Mar 7, 2025
f5c75c2
removed #if section for debugging
Mar 21, 2025
e11b18d
merged with develop
Mar 21, 2025
abe2cd5
Fixed Grid_Eigen_Dense.h for imcomplete removal of debugging #if + mo…
Mar 21, 2025
10b8b66
added debugging print statements; modify code to make Aurora happy
Apr 4, 2025
427a3a4
merged with upstream/develop
Apr 4, 2025
78fb9ce
resolved a conflict
Apr 4, 2025
903f1a3
removed debug print
Apr 9, 2025
a1a23b5
merged with remote
Apr 9, 2025
802418a
added debugging statements
Apr 23, 2025
dcdb278
merged with develop
Apr 23, 2025
9cf5747
implemented a temporary fix on issues on PaddedCell
Apr 24, 2025
5aea7d7
resolved all conflicts
Apr 24, 2025
9f8a1b3
enabled to take snapshots of gauge config and each force at each step…
May 1, 2025
e01195f
added flight recorder (pboyle); added snapshot features (with rearran…
Jul 10, 2025
a4d8bd2
merged with develop
Jul 12, 2025
5a768eb
switched off heartbeat during MD in case of taking snapshots, as it i…
Jul 17, 2025
c597cc3
commented out time-comsuming part
Aug 9, 2025
2142117
merged with upstream/develop
Aug 9, 2025
c0ab4ba
corrected normalization
Aug 14, 2025
ea8c2b8
Merge remote-tracking branch 'upstream/develop' into feature/fthmc-op…
Aug 14, 2025
cabcfc2
Merge remote-tracking branch 'upstream/develop' into feature/fthmc-op…
Aug 14, 2025
4ac8520
changed to GRID_ASSERT to assert
Aug 14, 2025
3d17c46
Merge remote-tracking branch 'upstream/develop' into feature/fthmc-op…
Aug 14, 2025
30db7f4
Added an optional feature of initial momentum filtering and scaling i…
Nov 26, 2025
b3ff730
merged with develop
Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Grid/Grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>

#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h>
#include <Grid/qcd/action/Action.h>// now contains #include <Grid/parallelIO/MetaData.h>
#include <Grid/qcd/utils/GaugeFix.h>
#include <Grid/qcd/utils/CovariantSmearing.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/parallelIO/MetaData.h>
//#include <Grid/parallelIO/MetaData.h>//moved to ActionCore.h
#include <Grid/qcd/hmc/HMC_aggregate.h>

#endif
6 changes: 3 additions & 3 deletions Grid/GridCore.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/DisableWarnings.h>
#include <Grid/Namespace.h>
#include <Grid/GridStd.h>
#include <Grid/threads/Pragmas.h>
#include <Grid/perfmon/Timer.h>
//#include <Grid/perfmon/PerfCount.h>
#include <Grid/util/Util.h>
#include <Grid/log/Log.h>
#include <Grid/perfmon/Tracing.h>
#include <Grid/threads/Pragmas.h>
//#include <Grid/perfmon/PerfCount.h>
#include <Grid/util/Util.h>
#include <Grid/allocator/Allocator.h>
#include <Grid/simd/Simd.h>
#include <Grid/threads/ThreadReduction.h>
Expand Down
4 changes: 3 additions & 1 deletion Grid/Grid_Eigen_Dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#endif

/* HIP save and restore compile environment*/

#ifdef GRID_HIP
#pragma push
#pragma push_macro("__HIP_DEVICE_COMPILE__")
Expand All @@ -63,11 +64,12 @@
#endif

/*HIP restore*/
/*
#ifdef __HIP__REDEFINE__
#pragma pop_macro("__HIP_DEVICE_COMPILE__")
#pragma pop
#endif

*/
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif
Expand Down
1 change: 1 addition & 0 deletions Grid/cartesian/Cartesian_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class GridBase : public CartesianCommunicator , public GridThread {
////////////////////////////////////////////////////////////////
virtual int CheckerBoarded(int dim) =0;
virtual int CheckerBoard(const Coordinate &site)=0;
virtual int CheckerDim(void){ return 0; };
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
Expand Down
1 change: 1 addition & 0 deletions Grid/cartesian/Cartesian_red_black.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class GridRedBlackCartesian : public GridBase
// int _checker_dim;
std::vector<int> _checker_board;

virtual int CheckerDim(void){ return _checker_dim; };
virtual int isCheckerBoarded(void) const { return 1; };
virtual int CheckerBoarded(int dim){
if( dim==_checker_dim) return 1;
Expand Down
7 changes: 7 additions & 0 deletions Grid/cshift/Cshift_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ inline std::pair<int,int> *MapCshiftTable(void)
template<class vobj> void
Gather_plane_simple (const Lattice<vobj> &rhs,deviceVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
{
GRID_TRACE("Gather_plane_simple");
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
Expand Down Expand Up @@ -105,6 +106,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,
ExtractPointerArray<typename vobj::scalar_object> pointers,
int dimension,int plane,int cbmask)
{
GRID_TRACE("Gather_plane_extract");
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
Expand Down Expand Up @@ -160,6 +162,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,
//////////////////////////////////////////////////////
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,deviceVector<vobj> &buffer, int dimension,int plane,int cbmask)
{
GRID_TRACE("Scatter_plane_simple");
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
Expand Down Expand Up @@ -214,6 +217,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,deviceVector<
//////////////////////////////////////////////////////
template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerArray<typename vobj::scalar_object> pointers,int dimension,int plane,int cbmask)
{
GRID_TRACE("Scatter_plane_merge");
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
Expand Down Expand Up @@ -263,6 +267,7 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA

template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
{
GRID_TRACE("Copy_plane");
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
Expand Down Expand Up @@ -311,6 +316,7 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs

template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
{
GRID_TRACE("Copy_plane_permute");
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
Expand Down Expand Up @@ -358,6 +364,7 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
//////////////////////////////////////////////////////
template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
{
GRID_TRACE("Cshift_local");
int sshift[2];

sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
Expand Down
5 changes: 5 additions & 0 deletions Grid/cshift/Cshift_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern uint64_t checksum_index;
const int Cshift_verbose=0;
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
{
GRID_TRACE("Cshift");
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;

Expand Down Expand Up @@ -138,6 +139,7 @@ template<class vobj> void Cshift_simple(Lattice<vobj>& ret,const Lattice<vobj> &
}
template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
{
GRID_TRACE("Cshift_comms");
int sshift[2];

sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
Expand All @@ -156,6 +158,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r

template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
{
GRID_TRACE("Cshift_comms_simd");
int sshift[2];

sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
Expand All @@ -173,6 +176,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
}
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{
GRID_TRACE("Cshift_comms_cb");
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;

Expand Down Expand Up @@ -305,6 +309,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r

template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{
GRID_TRACE("Cshift_comms_simd_cb");
GridBase *grid=rhs.Grid();
const int Nsimd = grid->Nsimd();
typedef typename vobj::vector_type vector_type;
Expand Down
155 changes: 155 additions & 0 deletions Grid/lattice/Lattice_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iSc
template<int N>
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
#if 0
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
Expand All @@ -121,9 +122,163 @@ Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScala
}}
pokeLocalSite(Ui,ret_v,lcoor);
});
#else
GridBase *grid=Umu.Grid();
auto osites = grid->oSites();
const int Nsimd=grid->Nsimd();
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,osites,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);

iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;

for(int lane=0;lane<Nsimd;lane++){
Us = extractLane(lane,Umu_v[site]);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
Eigen::MatrixXcd EigenUinv = EigenU.inverse();
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
Ui()()(i,j) = EigenUinv(i,j);
}}
insertLane(lane,ret_v[site],Ui);
}
});
#endif
return ret;
}

/* Helper functions for inversion of real matrix on GPU based on Nobu's code*/
template<class type1,class type2,int N>
accelerator_inline void LUdcmp( iMatrix<type1,N> &LU, iVector<type2,N> &P)
{

const RealD TINY=1.0e-40;

type1 vv[N], tmp, _max;
for(int i=0; i<N; i++) vv[i]=0.0;

for(int i=0; i<N; i++){
_max=0.0;
for(int j=0; j<N; j++) if( (tmp=abs(LU(i,j))) > _max ) _max = tmp;
assert( abs(_max) > TINY );
vv[i] = abs(1.0/_max);
}

int imax;
for(int k=0; k<N; k++){
_max=0.0;
for(int i=k; i<N; i++){
tmp = vv[i] * abs( LU(i,k) );
if(tmp>_max) {
_max = tmp;
imax = i;
}
}
if(k!=imax){
for(int j=0; j<N; j++){
tmp = LU(imax,j);
LU(imax,j) = LU(k,j);
LU(k,j) = tmp;
}
vv[imax] = vv[k];
}
P(k)=imax;

for(int i=k+1; i<N; i++){
LU(i,k) = LU(i,k) / LU(k,k);
tmp = LU(i,k);
for(int j=k+1; j<N; j++) LU(i,j) = LU(i,j) - tmp * LU(k,j);
} // end i
} // end k
};

template<class type1,class type2,int N>
accelerator_inline void solve( iVector<type1,N> &x, const iMatrix<type1,N> LU, const iVector<type2,N> P){

type1 sum = 0.0;

int ii=0;
for(int i=0; i<N; i++){
int ip = P(i);
sum = x(ip);
x(ip) = x(i);
if(ii!=0)for(int j=ii-1;j<i;j++) sum = sum - LU(i,j)*x(j);
else if (abs(sum)>0.0) ii=i+1;
x(i) = sum;
}
for(int i=N-1; i>=0; i--){
sum = x(i);
for(int j=i+1; j<N; j++) sum = sum - LU(i,j)*x(j);
x(i) = sum/LU(i,i);
}
};

template<int N>
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse_RealPart(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto osites = grid->oSites();
const int Nsimd=grid->Nsimd();
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
#if 0 // CPU version
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,osites,{
Eigen::MatrixXd EigenU = Eigen::MatrixXd::Zero(N,N);

iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;

for(int lane=0;lane<Nsimd;lane++){
Us = extractLane(lane,Umu_v[site]);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = real(Us()()(i,j));
}}
Eigen::MatrixXd EigenUinv = EigenU.inverse();
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
Ui()()(i,j) = EigenUinv(i,j);
}}
insertLane(lane,ret_v[site],Ui);
}
});
#else //GPU version
autoView(Umu_v,Umu,AcceleratorRead);
autoView(ret_v,ret,AcceleratorWrite);
accelerator_for(ss,grid->oSites(),vComplex::Nsimd(),{
iMatrix<RealD, N> LU;
iVector<Integer, N> P;
iVector<RealD, N> e;
// scalar layout won't coalesce
#ifdef GRID_SIMT
{
int blane=acceleratorSIMTlane(Nsimd); // buffer lane
#else
for(int blane=0;blane<Nsimd;blane++) {
#endif

for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
LU(i,j) = getlane(toReal(TensorRemove(Umu_v(ss)()()(i,j))),blane);
}}
LUdcmp(LU,P);
for(int j=0; j<N; j++){
for(int i=0; i<N; i++) e(i) = (i==j);
solve(e,LU,P);
for(int i=0; i<N; i++) putlane(ret_v[ss]()()(i,j),(ComplexD) e(i),blane);
}
}
});
#endif
return ret;
}

NAMESPACE_END(Grid);
#endif
Expand Down
Loading