Skip to content
20 changes: 16 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,37 @@ endif()
set(PROGRESS_OFFLOAD FALSE CACHE BOOL "Enable offload/GPU acceleration support (only supported with NVHPC)")
if(PROGRESS_OFFLOAD)
if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
set(EXTRA_FCFLAGS "-fast -acc -DUSE_OFFLOAD ${EXTRA_FCFLAGS}")
add_definitions(-DUSE_OFFLOAD)
set(EXTRA_FCFLAGS "-fast -acc ${EXTRA_FCFLAGS}")
else()
message(FATAL_ERROR "PROGRESS_OFFLOAD selected but only supported using NVHPC")
endif()
endif()

set(PROGRESS_NVTX FALSE CACHE BOOL "Compile with support for NVTX profiling")
if(PROGRESS_NVTX)
message(STATUS "Building with NVTX profiling tags")
if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
set(EXTRA_FCFLAGS "${EXTRA_FCFLAGS} -cudalib=nvtx -DUSE_NVTX")
message(STATUS "Building with NVTX profiling tags")
if(CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25")
add_definitions(-DUSE_NVTX)
include(FindCUDAToolkit)
find_library(FOUND_nvToolsExt nvToolsExt PATH ${CUDAToolkit_TARGET_DIR}/lib64/)
if(NOT FOUND_nvToolsExt)
message(FATAL_ERROR "Could not find nvToolsExt library")
endif()
list(APPEND LINK_LIBRARIES "-L${CUDAToolkit_TARGET_DIR}/lib64 -lnvToolsExt")
else()
add_definitions(-DUSE_NVTX)
set(EXTRA_FCFLAGS "${EXTRA_FCFLAGS} -cudalib=nvtx")
endif()
else()
message(FATAL_ERROR "PROGRESS_NVTX selected but only supported using NVHPC")
endif()
endif()

set(PROGRESS_SINGLE FALSE CACHE BOOL "Compile using some single precision internal arithmetic")
if(PROGRESS_SINGLE)
set(EXTRA_FCFLAGS "${EXTRA_FCFLAGS} -DUSE_SINGLE")
add_definitions(-DUSE_SINGLE)
endif()

option(DONT_TOUCH_MY_FLAGS "Don't touch compiler flags" OFF)
Expand Down
2 changes: 1 addition & 1 deletion bml
Submodule bml updated 39 files
+41 −41 .github/workflows/CI.yaml
+49 −1 .github/workflows/container.yaml
+3 −2 CMakeLists.txt
+20 −0 Dockerfile-jammy
+19 −0 Dockerfile-noble
+1 −1 scripts/ci-clang-16-gfortran-14-C-double-complex.sh
+1 −1 scripts/ci-clang-16-gfortran-14-C-double-real.sh
+1 −1 scripts/ci-clang-16-gfortran-14-C-single-complex.sh
+1 −1 scripts/ci-clang-16-gfortran-14-C-single-real.sh
+2 −2 scripts/indent.sh
+136 −0 scripts/prepare-container-jammy.sh
+123 −0 scripts/prepare-container-noble.sh
+2 −0 src/C-interface/bml_init.c
+2 −0 src/C-interface/bml_shutdown.c
+2 −0 src/C-interface/dense/bml_add_dense_typed.c
+2 −0 src/C-interface/dense/bml_allocate_dense_typed.c
+2 −0 src/C-interface/dense/bml_convert_dense_typed.c
+2 −0 src/C-interface/dense/bml_copy_dense_typed.c
+2 −0 src/C-interface/dense/bml_diagonalize_dense.c
+2 −0 src/C-interface/dense/bml_element_multiply_dense_typed.c
+2 −0 src/C-interface/dense/bml_export_dense.c
+2 −0 src/C-interface/dense/bml_export_dense_typed.c
+2 −0 src/C-interface/dense/bml_getters_dense_typed.c
+2 −0 src/C-interface/dense/bml_import_dense_typed.c
+2 −0 src/C-interface/dense/bml_inverse_dense_typed.c
+2 −0 src/C-interface/dense/bml_multiply_dense_typed.c
+2 −0 src/C-interface/dense/bml_norm_dense_typed.c
+2 −0 src/C-interface/dense/bml_normalize_dense_typed.c
+2 −0 src/C-interface/dense/bml_parallel_dense_typed.c
+2 −0 src/C-interface/dense/bml_scale_dense_typed.c
+2 −0 src/C-interface/dense/bml_setters_dense.c
+2 −0 src/C-interface/dense/bml_setters_dense_typed.c
+2 −0 src/C-interface/dense/bml_submatrix_dense_typed.c
+2 −0 src/C-interface/dense/bml_threshold_dense_typed.c
+2 −0 src/C-interface/dense/bml_trace_dense_typed.c
+2 −0 src/C-interface/dense/bml_transpose_dense_typed.c
+2 −0 src/C-interface/dense/bml_utilities_dense_typed.c
+2 −0 src/C-interface/ellpack/bml_submatrix_ellpack_typed.c
+1 −0 tests/C-tests/test_magma.c
21 changes: 21 additions & 0 deletions examples/gpmdk/src/gpmdcov_dm_min.F90
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,10 @@ subroutine gpmdcov_DM_Min_Eig(Nr_SCF,nguess,mix,applyField,newnl_in)
#endif
#endif

#ifdef USE_NVTX
call gpmdStartRange("Ewald Real",3)
#endif

call gpmdcov_msMem("gpmdcov_dm_min_eig","Before get_ewald_list_real_dcalc_vect",lt%verbose,myRank)
if(myRank == 1 .and. lt%verbose >= 1) mls_coul = mls()
#ifdef USE_OFFLOAD
Expand All @@ -357,9 +361,16 @@ subroutine gpmdcov_DM_Min_Eig(Nr_SCF,nguess,mix,applyField,newnl_in)
,nguess,tb%hubbardu,sy%lattice_vector,&
sy%volr,lt%coul_acc,lt%timeratio,nl%nnIx,nl%nnIy,&
nl%nnIz,nl%nrnnlist,nl%nnType,coul_forces_r,coul_pot_r);
#endif
#ifdef USE_NVTX
call gpmdEndRange
#endif
call gpmdcov_msII("gpmdcov_DM_Min","Time real coul "//to_string(mls() - mls_coul)//" ms",lt%verbose,myRank)
call gpmdcov_msMem("gpmdcov_dm_min_eig","After get_ewald_list_real_dcalc_vect",lt%verbose,myRank)

#ifdef USE_NVTX
call gpmdStartRange("Ewald Recip",4)
#endif

!> Reciprocal contribution to the Coul energy. The outputs are
!coul_forces_k,coul_pot_k.
Expand All @@ -370,6 +381,9 @@ subroutine gpmdcov_DM_Min_Eig(Nr_SCF,nguess,mix,applyField,newnl_in)
call get_ewald_recip(sy%spindex,sy%splist,sy%coordinate&
&,nguess,tb%hubbardu,sy%lattice_vector,&
&sy%recip_vector,sy%volr,lt%coul_acc,coul_forces_k,coul_pot_k);
#ifdef USE_NVTX
call gpmdEndRange
#endif
call gpmdcov_msII("gpmdcov_DM_Min","Time recip coul "//to_string(mls() - mls_coul)//" ms",lt%verbose,myRank)
call gpmdcov_msMem("gpmdcov_dm_min_eig","After get_ewald_recip",lt%verbose,myRank)
call gpmdcov_msMemGPU("DM_Min","After Ewald Recip",lt%verbose,myRank)
Expand All @@ -383,9 +397,16 @@ subroutine gpmdcov_DM_Min_Eig(Nr_SCF,nguess,mix,applyField,newnl_in)
#endif
if(iscf == Nr_SCF) converged = .true.

#ifdef USE_NVTX
call gpmdStartRange("Diagonalize H1",5)
#endif

call gpmdcov_msMem("gpmdcov_dm_min_eig", "Before gpmd_diagonalize_H1",lt%verbose,myRank)
if(myRank == 1 .and. lt%verbose >= 1) mls_diag = mls()
call gpmdcov_diagonalize_H1(nguess)
#ifdef USE_NVTX
call gpmdEndRange
#endif
call gpmdcov_msI("gpmdcov_DM_Min","Time for diag "//to_string(mls() - mls_diag)//" ms",lt%verbose,myRank)
call gpmdcov_msMem("gpmdcov_dm_min_eig", "After gpmd_diagonalize_H1",lt%verbose,myRank)
call gpmdcov_msMemGPU("DM_Min","After diagonalize_H1",lt%verbose,myRank)
Expand Down
21 changes: 20 additions & 1 deletion examples/gpmdk/src/gpmdcov_energandforces.F90
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ subroutine gpmdcov_EnergAndForces(charges)
endif

allocate(row(norb))

#ifdef USE_NVTX
call gpmdStartRange("Electronic energy calculation",1)
#endif

!> Get Electronic energy
call bml_zero_matrix(lt%bml_type,bml_element_real,dp,nOrb,nOrb,aux1_bml)
Expand All @@ -106,6 +110,10 @@ subroutine gpmdcov_EnergAndForces(charges)
TRRHOH= TRRHOH+ row(i)
enddo

#ifdef USE_NVTX
call gpmdEndRange
#endif

call gpmdcov_message("gpmdcov_EnergAndForces","Energy Band for part =&
& "//to_string(ipt)//"= "//to_string(TRRHOH),lt%verbose,myRank)

Expand Down Expand Up @@ -209,6 +217,9 @@ subroutine gpmdcov_EnergAndForces(charges)
GFSCOUL(:,gpat%sgraph(ipt)%core_halo_index(i)+1) = syprt(ipt)%estr%FSCOUL(:,i)
SKForce(:,gpat%sgraph(ipt)%core_halo_index(i)+1) = syprt(ipt)%estr%SKForce(:,i)
enddo
#ifdef USE_NVTX
call gpmdStartRange("Deallocate derivative matrices",6)
#endif

call bml_deallocate(dSx_bml)
call bml_deallocate(dSy_bml)
Expand All @@ -225,6 +236,9 @@ subroutine gpmdcov_EnergAndForces(charges)
!endif
!call bml_deallocate(syprt(ipt)%estr%over)
!call bml_deallocate(syprt(ipt)%estr%zmat)
#ifdef USE_NVTX
call gpmdEndRange
#endif

enddo

Expand Down Expand Up @@ -344,7 +358,9 @@ subroutine gpmdcov_EnergAndForces(charges)
&for Forces"//to_string(mls() - mls_i),lt%verbose,myRank)

coul_forces = coul_forces_r + coul_forces_k

#ifdef USE_NVTX
call gpmdStartRange("Pair potentials",7)
#endif
!> Get Repulsive energy and forces
! call get_PairPot_contrib(sy%coordinate,sy%lattice_vector,sy%spindex,ppot,PairForces,ERep)
call get_PairPot_contrib_int(sy%coordinate,sy%lattice_vector,nl%nnIx,nl%nnIy,&
Expand All @@ -357,6 +373,9 @@ subroutine gpmdcov_EnergAndForces(charges)
& disppot,DispForces,EDisp,lt%verbose,myRank)
endif

#ifdef USE_NVTX
call gpmdEndRange
#endif

!> Get Coulombic energy
ECoul = 0.0;
Expand Down
4 changes: 3 additions & 1 deletion examples/gpmdk/src/gpmdcov_langevin.F90
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ subroutine gpmdcov_uniform_to_normal(rands)
!Replace rands that are too small for the log()
!If this is left out then eventually the program will
! crash
smallest = tiny(smallest)
!smallest = tiny(smallest))
!Make the smallest rand bigger, to avoid large velocities
smallest = sqrt(tiny(smallest))
tinymask = rands(:,:,1).lt.smallest
if(any(tinymask))then
tinies = pack(rands(:,:,1),tinymask)
Expand Down
20 changes: 20 additions & 0 deletions examples/gpmdk/src/gpmdcov_part.F90
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
module gpmdcov_Part_mod

#ifdef USE_NVTX
use prg_nvtx_mod
#endif

contains

!> Partition by systems
Expand Down Expand Up @@ -47,6 +51,7 @@ subroutine gpmdcov_Part(ipreMD)

call gpmdcov_msMem("gpmdcov_Part", "After prg_get_covgraph",lt%verbose,myRank)
else !ipreMD == 1

#ifdef DO_MPI
n_atoms = sy%nats
max_updates = 100
Expand Down Expand Up @@ -89,12 +94,20 @@ subroutine gpmdcov_Part(ipreMD)
write(*,*)"GPMDCOV_PART: ERROR: Small subgraphs only supported using Box partitioning. Stopping."
stop
endif
#ifdef USE_NVTX
call gpmdStartRange("Subgraph collection",1)
#endif

call prg_collect_extended_graph_p(syprt(ipt)%estr%orho,gpat%sgraph(ipt)%llsize,sy%nats,syprt(ipt)%estr%hindex,&
gpat%sgraph(ipt)%core_halo_index,graph_p,gsp2%gthreshold,myMdim,gsp2%alpha,syprt(ipt)%coordinate,sy%coordinate,sy%lattice_vector,lt%verbose)
else
call prg_collect_graph_p(syprt(ipt)%estr%orho,gpat%sgraph(ipt)%llsize,sy%nats,syprt(ipt)%estr%hindex,&
gpat%sgraph(ipt)%core_halo_index,graph_p,gsp2%gthreshold,myMdim,lt%verbose)
endif

#ifdef USE_NVTX
call gpmdEndRange
#endif

call bml_deallocate(syprt(ipt)%estr%orho)

Expand Down Expand Up @@ -125,6 +138,10 @@ subroutine gpmdcov_Part(ipreMD)
endif
graph_p_old = graph_p
else
#ifdef USE_NVTX
call gpmdStartRange("Fast graph update",2)
#endif

write(*,*)"DEBUG: Doing graph update reduction at mdstep ",mdstep

ktot_a = 0
Expand Down Expand Up @@ -243,6 +260,9 @@ subroutine gpmdcov_Part(ipreMD)
endif
endif
! call prg_sumIntReduceN(auxVectInt, myMdim*sy%nats)
#ifdef USE_NVTX
call gpmdEndRange
#endif
endif
#endif
! call gpmdcov_vect2MatInt(auxVectInt,graph_p,sy%nats,myMdim)
Expand Down
Loading