From 809765e6034c5734b61fd1c10de4a68d02a1d780 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 8 Apr 2025 16:53:52 -0600 Subject: [PATCH 01/43] Working with upto 24km global mesh --- Makefile | 5 + src/core_atmosphere/Registry.xml | 2 +- src/framework/mpas_block_decomp.F | 209 +++++++++++++++++++++-------- src/framework/mpas_bootstrapping.F | 18 ++- 4 files changed, 176 insertions(+), 58 deletions(-) diff --git a/Makefile b/Makefile index c34823ab5a..7606b47032 100644 --- a/Makefile +++ b/Makefile @@ -759,6 +759,11 @@ endif LIBS += $(NCLIB) endif +export SCOTCH_ROOT=/glade/derecho/scratch/agopal/scotch/build + +FCINCLUDES += -I$(SCOTCH_ROOT)/src/include + +LIBS += -L$(SCOTCH_ROOT)/lib -lscotch -lscotcherr ifneq "$(PNETCDF)" "" ifneq ($(wildcard $(PNETCDF)/lib/libpnetcdf.*), ) diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index 4281c40bba..879d76aa91 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -319,7 +319,7 @@ - diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 4f3d197d5d..3079aed4f1 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -25,6 +25,7 @@ module mpas_block_decomp use mpas_derived_types use mpas_io_units use mpas_log + include "scotchf.h" type graph integer :: nVerticesTotal @@ -49,7 +50,7 @@ module mpas_block_decomp ! !----------------------------------------------------------------------- subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & - block_count, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ + block_count, cellsOnCellFull, nEdgesOnCellFull, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ implicit none @@ -59,6 +60,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), pointer :: block_id !< Output: list of global block id's this processor owns integer, dimension(:), pointer :: block_start !< Output: offset in local_cell_list for this blocks list of cells integer, dimension(:), pointer :: block_count !< Output: number of cells in blocks + type (field2dInteger), intent(in) :: cellsOnCellFull + type (field1dInteger), intent(in) :: nEdgesOnCellFull integer, intent(in) :: numBlocks !< Input: Number of blocks (from config_num_blocks) logical, intent(in) :: explicitProcDecomp !< Input: Logical flag controlling if blocks are explicitly assigned to processors @@ -71,12 +74,19 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), allocatable :: local_block_list integer, dimension(:,:), allocatable :: sorted_local_cell_list - integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus - integer :: blocks_per_proc, err + integer, dimension(:), allocatable :: global_block_id_arr, owning_proc_arr + integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus, vind, j, k + integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices - character (len=StrKIND) :: filename + character (len=StrKIND) :: filename, msg logical :: no_blocks + integer :: nTotalEdgesGraph = 0 + integer, dimension(:), allocatable :: edgetab, verttab, parttab + + doubleprecision :: stradat (SCOTCH_STRATDIM) + doubleprecision :: SCOTCHGRAPH (SCOTCH_GRAPHDIM) + integer :: n_size no_blocks = .false. @@ -95,71 +105,158 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) + allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) + allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) if (dminfo % my_proc_id == IO_NODE) then + if ( trim(blockFilePrefix) == '' ) then + call mpas_log_write('blockFilePrefix is not set: Using LibScotch for graph partitioning') + do vind=1,partial_global_graph_info % nVerticesTotal + nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(vind) + ! do j=1,nEdgesOnCellFull% array(vind) + ! call mpas_log_write('vind=$i j=$i adj= $i', intArgs=(/vind,j,cellsOnCellFull%array(j,vind)/) ) + ! end do + end do + + allocate(edgetab(nTotalEdgesGraph)) + allocate(verttab(partial_global_graph_info % nVerticesTotal + 1)) + !allocate(parttab(partial_global_graph_info % nVerticesTotal)) + + !do vind=1,partial_global_graph_info % nVerticesTotal + !call mpas_log_write('proc=$i vind= $i part= $i', intArgs=(/dminfo % my_proc_id, vind,parttab(vind)/)) + !call mpas_log_write('vind=$i j=$i adj= $i', intArgs=(/vind,j,cellsOnCellFull%array(j,vind)/) ) + !end do + + k = 1 + do vind=1,partial_global_graph_info % nVerticesTotal + verttab(vind) = k + !call mpas_log_write('vind=$i verttab= $i', intArgs=(/vind,verttab(vind)/) ) + do j=1,nEdgesOnCellFull% array(vind) + edgetab(k) = cellsOnCellFull%array(j,vind) + !call mpas_log_write('k=$i edgetab= $i', intArgs=(/k,edgetab(k)/) ) + k = k + 1 + end do + end do + verttab(partial_global_graph_info % nVerticesTotal+1) = nTotalEdgesGraph + 1 + + !call mpas_log_write('nvertices =$i nTotalEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVerticesTotal, nTotalEdgesGraph/)) + + CALL scotchfstratinit (stradat (1), IERR) + CALL SCOTCHFGRAPHINIT (SCOTCHGRAPH (1), IERR) + IF (IERR .NE. 0) THEN + call mpas_log_write('Cannot initialize Scotch Graph', MPAS_LOG_CRIT) + ENDIF + + CALL SCOTCHFGRAPHBUILD (SCOTCHGRAPH (1), 1, partial_global_graph_info % nVerticesTotal, & + verttab (1), verttab (2), & + verttab (1), verttab (1), & + nTotalEdgesGraph, edgetab (1), edgetab (1), IERR) + IF (IERR .NE. 0) THEN + call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) + ENDIF + + CALL SCOTCHFGRAPHCHECK (SCOTCHGRAPH (1), IERR) + IF (IERR .NE. 0) THEN + call mpas_log_write('Cannot check Scotch Graph', MPAS_LOG_CRIT) + ENDIF + + !CALL scotchfgraphpart( SCOTCHGRAPH (1), dminfo % total_blocks, stradat (1) ,parttab(1), IERR) + CALL scotchfgraphpart( SCOTCHGRAPH (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) + + call scotchfgraphexit (SCOTCHGRAPH (1)) + call scotchfstratexit (stradat (1)) + + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVerticesTotal + !global_block_id_arr(i) = parttab(i) + call mpas_get_owning_proc(dminfo, global_block_id_arr(i), owning_proc) + owning_proc_arr(i) = owning_proc + !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id_arr(i),owning_proc/) ) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + end do + + else + + if (dminfo % total_blocks < 10) then + write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100) then + write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000) then + write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000) then + write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000) then + write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000000) then + write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000000) then + write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000000) then + write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks + end if - if (dminfo % total_blocks < 10) then - write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100) then - write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000) then - write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000) then - write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000) then - write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000000) then - write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000000) then - write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000000) then - write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks - end if - - call mpas_new_unit(iunit) - open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) - - if (istatus /= 0) then - call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) - call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) - end if - - local_nvertices(:) = 0 - do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - - if ( err .ne. 0 ) then - call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) - end if - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 - end do + call mpas_new_unit(iunit) + open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) + + if (istatus /= 0) then + call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) + call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) + end if - read(unit=iunit, fmt=*, iostat=err) + call mpas_log_write('Using block decomposition file: '//trim(filename)) + + call mpas_log_write('First read pass ') + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*, iostat=err) global_block_id + global_block_id_arr(i) = global_block_id + + if ( err .ne. 0 ) then + call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + owning_proc_arr(i) = owning_proc + !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id,owning_proc/) ) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + !call mpas_log_write('owning_proc+1: $i local_nvertices= $i', intArgs=(/owning_proc+1, local_nvertices(owning_proc+1)/)) + end do + + read(unit=iunit, fmt=*, iostat=err) + + if ( err == 0 ) then + call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if - if ( err == 0 ) then - call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + close(unit=iunit) + call mpas_release_unit(iunit) end if global_start(1) = 1 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do - rewind(unit=iunit) + !rewind(unit=iunit) + call mpas_log_write('Second read pass ') do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + !read(unit=iunit, fmt=*, iostat=err) global_block_id + !call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_block_id = global_block_id_arr(i) + owning_proc = owning_proc_arr(i) global_list(global_start(owning_proc+1)) = i + !call mpas_log_write('owning_proc+1: $i global_start= $i global_list=$i', intArgs=(/owning_proc+1, global_start(owning_proc+1), global_list(global_start(owning_proc+1))/)) global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + !call mpas_log_write('global_start(owning_proc+1): $i', intArgs=(/global_start(owning_proc+1)/)) end do global_start(1) = 0 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) @@ -173,28 +270,34 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l global_start(1) = 1 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do - rewind(unit=iunit) + !rewind(unit=iunit) + call mpas_log_write('Third read pass ') do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*) global_block_id - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + !read(unit=iunit, fmt=*) global_block_id + !call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_block_id = global_block_id_arr(i) + owning_proc = owning_proc_arr(i) global_list(global_start(owning_proc+1)) = global_block_id + !call mpas_log_write('owning_proc+1: $i global_start= $i global_list=$i', intArgs=(/owning_proc+1, global_start(owning_proc+1), global_list(global_start(owning_proc+1))/)) global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + !call mpas_log_write('global_start(owning_proc+1): $i', intArgs=(/global_start(owning_proc+1)/)) end do ! Recompute global start after second read of global_block_list global_start(1) = 0 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & global_start, local_nvertices, global_list, local_block_list) - close(unit=iunit) - call mpas_release_unit(iunit) + else diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 4241255e2a..f444032e3b 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -106,8 +106,8 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p type (field1dInteger), pointer :: indexToCellIDField type (field1dInteger), pointer :: indexToEdgeIDField type (field1dInteger), pointer :: indexToVertexIDField - type (field1dInteger), pointer :: nEdgesOnCellField - type (field2dInteger), pointer :: cellsOnCellField + type (field1dInteger), pointer :: nEdgesOnCellField, nEdgesOnCellFull + type (field2dInteger), pointer :: cellsOnCellField, cellsOnCellFull type (field2dInteger), pointer :: edgesOnCellField type (field2dInteger), pointer :: verticesOnCellField type (field2dInteger), pointer :: cellsOnEdgeField @@ -144,7 +144,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p integer, pointer :: config_num_halos, config_number_of_blocks logical, pointer :: config_explicit_proc_decomp character (len=StrKIND), pointer :: config_block_decomp_file_prefix, config_proc_decomp_file_prefix - integer :: nHalos + integer :: nHalos, j, vind call mpas_pool_get_config(domain % configs, 'config_num_halos', config_num_halos) @@ -197,6 +197,16 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p ! which cells/edges/vertices are owned by each block, and which are ghost ! + allocate(cellsOnCellFull) + allocate(cellsOnCellFull % array(maxEdges,nCells)) + call MPAS_io_inq_var(inputHandle, 'cellsOnCell', ierr=ierr) + call mpas_io_get_var(inputHandle, 'cellsOnCell', cellsOnCellFull % array, ierr) + + allocate(nEdgesOnCellFull) + allocate(nEdgesOnCellFull % array(nCells)) + call MPAS_io_inq_var(inputHandle, 'nEdgesOnCell', ierr=ierr) + call mpas_io_get_var(inputHandle, 'nEdgesOnCell', nEdgesOnCellFull % array, ierr) + call mpas_io_setup_cell_block_fields(inputHandle, nreadCells, readCellStart, readingBlock, maxEdges, & indexTocellIDField, xCellField, yCellField, zCellField, nEdgesOnCellField, & cellsOnCellField, edgesOnCellField, verticesOnCellField, nHalos) @@ -235,7 +245,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p ! file, but in principle it could call some online, distributed mesh partitioning library. ! call mpas_block_decomp_cells_for_proc(domain % dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & - block_count, config_number_of_blocks, config_explicit_proc_decomp, & + block_count, cellsOnCellFull, nEdgesOnCellFull, config_number_of_blocks, config_explicit_proc_decomp, & config_block_decomp_file_prefix, config_proc_decomp_file_prefix) deallocate(partial_global_graph_info % vertexID) From d46fceb7c126ae185a91de14a6efa42d5888707d Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 9 Apr 2025 10:08:41 -0600 Subject: [PATCH 02/43] Now checking for zero connectivities. Works with limited area grid --- src/framework/mpas_block_decomp.F | 42 ++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 3079aed4f1..81da41292b 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -75,7 +75,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:,:), allocatable :: sorted_local_cell_list integer, dimension(:), allocatable :: global_block_id_arr, owning_proc_arr - integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus, vind, j, k + integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus, j, k integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices character (len=StrKIND) :: filename, msg @@ -111,28 +111,36 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (dminfo % my_proc_id == IO_NODE) then if ( trim(blockFilePrefix) == '' ) then call mpas_log_write('blockFilePrefix is not set: Using LibScotch for graph partitioning') - do vind=1,partial_global_graph_info % nVerticesTotal - nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(vind) - ! do j=1,nEdgesOnCellFull% array(vind) - ! call mpas_log_write('vind=$i j=$i adj= $i', intArgs=(/vind,j,cellsOnCellFull%array(j,vind)/) ) + do i=1,partial_global_graph_info % nVerticesTotal + do j=1,nEdgesOnCellFull% array(i) + + if (cellsOnCellFull%array(j,i) == 0) cycle + nTotalEdgesGraph = nTotalEdgesGraph + 1 + !nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(i) + ! do j=1,nEdgesOnCellFull% array(i) + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,cellsOnCellFull%array(j,i)/) ) ! end do + end do end do - + call mpas_log_write('nTotalEdgesGraph is $i', intArgs=(/nTotalEdgesGraph/)) allocate(edgetab(nTotalEdgesGraph)) allocate(verttab(partial_global_graph_info % nVerticesTotal + 1)) !allocate(parttab(partial_global_graph_info % nVerticesTotal)) - !do vind=1,partial_global_graph_info % nVerticesTotal - !call mpas_log_write('proc=$i vind= $i part= $i', intArgs=(/dminfo % my_proc_id, vind,parttab(vind)/)) - !call mpas_log_write('vind=$i j=$i adj= $i', intArgs=(/vind,j,cellsOnCellFull%array(j,vind)/) ) + !do i=1,partial_global_graph_info % nVerticesTotal + !call mpas_log_write('proc=$i i= $i part= $i', intArgs=(/dminfo % my_proc_id, i,parttab(i)/)) + !call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,cellsOnCellFull%array(j,i)/) ) !end do k = 1 - do vind=1,partial_global_graph_info % nVerticesTotal - verttab(vind) = k - !call mpas_log_write('vind=$i verttab= $i', intArgs=(/vind,verttab(vind)/) ) - do j=1,nEdgesOnCellFull% array(vind) - edgetab(k) = cellsOnCellFull%array(j,vind) + do i=1,partial_global_graph_info % nVerticesTotal + verttab(i) = k + !call mpas_log_write('i=$i verttab= $i', intArgs=(/i,verttab(i)/) ) + do j=1,nEdgesOnCellFull% array(i) + + if (cellsOnCellFull%array(j,i) == 0) cycle + + edgetab(k) = cellsOnCellFull%array(j,i) !call mpas_log_write('k=$i edgetab= $i', intArgs=(/k,edgetab(k)/) ) k = k + 1 end do @@ -154,6 +162,12 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l IF (IERR .NE. 0) THEN call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) ENDIF + + ! CALL SCOTCHFGRAPHSAVE (SCOTCHGRAPH (1), 1, IERR) + ! IF (IERR .NE. 0) THEN + ! PRINT *, 'ERROR : MAIN : Invalid graph output' + ! STOP + ! ENDIF CALL SCOTCHFGRAPHCHECK (SCOTCHGRAPH (1), IERR) IF (IERR .NE. 0) THEN From 86ecb30daf5067ea4d1b20076a9280054090466f Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 24 Jun 2025 09:35:48 -0600 Subject: [PATCH 03/43] Modifying the logic for when Scotch partitioning is active --- src/framework/mpas_block_decomp.F | 134 ++++++++++++++++++------------ 1 file changed, 81 insertions(+), 53 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 81da41292b..e26230f2d9 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -25,7 +25,10 @@ module mpas_block_decomp use mpas_derived_types use mpas_io_units use mpas_log + + #ifdef MPAS_SCOTCH include "scotchf.h" + #endif type graph integer :: nVerticesTotal @@ -75,7 +78,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:,:), allocatable :: sorted_local_cell_list integer, dimension(:), allocatable :: global_block_id_arr, owning_proc_arr - integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus, j, k + integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices character (len=StrKIND) :: filename, msg @@ -84,9 +87,11 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer :: nTotalEdgesGraph = 0 integer, dimension(:), allocatable :: edgetab, verttab, parttab - doubleprecision :: stradat (SCOTCH_STRATDIM) - doubleprecision :: SCOTCHGRAPH (SCOTCH_GRAPHDIM) - integer :: n_size + logical :: useScotch +#ifdef MPAS_SCOTCH + doubleprecision :: stradat (scotch_stratdim) + doubleprecision :: scotchgraph (scotch_graphdim) +#endif no_blocks = .false. @@ -109,8 +114,50 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) if (dminfo % my_proc_id == IO_NODE) then + useScotch = .false. if ( trim(blockFilePrefix) == '' ) then - call mpas_log_write('blockFilePrefix is not set: Using LibScotch for graph partitioning') + call mpas_log_write('Namelist option config_block_decomp_file_prefix is set to \'\' ', MPAS_LOG_ERR) +#ifdef MPAS_SCOTCH + useScotch = .true. +#else + call mpas_log_write('Either build MPAS with the Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#endif + else + if (dminfo % total_blocks < 10) then + write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100) then + write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000) then + write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000) then + write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000) then + write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000000) then + write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000000) then + write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000000) then + write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks + end if + + call mpas_new_unit(iunit) + open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) + + if (istatus /= 0) then + call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_WARN, intArgs=(/dminfo % total_blocks/) ) + call mpas_log_write('Filename: '//trim(filename),MPAS_LOG_WARN) +#ifdef MPAS_SCOTCH + useScotch = .true. +#else + call mpas_log_write('Either build MPAS with Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#endif + end if + end if + + if (useScotch) then +#ifdef MPAS_SCOTCH + call mpas_log_write('Using LibScotch for graph partitioning') do i=1,partial_global_graph_info % nVerticesTotal do j=1,nEdgesOnCellFull% array(i) @@ -149,33 +196,28 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l !call mpas_log_write('nvertices =$i nTotalEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVerticesTotal, nTotalEdgesGraph/)) - CALL scotchfstratinit (stradat (1), IERR) - CALL SCOTCHFGRAPHINIT (SCOTCHGRAPH (1), IERR) - IF (IERR .NE. 0) THEN - call mpas_log_write('Cannot initialize Scotch Graph', MPAS_LOG_CRIT) - ENDIF + call scotchfstratinit (stradat (1), ierr) + call scotchfgraphinit (scotchgraph (1), ierr) + + if (ierr .ne. 0) then + call mpas_log_write('Cannot initialize Scotch Graph', MPAS_LOG_CRIT) + endif - CALL SCOTCHFGRAPHBUILD (SCOTCHGRAPH (1), 1, partial_global_graph_info % nVerticesTotal, & + CALL scotchfgraphbuild (scotchgraph (1), 1, partial_global_graph_info % nVerticesTotal, & verttab (1), verttab (2), & verttab (1), verttab (1), & - nTotalEdgesGraph, edgetab (1), edgetab (1), IERR) - IF (IERR .NE. 0) THEN - call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) - ENDIF - - ! CALL SCOTCHFGRAPHSAVE (SCOTCHGRAPH (1), 1, IERR) - ! IF (IERR .NE. 0) THEN - ! PRINT *, 'ERROR : MAIN : Invalid graph output' - ! STOP - ! ENDIF + nTotalEdgesGraph, edgetab (1), edgetab (1), ierr) + if (ierr .ne. 0) then + call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) + endif + + ! Only needed during development/debugging. + !call scotchfgraphcheck (scotchgraph (1), ierr) + !if (ierr .ne. 0) then + ! call mpas_log_write('Cannot check Scotch Graph', MPAS_LOG_CRIT) + !endif - CALL SCOTCHFGRAPHCHECK (SCOTCHGRAPH (1), IERR) - IF (IERR .NE. 0) THEN - call mpas_log_write('Cannot check Scotch Graph', MPAS_LOG_CRIT) - ENDIF - - !CALL scotchfgraphpart( SCOTCHGRAPH (1), dminfo % total_blocks, stradat (1) ,parttab(1), IERR) - CALL scotchfgraphpart( SCOTCHGRAPH (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) + call scotchfgraphpart( scotchgraph (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) call scotchfgraphexit (SCOTCHGRAPH (1)) call scotchfstratexit (stradat (1)) @@ -188,34 +230,20 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id_arr(i),owning_proc/) ) local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 end do - - else - if (dminfo % total_blocks < 10) then - write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100) then - write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000) then - write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000) then - write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000) then - write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000000) then - write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000000) then - write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000000) then - write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks - end if - - call mpas_new_unit(iunit) - open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) - if (istatus /= 0) then - call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) - call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) + call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) + call mpas_new_unit(ounit) + open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) + do i=1,partial_global_graph_info % nVerticesTotal + write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) + !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) + end do + close(unit=ounit) + call mpas_release_unit(ounit) end if + #endif + else call mpas_log_write('Using block decomposition file: '//trim(filename)) From 2b6258c46a4ed4ec9a4cb90fd629aef6e65c61bb Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 24 Jun 2025 09:36:43 -0600 Subject: [PATCH 04/43] WIP: Makefile changes to enable SCOTCH --- Makefile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 7606b47032..6fe7bbcfbe 100644 --- a/Makefile +++ b/Makefile @@ -759,11 +759,14 @@ endif LIBS += $(NCLIB) endif -export SCOTCH_ROOT=/glade/derecho/scratch/agopal/scotch/build - -FCINCLUDES += -I$(SCOTCH_ROOT)/src/include - -LIBS += -L$(SCOTCH_ROOT)/lib -lscotch -lscotcherr +ifneq "$(SCOTCH)" "" + override CPPFLAGS += "-DMPAS_SCOTCH" + FCINCLUDES += -I$(SCOTCH)/src/include + LIBS += -L$(SCOTCH)/lib -lscotch -lscotcherr + SCOTCH_MESSAGE = "MPAS has been linked with the Scotch Graph Paritioning library." +else + SCOTCH_MESSAGE = "MPAS was NOT linked with the Scotch Graph Paritioning library." +endif ifneq "$(PNETCDF)" "" ifneq ($(wildcard $(PNETCDF)/lib/libpnetcdf.*), ) @@ -1513,6 +1516,7 @@ mpas_main: $(MAIN_DEPS) @echo $(OPENMP_OFFLOAD_MESSAGE) @echo $(OPENACC_MESSAGE) @echo $(MUSICA_MESSAGE) + @echo $(SCOTCH_MESSAGE) @echo $(SHAREDLIB_MESSAGE) ifeq "$(AUTOCLEAN)" "true" @echo $(AUTOCLEAN_MESSAGE) From d677fb4f03ea519dd66919ed93e369388bb7a66f Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 24 Jun 2025 16:26:27 -0600 Subject: [PATCH 05/43] Adding compile-time link test for the Scotch library --- Makefile | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 6fe7bbcfbe..b7d1b9f3dd 100644 --- a/Makefile +++ b/Makefile @@ -760,12 +760,13 @@ endif endif ifneq "$(SCOTCH)" "" - override CPPFLAGS += "-DMPAS_SCOTCH" - FCINCLUDES += -I$(SCOTCH)/src/include - LIBS += -L$(SCOTCH)/lib -lscotch -lscotcherr - SCOTCH_MESSAGE = "MPAS has been linked with the Scotch Graph Paritioning library." -else - SCOTCH_MESSAGE = "MPAS was NOT linked with the Scotch Graph Paritioning library." + SCOTCH_FCINCLUDES += -I$(SCOTCH)/src/include + SCOTCH_LIBS += -L$(SCOTCH)/lib -lscotch -lscotcherr + SCOTCH_FFLAGS = -DMPAS_SCOTCH + + FCINCLUDES += $(SCOTCH_FCINCLUDES) + LIBS += $(SCOTCH_LIBS) + override CPPFLAGS += $(SCOTCH_FFLAGS) endif ifneq "$(PNETCDF)" "" @@ -1423,6 +1424,33 @@ musica_fortran_test: $(eval MUSICA_FORTRAN_VERSION := $(shell pkg-config --modversion musica-fortran)) $(if $(findstring 1,$(MUSICA_FORTRAN_TEST)), $(info Built a simple test program with MUSICA-Fortran version $(MUSICA_FORTRAN_VERSION)), ) +scotch_fortran_test: + @# + @# Create a Fortran test program that will link against the SCOTCH library + @# + $(info Checking for a working MUSICA-Fortran library...) + $(eval SCOTCH_FORTRAN_TEST := $(shell $\ + printf "program test_scotch_fortran\n$\ + & include \"scotchf.h\"\n$\ + & doubleprecision :: scotchgraph (scotch_graphdim)\n$\ + & integer :: ierr\n$\ + & ierr = 0\n$\ + & call scotchfgraphinit(scotchgraph (1), ierr)\n$\ + & call scotchfgraphexit(scotchgraph(1))\n$\ + end program test_scotch_fortran\n" | sed 's/&/ /' > test_scotch_fortran.f90; $\ + $\ + $(FC) $(SCOTCH_FCINCLUDES) $(SCOTCH_FFLAGS) test_scotch_fortran.f90 -o test_scotch_fortran.x $(SCOTCH_LIBS) > /dev/null 2>&1; $\ + scotch_fortran_status=$$?; $\ + rm -f test_scotch_fortran.f90 test_scotch_fortran.x; $\ + if [ $$scotch_fortran_status -eq 0 ]; then $\ + printf "1"; $\ + else $\ + printf "0"; $\ + fi $\ + )) + $(if $(findstring 0,$(SCOTCH_FORTRAN_TEST)), $(error Could not build a simple test program with Scotch)) + $(if $(findstring 1,$(SCOTCH_FORTRAN_TEST)), $(info Built a simple test program with Scotch )) + pnetcdf_test: @# @# Create test C programs that look for PNetCDF header file and some symbols in it @@ -1479,6 +1507,13 @@ else MUSICA_MESSAGE = "MPAS was not linked with the MUSICA-Fortran library." endif +ifneq "$(SCOTCH_FFLAGS)" "" +MAIN_DEPS += scotch_fortran_test +SCOTCH_MESSAGE = "MPAS has been linked with the Scotch graph partitioning library." +else +SCOTCH_MESSAGE = "MPAS was NOT linked with the Scotch graph partitioning library." +endif + mpas_main: $(MAIN_DEPS) cd src; $(MAKE) FC="$(FC)" \ CC="$(CC)" \ From 217665756e39b7b7991bf2ac1bc4d36d99f5290a Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 2 Jul 2025 16:51:47 -0600 Subject: [PATCH 06/43] few fixes for GNU and Intel builds --- src/framework/mpas_block_decomp.F | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index e26230f2d9..67c285d14c 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -26,9 +26,9 @@ module mpas_block_decomp use mpas_io_units use mpas_log - #ifdef MPAS_SCOTCH - include "scotchf.h" - #endif +#ifdef MPAS_SCOTCH +#include "scotchf.h" +#endif type graph integer :: nVerticesTotal @@ -116,7 +116,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (dminfo % my_proc_id == IO_NODE) then useScotch = .false. if ( trim(blockFilePrefix) == '' ) then - call mpas_log_write('Namelist option config_block_decomp_file_prefix is set to \'\' ', MPAS_LOG_ERR) + call mpas_log_write("Namelist option config_block_decomp_file_prefix is set to ''", MPAS_LOG_ERR) #ifdef MPAS_SCOTCH useScotch = .true. #else @@ -242,7 +242,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l close(unit=ounit) call mpas_release_unit(ounit) end if - #endif +#endif else call mpas_log_write('Using block decomposition file: '//trim(filename)) From d63accc5d59481ac427d39d458336989839ee329 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 2 Jul 2025 20:05:12 -0600 Subject: [PATCH 07/43] adding timers --- src/framework/mpas_block_decomp.F | 30 +++++++++++++++++++----------- src/framework/mpas_bootstrapping.F | 5 +++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 67c285d14c..573220a9fb 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -55,6 +55,8 @@ module mpas_block_decomp subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & block_count, cellsOnCellFull, nEdgesOnCellFull, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ + use mpas_timer, only : mpas_timer_start, mpas_timer_stop + implicit none type (dm_info), intent(inout) :: dminfo !< Input: domain information @@ -95,6 +97,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l no_blocks = .false. + call mpas_timer_start('mpas_block_decomp_cells_for_proc') + if (numBlocks == 0) then dminfo % total_blocks = dminfo % nProcs else @@ -157,6 +161,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (useScotch) then #ifdef MPAS_SCOTCH + call mpas_timer_start('scotch_graph_partitioning') call mpas_log_write('Using LibScotch for graph partitioning') do i=1,partial_global_graph_info % nVerticesTotal do j=1,nEdgesOnCellFull% array(i) @@ -231,17 +236,18 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 end do - if (istatus /= 0) then - call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) - call mpas_new_unit(ounit) - open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) - do i=1,partial_global_graph_info % nVerticesTotal - write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) - !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) - end do - close(unit=ounit) - call mpas_release_unit(ounit) - end if + ! if (istatus /= 0) then + ! call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) + ! call mpas_new_unit(ounit) + ! open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) + ! do i=1,partial_global_graph_info % nVerticesTotal + ! write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) + ! !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) + ! end do + ! close(unit=ounit) + ! call mpas_release_unit(ounit) + ! end if + call mpas_timer_stop('scotch_graph_partitioning') #endif else @@ -430,6 +436,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end if end if + call mpas_timer_stop('mpas_block_decomp_cells_for_proc') + end subroutine mpas_block_decomp_cells_for_proc!}}} !*********************************************************************** diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index f444032e3b..909b0386ec 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -81,6 +81,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p #ifdef MPAS_PIO_SUPPORT use pio, only : file_desc_t #endif + use mpas_timer, only : mpas_timer_start, mpas_timer_stop implicit none @@ -155,6 +156,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p nHalos = config_num_halos + call mpas_timer_start('bootstrap_framework_phase1') inputHandle = MPAS_io_open(trim(mesh_filename), MPAS_IO_READ, mesh_iotype, domain % ioContext, & pio_file_desc=pio_file_desc, ierr=ierr) @@ -440,6 +442,9 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p deallocate(block_count) deallocate(readingBlock) + + call mpas_timer_stop('bootstrap_framework_phase1') + end subroutine mpas_bootstrap_framework_phase1 !}}} From ebbd91937e976f05e0d207909250c49cf56ef429 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Mon, 7 Jul 2025 16:31:02 -0600 Subject: [PATCH 08/43] PT-Scotch WIP --- Makefile | 6 +- src/framework/mpas_block_decomp.F | 305 +++++++++++++---------------- src/framework/mpas_bootstrapping.F | 2 +- 3 files changed, 136 insertions(+), 177 deletions(-) diff --git a/Makefile b/Makefile index b7d1b9f3dd..f668435435 100644 --- a/Makefile +++ b/Makefile @@ -760,8 +760,8 @@ endif endif ifneq "$(SCOTCH)" "" - SCOTCH_FCINCLUDES += -I$(SCOTCH)/src/include - SCOTCH_LIBS += -L$(SCOTCH)/lib -lscotch -lscotcherr + SCOTCH_FCINCLUDES += -I$(SCOTCH)/include + SCOTCH_LIBS += -L$(SCOTCH)/lib64 -lptscotch -lscotch -lptscotcherr -lm SCOTCH_FFLAGS = -DMPAS_SCOTCH FCINCLUDES += $(SCOTCH_FCINCLUDES) @@ -1431,7 +1431,7 @@ scotch_fortran_test: $(info Checking for a working MUSICA-Fortran library...) $(eval SCOTCH_FORTRAN_TEST := $(shell $\ printf "program test_scotch_fortran\n$\ - & include \"scotchf.h\"\n$\ + & include \"ptscotchf.h\"\n$\ & doubleprecision :: scotchgraph (scotch_graphdim)\n$\ & integer :: ierr\n$\ & ierr = 0\n$\ diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 573220a9fb..fe0825193f 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -26,9 +26,8 @@ module mpas_block_decomp use mpas_io_units use mpas_log -#ifdef MPAS_SCOTCH -#include "scotchf.h" -#endif +#include "ptscotchf.h" +!!#include "scotchf.h" type graph integer :: nVerticesTotal @@ -53,9 +52,10 @@ module mpas_block_decomp ! !----------------------------------------------------------------------- subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & - block_count, cellsOnCellFull, nEdgesOnCellFull, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ + block_count, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ - use mpas_timer, only : mpas_timer_start, mpas_timer_stop + use mpas_timer, only : mpas_timer_start, mpas_timer_stop, mpas_timer_write, mpas_timer_write_header + use mpi implicit none @@ -65,8 +65,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), pointer :: block_id !< Output: list of global block id's this processor owns integer, dimension(:), pointer :: block_start !< Output: offset in local_cell_list for this blocks list of cells integer, dimension(:), pointer :: block_count !< Output: number of cells in blocks - type (field2dInteger), intent(in) :: cellsOnCellFull - type (field1dInteger), intent(in) :: nEdgesOnCellFull integer, intent(in) :: numBlocks !< Input: Number of blocks (from config_num_blocks) logical, intent(in) :: explicitProcDecomp !< Input: Logical flag controlling if blocks are explicitly assigned to processors @@ -79,21 +77,23 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), allocatable :: local_block_list integer, dimension(:,:), allocatable :: sorted_local_cell_list - integer, dimension(:), allocatable :: global_block_id_arr, owning_proc_arr + integer, dimension(:), allocatable :: global_block_id_arr, local_block_id_arr, owning_proc_arr integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices character (len=StrKIND) :: filename, msg logical :: no_blocks - integer :: nTotalEdgesGraph = 0 + integer :: nTotalEdgesGraph = 0, nLocEdgesGraph = 0, edgelocsiz = 0 integer, dimension(:), allocatable :: edgetab, verttab, parttab logical :: useScotch -#ifdef MPAS_SCOTCH + doubleprecision :: stradat (scotch_stratdim) doubleprecision :: scotchgraph (scotch_graphdim) -#endif + doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) + integer :: localcomm, mpi_ierr + no_blocks = .false. @@ -115,171 +115,130 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(global_start(dminfo % nprocs)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) + allocate(local_block_id_arr(partial_global_graph_info % nVertices)) allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) - - if (dminfo % my_proc_id == IO_NODE) then - useScotch = .false. - if ( trim(blockFilePrefix) == '' ) then - call mpas_log_write("Namelist option config_block_decomp_file_prefix is set to ''", MPAS_LOG_ERR) -#ifdef MPAS_SCOTCH - useScotch = .true. -#else - call mpas_log_write('Either build MPAS with the Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) -#endif - else - if (dminfo % total_blocks < 10) then - write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100) then - write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000) then - write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000) then - write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000) then - write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000000) then - write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000000) then - write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000000) then - write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks - end if - - call mpas_new_unit(iunit) - open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) - - if (istatus /= 0) then - call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_WARN, intArgs=(/dminfo % total_blocks/) ) - call mpas_log_write('Filename: '//trim(filename),MPAS_LOG_WARN) -#ifdef MPAS_SCOTCH - useScotch = .true. -#else - call mpas_log_write('Either build MPAS with Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) -#endif - end if - end if - - if (useScotch) then -#ifdef MPAS_SCOTCH - call mpas_timer_start('scotch_graph_partitioning') - call mpas_log_write('Using LibScotch for graph partitioning') - do i=1,partial_global_graph_info % nVerticesTotal - do j=1,nEdgesOnCellFull% array(i) - - if (cellsOnCellFull%array(j,i) == 0) cycle - nTotalEdgesGraph = nTotalEdgesGraph + 1 - !nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(i) - ! do j=1,nEdgesOnCellFull% array(i) - ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,cellsOnCellFull%array(j,i)/) ) - ! end do - end do - end do - call mpas_log_write('nTotalEdgesGraph is $i', intArgs=(/nTotalEdgesGraph/)) - allocate(edgetab(nTotalEdgesGraph)) - allocate(verttab(partial_global_graph_info % nVerticesTotal + 1)) - !allocate(parttab(partial_global_graph_info % nVerticesTotal)) - - !do i=1,partial_global_graph_info % nVerticesTotal - !call mpas_log_write('proc=$i i= $i part= $i', intArgs=(/dminfo % my_proc_id, i,parttab(i)/)) - !call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,cellsOnCellFull%array(j,i)/) ) - !end do - - k = 1 - do i=1,partial_global_graph_info % nVerticesTotal - verttab(i) = k - !call mpas_log_write('i=$i verttab= $i', intArgs=(/i,verttab(i)/) ) - do j=1,nEdgesOnCellFull% array(i) - - if (cellsOnCellFull%array(j,i) == 0) cycle - - edgetab(k) = cellsOnCellFull%array(j,i) - !call mpas_log_write('k=$i edgetab= $i', intArgs=(/k,edgetab(k)/) ) - k = k + 1 - end do - end do - verttab(partial_global_graph_info % nVerticesTotal+1) = nTotalEdgesGraph + 1 - - !call mpas_log_write('nvertices =$i nTotalEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVerticesTotal, nTotalEdgesGraph/)) - - call scotchfstratinit (stradat (1), ierr) - call scotchfgraphinit (scotchgraph (1), ierr) - - if (ierr .ne. 0) then - call mpas_log_write('Cannot initialize Scotch Graph', MPAS_LOG_CRIT) - endif - - CALL scotchfgraphbuild (scotchgraph (1), 1, partial_global_graph_info % nVerticesTotal, & - verttab (1), verttab (2), & - verttab (1), verttab (1), & - nTotalEdgesGraph, edgetab (1), edgetab (1), ierr) - if (ierr .ne. 0) then - call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) - endif - - ! Only needed during development/debugging. - !call scotchfgraphcheck (scotchgraph (1), ierr) - !if (ierr .ne. 0) then - ! call mpas_log_write('Cannot check Scotch Graph', MPAS_LOG_CRIT) - !endif - - call scotchfgraphpart( scotchgraph (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) - - call scotchfgraphexit (SCOTCHGRAPH (1)) - call scotchfstratexit (stradat (1)) - - local_nvertices(:) = 0 - do i=1,partial_global_graph_info % nVerticesTotal - !global_block_id_arr(i) = parttab(i) - call mpas_get_owning_proc(dminfo, global_block_id_arr(i), owning_proc) - owning_proc_arr(i) = owning_proc - !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id_arr(i),owning_proc/) ) - local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 - end do - - ! if (istatus /= 0) then - ! call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) - ! call mpas_new_unit(ounit) - ! open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) - ! do i=1,partial_global_graph_info % nVerticesTotal - ! write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) - ! !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) - ! end do - ! close(unit=ounit) - ! call mpas_release_unit(ounit) - ! end if - call mpas_timer_stop('scotch_graph_partitioning') -#endif - else - - call mpas_log_write('Using block decomposition file: '//trim(filename)) - - call mpas_log_write('First read pass ') - local_nvertices(:) = 0 - do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - global_block_id_arr(i) = global_block_id - - if ( err .ne. 0 ) then - call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) - end if - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - owning_proc_arr(i) = owning_proc - !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id,owning_proc/) ) - local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 - !call mpas_log_write('owning_proc+1: $i local_nvertices= $i', intArgs=(/owning_proc+1, local_nvertices(owning_proc+1)/)) + + call mpas_timer_start('scotch') + + call mpas_log_write('Using LibScotch for graph partitioning') + do i=1,partial_global_graph_info % nVertices + do j=1,partial_global_graph_info % nAdjacent(i) + + if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle + nLocEdgesGraph = nLocEdgesGraph + 1 + !nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(i) + + ! do j=1,partial_global_graph_info % nAdjacent(i) + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) + ! end do + end do + end do + + call mpas_dmpar_sum_int(dminfo, nLocEdgesGraph, nTotalEdgesGraph) + + call mpas_log_write('nLocEdgesGraph is $i', intArgs=(/nLocEdgesGraph/)) + call mpas_log_write('nTotalEdgesGraph is $i', intArgs=(/nTotalEdgesGraph/)) + allocate(edgetab(nLocEdgesGraph)) + allocate(verttab(partial_global_graph_info % nVertices + 1)) + !allocate(parttab(partial_global_graph_info % nVerticesTotal)) + + ! do i=1,partial_global_graph_info % nVertices + ! !call mpas_log_write('proc=$i i= $i part= $i', intArgs=(/dminfo % my_proc_id, i,parttab(i)/)) + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) + ! end do + + k = 1 + do i=1,partial_global_graph_info % nVertices + verttab(i) = k + !call mpas_log_write('i=$i verttab= $i', intArgs=(/i,verttab(i)/) ) + !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) + do j=1,partial_global_graph_info % nAdjacent(i) + + if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle + + edgetab(k) = partial_global_graph_info % adjacencyList(j,i) + !call mpas_log_write('k=$i edgetab= $i', intArgs=(/k,edgetab(k)/) ) + k = k + 1 end do + end do + verttab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 + + call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVertices, nLocEdgesGraph/)) + + call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) + if (mpi_ierr .ne. 0) then + call mpas_log_write('Cannot duplicate communicator') + endif + call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) + call mpas_log_write('dminfo communicator is $i', intArgs=(/dminfo % comm/) ) + call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) + if (ierr .ne. 0) then + call mpas_log_write('Cannot initialize D Scotch Graph', MPAS_LOG_CRIT) + endif + + edgelocsiz = maxval(verttab) - 1 + + call scotchfdgraphbuild (scotchdgraph(1), & + 1, & + partial_global_graph_info % nVertices, & + partial_global_graph_info % nVertices, & ! vertlocmax + verttab (1), & ! vertloctab + verttab (2), & ! + verttab (1), & + verttab (1), & + nLocEdgesGraph, & + edgelocsiz, & + edgetab(1), & + edgetab(1), & + edgetab(1), ierr) + + if (ierr .ne. 0) then + call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) + endif + call MPI_Barrier(dminfo % comm, mpi_ierr) + + call mpas_log_write('Graph build successful ') + + + call scotchfstratinit (stradat (1), ierr) + + ! CALL scotchfgraphbuild (scotchgraph (1), 1, partial_global_graph_info % nVerticesTotal, & + ! verttab (1), verttab (2), & + ! verttab (1), verttab (1), & + ! nTotalEdgesGraph, edgetab (1), edgetab (1), ierr) + - read(unit=iunit, fmt=*, iostat=err) - - if ( err == 0 ) then - call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) - end if + ! if (dminfo % my_proc_id == IO_NODE) then + ! call scotchfdgraphscatter (scotchdgraph(1), scotchgraph(1) , ierr) + ! else + ! call scotchfdgraphscatter (scotchdgraph(1), scotchdgraph(1) , ierr) + ! end if + call mpas_timer_start('scotch_part') + call scotchfdgraphpart (scotchdgraph(1), dminfo % total_blocks, stradat (1), local_block_id_arr(1), ierr) + !call scotchfdgraphpart(scotchgraph (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) + call mpas_log_write('Graph parition successful ') + + call mpas_timer_stop('scotch_part') + call mpas_timer_stop('scotch') + call mpas_timer_stop('mpas_block_decomp_cells_for_proc') + call mpas_timer_stop('bootstrap_framework_phase1') + call mpas_timer_stop('initialize') + call mpas_timer_stop('total time') + call mpas_timer_write_header() + call mpas_timer_write() + + call scotchfdgraphexit (scotchdgraph (1)) + call scotchfstratexit (stradat (1)) + + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVertices + !global_block_id_arr(i) = parttab(i) + call mpas_get_owning_proc(dminfo, local_block_id_arr(i), owning_proc) + owning_proc_arr(i) = owning_proc + call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,local_block_id_arr(i),owning_proc/) ) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + end do - close(unit=iunit) - call mpas_release_unit(iunit) - end if global_start(1) = 1 do i=2,dminfo % nprocs @@ -306,7 +265,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l global_start(i) = global_start(i-1) + local_nvertices(i-1) !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do - + if (dminfo % my_proc_id == IO_NODE) then call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 909b0386ec..0bb3ea4117 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -247,7 +247,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p ! file, but in principle it could call some online, distributed mesh partitioning library. ! call mpas_block_decomp_cells_for_proc(domain % dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & - block_count, cellsOnCellFull, nEdgesOnCellFull, config_number_of_blocks, config_explicit_proc_decomp, & + block_count, config_number_of_blocks, config_explicit_proc_decomp, & config_block_decomp_file_prefix, config_proc_decomp_file_prefix) deallocate(partial_global_graph_info % vertexID) From d8207e68e13d467f4a0a62a4da3527cb672eda6b Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Mon, 21 Jul 2025 22:42:13 -0600 Subject: [PATCH 09/43] working pt-scotch --- src/framework/mpas_block_decomp.F | 166 ++++++++++++------------------ 1 file changed, 68 insertions(+), 98 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index fe0825193f..23204131f4 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -92,7 +92,12 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l doubleprecision :: stradat (scotch_stratdim) doubleprecision :: scotchgraph (scotch_graphdim) doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) + doubleprecision :: scotchdgraph_redist (SCOTCH_DGRAPHDIM) integer :: localcomm, mpi_ierr + integer, dimension(:), allocatable :: indxtab + integer :: vertglbnbr, vertlocnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx + integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval + no_blocks = .false. @@ -114,7 +119,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) - allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) + !allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) allocate(local_block_id_arr(partial_global_graph_info % nVertices)) allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) @@ -172,6 +177,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) call mpas_log_write('dminfo communicator is $i', intArgs=(/dminfo % comm/) ) call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) + call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) if (ierr .ne. 0) then call mpas_log_write('Cannot initialize D Scotch Graph', MPAS_LOG_CRIT) endif @@ -195,13 +201,18 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (ierr .ne. 0) then call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) endif - call MPI_Barrier(dminfo % comm, mpi_ierr) - + !call MPI_Barrier(dminfo % comm, mpi_ierr) + call scotchfdgraphcheck (scotchdgraph(1), ierr) + if (ierr .ne. 0) then + call mpas_log_write(' Scotch Graph check not successfull', MPAS_LOG_CRIT) + endif call mpas_log_write('Graph build successful ') + call scotchfstratinit (stradat (1), ierr) - + call mpas_log_write('After strat init successful ') + ! CALL scotchfgraphbuild (scotchgraph (1), 1, partial_global_graph_info % nVerticesTotal, & ! verttab (1), verttab (2), & ! verttab (1), verttab (1), & @@ -214,110 +225,65 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! call scotchfdgraphscatter (scotchdgraph(1), scotchdgraph(1) , ierr) ! end if call mpas_timer_start('scotch_part') - call scotchfdgraphpart (scotchdgraph(1), dminfo % total_blocks, stradat (1), local_block_id_arr(1), ierr) + call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) + !call scotchfdgraphpart (scotchdgraph(1), dminfo % total_blocks, stradat (1), local_block_id_arr(1), ierr) !call scotchfdgraphpart(scotchgraph (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) call mpas_log_write('Graph parition successful ') - call mpas_timer_stop('scotch_part') - call mpas_timer_stop('scotch') - call mpas_timer_stop('mpas_block_decomp_cells_for_proc') - call mpas_timer_stop('bootstrap_framework_phase1') - call mpas_timer_stop('initialize') - call mpas_timer_stop('total time') - call mpas_timer_write_header() - call mpas_timer_write() - - call scotchfdgraphexit (scotchdgraph (1)) - call scotchfstratexit (stradat (1)) - - local_nvertices(:) = 0 - do i=1,partial_global_graph_info % nVertices - !global_block_id_arr(i) = parttab(i) - call mpas_get_owning_proc(dminfo, local_block_id_arr(i), owning_proc) - owning_proc_arr(i) = owning_proc - call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,local_block_id_arr(i),owning_proc/) ) - local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 - end do - + call scotchfdgraphredist(scotchdgraph(1), local_block_id_arr(1),scotchdgraph(1), -1 ,-1, scotchdgraph_redist(1), ierr) + call mpas_log_write('Graph redist successful ') - global_start(1) = 1 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) - end do + allocate(indxtab(5*partial_global_graph_info % nVertices)) + call scotchfdgraphdata(scotchdgraph_redist(1), indxtab(1), baseval, vertglbnbr, vertlocnbr, vertlocmax, vertgstnbr, & + vertlocidx, vendlocidx, velolocidx, vlbllocidx, edgeglbnbr, edgelocnbr, & + edgelocsiz, edgelocidx, edgegstidx, edlolocidx, comm, ierr ) - !rewind(unit=iunit) - call mpas_log_write('Second read pass ') - - do i=1,partial_global_graph_info % nVerticesTotal - !read(unit=iunit, fmt=*, iostat=err) global_block_id - !call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - global_block_id = global_block_id_arr(i) - owning_proc = owning_proc_arr(i) - global_list(global_start(owning_proc+1)) = i - !call mpas_log_write('owning_proc+1: $i global_start= $i global_list=$i', intArgs=(/owning_proc+1, global_start(owning_proc+1), global_list(global_start(owning_proc+1))/)) - global_start(owning_proc+1) = global_start(owning_proc+1) + 1 - !call mpas_log_write('global_start(owning_proc+1): $i', intArgs=(/global_start(owning_proc+1)/)) - end do + call mpas_log_write('vertlocnbr $i vertlocidx $i ', intArgs=(/vertlocnbr,vertlocidx/) ) + call mpas_log_write('vertgstnbr $i vertlocmax $i ', intArgs=(/vertgstnbr,vertlocmax/) ) + call mpas_log_write('edgelocsiz $i edgelocidx $i ', intArgs=(/edgelocsiz,edgelocidx/) ) + call mpas_log_write('edgegstidx $i edlolocidx $i ', intArgs=(/edgegstidx,edlolocidx/) ) + call mpas_log_write('vertglbnbr $i ierr $i ', intArgs=(/vertglbnbr,ierr/) ) - global_start(1) = 0 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) - end do - if (dminfo % my_proc_id == IO_NODE) then - call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) - allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) - allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) - - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_cell_list) - - ! Reset global start for second read of global_block_list - global_start(1) = 1 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) - end do + ! do i=1,vertlocnbr + ! call mpas_log_write('i: $i, vertlocidx: $i', intArgs=(/i,indxtab(vertlocidx+i-1)/) ) + ! end do + ! do i=1,vertlocnbr + ! call mpas_log_write('i: $i, vlbllocidx: $i', intArgs=(/i,indxtab(vlbllocidx+i-1)/) ) + ! end do - !rewind(unit=iunit) - call mpas_log_write('Third read pass ') - - do i=1,partial_global_graph_info % nVerticesTotal - !read(unit=iunit, fmt=*) global_block_id - !call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - global_block_id = global_block_id_arr(i) - owning_proc = owning_proc_arr(i) - global_list(global_start(owning_proc+1)) = global_block_id - !call mpas_log_write('owning_proc+1: $i global_start= $i global_list=$i', intArgs=(/owning_proc+1, global_start(owning_proc+1), global_list(global_start(owning_proc+1))/)) - global_start(owning_proc+1) = global_start(owning_proc+1) + 1 - !call mpas_log_write('global_start(owning_proc+1): $i', intArgs=(/global_start(owning_proc+1)/)) - end do + !call MPI_Scatterv(inlist, counts, displs, MPI_INTEGERKIND, outlist, noutlist, MPI_INTEGERKIND, IO_NODE, dminfo % comm, mpi_ierr) + !call MPI_Gatherv( local_block_id_arr, partial_global_graph_info % nVertices, MPI_INTEGERKIND, rbuf, rcounts, displs, MPI_INTEGERKIND, IO_NODE, dminfo % comm, mpi_ierr) - ! Recompute global start after second read of global_block_list - global_start(1) = 0 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) - end do - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_block_list) - + call mpas_timer_stop('scotch_part') + call mpas_timer_stop('scotch') + ! call mpas_timer_stop('mpas_block_decomp_cells_for_proc') + ! call mpas_timer_stop('bootstrap_framework_phase1') + ! call mpas_timer_stop('initialize') + ! call mpas_timer_stop('total time') + !call mpas_timer_write_header() + !call mpas_timer_write() + + allocate(local_cell_list(vertlocnbr)) + allocate(local_block_list(vertlocnbr)) + + do i=1,vertlocnbr + local_cell_list(i)=indxtab(vlbllocidx+i-1) + local_block_list(i)=dminfo % my_proc_id + !call mpas_log_write('local_cell_list: $i, local_block_list: $i', intArgs=(/local_cell_list(i),local_block_list(i)/) ) + end do - else + call scotchfdgraphexit (scotchdgraph (1)) + call scotchfdgraphexit (scotchdgraph_redist (1)) + call scotchfstratexit (stradat (1)) - call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) - allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) - allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) + call mpas_log_write('post1 successful ') - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_cell_list) - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_block_list) - end if + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('vert: $i, partloctab: $i', intArgs=(/i,local_block_id_arr(i)/) ) + ! end do if(blocks_per_proc == 0) then no_blocks = .true. @@ -333,7 +299,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_start(1) = 0 block_count(1) = 0 else - allocate(sorted_local_cell_list(2, local_nvertices(dminfo % my_proc_id + 1))) + allocate(sorted_local_cell_list(2, vertlocnbr)) allocate(block_id(blocks_per_proc)) allocate(block_start(blocks_per_proc)) allocate(block_count(blocks_per_proc)) @@ -343,7 +309,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count = 0 end do - do i = 1,local_nvertices(dminfo % my_proc_id +1) + do i = 1,vertlocnbr call mpas_get_local_block_id(dminfo, local_block_list(i), local_block_id) block_id(local_block_id+1) = local_block_list(i) @@ -354,9 +320,9 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count(local_block_id+1) = block_count(local_block_id+1) + 1 end do - call mpas_quicksort(local_nvertices(dminfo % my_proc_id + 1), sorted_local_cell_list) + call mpas_quicksort(vertlocnbr, sorted_local_cell_list) - do i = 1, local_nvertices(dminfo % my_proc_id+1) + do i = 1, vertlocnbr local_cell_list(i) = sorted_local_cell_list(2, i) end do @@ -371,6 +337,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l deallocate(global_list) end if else + + call mpas_log_write('post2 successful ') if (dminfo % my_proc_id == IO_NODE) then allocate(local_cell_list(partial_global_graph_info % nVerticesTotal)) @@ -395,6 +363,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end if end if + call mpas_log_write('mpas_block_decomp_cells_for_proc successful ') + call mpas_timer_stop('mpas_block_decomp_cells_for_proc') end subroutine mpas_block_decomp_cells_for_proc!}}} From d4f3f2dd4ed97c3aaa518eb6bb71cc5254d2bdae Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Mon, 21 Jul 2025 22:57:25 -0600 Subject: [PATCH 10/43] removing inq of unneeded vars --- src/framework/mpas_bootstrapping.F | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 0bb3ea4117..790fdbf6c5 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -199,16 +199,6 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p ! which cells/edges/vertices are owned by each block, and which are ghost ! - allocate(cellsOnCellFull) - allocate(cellsOnCellFull % array(maxEdges,nCells)) - call MPAS_io_inq_var(inputHandle, 'cellsOnCell', ierr=ierr) - call mpas_io_get_var(inputHandle, 'cellsOnCell', cellsOnCellFull % array, ierr) - - allocate(nEdgesOnCellFull) - allocate(nEdgesOnCellFull % array(nCells)) - call MPAS_io_inq_var(inputHandle, 'nEdgesOnCell', ierr=ierr) - call mpas_io_get_var(inputHandle, 'nEdgesOnCell', nEdgesOnCellFull % array, ierr) - call mpas_io_setup_cell_block_fields(inputHandle, nreadCells, readCellStart, readingBlock, maxEdges, & indexTocellIDField, xCellField, yCellField, zCellField, nEdgesOnCellField, & cellsOnCellField, edgesOnCellField, verticesOnCellField, nHalos) From 6d2d9fd8287fd1a804dd124e0d49390373c12dbd Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 22 Jul 2025 20:37:03 +0000 Subject: [PATCH 11/43] cleanup prior to adding MPAS_SCOTCH logic --- src/framework/mpas_block_decomp.F | 47 +++++-------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 23204131f4..51293df890 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -25,9 +25,8 @@ module mpas_block_decomp use mpas_derived_types use mpas_io_units use mpas_log - + #include "ptscotchf.h" -!!#include "scotchf.h" type graph integer :: nVerticesTotal @@ -54,7 +53,7 @@ module mpas_block_decomp subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & block_count, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ - use mpas_timer, only : mpas_timer_start, mpas_timer_stop, mpas_timer_write, mpas_timer_write_header + use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpi implicit none @@ -77,18 +76,16 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), allocatable :: local_block_list integer, dimension(:,:), allocatable :: sorted_local_cell_list - integer, dimension(:), allocatable :: global_block_id_arr, local_block_id_arr, owning_proc_arr + integer, dimension(:), allocatable :: local_block_id_arr, owning_proc_arr integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices character (len=StrKIND) :: filename, msg logical :: no_blocks - integer :: nTotalEdgesGraph = 0, nLocEdgesGraph = 0, edgelocsiz = 0 - integer, dimension(:), allocatable :: edgetab, verttab, parttab - logical :: useScotch - + integer :: nLocEdgesGraph = 0, edgelocsiz = 0 + integer, dimension(:), allocatable :: edgetab, verttab, parttab doubleprecision :: stradat (scotch_stratdim) doubleprecision :: scotchgraph (scotch_graphdim) doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) @@ -98,8 +95,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer :: vertglbnbr, vertlocnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval - - no_blocks = .false. call mpas_timer_start('mpas_block_decomp_cells_for_proc') @@ -119,7 +114,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) - !allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) allocate(local_block_id_arr(partial_global_graph_info % nVertices)) allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) @@ -131,7 +125,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle nLocEdgesGraph = nLocEdgesGraph + 1 - !nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(i) + ! do j=1,partial_global_graph_info % nAdjacent(i) ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) @@ -139,10 +133,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end do end do - call mpas_dmpar_sum_int(dminfo, nLocEdgesGraph, nTotalEdgesGraph) - call mpas_log_write('nLocEdgesGraph is $i', intArgs=(/nLocEdgesGraph/)) - call mpas_log_write('nTotalEdgesGraph is $i', intArgs=(/nTotalEdgesGraph/)) allocate(edgetab(nLocEdgesGraph)) allocate(verttab(partial_global_graph_info % nVertices + 1)) !allocate(parttab(partial_global_graph_info % nVerticesTotal)) @@ -175,7 +166,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_log_write('Cannot duplicate communicator') endif call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) - call mpas_log_write('dminfo communicator is $i', intArgs=(/dminfo % comm/) ) + !call mpas_log_write('dminfo communicator is $i', intArgs=(/dminfo % comm/) ) call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) if (ierr .ne. 0) then @@ -208,26 +199,12 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l endif call mpas_log_write('Graph build successful ') - - call scotchfstratinit (stradat (1), ierr) call mpas_log_write('After strat init successful ') - - ! CALL scotchfgraphbuild (scotchgraph (1), 1, partial_global_graph_info % nVerticesTotal, & - ! verttab (1), verttab (2), & - ! verttab (1), verttab (1), & - ! nTotalEdgesGraph, edgetab (1), edgetab (1), ierr) - - - ! if (dminfo % my_proc_id == IO_NODE) then - ! call scotchfdgraphscatter (scotchdgraph(1), scotchgraph(1) , ierr) - ! else - ! call scotchfdgraphscatter (scotchdgraph(1), scotchdgraph(1) , ierr) ! end if call mpas_timer_start('scotch_part') call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) !call scotchfdgraphpart (scotchdgraph(1), dminfo % total_blocks, stradat (1), local_block_id_arr(1), ierr) - !call scotchfdgraphpart(scotchgraph (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) call mpas_log_write('Graph parition successful ') call scotchfdgraphredist(scotchdgraph(1), local_block_id_arr(1),scotchdgraph(1), -1 ,-1, scotchdgraph_redist(1), ierr) @@ -250,12 +227,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! do i=1,vertlocnbr ! call mpas_log_write('i: $i, vlbllocidx: $i', intArgs=(/i,indxtab(vlbllocidx+i-1)/) ) ! end do - - !call MPI_Scatterv(inlist, counts, displs, MPI_INTEGERKIND, outlist, noutlist, MPI_INTEGERKIND, IO_NODE, dminfo % comm, mpi_ierr) - !call MPI_Gatherv( local_block_id_arr, partial_global_graph_info % nVertices, MPI_INTEGERKIND, rbuf, rcounts, displs, MPI_INTEGERKIND, IO_NODE, dminfo % comm, mpi_ierr) - - - call mpas_timer_stop('scotch_part') call mpas_timer_stop('scotch') ! call mpas_timer_stop('mpas_block_decomp_cells_for_proc') @@ -277,7 +248,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call scotchfdgraphexit (scotchdgraph (1)) call scotchfdgraphexit (scotchdgraph_redist (1)) call scotchfstratexit (stradat (1)) - call mpas_log_write('post1 successful ') @@ -337,8 +307,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l deallocate(global_list) end if else - - call mpas_log_write('post2 successful ') if (dminfo % my_proc_id == IO_NODE) then allocate(local_cell_list(partial_global_graph_info % nVerticesTotal)) @@ -362,7 +330,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count(1) = 0 end if end if - call mpas_log_write('mpas_block_decomp_cells_for_proc successful ') call mpas_timer_stop('mpas_block_decomp_cells_for_proc') From fcd89ef138cb860004930fa953989f530565040b Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 22 Jul 2025 23:36:38 +0000 Subject: [PATCH 12/43] Scotch is selectively enabled when SCOTCH is set --- src/framework/mpas_block_decomp.F | 185 +++++++++++++++++++++++++---- src/framework/mpas_bootstrapping.F | 7 +- 2 files changed, 165 insertions(+), 27 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 51293df890..c75886390a 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -26,7 +26,9 @@ module mpas_block_decomp use mpas_io_units use mpas_log +#ifdef MPAS_SCOTCH #include "ptscotchf.h" +#endif type graph integer :: nVerticesTotal @@ -80,10 +82,12 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices + integer :: num_local_vertices !< Number of local vertices for this processor character (len=StrKIND) :: filename, msg logical :: no_blocks logical :: useScotch +#ifdef MPAS_SCOTCH integer :: nLocEdgesGraph = 0, edgelocsiz = 0 integer, dimension(:), allocatable :: edgetab, verttab, parttab doubleprecision :: stradat (scotch_stratdim) @@ -92,8 +96,9 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l doubleprecision :: scotchdgraph_redist (SCOTCH_DGRAPHDIM) integer :: localcomm, mpi_ierr integer, dimension(:), allocatable :: indxtab - integer :: vertglbnbr, vertlocnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx + integer :: vertglbnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval +#endif no_blocks = .false. @@ -117,6 +122,53 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_block_id_arr(partial_global_graph_info % nVertices)) allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) + useScotch = .false. + if (dminfo % my_proc_id == IO_NODE) then + if ( trim(blockFilePrefix) == '' ) then + call mpas_log_write("Namelist option config_block_decomp_file_prefix is set to ''", MPAS_LOG_ERR) +#ifdef MPAS_SCOTCH + useScotch = .true. +#else + call mpas_log_write('Either build MPAS with the Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#endif + else + if (dminfo % total_blocks < 10) then + write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100) then + write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000) then + write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000) then + write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000) then + write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000000) then + write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000000) then + write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000000) then + write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks + end if + + call mpas_new_unit(iunit) + open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) + + if (istatus /= 0) then + call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_WARN, intArgs=(/dminfo % total_blocks/) ) + call mpas_log_write('Filename: '//trim(filename),MPAS_LOG_WARN) +#ifdef MPAS_SCOTCH + useScotch = .true. +#else + call mpas_log_write('Either build MPAS with Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#endif + end if ! istatus /= 0 + end if ! trim(blockFilePrefix) == '' + end if ! dminfo % my_proc_id == IO_NODE + + call mpas_dmpar_bcast_logical(dminfo, useScotch) + + if (useScotch) then ! Using PT-Scotch across all MPI ranks +#ifdef MPAS_SCOTCH call mpas_timer_start('scotch') call mpas_log_write('Using LibScotch for graph partitioning') @@ -125,14 +177,11 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle nLocEdgesGraph = nLocEdgesGraph + 1 - - ! do j=1,partial_global_graph_info % nAdjacent(i) ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) ! end do end do end do - call mpas_log_write('nLocEdgesGraph is $i', intArgs=(/nLocEdgesGraph/)) allocate(edgetab(nLocEdgesGraph)) allocate(verttab(partial_global_graph_info % nVertices + 1)) @@ -192,7 +241,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (ierr .ne. 0) then call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) endif - !call MPI_Barrier(dminfo % comm, mpi_ierr) + + ! Only needed during development/debugging. call scotchfdgraphcheck (scotchdgraph(1), ierr) if (ierr .ne. 0) then call mpas_log_write(' Scotch Graph check not successfull', MPAS_LOG_CRIT) @@ -205,30 +255,31 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_timer_start('scotch_part') call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) !call scotchfdgraphpart (scotchdgraph(1), dminfo % total_blocks, stradat (1), local_block_id_arr(1), ierr) + call mpas_timer_stop('scotch_part') call mpas_log_write('Graph parition successful ') call scotchfdgraphredist(scotchdgraph(1), local_block_id_arr(1),scotchdgraph(1), -1 ,-1, scotchdgraph_redist(1), ierr) call mpas_log_write('Graph redist successful ') allocate(indxtab(5*partial_global_graph_info % nVertices)) - call scotchfdgraphdata(scotchdgraph_redist(1), indxtab(1), baseval, vertglbnbr, vertlocnbr, vertlocmax, vertgstnbr, & + call scotchfdgraphdata(scotchdgraph_redist(1), indxtab(1), baseval, vertglbnbr, num_local_vertices, vertlocmax, vertgstnbr, & vertlocidx, vendlocidx, velolocidx, vlbllocidx, edgeglbnbr, edgelocnbr, & edgelocsiz, edgelocidx, edgegstidx, edlolocidx, comm, ierr ) - call mpas_log_write('vertlocnbr $i vertlocidx $i ', intArgs=(/vertlocnbr,vertlocidx/) ) + call mpas_log_write('vertlocnbr $i vertlocidx $i ', intArgs=(/num_local_vertices,vertlocidx/) ) call mpas_log_write('vertgstnbr $i vertlocmax $i ', intArgs=(/vertgstnbr,vertlocmax/) ) call mpas_log_write('edgelocsiz $i edgelocidx $i ', intArgs=(/edgelocsiz,edgelocidx/) ) call mpas_log_write('edgegstidx $i edlolocidx $i ', intArgs=(/edgegstidx,edlolocidx/) ) call mpas_log_write('vertglbnbr $i ierr $i ', intArgs=(/vertglbnbr,ierr/) ) - ! do i=1,vertlocnbr + ! do i=1,num_local_vertices ! call mpas_log_write('i: $i, vertlocidx: $i', intArgs=(/i,indxtab(vertlocidx+i-1)/) ) ! end do - ! do i=1,vertlocnbr + ! do i=1,num_local_vertices ! call mpas_log_write('i: $i, vlbllocidx: $i', intArgs=(/i,indxtab(vlbllocidx+i-1)/) ) ! end do - call mpas_timer_stop('scotch_part') - call mpas_timer_stop('scotch') + + ! call mpas_timer_stop('mpas_block_decomp_cells_for_proc') ! call mpas_timer_stop('bootstrap_framework_phase1') ! call mpas_timer_stop('initialize') @@ -236,24 +287,113 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l !call mpas_timer_write_header() !call mpas_timer_write() - allocate(local_cell_list(vertlocnbr)) - allocate(local_block_list(vertlocnbr)) + allocate(local_cell_list(num_local_vertices)) + allocate(local_block_list(num_local_vertices)) - do i=1,vertlocnbr + do i=1,num_local_vertices local_cell_list(i)=indxtab(vlbllocidx+i-1) local_block_list(i)=dminfo % my_proc_id !call mpas_log_write('local_cell_list: $i, local_block_list: $i', intArgs=(/local_cell_list(i),local_block_list(i)/) ) end do + ! Calling scotchfdgraphexit call scotchfdgraphexit (scotchdgraph (1)) call scotchfdgraphexit (scotchdgraph_redist (1)) call scotchfstratexit (stradat (1)) - call mpas_log_write('post1 successful ') + call mpas_timer_stop('scotch') + call mpas_log_write('Scotch parition successful ') +#endif + else ! useScotch = .false. + if (dminfo % my_proc_id == IO_NODE) then + call mpas_log_write('Using block decomposition file: '//trim(filename)) + + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*, iostat=err) global_block_id + + if ( err .ne. 0 ) then + call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + end do + read(unit=iunit, fmt=*, iostat=err) - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('vert: $i, partloctab: $i', intArgs=(/i,local_block_id_arr(i)/) ) - ! end do + if ( err == 0 ) then + call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if + + global_start(1) = 1 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + rewind(unit=iunit) + + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*, iostat=err) global_block_id + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_list(global_start(owning_proc+1)) = i + global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + end do + + global_start(1) = 0 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) + allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) + allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_cell_list) + + ! Reset global start for second read of global_block_list + global_start(1) = 1 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + rewind(unit=iunit) + + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*) global_block_id + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_list(global_start(owning_proc+1)) = global_block_id + global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + end do + + ! Recompute global start after second read of global_block_list + global_start(1) = 0 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_block_list) + + close(unit=iunit) + call mpas_release_unit(iunit) + + else ! dminfo % my_proc_id == IO_NODE + + call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) + allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) + allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_cell_list) + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_block_list) + end if ! dminfo % my_proc_id == IO_NODE + num_local_vertices = local_nvertices(dminfo % my_proc_id + 1) + deallocate(local_nvertices) + end if ! useScotch if(blocks_per_proc == 0) then no_blocks = .true. @@ -269,7 +409,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_start(1) = 0 block_count(1) = 0 else - allocate(sorted_local_cell_list(2, vertlocnbr)) + allocate(sorted_local_cell_list(2, num_local_vertices)) allocate(block_id(blocks_per_proc)) allocate(block_start(blocks_per_proc)) allocate(block_count(blocks_per_proc)) @@ -279,7 +419,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count = 0 end do - do i = 1,vertlocnbr + do i = 1,num_local_vertices call mpas_get_local_block_id(dminfo, local_block_list(i), local_block_id) block_id(local_block_id+1) = local_block_list(i) @@ -290,9 +430,9 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count(local_block_id+1) = block_count(local_block_id+1) + 1 end do - call mpas_quicksort(vertlocnbr, sorted_local_cell_list) + call mpas_quicksort(num_local_vertices, sorted_local_cell_list) - do i = 1, vertlocnbr + do i = 1, num_local_vertices local_cell_list(i) = sorted_local_cell_list(2, i) end do @@ -302,7 +442,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l deallocate(sorted_local_cell_list) deallocate(local_block_list) - deallocate(local_nvertices) deallocate(global_start) deallocate(global_list) end if diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 790fdbf6c5..4586bc2549 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -107,8 +107,8 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p type (field1dInteger), pointer :: indexToCellIDField type (field1dInteger), pointer :: indexToEdgeIDField type (field1dInteger), pointer :: indexToVertexIDField - type (field1dInteger), pointer :: nEdgesOnCellField, nEdgesOnCellFull - type (field2dInteger), pointer :: cellsOnCellField, cellsOnCellFull + type (field1dInteger), pointer :: nEdgesOnCellField + type (field2dInteger), pointer :: cellsOnCellField type (field2dInteger), pointer :: edgesOnCellField type (field2dInteger), pointer :: verticesOnCellField type (field2dInteger), pointer :: cellsOnEdgeField @@ -145,7 +145,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p integer, pointer :: config_num_halos, config_number_of_blocks logical, pointer :: config_explicit_proc_decomp character (len=StrKIND), pointer :: config_block_decomp_file_prefix, config_proc_decomp_file_prefix - integer :: nHalos, j, vind + integer :: nHalos call mpas_pool_get_config(domain % configs, 'config_num_halos', config_num_halos) @@ -432,7 +432,6 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p deallocate(block_count) deallocate(readingBlock) - call mpas_timer_stop('bootstrap_framework_phase1') end subroutine mpas_bootstrap_framework_phase1 !}}} From 52c7830f170be161d1685f287302b46f4c151e48 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 23 Jul 2025 16:04:38 +0000 Subject: [PATCH 13/43] More cleanup and added comments --- src/framework/mpas_block_decomp.F | 226 +++++++++++++++++------------- 1 file changed, 131 insertions(+), 95 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index c75886390a..6e5ec3874e 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -89,7 +89,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l logical :: useScotch #ifdef MPAS_SCOTCH integer :: nLocEdgesGraph = 0, edgelocsiz = 0 - integer, dimension(:), allocatable :: edgetab, verttab, parttab + integer, dimension(:), allocatable :: edgeloctab, vertloctab doubleprecision :: stradat (scotch_stratdim) doubleprecision :: scotchgraph (scotch_graphdim) doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) @@ -169,102 +169,147 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (useScotch) then ! Using PT-Scotch across all MPI ranks #ifdef MPAS_SCOTCH - call mpas_timer_start('scotch') + call mpas_timer_start('scotch_total') call mpas_log_write('Using LibScotch for graph partitioning') + + ! Count the number of edges (including to ghost cells) in the portion of graph + ! owned by the current rank. Each edge is counted twice, once for each vertex, + ! with the exception of edges to ghost vertices, which are counted only once. do i=1,partial_global_graph_info % nVertices do j=1,partial_global_graph_info % nAdjacent(i) - if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle nLocEdgesGraph = nLocEdgesGraph + 1 - ! do j=1,partial_global_graph_info % nAdjacent(i) - ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) - ! end do + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) end do end do call mpas_log_write('nLocEdgesGraph is $i', intArgs=(/nLocEdgesGraph/)) - allocate(edgetab(nLocEdgesGraph)) - allocate(verttab(partial_global_graph_info % nVertices + 1)) - !allocate(parttab(partial_global_graph_info % nVerticesTotal)) + ! Holds the adjacency array for every local vertex + allocate(edgeloctab(nLocEdgesGraph)) + ! Array of start indices in edgeloctab for each local vertex + allocate(vertloctab(partial_global_graph_info % nVertices + 1)) + ! do i=1,partial_global_graph_info % nVertices - ! !call mpas_log_write('proc=$i i= $i part= $i', intArgs=(/dminfo % my_proc_id, i,parttab(i)/)) ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) ! end do - k = 1 - do i=1,partial_global_graph_info % nVertices - verttab(i) = k - !call mpas_log_write('i=$i verttab= $i', intArgs=(/i,verttab(i)/) ) - !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) - do j=1,partial_global_graph_info % nAdjacent(i) - - if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle - - edgetab(k) = partial_global_graph_info % adjacencyList(j,i) - !call mpas_log_write('k=$i edgetab= $i', intArgs=(/k,edgetab(k)/) ) - k = k + 1 - end do - end do - verttab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 - - call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVertices, nLocEdgesGraph/)) - - call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) - if (mpi_ierr .ne. 0) then - call mpas_log_write('Cannot duplicate communicator') - endif - call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) - !call mpas_log_write('dminfo communicator is $i', intArgs=(/dminfo % comm/) ) - call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) - call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) - if (ierr .ne. 0) then - call mpas_log_write('Cannot initialize D Scotch Graph', MPAS_LOG_CRIT) - endif - - edgelocsiz = maxval(verttab) - 1 - - call scotchfdgraphbuild (scotchdgraph(1), & - 1, & - partial_global_graph_info % nVertices, & - partial_global_graph_info % nVertices, & ! vertlocmax - verttab (1), & ! vertloctab - verttab (2), & ! - verttab (1), & - verttab (1), & - nLocEdgesGraph, & - edgelocsiz, & - edgetab(1), & - edgetab(1), & - edgetab(1), ierr) - - if (ierr .ne. 0) then - call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) - endif - - ! Only needed during development/debugging. - call scotchfdgraphcheck (scotchdgraph(1), ierr) - if (ierr .ne. 0) then - call mpas_log_write(' Scotch Graph check not successfull', MPAS_LOG_CRIT) - endif - call mpas_log_write('Graph build successful ') - - call scotchfstratinit (stradat (1), ierr) - call mpas_log_write('After strat init successful ') - ! end if - call mpas_timer_start('scotch_part') + ! Fill up edgeloctab and vertloctab + k = 1 + do i=1,partial_global_graph_info % nVertices + vertloctab(i) = k + !call mpas_log_write('i=$i vertloctab= $i', intArgs=(/i,vertloctab(i)/) ) + !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) + do j=1,partial_global_graph_info % nAdjacent(i) + + if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle + + edgeloctab(k) = partial_global_graph_info % adjacencyList(j,i) + !call mpas_log_write('k=$i edgeloctab= $i', intArgs=(/k,edgeloctab(k)/) ) + k = k + 1 + end do + end do + vertloctab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 + + call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVertices, nLocEdgesGraph/)) + + ! Duplicate the communicator to be used by Scotch + call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) + if (mpi_ierr .ne. 0) then + call mpas_log_write('Cannot duplicate communicator') + endif + call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) + + ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph + call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) + if (ierr .ne. 0) then + call mpas_log_write('Cannot initialize D Scotch Graph', MPAS_LOG_CRIT) + endif + call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) + + ! From Scotch documentation: edgelocsiz is lower-bounded by the minimum size + ! of the edge array required to encompass all used adjacency values; it is + ! therefore at least equal to the maximum of the vendloctab entries, over all + ! local vertices, minus baseval; it can be set to edgelocnbr if the edge array is compact. + edgelocsiz = maxval(vertloctab) - 1 + + ! Build the distributed Scotch graph and save it in scotchdgraph + ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. + ! Scotch Fortran interface requires that if veloloctab, vlblloctab are skipped, then we pass a reference + ! to vertloctab(1) so that they are treated as null pointers. Similarly, pass reference to edgeloctab for + ! the optional edgegsttab and edloloctab arguments. + call scotchfdgraphbuild (scotchdgraph(1), & + 1, & ! base value value for index arrays (1 in Fortran) + partial_global_graph_info % nVertices, & ! num of local vertices on the calling process + partial_global_graph_info % nVertices, & ! max number of local vertices = num_local_vertices for graphs without holes + vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex + vertloctab (2), & ! vendloctab: Array of after-last indices in edgeloctab + vertloctab (1), & ! veloloctab: Optional array of integer loads for each local vertex + vertloctab (1), & ! vlblloctab: Optional array of labels for each local vertex + nLocEdgesGraph, & ! Number of local edges, including to ghost vertices + edgelocsiz, & ! Defined previously + edgeloctab(1), & ! Holds the adjacency array for every local vertex + edgeloctab(1), & ! edgegsttab: Optional array holding the local and ghost indices + edgeloctab(1), & ! edloloctab: Optional array of integer loads for each local edge + ierr) + + if (ierr .ne. 0) then + call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) + endif + + ! Only needed during development/debugging. + call scotchfdgraphcheck (scotchdgraph(1), ierr) + if (ierr .ne. 0) then + call mpas_log_write(' Scotch Graph check not successfull', MPAS_LOG_CRIT) + endif + call mpas_log_write('Graph build successful ') + ! Initialize the strategy data structure + call scotchfstratinit (stradat (1), ierr) + call mpas_log_write('strategy init success') + + call mpas_timer_start('scotch_graph_partitioning') + ! Partition the distributed graph and save the result in local_block_id_arr call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) - !call scotchfdgraphpart (scotchdgraph(1), dminfo % total_blocks, stradat (1), local_block_id_arr(1), ierr) - call mpas_timer_stop('scotch_part') + call mpas_timer_stop('scotch_graph_partitioning') call mpas_log_write('Graph parition successful ') - call scotchfdgraphredist(scotchdgraph(1), local_block_id_arr(1),scotchdgraph(1), -1 ,-1, scotchdgraph_redist(1), ierr) + ! After the paritioning above, each processor would not necessarily have information about all of the + ! vertices it owns. To obtain this information, Scotch provides a convenience function to redistribute the graph + ! to all processors, so that each processor has information about all of the vertices it owns. + call scotchfdgraphredist(scotchdgraph(1), & ! Input: original distributed graph + local_block_id_arr(1), & ! Input: the partition array + scotchdgraph(1), & ! Optional: permgsttab. Pass ref to original graph to skip it. + -1 , & ! Optional: vertlocdlt. Pass <0 to skip it. + -1, & ! Optional: edgelocdlt. Pass <0 to skip it. + scotchdgraph_redist(1), & ! Output: re-distributed graph + ierr) call mpas_log_write('Graph redist successful ') + ! indxtab holds the graph data returned by the call to scotchfdgraphdata + ! Not sure how large indxtab needs to be, so currently allocating it to be + ! 5 times the number of local vertices allocate(indxtab(5*partial_global_graph_info % nVertices)) - call scotchfdgraphdata(scotchdgraph_redist(1), indxtab(1), baseval, vertglbnbr, num_local_vertices, vertlocmax, vertgstnbr, & - vertlocidx, vendlocidx, velolocidx, vlbllocidx, edgeglbnbr, edgelocnbr, & - edgelocsiz, edgelocidx, edgegstidx, edlolocidx, comm, ierr ) + + ! Input to scotchfdgraphdata is the re-distributed graph scotchdgraph_redist, and all + ! the remaining are output arguments. + call scotchfdgraphdata(scotchdgraph_redist(1), & + indxtab(1), & ! Holds the data queried from graph, local to this processor + baseval, & ! returns base value value for index arrays (1 in Fortran) + vertglbnbr, & ! number of global vertices + num_local_vertices, & ! number of local vertices + vertlocmax, & ! max number of local vertices = num_local_vertices for graphs without holes + vertgstnbr, & ! num of local and ghost vertices. -1 unless dgraphGhst is called + vertlocidx, & ! starting index of vertloctab in indxtab + vendlocidx, & ! starting index of vendloctab in indxtab + velolocidx, & ! starting index of veloloctab in indxtab + vlbllocidx, & ! starting index of vlblloctab in indxtab + edgeglbnbr, & ! Global number of arcs. Each edge counts twice. + edgelocnbr, & ! Local number of arcs including to ghost vertices + edgelocsiz, & ! ~ max of vendloctab entries, for local vertices, minus baseval + edgelocidx, & ! starting index of edgeloctab in indxtab + edgegstidx, & ! starting index of edgegsttab in indxtab + edlolocidx, & ! starting index of edloloctab in indxtab + comm, ierr ) ! communicator used and error code call mpas_log_write('vertlocnbr $i vertlocidx $i ', intArgs=(/num_local_vertices,vertlocidx/) ) call mpas_log_write('vertgstnbr $i vertlocmax $i ', intArgs=(/vertgstnbr,vertlocmax/) ) @@ -272,36 +317,27 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_log_write('edgegstidx $i edlolocidx $i ', intArgs=(/edgegstidx,edlolocidx/) ) call mpas_log_write('vertglbnbr $i ierr $i ', intArgs=(/vertglbnbr,ierr/) ) - ! do i=1,num_local_vertices - ! call mpas_log_write('i: $i, vertlocidx: $i', intArgs=(/i,indxtab(vertlocidx+i-1)/) ) - ! end do - ! do i=1,num_local_vertices - ! call mpas_log_write('i: $i, vlbllocidx: $i', intArgs=(/i,indxtab(vlbllocidx+i-1)/) ) - ! end do - - - ! call mpas_timer_stop('mpas_block_decomp_cells_for_proc') - ! call mpas_timer_stop('bootstrap_framework_phase1') - ! call mpas_timer_stop('initialize') - ! call mpas_timer_stop('total time') - !call mpas_timer_write_header() - !call mpas_timer_write() - allocate(local_cell_list(num_local_vertices)) allocate(local_block_list(num_local_vertices)) + ! To look up all the local cells, we simply look at vlblloctab returned in indxtab + ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? do i=1,num_local_vertices local_cell_list(i)=indxtab(vlbllocidx+i-1) local_block_list(i)=dminfo % my_proc_id !call mpas_log_write('local_cell_list: $i, local_block_list: $i', intArgs=(/local_cell_list(i),local_block_list(i)/) ) end do - ! Calling scotchfdgraphexit + ! Clean up call scotchfdgraphexit (scotchdgraph (1)) call scotchfdgraphexit (scotchdgraph_redist (1)) call scotchfstratexit (stradat (1)) - call mpas_timer_stop('scotch') - call mpas_log_write('Scotch parition successful ') + deallocate(edgeloctab) + deallocate(vertloctab) + deallocate(indxtab) + call MPI_Comm_free(localcomm, mpi_ierr) + call mpas_timer_stop('scotch_total') + call mpas_log_write('Scotch partition successful') #endif else ! useScotch = .false. if (dminfo % my_proc_id == IO_NODE) then From a0734718105180ac7187133c98caa69423ffcd23 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 25 Jul 2025 13:22:32 -0600 Subject: [PATCH 14/43] Writing out correct graph parition files from PT-Scotch --- src/framework/mpas_block_decomp.F | 69 ++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 6e5ec3874e..e3abab4b4e 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -78,7 +78,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), allocatable :: local_block_list integer, dimension(:,:), allocatable :: sorted_local_cell_list - integer, dimension(:), allocatable :: local_block_id_arr, owning_proc_arr + integer, dimension(:), allocatable ::global_block_id_arr, local_block_id_arr, owning_proc_arr integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices @@ -89,6 +89,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l logical :: useScotch #ifdef MPAS_SCOTCH integer :: nLocEdgesGraph = 0, edgelocsiz = 0 + character (len=StrKIND) :: partitionFilePrefix integer, dimension(:), allocatable :: edgeloctab, vertloctab doubleprecision :: stradat (scotch_stratdim) doubleprecision :: scotchgraph (scotch_graphdim) @@ -118,6 +119,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if(dminfo % total_blocks > 1) then allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) + allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) allocate(local_block_id_arr(partial_global_graph_info % nVertices)) allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) @@ -125,7 +127,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l useScotch = .false. if (dminfo % my_proc_id == IO_NODE) then if ( trim(blockFilePrefix) == '' ) then - call mpas_log_write("Namelist option config_block_decomp_file_prefix is set to ''", MPAS_LOG_ERR) + call mpas_log_write("Namelist option config_block_decomp_file_prefix is set to ''", MPAS_LOG_WARN) #ifdef MPAS_SCOTCH useScotch = .true. #else @@ -265,7 +267,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_log_write('Graph build successful ') ! Initialize the strategy data structure call scotchfstratinit (stradat (1), ierr) - call mpas_log_write('strategy init success') + call mpas_log_write('Scotch strategy init success') call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr @@ -319,14 +321,71 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_cell_list(num_local_vertices)) allocate(local_block_list(num_local_vertices)) - ! To look up all the local cells, we simply look at vlblloctab returned in indxtab ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? do i=1,num_local_vertices local_cell_list(i)=indxtab(vlbllocidx+i-1) local_block_list(i)=dminfo % my_proc_id - !call mpas_log_write('local_cell_list: $i, local_block_list: $i', intArgs=(/local_cell_list(i),local_block_list(i)/) ) + !call mpas_log_write('local_cell_list: $i, local_block_list: $i',MPAS_LOG_ERR, intArgs=(/local_cell_list(i),local_block_list(i)/) ) + end do + + !call mpas_log_write('nVertices $i num_local_vertices: $i',MPAS_LOG_ERR, intArgs=(/partial_global_graph_info % nVertices,num_local_vertices/)) + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('local_block_id_arr($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_block_id_arr(i)/)) + ! end do + + ! Using the local_nvertices array to hold the original number of vertices in + ! the partial graph readb by each processor. Might need to use a different array + ! to clear up potential confusion. + local_nvertices(dminfo % my_proc_id + 1) = partial_global_graph_info % nVertices + + ! call mpas_log_write('local_nvertices($i): $i', MPAS_LOG_ERR, intArgs=(/i,num_local_vertices/)) + + ! Gather all the partial_global_graph_info % nVertices to IO_NODE. + ! num_local_vertices is the number of vertices that this processor owns, determined by the + ! Scotch paritioning. Whereas artial_global_graph_info % nVertices is the number of vertices + ! resident in the partial graph read by this processor. The latter is the correct size of the + ! local_block_id_arr. + call MPI_Gather( partial_global_graph_info % nVertices, 1, MPI_INTEGER, local_nvertices, & + 1, MPI_INTEGER, 0, localcomm, ierr) + + ! if (dminfo % my_proc_id == IO_NODE) then + ! call mpas_log_write('After gathering local_nvertices on IO_NODE: ') + ! do i=1, dminfo % nProcs + ! call mpas_log_write('local_nvertices: $i', intArgs=(/local_nvertices(i)/) ) + ! end do + ! end if + + ! Compute the displacements for gathering all the local_block_id_arr to global_block_id_arr + global_start(1) = 0 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) end do + ! do i=1, dminfo % nProcs + ! call mpas_log_write('global_start: $i', intArgs=(/global_start(i)/) ) + ! end do + + ! Gather all the local block ids to global_block_id_arr so IO_NODE can write out the partitioning data + call MPI_Gatherv( local_block_id_arr, partial_global_graph_info % nVertices, MPI_INTEGER, global_block_id_arr, & + local_nvertices, global_start, MPI_INTEGER, 0, localcomm, ierr) + ! Write out the paritioning data to a file from IO_NODE + if (dminfo % my_proc_id == IO_NODE) then + partitionFilePrefix=trim(blockFilePrefix) + if (trim(partitionFilePrefix) == '') then + write(partitionFilePrefix,'(a,i0,a)') 'x1.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' + end if + write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % total_blocks + + call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) + call mpas_new_unit(ounit) + open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) + do i=1,partial_global_graph_info % nVerticesTotal + write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) + !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) + end do + close(unit=ounit) + call mpas_release_unit(ounit) + end if ! Clean up call scotchfdgraphexit (scotchdgraph (1)) From d04cbf896ac65c74b3808815119ee4c49a429efc Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 18 Sep 2025 09:39:28 -0600 Subject: [PATCH 15/43] WIP --- Makefile | 1 + src/framework/Makefile | 4 + src/framework/mpas_block_decomp.F | 78 +++++-- src/framework/mpas_ptscotch_interface.F | 262 ++++++++++++++++++++++++ src/framework/ptscotch_interface.c | 221 ++++++++++++++++++++ 5 files changed, 544 insertions(+), 22 deletions(-) create mode 100644 src/framework/mpas_ptscotch_interface.F create mode 100644 src/framework/ptscotch_interface.c diff --git a/Makefile b/Makefile index f668435435..59b16a9765 100644 --- a/Makefile +++ b/Makefile @@ -765,6 +765,7 @@ ifneq "$(SCOTCH)" "" SCOTCH_FFLAGS = -DMPAS_SCOTCH FCINCLUDES += $(SCOTCH_FCINCLUDES) + CPPINCLUDES += $(SCOTCH_FCINCLUDES) LIBS += $(SCOTCH_LIBS) override CPPFLAGS += $(SCOTCH_FFLAGS) endif diff --git a/src/framework/Makefile b/src/framework/Makefile index 2d8e7dc92b..0d877c1f9a 100644 --- a/src/framework/Makefile +++ b/src/framework/Makefile @@ -36,6 +36,8 @@ OBJS = mpas_kind_types.o \ mpas_log.o \ mpas_halo.o \ mpas_string_utils.o \ + mpas_ptscotch_interface.o \ + ptscotch_interface.o \ mpas_stream_inquiry.o \ stream_inquiry.o @@ -112,6 +114,8 @@ xml_stream_parser.o: xml_stream_parser.c mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o +mpas_ptscotch_interface.o : mpas_derived_types.o mpas_dmpar.o mpas_log.o ptscotch_interface.o + mpas_stream_inquiry.o : mpas_derived_types.o mpas_log.o mpas_c_interfacing.o clean: diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index e3abab4b4e..f6fcac16f7 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -25,10 +25,11 @@ module mpas_block_decomp use mpas_derived_types use mpas_io_units use mpas_log + use mpas_ptscotch_interface -#ifdef MPAS_SCOTCH -#include "ptscotchf.h" -#endif +!#ifdef MPAS_SCOTCH +!#include "ptscotchf.h" +!#endif type graph integer :: nVerticesTotal @@ -56,7 +57,11 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ use mpas_timer, only : mpas_timer_start, mpas_timer_stop +#ifdef MPAS_USE_MPI_F08 + use mpi_f08, only : MPI_Comm, MPI_COMM_WORLD, MPI_INTEGER, MPI_Comm_dup, MPI_Comm_free, MPI_Gather, MPI_Gatherv +#else use mpi +#endif implicit none @@ -95,7 +100,13 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l doubleprecision :: scotchgraph (scotch_graphdim) doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) doubleprecision :: scotchdgraph_redist (SCOTCH_DGRAPHDIM) - integer :: localcomm, mpi_ierr + integer :: mpi_ierr +#ifdef MPAS_USE_MPI_F08 + type (MPI_Comm) :: localcomm +#else + integer :: localcomm +#endif + integer, dimension(:), allocatable :: indxtab integer :: vertglbnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval @@ -200,14 +211,14 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l k = 1 do i=1,partial_global_graph_info % nVertices vertloctab(i) = k - !call mpas_log_write('i=$i vertloctab= $i', intArgs=(/i,vertloctab(i)/) ) + call mpas_log_write('rank=$i, i=$i vertloctab= $i', intArgs=(/dminfo % my_proc_id, i,vertloctab(i)/) ) !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) do j=1,partial_global_graph_info % nAdjacent(i) if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle edgeloctab(k) = partial_global_graph_info % adjacencyList(j,i) - !call mpas_log_write('k=$i edgeloctab= $i', intArgs=(/k,edgeloctab(k)/) ) + call mpas_log_write('rank=$i k=$i edgeloctab= $i', intArgs=(/dminfo % my_proc_id, k,edgeloctab(k)/) ) k = k + 1 end do end do @@ -220,14 +231,24 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (mpi_ierr .ne. 0) then call mpas_log_write('Cannot duplicate communicator') endif - call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) + !call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) +#ifdef MPAS_USE_MPI_F08 + call mpas_log_write('duplicate communicator is mpi_f08 $i', intArgs=(/localcomm % mpi_val/)) + ierr = scotch_dgraphinit(scotchdgraph(1), localcomm% mpi_val) +#else + call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/)) + ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) +#endif ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph - call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) + !call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) + !ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) if (ierr .ne. 0) then call mpas_log_write('Cannot initialize D Scotch Graph', MPAS_LOG_CRIT) endif - call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) + call mpas_log_write('initialized D Scotch Graph') + !ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm) + !call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) ! From Scotch documentation: edgelocsiz is lower-bounded by the minimum size ! of the edge array required to encompass all used adjacency values; it is @@ -240,38 +261,51 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! Scotch Fortran interface requires that if veloloctab, vlblloctab are skipped, then we pass a reference ! to vertloctab(1) so that they are treated as null pointers. Similarly, pass reference to edgeloctab for ! the optional edgegsttab and edloloctab arguments. - call scotchfdgraphbuild (scotchdgraph(1), & - 1, & ! base value value for index arrays (1 in Fortran) + ierr = scotch_dgraphbuild (scotchdgraph(1), & partial_global_graph_info % nVertices, & ! num of local vertices on the calling process - partial_global_graph_info % nVertices, & ! max number of local vertices = num_local_vertices for graphs without holes vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex - vertloctab (2), & ! vendloctab: Array of after-last indices in edgeloctab - vertloctab (1), & ! veloloctab: Optional array of integer loads for each local vertex - vertloctab (1), & ! vlblloctab: Optional array of labels for each local vertex nLocEdgesGraph, & ! Number of local edges, including to ghost vertices edgelocsiz, & ! Defined previously - edgeloctab(1), & ! Holds the adjacency array for every local vertex - edgeloctab(1), & ! edgegsttab: Optional array holding the local and ghost indices - edgeloctab(1), & ! edloloctab: Optional array of integer loads for each local edge - ierr) + edgeloctab(1)) ! Holds the adjacency array for every local vertex + + + ! call scotchfdgraphbuild (scotchdgraph(1), & + ! 1, & ! base value value for index arrays (1 in Fortran) + ! partial_global_graph_info % nVertices, & ! num of local vertices on the calling process + ! partial_global_graph_info % nVertices, & ! max number of local vertices = num_local_vertices for graphs without holes + ! vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex + ! vertloctab (2), & ! vendloctab: Array of after-last indices in edgeloctab + ! vertloctab (1), & ! veloloctab: Optional array of integer loads for each local vertex + ! vertloctab (1), & ! vlblloctab: Optional array of labels for each local vertex + ! nLocEdgesGraph, & ! Number of local edges, including to ghost vertices + ! edgelocsiz, & ! Defined previously + ! edgeloctab(1), & ! Holds the adjacency array for every local vertex + ! edgeloctab(1), & ! edgegsttab: Optional array holding the local and ghost indices + ! edgeloctab(1), & ! edloloctab: Optional array of integer loads for each local edge + ! ierr) if (ierr .ne. 0) then call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) endif ! Only needed during development/debugging. - call scotchfdgraphcheck (scotchdgraph(1), ierr) + !call scotchfdgraphcheck (scotchdgraph(1), ierr) + + ierr = scotch_dgraphcheck (scotchdgraph(1)) + if (ierr .ne. 0) then call mpas_log_write(' Scotch Graph check not successfull', MPAS_LOG_CRIT) endif call mpas_log_write('Graph build successful ') ! Initialize the strategy data structure - call scotchfstratinit (stradat (1), ierr) + !call scotchfstratinit (stradat (1), ierr) + ierr = scotch_stratinit (stradat (1)) call mpas_log_write('Scotch strategy init success') call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr - call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) + !call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) + ierr = scotch_dgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1)) call mpas_timer_stop('scotch_graph_partitioning') call mpas_log_write('Graph parition successful ') diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F new file mode 100644 index 0000000000..2913bf6b98 --- /dev/null +++ b/src/framework/mpas_ptscotch_interface.F @@ -0,0 +1,262 @@ + +module mpas_ptscotch_interface + use iso_c_binding, only : c_int, c_double +#include "ptscotchf.h" + public :: scotch_dgraphinit, scotch_dgraphbuild + + type, bind(c) :: my_scotch_graph_type + real(c_double) :: dummy (29) + end type + + +type(my_scotch_graph_type), pointer :: my_scotch_dgraph => null() + + +contains + + function scotch_dgraphinit(dgraph, comm) result(ierr) + use iso_c_binding, only : c_ptr, c_int, c_loc + use mpas_log, only : mpas_log_write +! #ifdef MPAS_USE_MPI_F08 +! use mpi_f08, only : MPI_Comm +! #endif + + implicit none + ! Arguments + doubleprecision, target, intent(in) :: dgraph (29) +! #ifdef MPAS_USE_MPI_F08 +! type (MPI_Comm), intent(in) :: comm +! #else + integer, intent(in) :: comm +! #endif + + ! Return value + integer :: ierr + integer :: i + type(c_ptr) :: dgraph_ptr + + interface + function scotchfdgraphinit(dgraph_ptr_1, localcomm) bind(C, name='scotchm_dgraphinit') result(err) + !function scotchfdgraphinit(localcomm) bind(C, name='scotchm_dgraphinit') result(err) + use iso_c_binding, only : c_ptr, c_int, c_double + !real(c_double), intent(in) :: dgraph_ptr_1 (29) + type(c_ptr), value :: dgraph_ptr_1 + integer, intent(in), value :: localcomm + integer(kind=c_int) :: err + end function scotchfdgraphinit + end interface + + + ierr = 0 + !allocate(my_scotch_dgraph) + + !my_scotch_dgraph%dummy[0] = -1 + + !dgraph_ptr = c_loc(my_scotch_dgraph) + + call mpas_log_write('Initializing Distributed Scotch Graph') +! #ifdef MPAS_USE_MPI_F08 +! ierr = scotchfdgraphinit(dgraph_ptr, comm % mpi_val) +! #else + ierr = scotchfdgraphinit(c_loc(dgraph), comm) + !ierr = scotchfdgraphinit(c_loc(my_scotch_dgraph), comm) + !ierr = scotchfdgraphinit(comm) +!#endif + + end function scotch_dgraphinit + + + function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) + + use iso_c_binding, only : c_ptr, c_int, c_loc + use mpas_log, only : mpas_log_write + use mpas_c_interfacing, only : mpas_f_to_c_string + use iso_c_binding, only : c_char, c_associated + + implicit none + + doubleprecision, target, intent(in) :: dgraph (29) + integer, intent(in) :: nVertices + integer, intent(in) :: vertloctab(nVertices+1) + integer, intent(in) :: nLocEdgesGraph + integer, intent(in) :: edgelocsiz + integer, intent(in) :: adjncy(nLocEdgesGraph) + + + ! Return value + integer :: ierr + type(c_ptr) :: dgraph_ptr + + interface + function scotchfdgraphbuild(dgraph_ptr_1, nVertices, vertloctab, & + nLocEdgesGraph, edgelocsiz, adjncy) bind(C, name='scotchm_dgraphbuild') result(err) + use iso_c_binding, only : c_ptr, c_int + type(c_ptr), value :: dgraph_ptr_1 + integer, intent(in), value :: nVertices + integer(c_int) :: vertloctab(nVertices+1) + integer, intent(in), value :: nLocEdgesGraph + integer, intent(in), value :: edgelocsiz + integer(c_int) :: adjncy(nLocEdgesGraph) + integer(kind=c_int) :: err + end function scotchfdgraphbuild + end interface + + + ierr = 0 + !dgraph_ptr = c_loc(dgraph(1)) + + call mpas_log_write('Building Distributed Scotch Graph') + + ierr = scotchfdgraphbuild(c_loc(dgraph), nVertices, vertloctab, & + nLocEdgesGraph, edgelocsiz, adjncy) + + + end function scotch_dgraphbuild + + + + function scotch_dgraphcheck(dgraph) result(ierr) + + use mpas_log, only : mpas_log_write + use iso_c_binding, only : c_ptr, c_int, c_loc + use iso_c_binding, only : c_char, c_associated + + implicit none + + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) + + ! Return value + integer :: ierr + type(c_ptr) :: dgraph_ptr + + interface + function scotchfdgraphcheck(dgraph_ptr_1) bind(C, name='scotchm_dgraphcheck') result(err) + use iso_c_binding, only : c_int, c_ptr + type(c_ptr), value :: dgraph_ptr_1 + integer(kind=c_int) :: err + end function scotchfdgraphcheck + end interface + + + ierr = 0 + + dgraph_ptr = c_loc(dgraph(1)) + + call mpas_log_write('Checking Distributed Scotch Graph') + + ierr = scotchfdgraphcheck(c_loc(dgraph)) + + + end function scotch_dgraphcheck + + + function scotch_dgraphexit(dgraph) result(ierr) + + use mpas_log, only : mpas_log_write + use iso_c_binding, only : c_ptr, c_int, c_loc + + implicit none + + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) + + ! Return value + integer :: ierr + type(c_ptr) :: dgraph_ptr + + interface + function scotchfdgraphexit(dgraph_ptr_1) bind(C, name='scotchm_dgraphexit') result(err) + use iso_c_binding, only : c_int, c_ptr + type(c_ptr) :: dgraph_ptr_1 + integer(kind=c_int) :: err + end function scotchfdgraphexit + end interface + + + ierr = 0 + + dgraph_ptr = c_loc(dgraph(1)) + + call mpas_log_write('Checking Distributed Scotch Graph') + + ierr = scotchfdgraphexit(dgraph_ptr) + + + end function scotch_dgraphexit + + + function scotch_stratinit(stradat) result(ierr) + + use mpas_log, only : mpas_log_write + use iso_c_binding, only : c_ptr, c_int, c_loc + use iso_c_binding, only : c_char, c_associated + + implicit none + + doubleprecision, target, intent(in) :: stradat (scotch_stratdim) + + + ! Return value + integer :: ierr + type(c_ptr) :: dgraph_ptr + + interface + function scotchfstratinit(strat_ptr_1) bind(C, name='scotchm_stratinit') result(err) + use iso_c_binding, only : c_int, c_ptr + type(c_ptr), value :: strat_ptr_1 + integer(kind=c_int) :: err + end function scotchfstratinit + end interface + + + ierr = 0 + + call mpas_log_write('Checking Distributed Scotch Graph') + + ierr = scotchfstratinit(c_loc(stradat)) + + + end function scotch_stratinit + + + function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) + + use mpas_log, only : mpas_log_write + use iso_c_binding, only : c_ptr, c_int, c_loc + use iso_c_binding, only : c_char, c_associated + + implicit none + + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) + integer, intent(in) :: num_part + doubleprecision, target, intent(in) :: stradat (scotch_stratdim) + integer, intent(out) :: parttab(*) + + + ! Return value + integer :: ierr + type(c_ptr) :: dgraph_ptr + + interface + function scotchfdgraphpart(dgraph_ptr, num_part, strat_ptr_1, parttab ) bind(C, name='scotchm_dgraphpart') result(err) + use iso_c_binding, only : c_int, c_ptr + type(c_ptr), value :: dgraph_ptr + integer, intent(in), value :: num_part + type(c_ptr), value :: strat_ptr_1 + integer(c_int) :: parttab(*) + integer(kind=c_int) :: err + end function scotchfdgraphpart + end interface + + + ierr = 0 + + call mpas_log_write('Checking Distributed Scotch Graph') + + ierr = scotchfdgraphpart(c_loc(my_scotch_dgraph), num_part, c_loc(stradat), parttab) + + + end function scotch_dgraphpart + + + +end module mpas_ptscotch_interface diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c new file mode 100644 index 0000000000..013769e9fd --- /dev/null +++ b/src/framework/ptscotch_interface.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2023, The University Corporation for Atmospheric Research (UCAR). + * + * Unless noted otherwise source code is licensed under the BSD license. + * Additional copyright and license information can be found in the LICENSE file + * distributed with this code, or at http://mpas-dev.github.com/license.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ptscotch.h" + + +///#ifdef _MPI + +///#endif + +#define MSGSIZE 256 + +SCOTCH_Dgraph dgrfdat; + + +typedef struct Dgraph_ { + unsigned int flagval; /*+ Graph properties +*/ + int baseval; /*+ Base index for edge/vertex arrays +*/ + int vertglbnbr; /*+ Global number of vertices +*/ + int vertglbmax; /*+ Maximum number of local vertices over all processes +*/ + int vertgstnbr; /*+ Number of local + ghost vertices +*/ + int vertgstnnd; /*+ vertgstnbr + baseval +*/ + int vertlocnbr; /*+ Local number of vertices +*/ + int vertlocnnd; /*+ Local number of vertices + baseval +*/ + int * vertloctax; /*+ Local vertex beginning index array [based] +*/ + int * vendloctax; /*+ Local vertex end index array [based] +*/ + int * veloloctax; /*+ Local vertex load array if present +*/ + int velolocsum; /*+ Local sum of all vertex loads +*/ + int veloglbsum; /*+ Global sum of all vertex loads +*/ + int * vnumloctax; /*+ Arrays of global vertex numbers in original graph +*/ + int * vlblloctax; /*+ Arrays of vertex labels (when read from file) +*/ + int edgeglbnbr; /*+ Global number of arcs +*/ + int edgeglbmax; /*+ Maximum number of local edges over all processes +*/ + int edgelocnbr; /*+ Number of local edges +*/ + int edgelocsiz; /*+ Size of local edge array (= edgelocnbr when compact) +*/ + int edgeglbsmx; /*+ Maximum size of local edge arrays over all processes +*/ + int * edgegsttax; /*+ Edge array holding local indices of neighbors [based] +*/ + int * edgeloctax; /*+ Edge array holding global neighbor numbers [based] +*/ + int * edloloctax; /*+ Edge load array +*/ + int degrglbmax; /*+ Maximum degree over all processes +*/ + int pkeyglbval; /*+ Communicator key value: folded communicators are distinct +*/ + MPI_Comm proccomm; /*+ Graph communicator +*/ + int procglbnbr; /*+ Number of processes sharing graph data +*/ + int proclocnum; /*+ Number of this process +*/ + int * procvrttab; /*+ Global array of vertex number ranges [+1,based] +*/ + int * proccnttab; /*+ Count array for local number of vertices +*/ + int * procdsptab; /*+ Displacement array with respect to proccnttab [+1,based] +*/ + int procngbnbr; /*+ Number of neighboring processes +*/ + int procngbmax; /*+ Maximum number of neighboring processes +*/ + int * procngbtab; /*+ Array of neighbor process numbers [sorted] +*/ + int * procrcvtab; /*+ Number of vertices to receive in ghost vertex sub-arrays +*/ + int procsndnbr; /*+ Overall size of local send array +*/ + int * procsndtab; /*+ Number of vertices to send in ghost vertex sub-arrays +*/ + int * procsidtab; /*+ Array of indices to build communication vectors (send) +*/ + int procsidnbr; /*+ Size of the send index array +*/ +} Dgraph; + + +/* + * Interface routines for writing log messages; defined in mpas_log.F + * messageType_c may be any of "MPAS_LOG_OUT", "MPAS_LOG_WARN", "MPAS_LOG_ERR", or "MPAS_LOG_CRIT" + */ +int scotchm_dgraphinit(void * ptr, int localcomm) +//int scotchm_dgraphinit(int localcomm) +{ + MPI_Comm comm; + MPI_Comm comm2; + + int size, rank, err; + + comm = MPI_Comm_f2c((MPI_Fint)localcomm); + + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; + + err = SCOTCH_dgraphInit(dgraph, comm); + + + Dgraph * my_dgraph = (Dgraph *) dgraph; + + printf("In scotchm_dgraphinit: After SCOTCH_dgraphInit: = %d \n",my_dgraph->procglbnbr); + printf("In scotchm_dgraphinit: After SCOTCH_dgraphInit: = %d \n",my_dgraph->proclocnum); + + comm2 = my_dgraph->proccomm; + + MPI_Comm_size (comm2, &size); /* Get communicator data */ + MPI_Comm_rank (comm2, &rank); + + printf("In scotchm_dgraphinit: MPI_Comm size = %d, rank = %d\n",size, rank); + + + return err; + +} + +int scotchm_dgraphbuild(void * ptr, + int nVertices, + int * vertloctab_1, + int nLocEdgesGraph, + int edgelocsiz, + int *adjncy +) +{ + int baseval = 1; // Fortran-style 1-based indexing + int vertlocnbr = nVertices; + int * vertloctab = vertloctab_1; + int * vendloctab = vertloctab_1 + 1; + int * veloloctab = NULL; // vertex weights not used + int * vlblloctab = NULL; // vertex labels not used + int edgelocnbr = nLocEdgesGraph; + int *edgeloctab = adjncy; + int * edgegsttab = NULL; // Optional array holding the local and ghost indices + int * edloloctab = NULL; // Optional array of integer loads for each local edge + int i,err; + + + + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; + + Dgraph * my_dgraph = (Dgraph *) dgraph; + + for (int i=0; i < nVertices+1; i++) { + printf("before scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, vertloctab[i]); + } + for (int i=0; i < nLocEdgesGraph; i++) { + printf("before scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, edgeloctab[i]); + } + + err = SCOTCH_dgraphBuild (dgraph, + baseval, + vertlocnbr, + vertlocnbr, + vertloctab, + vendloctab, + veloloctab, + vlblloctab, + edgelocnbr, + edgelocsiz, + edgeloctab, + edgegsttab, + edloloctab); + + + + printf("In scotchm_dgraphbuild: rank: %d vertglbnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertglbnbr); + printf("In scotchm_dgraphbuild: rank: %d vertlocnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertlocnbr); + + for (int i=0; i < nVertices+1; i++) { + printf("In scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->vertloctax[i]); + } + for (int i=0; i < nLocEdgesGraph; i++) { + printf("In scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->edgeloctax[i]); + } + + return err; + +} + +int scotchm_dgraphcheck(void * ptr) +{ + + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; + + return SCOTCH_dgraphCheck(dgraph); +} + + + +int scotchm_dgraphpart(void * ptr, int num_part, void * ptr_strat, int * parttab){ + + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; + SCOTCH_Strat *strat = (SCOTCH_Strat *) ptr_strat; + + return SCOTCH_dgraphPart(dgraph, num_part, strat, parttab); +} + +int scotch_dgraphredist(void * ptr, int *partloctab, void * ptr_out){ + + + SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; + SCOTCH_Dgraph *dgraph_out = (SCOTCH_Dgraph *) ptr_out; + int * permgsttab = NULL; // Redistribution permutation array + int vertlocdlt = 0; // Extra size of local vertex array + int edgelocdlt = 0; // Extra size of local edge array + + return SCOTCH_dgraphRedist (dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); +} + +// int scotchfdgraphdata() +// { +// void SCOTCH_dgraphData (const SCOTCH_Dgraph * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num ** const, MPI_Comm * const); +// } + +void scotchm_dgraphexit(SCOTCH_Dgraph *dgraph) +{ + + return SCOTCH_dgraphExit(dgraph); +} + +int scotchm_stratinit(void * strat_ptr) +{ + SCOTCH_Strat *strat = (SCOTCH_Strat *) strat_ptr; + + return SCOTCH_stratInit(strat); +} + +// int scotchfstratexit() +// { +// void SCOTCH_stratExit (SCOTCH_Strat * const); +// } From 4e105d49019d7f0943b1d11d771f355c08b60dfa Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 23 Sep 2025 17:35:23 -0600 Subject: [PATCH 16/43] Working interfaces --- src/framework/mpas_block_decomp.F | 130 +++++------- src/framework/mpas_ptscotch_interface.F | 266 ++++++++++++++---------- src/framework/ptscotch_interface.c | 113 +++++----- 3 files changed, 265 insertions(+), 244 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index f6fcac16f7..8bb298ae2b 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -107,7 +107,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer :: localcomm #endif - integer, dimension(:), allocatable :: indxtab integer :: vertglbnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval #endif @@ -211,14 +210,14 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l k = 1 do i=1,partial_global_graph_info % nVertices vertloctab(i) = k - call mpas_log_write('rank=$i, i=$i vertloctab= $i', intArgs=(/dminfo % my_proc_id, i,vertloctab(i)/) ) + !call mpas_log_write('rank=$i, i=$i vertloctab= $i', intArgs=(/dminfo % my_proc_id, i,vertloctab(i)/) ) !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) do j=1,partial_global_graph_info % nAdjacent(i) if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle edgeloctab(k) = partial_global_graph_info % adjacencyList(j,i) - call mpas_log_write('rank=$i k=$i edgeloctab= $i', intArgs=(/dminfo % my_proc_id, k,edgeloctab(k)/) ) + !call mpas_log_write('rank=$i k=$i edgeloctab= $i', intArgs=(/dminfo % my_proc_id, k,edgeloctab(k)/) ) k = k + 1 end do end do @@ -231,25 +230,15 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (mpi_ierr .ne. 0) then call mpas_log_write('Cannot duplicate communicator') endif - !call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/) ) + ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph #ifdef MPAS_USE_MPI_F08 - call mpas_log_write('duplicate communicator is mpi_f08 $i', intArgs=(/localcomm % mpi_val/)) ierr = scotch_dgraphinit(scotchdgraph(1), localcomm% mpi_val) + ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm% mpi_val) #else - call mpas_log_write('duplicate communicator is $i', intArgs=(/localcomm/)) ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) + ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm) #endif - - ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph - !call scotchfdgraphinit(scotchdgraph(1), localcomm, ierr) - !ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) - if (ierr .ne. 0) then - call mpas_log_write('Cannot initialize D Scotch Graph', MPAS_LOG_CRIT) - endif - call mpas_log_write('initialized D Scotch Graph') - !ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm) - !call scotchfdgraphinit(scotchdgraph_redist(1), localcomm, ierr) - + ! From Scotch documentation: edgelocsiz is lower-bounded by the minimum size ! of the edge array required to encompass all used adjacency values; it is ! therefore at least equal to the maximum of the vendloctab entries, over all @@ -258,9 +247,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! Build the distributed Scotch graph and save it in scotchdgraph ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. - ! Scotch Fortran interface requires that if veloloctab, vlblloctab are skipped, then we pass a reference - ! to vertloctab(1) so that they are treated as null pointers. Similarly, pass reference to edgeloctab for - ! the optional edgegsttab and edloloctab arguments. ierr = scotch_dgraphbuild (scotchdgraph(1), & partial_global_graph_info % nVertices, & ! num of local vertices on the calling process vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex @@ -284,85 +270,68 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! edgeloctab(1), & ! edloloctab: Optional array of integer loads for each local edge ! ierr) - if (ierr .ne. 0) then - call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) - endif ! Only needed during development/debugging. - !call scotchfdgraphcheck (scotchdgraph(1), ierr) - ierr = scotch_dgraphcheck (scotchdgraph(1)) - if (ierr .ne. 0) then - call mpas_log_write(' Scotch Graph check not successfull', MPAS_LOG_CRIT) - endif - call mpas_log_write('Graph build successful ') ! Initialize the strategy data structure - !call scotchfstratinit (stradat (1), ierr) ierr = scotch_stratinit (stradat (1)) - call mpas_log_write('Scotch strategy init success') call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr - !call scotchfdgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1), ierr) ierr = scotch_dgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1)) + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i,local_block_id_arr(i)/) ) + ! end do call mpas_timer_stop('scotch_graph_partitioning') call mpas_log_write('Graph parition successful ') ! After the paritioning above, each processor would not necessarily have information about all of the ! vertices it owns. To obtain this information, Scotch provides a convenience function to redistribute the graph ! to all processors, so that each processor has information about all of the vertices it owns. - call scotchfdgraphredist(scotchdgraph(1), & ! Input: original distributed graph - local_block_id_arr(1), & ! Input: the partition array - scotchdgraph(1), & ! Optional: permgsttab. Pass ref to original graph to skip it. - -1 , & ! Optional: vertlocdlt. Pass <0 to skip it. - -1, & ! Optional: edgelocdlt. Pass <0 to skip it. + ierr = scotch_dgraphredist(scotchdgraph(1), & ! Input: original distributed graph + local_block_id_arr, & ! Input: the partition array scotchdgraph_redist(1), & ! Output: re-distributed graph - ierr) - call mpas_log_write('Graph redist successful ') + num_local_vertices) ! Output: number of local vertices - ! indxtab holds the graph data returned by the call to scotchfdgraphdata - ! Not sure how large indxtab needs to be, so currently allocating it to be - ! 5 times the number of local vertices - allocate(indxtab(5*partial_global_graph_info % nVertices)) + allocate(local_cell_list(num_local_vertices)) + allocate(local_block_list(num_local_vertices)) + ! call mpas_log_write('After redist, num_local_vertices is $i', intArgs=(/num_local_vertices/) ) + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('AFTER REDIST rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i,local_block_id_arr(i)/) ) + ! end do + ierr = scotch_dgraphout (scotchdgraph_redist(1), local_cell_list) + + ! call mpas_log_write('Graph redist successful ') + ! do i=1,num_local_vertices + ! call mpas_log_write('AFTER REDIST local_cell_list ($i)= $i', intArgs=(/i, local_cell_list(i)/) ) + ! end do ! Input to scotchfdgraphdata is the re-distributed graph scotchdgraph_redist, and all ! the remaining are output arguments. - call scotchfdgraphdata(scotchdgraph_redist(1), & - indxtab(1), & ! Holds the data queried from graph, local to this processor - baseval, & ! returns base value value for index arrays (1 in Fortran) - vertglbnbr, & ! number of global vertices - num_local_vertices, & ! number of local vertices - vertlocmax, & ! max number of local vertices = num_local_vertices for graphs without holes - vertgstnbr, & ! num of local and ghost vertices. -1 unless dgraphGhst is called - vertlocidx, & ! starting index of vertloctab in indxtab - vendlocidx, & ! starting index of vendloctab in indxtab - velolocidx, & ! starting index of veloloctab in indxtab - vlbllocidx, & ! starting index of vlblloctab in indxtab - edgeglbnbr, & ! Global number of arcs. Each edge counts twice. - edgelocnbr, & ! Local number of arcs including to ghost vertices - edgelocsiz, & ! ~ max of vendloctab entries, for local vertices, minus baseval - edgelocidx, & ! starting index of edgeloctab in indxtab - edgegstidx, & ! starting index of edgegsttab in indxtab - edlolocidx, & ! starting index of edloloctab in indxtab - comm, ierr ) ! communicator used and error code - - call mpas_log_write('vertlocnbr $i vertlocidx $i ', intArgs=(/num_local_vertices,vertlocidx/) ) - call mpas_log_write('vertgstnbr $i vertlocmax $i ', intArgs=(/vertgstnbr,vertlocmax/) ) - call mpas_log_write('edgelocsiz $i edgelocidx $i ', intArgs=(/edgelocsiz,edgelocidx/) ) - call mpas_log_write('edgegstidx $i edlolocidx $i ', intArgs=(/edgegstidx,edlolocidx/) ) - call mpas_log_write('vertglbnbr $i ierr $i ', intArgs=(/vertglbnbr,ierr/) ) - - allocate(local_cell_list(num_local_vertices)) - allocate(local_block_list(num_local_vertices)) - ! To look up all the local cells, we simply look at vlblloctab returned in indxtab + ! ierr = scotch_dgraphdata(scotchdgraph_redist(1), & + ! indxtab(1), & ! Holds the data queried from graph, local to this processor + ! baseval, & ! returns base value value for index arrays (1 in Fortran) + ! vertglbnbr, & ! number of global vertices + ! num_local_vertices, & ! number of local vertices + ! vertlocmax, & ! max number of local vertices = num_local_vertices for graphs without holes + ! vertgstnbr, & ! num of local and ghost vertices. -1 unless dgraphGhst is called + ! vertlocidx, & ! starting index of vertloctab in indxtab + ! vendlocidx, & ! starting index of vendloctab in indxtab + ! velolocidx, & ! starting index of veloloctab in indxtab + ! vlbllocidx, & ! starting index of vlblloctab in indxtab + ! edgeglbnbr, & ! Global number of arcs. Each edge counts twice. + ! edgelocnbr, & ! Local number of arcs including to ghost vertices + ! edgelocsiz, & ! ~ max of vendloctab entries, for local vertices, minus baseval + ! edgelocidx, & ! starting index of edgeloctab in indxtab + ! edgegstidx, & ! starting index of edgegsttab in indxtab + ! edlolocidx, & ! starting index of edloloctab in indxtab + ! comm ) ! communicator used and error code + ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? - do i=1,num_local_vertices - local_cell_list(i)=indxtab(vlbllocidx+i-1) - local_block_list(i)=dminfo % my_proc_id - !call mpas_log_write('local_cell_list: $i, local_block_list: $i',MPAS_LOG_ERR, intArgs=(/local_cell_list(i),local_block_list(i)/) ) - end do - + + local_block_list(:)=dminfo % my_proc_id + !call mpas_log_write('nVertices $i num_local_vertices: $i',MPAS_LOG_ERR, intArgs=(/partial_global_graph_info % nVertices,num_local_vertices/)) ! do i=1,partial_global_graph_info % nVertices ! call mpas_log_write('local_block_id_arr($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_block_id_arr(i)/)) @@ -422,12 +391,11 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end if ! Clean up - call scotchfdgraphexit (scotchdgraph (1)) - call scotchfdgraphexit (scotchdgraph_redist (1)) - call scotchfstratexit (stradat (1)) + call scotch_dgraphexit (scotchdgraph (1)) + call scotch_dgraphexit (scotchdgraph_redist (1)) + call scotch_stratexit (stradat (1)) deallocate(edgeloctab) deallocate(vertloctab) - deallocate(indxtab) call MPI_Comm_free(localcomm, mpi_ierr) call mpas_timer_stop('scotch_total') call mpas_log_write('Scotch partition successful') diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index 2913bf6b98..ebc68e9e3e 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -4,78 +4,51 @@ module mpas_ptscotch_interface #include "ptscotchf.h" public :: scotch_dgraphinit, scotch_dgraphbuild - type, bind(c) :: my_scotch_graph_type - real(c_double) :: dummy (29) - end type - - -type(my_scotch_graph_type), pointer :: my_scotch_dgraph => null() - contains function scotch_dgraphinit(dgraph, comm) result(ierr) - use iso_c_binding, only : c_ptr, c_int, c_loc + use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write -! #ifdef MPAS_USE_MPI_F08 -! use mpi_f08, only : MPI_Comm -! #endif + use mpas_derived_types, only : MPAS_LOG_CRIT implicit none ! Arguments - doubleprecision, target, intent(in) :: dgraph (29) -! #ifdef MPAS_USE_MPI_F08 -! type (MPI_Comm), intent(in) :: comm -! #else + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) integer, intent(in) :: comm -! #endif ! Return value integer :: ierr - integer :: i - type(c_ptr) :: dgraph_ptr interface - function scotchfdgraphinit(dgraph_ptr_1, localcomm) bind(C, name='scotchm_dgraphinit') result(err) - !function scotchfdgraphinit(localcomm) bind(C, name='scotchm_dgraphinit') result(err) - use iso_c_binding, only : c_ptr, c_int, c_double - !real(c_double), intent(in) :: dgraph_ptr_1 (29) - type(c_ptr), value :: dgraph_ptr_1 - integer, intent(in), value :: localcomm - integer(kind=c_int) :: err + function scotchfdgraphinit(dgraph_ptr, localcomm) bind(C, name='scotchm_dgraphinit') result(err) + use iso_c_binding, only : c_ptr, c_int + type(c_ptr), value :: dgraph_ptr + integer(c_int), value :: localcomm + integer(c_int) :: err end function scotchfdgraphinit end interface - - ierr = 0 - !allocate(my_scotch_dgraph) - - !my_scotch_dgraph%dummy[0] = -1 - - !dgraph_ptr = c_loc(my_scotch_dgraph) - - call mpas_log_write('Initializing Distributed Scotch Graph') -! #ifdef MPAS_USE_MPI_F08 -! ierr = scotchfdgraphinit(dgraph_ptr, comm % mpi_val) -! #else ierr = scotchfdgraphinit(c_loc(dgraph), comm) - !ierr = scotchfdgraphinit(c_loc(my_scotch_dgraph), comm) - !ierr = scotchfdgraphinit(comm) -!#endif + + if (ierr /= 0) then + call mpas_log_write('Error initalizing distributed Scotch graph') + else + call mpas_log_write('Successfully initialized distributed Scotch graph') + end if end function scotch_dgraphinit function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) - use iso_c_binding, only : c_ptr, c_int, c_loc + use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write - use mpas_c_interfacing, only : mpas_f_to_c_string - use iso_c_binding, only : c_char, c_associated + use mpas_derived_types, only : MPAS_LOG_CRIT implicit none - doubleprecision, target, intent(in) :: dgraph (29) + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) integer, intent(in) :: nVertices integer, intent(in) :: vertloctab(nVertices+1) integer, intent(in) :: nLocEdgesGraph @@ -85,31 +58,33 @@ function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgel ! Return value integer :: ierr - type(c_ptr) :: dgraph_ptr interface - function scotchfdgraphbuild(dgraph_ptr_1, nVertices, vertloctab, & + function scotchfdgraphbuild(dgraph_ptr, nVertices, vertloctab, & nLocEdgesGraph, edgelocsiz, adjncy) bind(C, name='scotchm_dgraphbuild') result(err) use iso_c_binding, only : c_ptr, c_int - type(c_ptr), value :: dgraph_ptr_1 - integer, intent(in), value :: nVertices + type(c_ptr), value :: dgraph_ptr + integer(c_int), value :: nVertices integer(c_int) :: vertloctab(nVertices+1) - integer, intent(in), value :: nLocEdgesGraph - integer, intent(in), value :: edgelocsiz + integer(c_int), value :: nLocEdgesGraph + integer(c_int), value :: edgelocsiz integer(c_int) :: adjncy(nLocEdgesGraph) - integer(kind=c_int) :: err + integer(c_int) :: err end function scotchfdgraphbuild end interface ierr = 0 - !dgraph_ptr = c_loc(dgraph(1)) - - call mpas_log_write('Building Distributed Scotch Graph') ierr = scotchfdgraphbuild(c_loc(dgraph), nVertices, vertloctab, & nLocEdgesGraph, edgelocsiz, adjncy) + if (ierr /= 0) then + call mpas_log_write('Error building distributed Scotch graph', MPAS_LOG_CRIT) + else + call mpas_log_write('Successfully built distributed Scotch graph') + end if + end function scotch_dgraphbuild @@ -118,8 +93,8 @@ end function scotch_dgraphbuild function scotch_dgraphcheck(dgraph) result(ierr) use mpas_log, only : mpas_log_write - use iso_c_binding, only : c_ptr, c_int, c_loc - use iso_c_binding, only : c_char, c_associated + use mpas_derived_types, only : MPAS_LOG_CRIT + use iso_c_binding, only : c_ptr, c_loc implicit none @@ -127,102 +102,105 @@ function scotch_dgraphcheck(dgraph) result(ierr) ! Return value integer :: ierr - type(c_ptr) :: dgraph_ptr interface - function scotchfdgraphcheck(dgraph_ptr_1) bind(C, name='scotchm_dgraphcheck') result(err) + function scotchfdgraphcheck(dgraph_ptr) bind(C, name='scotchm_dgraphcheck') result(err) use iso_c_binding, only : c_int, c_ptr - type(c_ptr), value :: dgraph_ptr_1 - integer(kind=c_int) :: err + type(c_ptr), value :: dgraph_ptr + integer(c_int) :: err end function scotchfdgraphcheck end interface - - ierr = 0 - - dgraph_ptr = c_loc(dgraph(1)) - - call mpas_log_write('Checking Distributed Scotch Graph') - ierr = scotchfdgraphcheck(c_loc(dgraph)) + if (ierr /= 0) then + call mpas_log_write('Error during distributed Scotch graph check', MPAS_LOG_CRIT) + else + call mpas_log_write('Successfully checked distributed Scotch graph') + end if + end function scotch_dgraphcheck - function scotch_dgraphexit(dgraph) result(ierr) + subroutine scotch_dgraphexit(dgraph) use mpas_log, only : mpas_log_write - use iso_c_binding, only : c_ptr, c_int, c_loc + use iso_c_binding, only : c_ptr, c_loc implicit none doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) - ! Return value - integer :: ierr - type(c_ptr) :: dgraph_ptr - interface - function scotchfdgraphexit(dgraph_ptr_1) bind(C, name='scotchm_dgraphexit') result(err) + subroutine scotchfdgraphexit(dgraph_ptr) bind(C, name='scotchm_dgraphexit') use iso_c_binding, only : c_int, c_ptr - type(c_ptr) :: dgraph_ptr_1 - integer(kind=c_int) :: err - end function scotchfdgraphexit + type(c_ptr), value :: dgraph_ptr + end subroutine scotchfdgraphexit end interface + call scotchfdgraphexit(c_loc(dgraph)) - ierr = 0 - - dgraph_ptr = c_loc(dgraph(1)) - - call mpas_log_write('Checking Distributed Scotch Graph') - - ierr = scotchfdgraphexit(dgraph_ptr) - - - end function scotch_dgraphexit + end subroutine scotch_dgraphexit function scotch_stratinit(stradat) result(ierr) use mpas_log, only : mpas_log_write - use iso_c_binding, only : c_ptr, c_int, c_loc - use iso_c_binding, only : c_char, c_associated + use mpas_derived_types, only : MPAS_LOG_CRIT + use iso_c_binding, only : c_ptr, c_loc implicit none doubleprecision, target, intent(in) :: stradat (scotch_stratdim) - ! Return value integer :: ierr - type(c_ptr) :: dgraph_ptr interface - function scotchfstratinit(strat_ptr_1) bind(C, name='scotchm_stratinit') result(err) + function scotchfstratinit(strat_ptr) bind(C, name='scotchm_stratinit') result(err) use iso_c_binding, only : c_int, c_ptr - type(c_ptr), value :: strat_ptr_1 - integer(kind=c_int) :: err + type(c_ptr), value :: strat_ptr + integer(c_int) :: err end function scotchfstratinit end interface + ierr = scotchfstratinit(c_loc(stradat)) - ierr = 0 + if (ierr /= 0) then + call mpas_log_write('Error during Scotch strategy initialization', MPAS_LOG_CRIT) + else + call mpas_log_write('Successfully initialized Scotch strategy') + end if - call mpas_log_write('Checking Distributed Scotch Graph') + end function scotch_stratinit - ierr = scotchfstratinit(c_loc(stradat)) + subroutine scotch_stratexit(stradat) + use mpas_log, only : mpas_log_write + use iso_c_binding, only : c_ptr, c_loc - end function scotch_stratinit + implicit none + + doubleprecision, target, intent(in) :: stradat (scotch_stratdim) + + interface + subroutine scotchfstratexit(strat_ptr) bind(C, name='scotchm_stratexit') + use iso_c_binding, only : c_ptr + type(c_ptr), value :: strat_ptr + end subroutine scotchfstratexit + end interface + + call scotchfstratexit(c_loc(stradat)) + + end subroutine scotch_stratexit - function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) + function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) use mpas_log, only : mpas_log_write - use iso_c_binding, only : c_ptr, c_int, c_loc - use iso_c_binding, only : c_char, c_associated + use mpas_derived_types, only : MPAS_LOG_CRIT + use iso_c_binding, only : c_ptr, c_loc implicit none @@ -231,32 +209,96 @@ function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) doubleprecision, target, intent(in) :: stradat (scotch_stratdim) integer, intent(out) :: parttab(*) - ! Return value integer :: ierr - type(c_ptr) :: dgraph_ptr interface - function scotchfdgraphpart(dgraph_ptr, num_part, strat_ptr_1, parttab ) bind(C, name='scotchm_dgraphpart') result(err) + function scotchfdgraphpart(dgraph_ptr, num_part_loc, strat_ptr, parttab_loc ) bind(C, name='scotchm_dgraphpart') result(err) use iso_c_binding, only : c_int, c_ptr type(c_ptr), value :: dgraph_ptr - integer, intent(in), value :: num_part - type(c_ptr), value :: strat_ptr_1 - integer(c_int) :: parttab(*) - integer(kind=c_int) :: err + integer(c_int), value :: num_part_loc + type(c_ptr), value :: strat_ptr + integer(c_int) :: parttab_loc(*) + integer(c_int) :: err end function scotchfdgraphpart end interface + ierr = scotchfdgraphpart(c_loc(dgraph), num_part, c_loc(stradat), parttab) - ierr = 0 + if (ierr /= 0) then + call mpas_log_write('Error during Scotch graph partition', MPAS_LOG_CRIT) + else + call mpas_log_write('Successfully partitioned distributed Scotch graph') + end if - call mpas_log_write('Checking Distributed Scotch Graph') + end function scotch_dgraphpart - ierr = scotchfdgraphpart(c_loc(my_scotch_dgraph), num_part, c_loc(stradat), parttab) + function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) result(ierr) + + use mpas_log, only : mpas_log_write + use mpas_derived_types, only : MPAS_LOG_CRIT + use iso_c_binding, only : c_ptr, c_loc + + implicit none + + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) + integer, intent(in) :: parttab(*) + doubleprecision, target, intent(inout) :: dgraph_out (SCOTCH_DGRAPHDIM) + integer :: num_local_vertices + + ! Return value + integer :: ierr + + interface + function scotchfdgraphredist(dgraph_ptr, parttab_loc, dgraph_out_ptr, vertlocnbr ) bind(C, name='scotchm_dgraphredist') result(err) + use iso_c_binding, only : c_int, c_ptr + type(c_ptr), value :: dgraph_ptr + integer(c_int) :: parttab_loc(*) + type(c_ptr), value :: dgraph_out_ptr + integer(c_int) :: vertlocnbr + integer(c_int) :: err + end function scotchfdgraphredist + end interface + + ierr = scotchfdgraphredist(c_loc(dgraph), parttab, c_loc(dgraph_out), num_local_vertices) + + if (ierr /= 0) then + call mpas_log_write('Error during Scotch graph redistribution', MPAS_LOG_CRIT) + else + call mpas_log_write('Successfully redistributed Scotch graph') + end if + + + end function scotch_dgraphredist + + + function scotch_dgraphout(dgraph, local_cell_list) result(ierr) + + use mpas_log, only : mpas_log_write + use iso_c_binding, only : c_ptr, c_loc + + implicit none + + doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) + integer, intent(out) :: local_cell_list(*) + + ! Return value + integer :: ierr + + interface + function scotchfdgraphout(dgraph_ptr, cell_list) bind(C, name='scotchm_dgraphout') result(err) + use iso_c_binding, only : c_int, c_ptr + type(c_ptr), value :: dgraph_ptr + integer(c_int) :: cell_list(*) + integer(c_int) :: err + end function scotchfdgraphout + end interface + + ierr = scotchfdgraphout(c_loc(dgraph), local_cell_list) - end function scotch_dgraphpart + end function scotch_dgraphout end module mpas_ptscotch_interface diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index 013769e9fd..a7c1840747 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -16,14 +16,6 @@ #include "ptscotch.h" -///#ifdef _MPI - -///#endif - -#define MSGSIZE 256 - -SCOTCH_Dgraph dgrfdat; - typedef struct Dgraph_ { unsigned int flagval; /*+ Graph properties +*/ @@ -65,7 +57,7 @@ typedef struct Dgraph_ { int * procsndtab; /*+ Number of vertices to send in ghost vertex sub-arrays +*/ int * procsidtab; /*+ Array of indices to build communication vectors (send) +*/ int procsidnbr; /*+ Size of the send index array +*/ -} Dgraph; +} Dgraph2; /* @@ -73,7 +65,6 @@ typedef struct Dgraph_ { * messageType_c may be any of "MPAS_LOG_OUT", "MPAS_LOG_WARN", "MPAS_LOG_ERR", or "MPAS_LOG_CRIT" */ int scotchm_dgraphinit(void * ptr, int localcomm) -//int scotchm_dgraphinit(int localcomm) { MPI_Comm comm; MPI_Comm comm2; @@ -86,20 +77,6 @@ int scotchm_dgraphinit(void * ptr, int localcomm) err = SCOTCH_dgraphInit(dgraph, comm); - - Dgraph * my_dgraph = (Dgraph *) dgraph; - - printf("In scotchm_dgraphinit: After SCOTCH_dgraphInit: = %d \n",my_dgraph->procglbnbr); - printf("In scotchm_dgraphinit: After SCOTCH_dgraphInit: = %d \n",my_dgraph->proclocnum); - - comm2 = my_dgraph->proccomm; - - MPI_Comm_size (comm2, &size); /* Get communicator data */ - MPI_Comm_rank (comm2, &rank); - - printf("In scotchm_dgraphinit: MPI_Comm size = %d, rank = %d\n",size, rank); - - return err; } @@ -128,14 +105,14 @@ int scotchm_dgraphbuild(void * ptr, SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; - Dgraph * my_dgraph = (Dgraph *) dgraph; + // Dgraph2 * my_dgraph = (Dgraph2 *) dgraph; - for (int i=0; i < nVertices+1; i++) { - printf("before scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, vertloctab[i]); - } - for (int i=0; i < nLocEdgesGraph; i++) { - printf("before scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, edgeloctab[i]); - } + // for (int i=0; i < nVertices+1; i++) { + // printf("before scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, vertloctab[i]); + // } + // for (int i=0; i < nLocEdgesGraph; i++) { + // printf("before scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, edgeloctab[i]); + // } err = SCOTCH_dgraphBuild (dgraph, baseval, @@ -153,15 +130,15 @@ int scotchm_dgraphbuild(void * ptr, - printf("In scotchm_dgraphbuild: rank: %d vertglbnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertglbnbr); - printf("In scotchm_dgraphbuild: rank: %d vertlocnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertlocnbr); + // printf("In scotchm_dgraphbuild: rank: %d vertglbnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertglbnbr); + // printf("In scotchm_dgraphbuild: rank: %d vertlocnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertlocnbr); - for (int i=0; i < nVertices+1; i++) { - printf("In scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->vertloctax[i]); - } - for (int i=0; i < nLocEdgesGraph; i++) { - printf("In scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->edgeloctax[i]); - } + // for (int i=0; i < nVertices+1; i++) { + // printf("In scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->vertloctax[i]); + // } + // for (int i=0; i < nLocEdgesGraph; i++) { + // printf("In scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->edgeloctax[i]); + // } return err; @@ -185,7 +162,7 @@ int scotchm_dgraphpart(void * ptr, int num_part, void * ptr_strat, int * parttab return SCOTCH_dgraphPart(dgraph, num_part, strat, parttab); } -int scotch_dgraphredist(void * ptr, int *partloctab, void * ptr_out){ +int scotchm_dgraphredist(void * ptr, int *partloctab, void * ptr_out, int *vertlocnbr){ SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; @@ -193,19 +170,53 @@ int scotch_dgraphredist(void * ptr, int *partloctab, void * ptr_out){ int * permgsttab = NULL; // Redistribution permutation array int vertlocdlt = 0; // Extra size of local vertex array int edgelocdlt = 0; // Extra size of local edge array + int err; + + err = SCOTCH_dgraphRedist (dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); + + + Dgraph2 *dgraph_mine = (Dgraph2 *) dgraph_out; + + *vertlocnbr = dgraph_mine->vertlocnbr; + + // printf("redist: vlllb pointer = %p, +1 %p \n",dgraph_mine->vlblloctax, dgraph_mine->vlblloctax+1); + + + // for (int i=1; i < dgraph_mine->vertlocnbr + 1; i++) { + // printf("redist bypass: vlllb(%d) = %d \n",i,dgraph_mine->vlblloctax[i] ); + // } + return err; +} + + +int scotchm_dgraphout(void * ptr, int * cell_list){ + + + //SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; + + int * permgsttab = NULL; // Redistribution permutation array + int vertlocdlt = 0; // Extra size of local vertex array + int edgelocdlt = 0; // Extra size of local edge array + int err; + + + Dgraph2 *dgraph_mine = (Dgraph2 *) ptr; + + //printf("In graphout: vertlocnbr=%d vertglbnbr=%d \n",dgraph_mine->vertlocnbr, dgraph_mine->vertglbnbr); - return SCOTCH_dgraphRedist (dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); + //printf("graphout: vlllb pointer = %p, +1 %p \n",dgraph_mine->vlblloctax, dgraph_mine->vlblloctax+1); + + for (int i=0; i < dgraph_mine->vertlocnbr; i++) { + cell_list[i] = *(dgraph_mine->vlblloctax + dgraph_mine->baseval + i); + } + return err; } -// int scotchfdgraphdata() -// { -// void SCOTCH_dgraphData (const SCOTCH_Dgraph * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num * const, SCOTCH_Num ** const, SCOTCH_Num ** const, SCOTCH_Num ** const, MPI_Comm * const); -// } -void scotchm_dgraphexit(SCOTCH_Dgraph *dgraph) +void scotchm_dgraphexit(void *ptr) { - return SCOTCH_dgraphExit(dgraph); + return SCOTCH_dgraphExit((SCOTCH_Dgraph *) ptr); } int scotchm_stratinit(void * strat_ptr) @@ -215,7 +226,7 @@ int scotchm_stratinit(void * strat_ptr) return SCOTCH_stratInit(strat); } -// int scotchfstratexit() -// { -// void SCOTCH_stratExit (SCOTCH_Strat * const); -// } +void scotchm_stratexit(void * strat_ptr) +{ + return SCOTCH_stratExit((SCOTCH_Strat *) strat_ptr); +} From f721710fff47c6236bb37e061d87436fd7740ac7 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 24 Sep 2025 10:48:04 -0600 Subject: [PATCH 17/43] cleanup --- src/framework/mpas_block_decomp.F | 48 +--------------------- src/framework/ptscotch_interface.c | 66 ++++-------------------------- 2 files changed, 9 insertions(+), 105 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 8bb298ae2b..21a0e6e9b6 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -253,23 +253,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l nLocEdgesGraph, & ! Number of local edges, including to ghost vertices edgelocsiz, & ! Defined previously edgeloctab(1)) ! Holds the adjacency array for every local vertex - - - ! call scotchfdgraphbuild (scotchdgraph(1), & - ! 1, & ! base value value for index arrays (1 in Fortran) - ! partial_global_graph_info % nVertices, & ! num of local vertices on the calling process - ! partial_global_graph_info % nVertices, & ! max number of local vertices = num_local_vertices for graphs without holes - ! vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex - ! vertloctab (2), & ! vendloctab: Array of after-last indices in edgeloctab - ! vertloctab (1), & ! veloloctab: Optional array of integer loads for each local vertex - ! vertloctab (1), & ! vlblloctab: Optional array of labels for each local vertex - ! nLocEdgesGraph, & ! Number of local edges, including to ghost vertices - ! edgelocsiz, & ! Defined previously - ! edgeloctab(1), & ! Holds the adjacency array for every local vertex - ! edgeloctab(1), & ! edgegsttab: Optional array holding the local and ghost indices - ! edgeloctab(1), & ! edloloctab: Optional array of integer loads for each local edge - ! ierr) - ! Only needed during development/debugging. ierr = scotch_dgraphcheck (scotchdgraph(1)) @@ -296,37 +279,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_cell_list(num_local_vertices)) allocate(local_block_list(num_local_vertices)) - ! call mpas_log_write('After redist, num_local_vertices is $i', intArgs=(/num_local_vertices/) ) - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('AFTER REDIST rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i,local_block_id_arr(i)/) ) - ! end do - ierr = scotch_dgraphout (scotchdgraph_redist(1), local_cell_list) - ! call mpas_log_write('Graph redist successful ') - ! do i=1,num_local_vertices - ! call mpas_log_write('AFTER REDIST local_cell_list ($i)= $i', intArgs=(/i, local_cell_list(i)/) ) - ! end do - - ! Input to scotchfdgraphdata is the re-distributed graph scotchdgraph_redist, and all - ! the remaining are output arguments. - ! ierr = scotch_dgraphdata(scotchdgraph_redist(1), & - ! indxtab(1), & ! Holds the data queried from graph, local to this processor - ! baseval, & ! returns base value value for index arrays (1 in Fortran) - ! vertglbnbr, & ! number of global vertices - ! num_local_vertices, & ! number of local vertices - ! vertlocmax, & ! max number of local vertices = num_local_vertices for graphs without holes - ! vertgstnbr, & ! num of local and ghost vertices. -1 unless dgraphGhst is called - ! vertlocidx, & ! starting index of vertloctab in indxtab - ! vendlocidx, & ! starting index of vendloctab in indxtab - ! velolocidx, & ! starting index of veloloctab in indxtab - ! vlbllocidx, & ! starting index of vlblloctab in indxtab - ! edgeglbnbr, & ! Global number of arcs. Each edge counts twice. - ! edgelocnbr, & ! Local number of arcs including to ghost vertices - ! edgelocsiz, & ! ~ max of vendloctab entries, for local vertices, minus baseval - ! edgelocidx, & ! starting index of edgeloctab in indxtab - ! edgegstidx, & ! starting index of edgegsttab in indxtab - ! edlolocidx, & ! starting index of edloloctab in indxtab - ! comm ) ! communicator used and error code + ierr = scotch_dgraphout (scotchdgraph_redist(1), local_cell_list) ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index a7c1840747..4b87063514 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -60,10 +60,7 @@ typedef struct Dgraph_ { } Dgraph2; -/* - * Interface routines for writing log messages; defined in mpas_log.F - * messageType_c may be any of "MPAS_LOG_OUT", "MPAS_LOG_WARN", "MPAS_LOG_ERR", or "MPAS_LOG_CRIT" - */ + int scotchm_dgraphinit(void * ptr, int localcomm) { MPI_Comm comm; @@ -101,19 +98,9 @@ int scotchm_dgraphbuild(void * ptr, int * edloloctab = NULL; // Optional array of integer loads for each local edge int i,err; - SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; - // Dgraph2 * my_dgraph = (Dgraph2 *) dgraph; - - // for (int i=0; i < nVertices+1; i++) { - // printf("before scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, vertloctab[i]); - // } - // for (int i=0; i < nLocEdgesGraph; i++) { - // printf("before scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, edgeloctab[i]); - // } - err = SCOTCH_dgraphBuild (dgraph, baseval, vertlocnbr, @@ -128,32 +115,15 @@ int scotchm_dgraphbuild(void * ptr, edgegsttab, edloloctab); - - - // printf("In scotchm_dgraphbuild: rank: %d vertglbnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertglbnbr); - // printf("In scotchm_dgraphbuild: rank: %d vertlocnbr = %d \n",my_dgraph->proclocnum, my_dgraph->vertlocnbr); - - // for (int i=0; i < nVertices+1; i++) { - // printf("In scotchm_dgraphbuild: rank: %d vertloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->vertloctax[i]); - // } - // for (int i=0; i < nLocEdgesGraph; i++) { - // printf("In scotchm_dgraphbuild: rank: %d edgeloctab(%d) = %d \n",my_dgraph->proclocnum, i, my_dgraph->edgeloctax[i]); - // } - return err; } int scotchm_dgraphcheck(void * ptr) { - - SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; - - return SCOTCH_dgraphCheck(dgraph); + return SCOTCH_dgraphCheck((SCOTCH_Dgraph *) ptr); } - - int scotchm_dgraphpart(void * ptr, int num_part, void * ptr_strat, int * parttab){ SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; @@ -164,7 +134,6 @@ int scotchm_dgraphpart(void * ptr, int num_part, void * ptr_strat, int * parttab int scotchm_dgraphredist(void * ptr, int *partloctab, void * ptr_out, int *vertlocnbr){ - SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; SCOTCH_Dgraph *dgraph_out = (SCOTCH_Dgraph *) ptr_out; int * permgsttab = NULL; // Redistribution permutation array @@ -174,56 +143,37 @@ int scotchm_dgraphredist(void * ptr, int *partloctab, void * ptr_out, int *vertl err = SCOTCH_dgraphRedist (dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); + Dgraph2 *dgraph = (Dgraph2 *) dgraph_out; - Dgraph2 *dgraph_mine = (Dgraph2 *) dgraph_out; - - *vertlocnbr = dgraph_mine->vertlocnbr; + *vertlocnbr = dgraph->vertlocnbr; - // printf("redist: vlllb pointer = %p, +1 %p \n",dgraph_mine->vlblloctax, dgraph_mine->vlblloctax+1); - - - // for (int i=1; i < dgraph_mine->vertlocnbr + 1; i++) { - // printf("redist bypass: vlllb(%d) = %d \n",i,dgraph_mine->vlblloctax[i] ); - // } return err; } int scotchm_dgraphout(void * ptr, int * cell_list){ - - //SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; - int * permgsttab = NULL; // Redistribution permutation array int vertlocdlt = 0; // Extra size of local vertex array int edgelocdlt = 0; // Extra size of local edge array int err; - - Dgraph2 *dgraph_mine = (Dgraph2 *) ptr; + Dgraph2 *dgraph = (Dgraph2 *) ptr; - //printf("In graphout: vertlocnbr=%d vertglbnbr=%d \n",dgraph_mine->vertlocnbr, dgraph_mine->vertglbnbr); - - //printf("graphout: vlllb pointer = %p, +1 %p \n",dgraph_mine->vlblloctax, dgraph_mine->vlblloctax+1); - - for (int i=0; i < dgraph_mine->vertlocnbr; i++) { - cell_list[i] = *(dgraph_mine->vlblloctax + dgraph_mine->baseval + i); + for (int i=0; i < dgraph->vertlocnbr; i++) { + cell_list[i] = *(dgraph->vlblloctax + dgraph->baseval + i); } return err; } - void scotchm_dgraphexit(void *ptr) { - return SCOTCH_dgraphExit((SCOTCH_Dgraph *) ptr); } int scotchm_stratinit(void * strat_ptr) { - SCOTCH_Strat *strat = (SCOTCH_Strat *) strat_ptr; - - return SCOTCH_stratInit(strat); + return SCOTCH_stratInit((SCOTCH_Strat *) strat_ptr); } void scotchm_stratexit(void * strat_ptr) From d3f511c7c23f0dc001242ff55aac117c34522371 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 22 Oct 2025 10:45:19 -0600 Subject: [PATCH 18/43] mpas_block_decomp.o depends on mpas_ptscotch_interface.o --- src/framework/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/Makefile b/src/framework/Makefile index 0d877c1f9a..4b3f1cbc5c 100644 --- a/src/framework/Makefile +++ b/src/framework/Makefile @@ -87,7 +87,7 @@ mpas_timekeeping.o: mpas_string_utils.o mpas_kind_types.o mpas_derived_types.o m mpas_timer.o: mpas_kind_types.o mpas_dmpar.o mpas_threading.o mpas_log.o -mpas_block_decomp.o: mpas_derived_types.o mpas_hash.o mpas_io_units.o mpas_dmpar.o +mpas_block_decomp.o: mpas_derived_types.o mpas_hash.o mpas_io_units.o mpas_dmpar.o mpas_ptscotch_interface.o mpas_block_creator.o: mpas_dmpar.o mpas_hash.o mpas_sort.o mpas_io_units.o mpas_block_decomp.o mpas_stream_manager.o mpas_decomp.o mpas_abort.o $(DEPS) From a8d18084d28a8fbc279f4657eb540d9a0c17592f Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 22 Oct 2025 12:06:25 -0600 Subject: [PATCH 19/43] Using SCOTCH_Num as integer type PT-Scotch can be built with 64-bit integers or 32-bit integers. This is determined from build-time options and the generic integer type SCOTCH_Num covers either option. This commit replaces the type of certain integer variables, in the MPAS interface to PT-Scotch, to use SCOTCH_Num to remain consistent with the external library. Note that MPAS doesn't yet support 64-bit integers, and variables of type SCOTCH_Num are typecast into 32-bit integers in framework/mpas_block_decomp.F. And in order for MPAS to correctly work with PT-Scotch, PT-Scotch must not be built to use 64-bit integers. --- src/framework/mpas_ptscotch_interface.F | 24 ++-- src/framework/ptscotch_interface.c | 144 +++++++++++++----------- 2 files changed, 90 insertions(+), 78 deletions(-) diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index ebc68e9e3e..8f75142897 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -5,6 +5,7 @@ module mpas_ptscotch_interface public :: scotch_dgraphinit, scotch_dgraphbuild + contains function scotch_dgraphinit(dgraph, comm) result(ierr) @@ -49,11 +50,12 @@ function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgel implicit none doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) - integer, intent(in) :: nVertices - integer, intent(in) :: vertloctab(nVertices+1) - integer, intent(in) :: nLocEdgesGraph - integer, intent(in) :: edgelocsiz - integer, intent(in) :: adjncy(nLocEdgesGraph) + integer(SCOTCH_NUMSIZE), intent(in) :: nVertices + integer(SCOTCH_NUMSIZE), intent(in) :: vertloctab(nVertices+1) + integer(SCOTCH_NUMSIZE), intent(in) :: nLocEdgesGraph + integer(SCOTCH_NUMSIZE), intent(in) :: edgelocsiz + integer(SCOTCH_NUMSIZE), intent(in) :: adjncy(nLocEdgesGraph) + ! Return value @@ -88,8 +90,6 @@ end function scotchfdgraphbuild end function scotch_dgraphbuild - - function scotch_dgraphcheck(dgraph) result(ierr) use mpas_log, only : mpas_log_write @@ -205,9 +205,9 @@ function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) implicit none doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) - integer, intent(in) :: num_part + integer(SCOTCH_NUMSIZE), intent(in) :: num_part doubleprecision, target, intent(in) :: stradat (scotch_stratdim) - integer, intent(out) :: parttab(*) + integer(SCOTCH_NUMSIZE), intent(out) :: parttab(*) ! Return value integer :: ierr @@ -243,9 +243,9 @@ function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) re implicit none doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) - integer, intent(in) :: parttab(*) + integer(SCOTCH_NUMSIZE), intent(in) :: parttab(*) doubleprecision, target, intent(inout) :: dgraph_out (SCOTCH_DGRAPHDIM) - integer :: num_local_vertices + integer(SCOTCH_NUMSIZE) :: num_local_vertices ! Return value integer :: ierr @@ -281,7 +281,7 @@ function scotch_dgraphout(dgraph, local_cell_list) result(ierr) implicit none doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) - integer, intent(out) :: local_cell_list(*) + integer(SCOTCH_NUMSIZE), intent(out) :: local_cell_list(*) ! Return value integer :: ierr diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index 4b87063514..54b31e47d3 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -19,44 +19,44 @@ typedef struct Dgraph_ { unsigned int flagval; /*+ Graph properties +*/ - int baseval; /*+ Base index for edge/vertex arrays +*/ - int vertglbnbr; /*+ Global number of vertices +*/ - int vertglbmax; /*+ Maximum number of local vertices over all processes +*/ - int vertgstnbr; /*+ Number of local + ghost vertices +*/ - int vertgstnnd; /*+ vertgstnbr + baseval +*/ - int vertlocnbr; /*+ Local number of vertices +*/ - int vertlocnnd; /*+ Local number of vertices + baseval +*/ - int * vertloctax; /*+ Local vertex beginning index array [based] +*/ - int * vendloctax; /*+ Local vertex end index array [based] +*/ - int * veloloctax; /*+ Local vertex load array if present +*/ - int velolocsum; /*+ Local sum of all vertex loads +*/ - int veloglbsum; /*+ Global sum of all vertex loads +*/ - int * vnumloctax; /*+ Arrays of global vertex numbers in original graph +*/ - int * vlblloctax; /*+ Arrays of vertex labels (when read from file) +*/ - int edgeglbnbr; /*+ Global number of arcs +*/ - int edgeglbmax; /*+ Maximum number of local edges over all processes +*/ - int edgelocnbr; /*+ Number of local edges +*/ - int edgelocsiz; /*+ Size of local edge array (= edgelocnbr when compact) +*/ - int edgeglbsmx; /*+ Maximum size of local edge arrays over all processes +*/ - int * edgegsttax; /*+ Edge array holding local indices of neighbors [based] +*/ - int * edgeloctax; /*+ Edge array holding global neighbor numbers [based] +*/ - int * edloloctax; /*+ Edge load array +*/ - int degrglbmax; /*+ Maximum degree over all processes +*/ - int pkeyglbval; /*+ Communicator key value: folded communicators are distinct +*/ - MPI_Comm proccomm; /*+ Graph communicator +*/ - int procglbnbr; /*+ Number of processes sharing graph data +*/ - int proclocnum; /*+ Number of this process +*/ - int * procvrttab; /*+ Global array of vertex number ranges [+1,based] +*/ - int * proccnttab; /*+ Count array for local number of vertices +*/ - int * procdsptab; /*+ Displacement array with respect to proccnttab [+1,based] +*/ - int procngbnbr; /*+ Number of neighboring processes +*/ - int procngbmax; /*+ Maximum number of neighboring processes +*/ - int * procngbtab; /*+ Array of neighbor process numbers [sorted] +*/ - int * procrcvtab; /*+ Number of vertices to receive in ghost vertex sub-arrays +*/ - int procsndnbr; /*+ Overall size of local send array +*/ - int * procsndtab; /*+ Number of vertices to send in ghost vertex sub-arrays +*/ - int * procsidtab; /*+ Array of indices to build communication vectors (send) +*/ - int procsidnbr; /*+ Size of the send index array +*/ + SCOTCH_Num baseval; /*+ Base index for edge/vertex arrays +*/ + SCOTCH_Num vertglbnbr; /*+ Global number of vertices +*/ + SCOTCH_Num vertglbmax; /*+ Maximum number of local vertices over all processes +*/ + SCOTCH_Num vertgstnbr; /*+ Number of local + ghost vertices +*/ + SCOTCH_Num vertgstnnd; /*+ vertgstnbr + baseval +*/ + SCOTCH_Num vertlocnbr; /*+ Local number of vertices +*/ + SCOTCH_Num vertlocnnd; /*+ Local number of vertices + baseval +*/ + SCOTCH_Num * vertloctax; /*+ Local vertex beginning index array [based] +*/ + SCOTCH_Num * vendloctax; /*+ Local vertex end index array [based] +*/ + SCOTCH_Num * veloloctax; /*+ Local vertex load array if present +*/ + SCOTCH_Num velolocsum; /*+ Local sum of all vertex loads +*/ + SCOTCH_Num veloglbsum; /*+ Global sum of all vertex loads +*/ + SCOTCH_Num * vnumloctax; /*+ Arrays of global vertex numbers in original graph +*/ + SCOTCH_Num * vlblloctax; /*+ Arrays of vertex labels (when read from file) +*/ + SCOTCH_Num edgeglbnbr; /*+ Global number of arcs +*/ + SCOTCH_Num edgeglbmax; /*+ Maximum number of local edges over all processes +*/ + SCOTCH_Num edgelocnbr; /*+ Number of local edges +*/ + SCOTCH_Num edgelocsiz; /*+ Size of local edge array (= edgelocnbr when compact) +*/ + SCOTCH_Num edgeglbsmx; /*+ Maximum size of local edge arrays over all processes +*/ + SCOTCH_Num * edgegsttax; /*+ Edge array holding local indices of neighbors [based] +*/ + SCOTCH_Num * edgeloctax; /*+ Edge array holding global neighbor numbers [based] +*/ + SCOTCH_Num * edloloctax; /*+ Edge load array +*/ + SCOTCH_Num degrglbmax; /*+ Maximum degree over all processes +*/ + SCOTCH_Num pkeyglbval; /*+ Communicator key value: folded communicators are distinct +*/ + MPI_Comm proccomm; /*+ Graph communicator +*/ + SCOTCH_Num procglbnbr; /*+ Number of processes sharing graph data +*/ + SCOTCH_Num proclocnum; /*+ Number of this process +*/ + SCOTCH_Num * procvrttab; /*+ Global array of vertex number ranges [+1,based] +*/ + SCOTCH_Num * proccnttab; /*+ Count array for local number of vertices +*/ + SCOTCH_Num * procdsptab; /*+ Displacement array with respect to proccnttab [+1,based] +*/ + SCOTCH_Num procngbnbr; /*+ Number of neighboring processes +*/ + SCOTCH_Num procngbmax; /*+ Maximum number of neighboring processes +*/ + SCOTCH_Num * procngbtab; /*+ Array of neighbor process numbers [sorted] +*/ + SCOTCH_Num * procrcvtab; /*+ Number of vertices to receive in ghost vertex sub-arrays +*/ + SCOTCH_Num procsndnbr; /*+ Overall size of local send array +*/ + SCOTCH_Num * procsndtab; /*+ Number of vertices to send in ghost vertex sub-arrays +*/ + SCOTCH_Num * procsidtab; /*+ Array of indices to build communication vectors (send) +*/ + SCOTCH_Num procsidnbr; /*+ Size of the send index array +*/ } Dgraph2; @@ -79,26 +79,28 @@ int scotchm_dgraphinit(void * ptr, int localcomm) } int scotchm_dgraphbuild(void * ptr, - int nVertices, - int * vertloctab_1, - int nLocEdgesGraph, - int edgelocsiz, - int *adjncy + SCOTCH_Num nVertices, + SCOTCH_Num * vertloctab_1, + SCOTCH_Num nLocEdgesGraph, + SCOTCH_Num edgelocsiz_1, + SCOTCH_Num *adjncy ) { - int baseval = 1; // Fortran-style 1-based indexing - int vertlocnbr = nVertices; - int * vertloctab = vertloctab_1; - int * vendloctab = vertloctab_1 + 1; - int * veloloctab = NULL; // vertex weights not used - int * vlblloctab = NULL; // vertex labels not used - int edgelocnbr = nLocEdgesGraph; - int *edgeloctab = adjncy; - int * edgegsttab = NULL; // Optional array holding the local and ghost indices - int * edloloctab = NULL; // Optional array of integer loads for each local edge - int i,err; - + SCOTCH_Num baseval = 1; // Fortran-style 1-based indexing + SCOTCH_Num vertlocnbr = nVertices; + SCOTCH_Num * veloloctab = NULL; // vertex weights not used + SCOTCH_Num * vlblloctab = NULL; // vertex labels not used + SCOTCH_Num edgelocnbr = nLocEdgesGraph; + SCOTCH_Num edgelocsiz = edgelocsiz_1; + SCOTCH_Num * edgegsttab = NULL; // Optional array holding the local and ghost indices + SCOTCH_Num * edloloctab = NULL; // Optional array of integer loads for each local edge + + SCOTCH_Num * vertloctab = (SCOTCH_Num *) vertloctab_1; + SCOTCH_Num * vendloctab = vertloctab_1 + 1; + SCOTCH_Num * edgeloctab = (SCOTCH_Num *) adjncy; + int i,err; + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; err = SCOTCH_dgraphBuild (dgraph, @@ -119,12 +121,14 @@ int scotchm_dgraphbuild(void * ptr, } + + int scotchm_dgraphcheck(void * ptr) { return SCOTCH_dgraphCheck((SCOTCH_Dgraph *) ptr); } -int scotchm_dgraphpart(void * ptr, int num_part, void * ptr_strat, int * parttab){ +int scotchm_dgraphpart(void * ptr, SCOTCH_Num num_part, void * ptr_strat, SCOTCH_Num * parttab){ SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; SCOTCH_Strat *strat = (SCOTCH_Strat *) ptr_strat; @@ -132,13 +136,14 @@ int scotchm_dgraphpart(void * ptr, int num_part, void * ptr_strat, int * parttab return SCOTCH_dgraphPart(dgraph, num_part, strat, parttab); } -int scotchm_dgraphredist(void * ptr, int *partloctab, void * ptr_out, int *vertlocnbr){ + +int scotchm_dgraphredist(void * ptr, SCOTCH_Num *partloctab, void * ptr_out, SCOTCH_Num *vertlocnbr){ SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; SCOTCH_Dgraph *dgraph_out = (SCOTCH_Dgraph *) ptr_out; - int * permgsttab = NULL; // Redistribution permutation array - int vertlocdlt = 0; // Extra size of local vertex array - int edgelocdlt = 0; // Extra size of local edge array + SCOTCH_Num * permgsttab = NULL; // Redistribution permutation array + SCOTCH_Num vertlocdlt = 0; // Extra size of local vertex array + SCOTCH_Num edgelocdlt = 0; // Extra size of local edge array int err; err = SCOTCH_dgraphRedist (dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); @@ -151,21 +156,23 @@ int scotchm_dgraphredist(void * ptr, int *partloctab, void * ptr_out, int *vertl } -int scotchm_dgraphout(void * ptr, int * cell_list){ - int * permgsttab = NULL; // Redistribution permutation array - int vertlocdlt = 0; // Extra size of local vertex array - int edgelocdlt = 0; // Extra size of local edge array +int scotchm_dgraphout(void * ptr, SCOTCH_Num * cell_list){ + + SCOTCH_Num * permgsttab = NULL; // Redistribution permutation array + SCOTCH_Num vertlocdlt = 0; // Extra size of local vertex array + SCOTCH_Num edgelocdlt = 0; // Extra size of local edge array int err; Dgraph2 *dgraph = (Dgraph2 *) ptr; - for (int i=0; i < dgraph->vertlocnbr; i++) { + for (SCOTCH_Num i=0; i < dgraph->vertlocnbr; i++) { cell_list[i] = *(dgraph->vlblloctax + dgraph->baseval + i); } return err; } + void scotchm_dgraphexit(void *ptr) { return SCOTCH_dgraphExit((SCOTCH_Dgraph *) ptr); @@ -173,7 +180,12 @@ void scotchm_dgraphexit(void *ptr) int scotchm_stratinit(void * strat_ptr) { - return SCOTCH_stratInit((SCOTCH_Strat *) strat_ptr); + SCOTCH_stratInit((SCOTCH_Strat *) strat_ptr); + //SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATDEFAULT, 16, 16, 0.03); + //SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATSCALABILITY, 1, 0, 0.05); + + + return 0; } void scotchm_stratexit(void * strat_ptr) From 67c25ba0b9637699f01f566400afbe24e976398c Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 22 Oct 2025 12:27:06 -0600 Subject: [PATCH 20/43] introducing some new variables --- src/framework/mpas_block_decomp.F | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 21a0e6e9b6..77197ba775 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -93,7 +93,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l logical :: no_blocks logical :: useScotch #ifdef MPAS_SCOTCH - integer :: nLocEdgesGraph = 0, edgelocsiz = 0 + integer :: nLocEdgesGraph = 0, nLocVerticesGraph = 0, edgelocsiz = 0, npart = 1 character (len=StrKIND) :: partitionFilePrefix integer, dimension(:), allocatable :: edgeloctab, vertloctab doubleprecision :: stradat (scotch_stratdim) @@ -195,7 +195,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) end do end do - call mpas_log_write('nLocEdgesGraph is $i', intArgs=(/nLocEdgesGraph/)) ! Holds the adjacency array for every local vertex allocate(edgeloctab(nLocEdgesGraph)) @@ -223,8 +222,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end do vertloctab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 - call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVertices, nLocEdgesGraph/)) - ! Duplicate the communicator to be used by Scotch call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) if (mpi_ierr .ne. 0) then @@ -245,10 +242,14 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! local vertices, minus baseval; it can be set to edgelocnbr if the edge array is compact. edgelocsiz = maxval(vertloctab) - 1 + nLocVerticesGraph = partial_global_graph_info % nVertices + + call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/nLocVerticesGraph, nLocEdgesGraph/)) + ! Build the distributed Scotch graph and save it in scotchdgraph ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. ierr = scotch_dgraphbuild (scotchdgraph(1), & - partial_global_graph_info % nVertices, & ! num of local vertices on the calling process + nLocVerticesGraph, & ! num of local vertices on the calling process vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex nLocEdgesGraph, & ! Number of local edges, including to ghost vertices edgelocsiz, & ! Defined previously @@ -262,9 +263,10 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr - ierr = scotch_dgraphpart (scotchdgraph(1), dminfo % nProcs, stradat (1), local_block_id_arr(1)) + ! npart = dminfo % nProcs + ! ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i,local_block_id_arr(i)/) ) + ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i, local_block_id_arr(i)/) ) ! end do call mpas_timer_stop('scotch_graph_partitioning') call mpas_log_write('Graph parition successful ') @@ -308,7 +310,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l ! if (dminfo % my_proc_id == IO_NODE) then ! call mpas_log_write('After gathering local_nvertices on IO_NODE: ') - ! do i=1, dminfo % nProcs + ! do i=1, npart ! call mpas_log_write('local_nvertices: $i', intArgs=(/local_nvertices(i)/) ) ! end do ! end if From 029d3e14d61f269f40453109ed7d7cfbddf60bbe Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 22 Oct 2025 12:28:06 -0600 Subject: [PATCH 21/43] Using SCOTCH_STRATSCALABILITY to avoid crashes with large partition count --- src/framework/ptscotch_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index 54b31e47d3..a8887c0267 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -182,7 +182,7 @@ int scotchm_stratinit(void * strat_ptr) { SCOTCH_stratInit((SCOTCH_Strat *) strat_ptr); //SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATDEFAULT, 16, 16, 0.03); - //SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATSCALABILITY, 1, 0, 0.05); + SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATSCALABILITY, 1, 0, 0.05); return 0; From 103e8a53ce9bcaecb602796dbbf1ad7c3165d76b Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Sat, 6 Dec 2025 08:49:09 -0700 Subject: [PATCH 22/43] Undoing changes to config_block_decomp_file_prefix defaults --- src/core_atmosphere/Registry.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index 879d76aa91..4281c40bba 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -319,7 +319,7 @@ - From 0eee6574f745d0aeeb3500fe4ddfeaab6f2add9d Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Sat, 6 Dec 2025 08:50:22 -0700 Subject: [PATCH 23/43] Make sure non-Scotch build still works --- src/framework/mpas_ptscotch_interface.F | 3 ++- src/framework/ptscotch_interface.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index 8f75142897..5205ed7231 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -1,4 +1,4 @@ - +#ifdef MPAS_SCOTCH module mpas_ptscotch_interface use iso_c_binding, only : c_int, c_double #include "ptscotchf.h" @@ -302,3 +302,4 @@ end function scotch_dgraphout end module mpas_ptscotch_interface +#endif \ No newline at end of file diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index a8887c0267..b92a34cb2c 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -5,7 +5,7 @@ * Additional copyright and license information can be found in the LICENSE file * distributed with this code, or at http://mpas-dev.github.com/license.html */ - +#ifdef MPAS_SCOTCH #include #include #include @@ -192,3 +192,4 @@ void scotchm_stratexit(void * strat_ptr) { return SCOTCH_stratExit((SCOTCH_Strat *) strat_ptr); } +#endif \ No newline at end of file From f4f14c4b92f63b1e937656bcf8f79f69557c9184 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Sat, 6 Dec 2025 09:05:19 -0700 Subject: [PATCH 24/43] Refactoring Scotch logic Moving Scotch logic from mpas_block_decomp_cells_for_proc into a new subroutine mpas_block_decomp_scotch, in order to simplify the diff. In doing so, it also introduces some changes to logic. If mpas_block_decomp_cells_for_proc cannot find a suitable partition file AND the MPAS core has been built with Scotch, then mpas_block_decomp_scotch is invoked. If successful, this writes out a graph partition file which mpas_block_decomp_cells_for_proc then reads from disk again. --- src/framework/mpas_block_decomp.F | 498 ++++++++++++++++-------------- 1 file changed, 271 insertions(+), 227 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 77197ba775..0a7348caae 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -25,11 +25,9 @@ module mpas_block_decomp use mpas_derived_types use mpas_io_units use mpas_log +#ifdef MPAS_SCOTCH use mpas_ptscotch_interface - -!#ifdef MPAS_SCOTCH -!#include "ptscotchf.h" -!#endif +#endif type graph integer :: nVerticesTotal @@ -57,11 +55,6 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l block_count, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ use mpas_timer, only : mpas_timer_start, mpas_timer_stop -#ifdef MPAS_USE_MPI_F08 - use mpi_f08, only : MPI_Comm, MPI_COMM_WORLD, MPI_INTEGER, MPI_Comm_dup, MPI_Comm_free, MPI_Gather, MPI_Gatherv -#else - use mpi -#endif implicit none @@ -83,33 +76,13 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), allocatable :: local_block_list integer, dimension(:,:), allocatable :: sorted_local_cell_list - integer, dimension(:), allocatable ::global_block_id_arr, local_block_id_arr, owning_proc_arr - integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k - integer :: blocks_per_proc, err, ierr + integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus + integer :: blocks_per_proc, err integer, dimension(:), pointer :: local_nvertices - integer :: num_local_vertices !< Number of local vertices for this processor - character (len=StrKIND) :: filename, msg + character (len=StrKIND) :: filename logical :: no_blocks logical :: useScotch -#ifdef MPAS_SCOTCH - integer :: nLocEdgesGraph = 0, nLocVerticesGraph = 0, edgelocsiz = 0, npart = 1 - character (len=StrKIND) :: partitionFilePrefix - integer, dimension(:), allocatable :: edgeloctab, vertloctab - doubleprecision :: stradat (scotch_stratdim) - doubleprecision :: scotchgraph (scotch_graphdim) - doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) - doubleprecision :: scotchdgraph_redist (SCOTCH_DGRAPHDIM) - integer :: mpi_ierr -#ifdef MPAS_USE_MPI_F08 - type (MPI_Comm) :: localcomm -#else - integer :: localcomm -#endif - - integer :: vertglbnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx - integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval -#endif no_blocks = .false. @@ -129,21 +102,10 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if(dminfo % total_blocks > 1) then allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) - allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) - allocate(local_block_id_arr(partial_global_graph_info % nVertices)) - allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) - useScotch = .false. - if (dminfo % my_proc_id == IO_NODE) then - if ( trim(blockFilePrefix) == '' ) then - call mpas_log_write("Namelist option config_block_decomp_file_prefix is set to ''", MPAS_LOG_WARN) -#ifdef MPAS_SCOTCH - useScotch = .true. -#else - call mpas_log_write('Either build MPAS with the Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) -#endif - else + if (dminfo % my_proc_id == IO_NODE) then + if (dminfo % total_blocks < 10) then write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks else if (dminfo % total_blocks < 100) then @@ -166,20 +128,271 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) if (istatus /= 0) then - call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_WARN, intArgs=(/dminfo % total_blocks/) ) - call mpas_log_write('Filename: '//trim(filename),MPAS_LOG_WARN) #ifdef MPAS_SCOTCH useScotch = .true. -#else - call mpas_log_write('Either build MPAS with Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#else + call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) + call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) +#endif + else + useScotch = .false. + end if + end if + + call mpas_dmpar_bcast_logical(dminfo, useScotch) + + if (useScotch) then ! Using PT-Scotch across all MPI ranks + + call mpas_block_decomp_scotch(dminfo, partial_global_graph_info, dminfo % total_blocks, blockFilePrefix) + + if (dminfo % my_proc_id == IO_NODE) then + open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) + if (istatus /= 0) then + call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) + call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) + end if + end if + end if + + if (dminfo % my_proc_id == IO_NODE) then + + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*, iostat=err) global_block_id + + if ( err .ne. 0 ) then + call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + end do + + read(unit=iunit, fmt=*, iostat=err) + + if ( err == 0 ) then + call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if + + global_start(1) = 1 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + rewind(unit=iunit) + + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*, iostat=err) global_block_id + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_list(global_start(owning_proc+1)) = i + global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + end do + + global_start(1) = 0 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) + allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) + allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_cell_list) + + ! Reset global start for second read of global_block_list + global_start(1) = 1 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + rewind(unit=iunit) + + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*) global_block_id + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_list(global_start(owning_proc+1)) = global_block_id + global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + end do + + ! Recompute global start after second read of global_block_list + global_start(1) = 0 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_block_list) + + close(unit=iunit) + call mpas_release_unit(iunit) + + else + + call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) + allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) + allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_cell_list) + + call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & + global_start, local_nvertices, global_list, local_block_list) + end if + + if(blocks_per_proc == 0) then + no_blocks = .true. + blocks_per_proc = 1 + end if + + if(no_blocks) then + allocate(block_id(blocks_per_proc)) + allocate(block_start(blocks_per_proc)) + allocate(block_count(blocks_per_proc)) + + block_id(1) = numBlocks + 1 + block_start(1) = 0 + block_count(1) = 0 + else + allocate(sorted_local_cell_list(2, local_nvertices(dminfo % my_proc_id + 1))) + allocate(block_id(blocks_per_proc)) + allocate(block_start(blocks_per_proc)) + allocate(block_count(blocks_per_proc)) + + do i = 1, blocks_per_proc + block_start = 0 + block_count = 0 + end do + + do i = 1,local_nvertices(dminfo % my_proc_id +1) + call mpas_get_local_block_id(dminfo, local_block_list(i), local_block_id) + + block_id(local_block_id+1) = local_block_list(i) + + sorted_local_cell_list(1, i) = local_block_list(i) + sorted_local_cell_list(2, i) = local_cell_list(i) + + block_count(local_block_id+1) = block_count(local_block_id+1) + 1 + end do + + call mpas_quicksort(local_nvertices(dminfo % my_proc_id + 1), sorted_local_cell_list) + + do i = 1, local_nvertices(dminfo % my_proc_id+1) + local_cell_list(i) = sorted_local_cell_list(2, i) + end do + + do i = 2,blocks_per_proc + block_start(i) = block_start(i-1) + block_count(i-1) + end do + + deallocate(sorted_local_cell_list) + deallocate(local_block_list) + deallocate(local_nvertices) + deallocate(global_start) + deallocate(global_list) + end if + else + + if (dminfo % my_proc_id == IO_NODE) then + allocate(local_cell_list(partial_global_graph_info % nVerticesTotal)) + allocate(block_id(1)) + allocate(block_start(1)) + allocate(block_count(1)) + block_id(1) = 0 + block_start(1) = 0 + block_count(1) = size(local_cell_list) + do i=1,size(local_cell_list) + local_cell_list(i) = i + end do + else + allocate(local_cell_list(1)) + allocate(block_id(1)) + allocate(block_start(1)) + allocate(block_count(1)) + local_cell_list(1) = 0 + block_id(1) = numBlocks + 1 + block_start(1) = 0 + block_count(1) = 0 + end if + end if + + call mpas_log_write('mpas_block_decomp_cells_for_proc successful ') + + call mpas_timer_stop('mpas_block_decomp_cells_for_proc') + + end subroutine mpas_block_decomp_cells_for_proc!}}} + + + + +!*********************************************************************** +! +! routine mpas_block_decomp_scotch +! +!> \brief Use PT-Scotch to generate the graph partitioning +!> \author Abishek Gopal +!> \date 12/05/25 +!> \details +!> This routine invokes the PT-Scotch library to first construct a distributed graph from the +!> partial global graph information read by each processor, then partitions the graph into the +!> specified number of blocks, and after redistributing the graph, it finally gathers all the +!> local block ids (for each MPI rank) to the IO_NODE to write out to a partition file with the +!> specified prefix. +! +!----------------------------------------------------------------------- + subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBlocks, blockFilePrefix)!{{{ + + use mpas_timer, only : mpas_timer_start, mpas_timer_stop +#ifdef MPAS_USE_MPI_F08 + use mpi_f08, only : MPI_Comm, MPI_COMM_WORLD, MPI_INTEGER, MPI_Comm_dup, MPI_Comm_free, MPI_Gather, MPI_Gatherv +#else + use mpi #endif - end if ! istatus /= 0 - end if ! trim(blockFilePrefix) == '' - end if ! dminfo % my_proc_id == IO_NODE - call mpas_dmpar_bcast_logical(dminfo, useScotch) + implicit none + + type (dm_info), intent(inout) :: dminfo !< Input: domain information + type (graph), intent(in) :: partial_global_graph_info !< Input: Global graph information + integer, intent(in) :: totalBlocks !< Input: Number of blocks (from config_num_blocks) + character (len=*), intent(in) :: blockFilePrefix !< Input: File prefix for block decomposition + + integer, dimension(:), pointer :: global_start + integer, dimension(:), allocatable :: local_cell_list + integer, dimension(:), allocatable :: local_block_list + + integer, dimension(:), allocatable ::global_block_id_arr, local_block_id_arr + integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k + integer :: blocks_per_proc, err, ierr + integer, dimension(:), pointer :: local_nvertices + integer :: num_local_vertices !< Number of local vertices for this processor + character (len=StrKIND) :: filename, msg + + logical :: no_blocks +#ifdef MPAS_SCOTCH + integer :: nLocEdgesGraph = 0, nLocVerticesGraph = 0, edgelocsiz = 0, npart = 1 + character (len=StrKIND) :: partitionFilePrefix + integer, dimension(:), allocatable :: edgeloctab, vertloctab + doubleprecision :: stradat (scotch_stratdim) + doubleprecision :: scotchgraph (SCOTCH_GRAPHDIM) + doubleprecision :: scotchdgraph (SCOTCH_DGRAPHDIM) + doubleprecision :: scotchdgraph_redist (SCOTCH_DGRAPHDIM) + integer :: mpi_ierr +#ifdef MPAS_USE_MPI_F08 + type (MPI_Comm) :: localcomm +#else + integer :: localcomm +#endif + + integer :: vertglbnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx + integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval +#endif + + allocate(local_nvertices(dminfo % nprocs)) + allocate(global_start(dminfo % nprocs)) + allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) + allocate(local_block_id_arr(partial_global_graph_info % nVertices)) - if (useScotch) then ! Using PT-Scotch across all MPI ranks #ifdef MPAS_SCOTCH call mpas_timer_start('scotch_total') @@ -263,8 +476,9 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr - ! npart = dminfo % nProcs - ! ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) + ! TODO: Should this be totalBlocks instead of nProcs? + npart = dminfo % nProcs + ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) ! do i=1,partial_global_graph_info % nVertices ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i, local_block_id_arr(i)/) ) ! end do @@ -356,178 +570,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_timer_stop('scotch_total') call mpas_log_write('Scotch partition successful') #endif - else ! useScotch = .false. - if (dminfo % my_proc_id == IO_NODE) then - call mpas_log_write('Using block decomposition file: '//trim(filename)) - - local_nvertices(:) = 0 - do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - - if ( err .ne. 0 ) then - call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) - end if - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 - end do - - read(unit=iunit, fmt=*, iostat=err) - - if ( err == 0 ) then - call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) - end if - - global_start(1) = 1 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - end do - - rewind(unit=iunit) - - do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - global_list(global_start(owning_proc+1)) = i - global_start(owning_proc+1) = global_start(owning_proc+1) + 1 - end do - - global_start(1) = 0 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - end do - - call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) - allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) - allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) - - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_cell_list) - - ! Reset global start for second read of global_block_list - global_start(1) = 1 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - end do - - rewind(unit=iunit) - - do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*) global_block_id - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - global_list(global_start(owning_proc+1)) = global_block_id - global_start(owning_proc+1) = global_start(owning_proc+1) + 1 - end do - - ! Recompute global start after second read of global_block_list - global_start(1) = 0 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) - end do - - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_block_list) - - close(unit=iunit) - call mpas_release_unit(iunit) - - else ! dminfo % my_proc_id == IO_NODE - - call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) - allocate(local_cell_list(local_nvertices(dminfo % my_proc_id + 1))) - allocate(local_block_list(local_nvertices(dminfo % my_proc_id + 1))) - - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_cell_list) - - call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & - global_start, local_nvertices, global_list, local_block_list) - end if ! dminfo % my_proc_id == IO_NODE - num_local_vertices = local_nvertices(dminfo % my_proc_id + 1) - deallocate(local_nvertices) - end if ! useScotch - - if(blocks_per_proc == 0) then - no_blocks = .true. - blocks_per_proc = 1 - end if - - if(no_blocks) then - allocate(block_id(blocks_per_proc)) - allocate(block_start(blocks_per_proc)) - allocate(block_count(blocks_per_proc)) - - block_id(1) = numBlocks + 1 - block_start(1) = 0 - block_count(1) = 0 - else - allocate(sorted_local_cell_list(2, num_local_vertices)) - allocate(block_id(blocks_per_proc)) - allocate(block_start(blocks_per_proc)) - allocate(block_count(blocks_per_proc)) - - do i = 1, blocks_per_proc - block_start = 0 - block_count = 0 - end do - - do i = 1,num_local_vertices - call mpas_get_local_block_id(dminfo, local_block_list(i), local_block_id) - - block_id(local_block_id+1) = local_block_list(i) - - sorted_local_cell_list(1, i) = local_block_list(i) - sorted_local_cell_list(2, i) = local_cell_list(i) - - block_count(local_block_id+1) = block_count(local_block_id+1) + 1 - end do - - call mpas_quicksort(num_local_vertices, sorted_local_cell_list) - - do i = 1, num_local_vertices - local_cell_list(i) = sorted_local_cell_list(2, i) - end do - - do i = 2,blocks_per_proc - block_start(i) = block_start(i-1) + block_count(i-1) - end do - - deallocate(sorted_local_cell_list) - deallocate(local_block_list) - deallocate(global_start) - deallocate(global_list) - end if - else - - if (dminfo % my_proc_id == IO_NODE) then - allocate(local_cell_list(partial_global_graph_info % nVerticesTotal)) - allocate(block_id(1)) - allocate(block_start(1)) - allocate(block_count(1)) - block_id(1) = 0 - block_start(1) = 0 - block_count(1) = size(local_cell_list) - do i=1,size(local_cell_list) - local_cell_list(i) = i - end do - else - allocate(local_cell_list(1)) - allocate(block_id(1)) - allocate(block_start(1)) - allocate(block_count(1)) - local_cell_list(1) = 0 - block_id(1) = numBlocks + 1 - block_start(1) = 0 - block_count(1) = 0 - end if - end if - call mpas_log_write('mpas_block_decomp_cells_for_proc successful ') - - call mpas_timer_stop('mpas_block_decomp_cells_for_proc') - - end subroutine mpas_block_decomp_cells_for_proc!}}} + end subroutine mpas_block_decomp_scotch !*********************************************************************** ! ! routine mpas_block_decomp_partitioned_edge_list From 7accc40425ead1688fd6f36871a24804963fd8a6 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Sat, 6 Dec 2025 09:17:17 -0700 Subject: [PATCH 25/43] Removing timers from mpas_bootstrapping --- src/framework/mpas_bootstrapping.F | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 4586bc2549..46e64392c9 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -81,7 +81,6 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p #ifdef MPAS_PIO_SUPPORT use pio, only : file_desc_t #endif - use mpas_timer, only : mpas_timer_start, mpas_timer_stop implicit none @@ -156,7 +155,6 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p nHalos = config_num_halos - call mpas_timer_start('bootstrap_framework_phase1') inputHandle = MPAS_io_open(trim(mesh_filename), MPAS_IO_READ, mesh_iotype, domain % ioContext, & pio_file_desc=pio_file_desc, ierr=ierr) @@ -432,7 +430,6 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p deallocate(block_count) deallocate(readingBlock) - call mpas_timer_stop('bootstrap_framework_phase1') end subroutine mpas_bootstrap_framework_phase1 !}}} From 697955f6bf2e947ed106b198f3f3c6c97c3b3afc Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Sat, 6 Dec 2025 09:18:14 -0700 Subject: [PATCH 26/43] Cleaning up whitespace --- src/framework/mpas_bootstrapping.F | 1 - 1 file changed, 1 deletion(-) diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 46e64392c9..4241255e2a 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -430,7 +430,6 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p deallocate(block_count) deallocate(readingBlock) - end subroutine mpas_bootstrap_framework_phase1 !}}} From ff4e01254b2df696390f56df1a35136f98954040 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Mon, 8 Dec 2025 10:48:31 -0700 Subject: [PATCH 27/43] Apply a more consistent formatting --- src/framework/ptscotch_interface.c | 223 ++++++++++++++--------------- 1 file changed, 110 insertions(+), 113 deletions(-) diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index b92a34cb2c..e380f52715 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -18,137 +18,133 @@ typedef struct Dgraph_ { - unsigned int flagval; /*+ Graph properties +*/ - SCOTCH_Num baseval; /*+ Base index for edge/vertex arrays +*/ - SCOTCH_Num vertglbnbr; /*+ Global number of vertices +*/ - SCOTCH_Num vertglbmax; /*+ Maximum number of local vertices over all processes +*/ - SCOTCH_Num vertgstnbr; /*+ Number of local + ghost vertices +*/ - SCOTCH_Num vertgstnnd; /*+ vertgstnbr + baseval +*/ - SCOTCH_Num vertlocnbr; /*+ Local number of vertices +*/ - SCOTCH_Num vertlocnnd; /*+ Local number of vertices + baseval +*/ - SCOTCH_Num * vertloctax; /*+ Local vertex beginning index array [based] +*/ - SCOTCH_Num * vendloctax; /*+ Local vertex end index array [based] +*/ - SCOTCH_Num * veloloctax; /*+ Local vertex load array if present +*/ - SCOTCH_Num velolocsum; /*+ Local sum of all vertex loads +*/ - SCOTCH_Num veloglbsum; /*+ Global sum of all vertex loads +*/ - SCOTCH_Num * vnumloctax; /*+ Arrays of global vertex numbers in original graph +*/ - SCOTCH_Num * vlblloctax; /*+ Arrays of vertex labels (when read from file) +*/ - SCOTCH_Num edgeglbnbr; /*+ Global number of arcs +*/ - SCOTCH_Num edgeglbmax; /*+ Maximum number of local edges over all processes +*/ - SCOTCH_Num edgelocnbr; /*+ Number of local edges +*/ - SCOTCH_Num edgelocsiz; /*+ Size of local edge array (= edgelocnbr when compact) +*/ - SCOTCH_Num edgeglbsmx; /*+ Maximum size of local edge arrays over all processes +*/ - SCOTCH_Num * edgegsttax; /*+ Edge array holding local indices of neighbors [based] +*/ - SCOTCH_Num * edgeloctax; /*+ Edge array holding global neighbor numbers [based] +*/ - SCOTCH_Num * edloloctax; /*+ Edge load array +*/ - SCOTCH_Num degrglbmax; /*+ Maximum degree over all processes +*/ - SCOTCH_Num pkeyglbval; /*+ Communicator key value: folded communicators are distinct +*/ - MPI_Comm proccomm; /*+ Graph communicator +*/ - SCOTCH_Num procglbnbr; /*+ Number of processes sharing graph data +*/ - SCOTCH_Num proclocnum; /*+ Number of this process +*/ - SCOTCH_Num * procvrttab; /*+ Global array of vertex number ranges [+1,based] +*/ - SCOTCH_Num * proccnttab; /*+ Count array for local number of vertices +*/ - SCOTCH_Num * procdsptab; /*+ Displacement array with respect to proccnttab [+1,based] +*/ - SCOTCH_Num procngbnbr; /*+ Number of neighboring processes +*/ - SCOTCH_Num procngbmax; /*+ Maximum number of neighboring processes +*/ - SCOTCH_Num * procngbtab; /*+ Array of neighbor process numbers [sorted] +*/ - SCOTCH_Num * procrcvtab; /*+ Number of vertices to receive in ghost vertex sub-arrays +*/ - SCOTCH_Num procsndnbr; /*+ Overall size of local send array +*/ - SCOTCH_Num * procsndtab; /*+ Number of vertices to send in ghost vertex sub-arrays +*/ - SCOTCH_Num * procsidtab; /*+ Array of indices to build communication vectors (send) +*/ - SCOTCH_Num procsidnbr; /*+ Size of the send index array +*/ + unsigned int flagval; /*+ Graph properties +*/ + SCOTCH_Num baseval; /*+ Base index for edge/vertex arrays +*/ + SCOTCH_Num vertglbnbr;/*+ Global number of vertices +*/ + SCOTCH_Num vertglbmax;/*+ Maximum number of local vertices over all processes +*/ + SCOTCH_Num vertgstnbr;/*+ Number of local + ghost vertices +*/ + SCOTCH_Num vertgstnnd;/*+ vertgstnbr + baseval +*/ + SCOTCH_Num vertlocnbr;/*+ Local number of vertices +*/ + SCOTCH_Num vertlocnnd;/*+ Local number of vertices + baseval +*/ + SCOTCH_Num *vertloctax;/*+ Local vertex beginning index array [based] +*/ + SCOTCH_Num *vendloctax;/*+ Local vertex end index array [based] +*/ + SCOTCH_Num *veloloctax;/*+ Local vertex load array if present +*/ + SCOTCH_Num velolocsum; /*+ Local sum of all vertex loads +*/ + SCOTCH_Num veloglbsum; /*+ Global sum of all vertex loads +*/ + SCOTCH_Num *vnumloctax;/*+ Arrays of global vertex numbers in original graph +*/ + SCOTCH_Num *vlblloctax;/*+ Arrays of vertex labels (when read from file) +*/ + SCOTCH_Num edgeglbnbr; /*+ Global number of arcs +*/ + SCOTCH_Num edgeglbmax; /*+ Maximum number of local edges over all processes +*/ + SCOTCH_Num edgelocnbr; /*+ Number of local edges +*/ + SCOTCH_Num edgelocsiz; /*+ Size of local edge array (= edgelocnbr when compact) +*/ + SCOTCH_Num edgeglbsmx; /*+ Maximum size of local edge arrays over all processes +*/ + SCOTCH_Num *edgegsttax;/*+ Edge array holding local indices of neighbors [based] +*/ + SCOTCH_Num *edgeloctax;/*+ Edge array holding global neighbor numbers [based] +*/ + SCOTCH_Num *edloloctax;/*+ Edge load array +*/ + SCOTCH_Num degrglbmax; /*+ Maximum degree over all processes +*/ + SCOTCH_Num pkeyglbval; /*+ Communicator key value: folded communicators are distinct+*/ + MPI_Comm proccomm; /*+ Graph communicator +*/ + SCOTCH_Num procglbnbr; /*+ Number of processes sharing graph data +*/ + SCOTCH_Num proclocnum; /*+ Number of this process +*/ + SCOTCH_Num *procvrttab;/*+ Global array of vertex number ranges [+1,based] +*/ + SCOTCH_Num *proccnttab;/*+ Count array for local number of vertices +*/ + SCOTCH_Num *procdsptab;/*+ Displacement array with respect to proccnttab [+1,based] +*/ + SCOTCH_Num procngbnbr; /*+ Number of neighboring processes +*/ + SCOTCH_Num procngbmax; /*+ Maximum number of neighboring processes +*/ + SCOTCH_Num *procngbtab;/*+ Array of neighbor process numbers [sorted] +*/ + SCOTCH_Num *procrcvtab;/*+ Number of vertices to receive in ghost vertex sub-arrays +*/ + SCOTCH_Num procsndnbr; /*+ Overall size of local send array +*/ + SCOTCH_Num *procsndtab;/*+ Number of vertices to send in ghost vertex sub-arrays +*/ + SCOTCH_Num *procsidtab;/*+ Array of indices to build communication vectors (send) +*/ + SCOTCH_Num procsidnbr; /*+ Size of the send index array +*/ } Dgraph2; - -int scotchm_dgraphinit(void * ptr, int localcomm) +int scotchm_dgraphinit(void *ptr, int localcomm) { MPI_Comm comm; MPI_Comm comm2; - int size, rank, err; comm = MPI_Comm_f2c((MPI_Fint)localcomm); - SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; - - err = SCOTCH_dgraphInit(dgraph, comm); + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *)ptr; - return err; + err = SCOTCH_dgraphInit(dgraph, comm); + return err; } -int scotchm_dgraphbuild(void * ptr, - SCOTCH_Num nVertices, - SCOTCH_Num * vertloctab_1, - SCOTCH_Num nLocEdgesGraph, - SCOTCH_Num edgelocsiz_1, - SCOTCH_Num *adjncy -) + +int scotchm_dgraphbuild(void *ptr, + SCOTCH_Num nVertices, + SCOTCH_Num *vertloctab_1, + SCOTCH_Num nLocEdgesGraph, + SCOTCH_Num edgelocsiz_1, + SCOTCH_Num *adjncy) { - SCOTCH_Num baseval = 1; // Fortran-style 1-based indexing + SCOTCH_Num baseval = 1; /* Fortran-style 1-based indexing */ SCOTCH_Num vertlocnbr = nVertices; - SCOTCH_Num * veloloctab = NULL; // vertex weights not used - SCOTCH_Num * vlblloctab = NULL; // vertex labels not used + SCOTCH_Num *veloloctab = NULL; /* vertex weights not used */ + SCOTCH_Num *vlblloctab = NULL; /* vertex labels not used */ SCOTCH_Num edgelocnbr = nLocEdgesGraph; - SCOTCH_Num edgelocsiz = edgelocsiz_1; - SCOTCH_Num * edgegsttab = NULL; // Optional array holding the local and ghost indices - SCOTCH_Num * edloloctab = NULL; // Optional array of integer loads for each local edge - - SCOTCH_Num * vertloctab = (SCOTCH_Num *) vertloctab_1; - SCOTCH_Num * vendloctab = vertloctab_1 + 1; - SCOTCH_Num * edgeloctab = (SCOTCH_Num *) adjncy; - - int i,err; - - SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; - - err = SCOTCH_dgraphBuild (dgraph, - baseval, - vertlocnbr, - vertlocnbr, - vertloctab, - vendloctab, - veloloctab, - vlblloctab, - edgelocnbr, - edgelocsiz, - edgeloctab, - edgegsttab, - edloloctab); + SCOTCH_Num edgelocsiz = edgelocsiz_1; + SCOTCH_Num *edgegsttab = NULL; /* Optional array holding the local and ghost indices */ + SCOTCH_Num *edloloctab = NULL; /* Optional array of integer loads for each local edge */ + + SCOTCH_Num *vertloctab = (SCOTCH_Num *)vertloctab_1; + SCOTCH_Num *vendloctab = vertloctab_1 + 1; + SCOTCH_Num *edgeloctab = (SCOTCH_Num *)adjncy; + + int i, err; + + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *)ptr; + + err = SCOTCH_dgraphBuild(dgraph, + baseval, + vertlocnbr, + vertlocnbr, + vertloctab, + vendloctab, + veloloctab, + vlblloctab, + edgelocnbr, + edgelocsiz, + edgeloctab, + edgegsttab, + edloloctab); return err; - } - -int scotchm_dgraphcheck(void * ptr) +int scotchm_dgraphcheck(void *ptr) { - return SCOTCH_dgraphCheck((SCOTCH_Dgraph *) ptr); + return SCOTCH_dgraphCheck((SCOTCH_Dgraph *)ptr); } -int scotchm_dgraphpart(void * ptr, SCOTCH_Num num_part, void * ptr_strat, SCOTCH_Num * parttab){ - SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *) ptr; - SCOTCH_Strat *strat = (SCOTCH_Strat *) ptr_strat; +int scotchm_dgraphpart(void *ptr, SCOTCH_Num num_part, void *ptr_strat, SCOTCH_Num *parttab) +{ + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *)ptr; + SCOTCH_Strat *strat = (SCOTCH_Strat *)ptr_strat; return SCOTCH_dgraphPart(dgraph, num_part, strat, parttab); } -int scotchm_dgraphredist(void * ptr, SCOTCH_Num *partloctab, void * ptr_out, SCOTCH_Num *vertlocnbr){ - - SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *) ptr; - SCOTCH_Dgraph *dgraph_out = (SCOTCH_Dgraph *) ptr_out; - SCOTCH_Num * permgsttab = NULL; // Redistribution permutation array - SCOTCH_Num vertlocdlt = 0; // Extra size of local vertex array - SCOTCH_Num edgelocdlt = 0; // Extra size of local edge array +int scotchm_dgraphredist(void *ptr, SCOTCH_Num *partloctab, void *ptr_out, SCOTCH_Num *vertlocnbr) +{ + SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *)ptr; + SCOTCH_Dgraph *dgraph_out = (SCOTCH_Dgraph *)ptr_out; + SCOTCH_Num *permgsttab = NULL; /* Redistribution permutation array */ + SCOTCH_Num vertlocdlt = 0; /* Extra size of local vertex array */ + SCOTCH_Num edgelocdlt = 0; /* Extra size of local edge array */ int err; - err = SCOTCH_dgraphRedist (dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); + err = SCOTCH_dgraphRedist(dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); - Dgraph2 *dgraph = (Dgraph2 *) dgraph_out; + Dgraph2 *dgraph = (Dgraph2 *)dgraph_out; *vertlocnbr = dgraph->vertlocnbr; @@ -156,40 +152,41 @@ int scotchm_dgraphredist(void * ptr, SCOTCH_Num *partloctab, void * ptr_out, SCO } - -int scotchm_dgraphout(void * ptr, SCOTCH_Num * cell_list){ - - SCOTCH_Num * permgsttab = NULL; // Redistribution permutation array - SCOTCH_Num vertlocdlt = 0; // Extra size of local vertex array - SCOTCH_Num edgelocdlt = 0; // Extra size of local edge array +int scotchm_dgraphout(void *ptr, SCOTCH_Num *cell_list) +{ + SCOTCH_Num *permgsttab = NULL; /* Redistribution permutation array */ + SCOTCH_Num vertlocdlt = 0; /* Extra size of local vertex array */ + SCOTCH_Num edgelocdlt = 0; /* Extra size of local edge array */ int err; - Dgraph2 *dgraph = (Dgraph2 *) ptr; + Dgraph2 *dgraph = (Dgraph2 *)ptr; - for (SCOTCH_Num i=0; i < dgraph->vertlocnbr; i++) { + for (SCOTCH_Num i = 0; i < dgraph->vertlocnbr; i++) { cell_list[i] = *(dgraph->vlblloctax + dgraph->baseval + i); } + return err; } void scotchm_dgraphexit(void *ptr) { - return SCOTCH_dgraphExit((SCOTCH_Dgraph *) ptr); + return SCOTCH_dgraphExit((SCOTCH_Dgraph *)ptr); } -int scotchm_stratinit(void * strat_ptr) + +int scotchm_stratinit(void *strat_ptr) { - SCOTCH_stratInit((SCOTCH_Strat *) strat_ptr); - //SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATDEFAULT, 16, 16, 0.03); - SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATSCALABILITY, 1, 0, 0.05); - + SCOTCH_stratInit((SCOTCH_Strat *)strat_ptr); + /*SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATDEFAULT, 16, 16, 0.03);*/ + SCOTCH_stratDgraphMapBuild((SCOTCH_Strat *)strat_ptr, SCOTCH_STRATSCALABILITY, 1, 0, 0.05); return 0; } -void scotchm_stratexit(void * strat_ptr) + +void scotchm_stratexit(void *strat_ptr) { - return SCOTCH_stratExit((SCOTCH_Strat *) strat_ptr); + return SCOTCH_stratExit((SCOTCH_Strat *)strat_ptr); } #endif \ No newline at end of file From 38d6a8671fe484f599da48e0b8ba131431a25a35 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Mon, 8 Dec 2025 11:08:16 -0700 Subject: [PATCH 28/43] Adding comments and removing return for void calls --- src/framework/ptscotch_interface.c | 138 ++++++++++++++++++++++++++++- 1 file changed, 135 insertions(+), 3 deletions(-) diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index e380f52715..db2ff2fd08 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, The University Corporation for Atmospheric Research (UCAR). + * Copyright (c) 2025, The University Corporation for Atmospheric Research (UCAR). * * Unless noted otherwise source code is licensed under the BSD license. * Additional copyright and license information can be found in the LICENSE file @@ -60,6 +60,20 @@ typedef struct Dgraph_ { } Dgraph2; +/******************************************************************************** + * + * scotchm_dgraphinit + * + * Initialize a SCOTCH distributed graph object using a Fortran MPI communicator. + * + * Parameters: + * ptr - pointer to a `SCOTCH_Dgraph` structure (as `void *`) + * localcomm - Fortran MPI communicator handle (`MPI_Fint`) passed as `int` + * + * Returns: + * integer error code returned by `SCOTCH_dgraphInit` (0 on success). + * + ********************************************************************************/ int scotchm_dgraphinit(void *ptr, int localcomm) { MPI_Comm comm; @@ -76,6 +90,24 @@ int scotchm_dgraphinit(void *ptr, int localcomm) } +/******************************************************************************** + * + * scotchm_dgraphbuild + * + * Build a SCOTCH distributed graph from local vertex/edge arrays. + * + * Parameters: + * ptr - pointer to a `SCOTCH_Dgraph` structure (as `void *`) + * nVertices - number of local vertices + * vertloctab_1 - pointer to Fortran-style vertex index array (based) + * nLocEdgesGraph - number of local edges in the distributed graph + * edgelocsiz_1 - size of the local edge array + * adjncy - adjacency list array (edge destinations) + * + * Returns: + * integer error code returned by `SCOTCH_dgraphBuild` (0 on success). + * + ********************************************************************************/ int scotchm_dgraphbuild(void *ptr, SCOTCH_Num nVertices, SCOTCH_Num *vertloctab_1, @@ -118,12 +150,42 @@ int scotchm_dgraphbuild(void *ptr, } +/******************************************************************************** + * + * scotchm_dgraphcheck + * + * Perform an internal consistency check of a SCOTCH distributed graph. + * + * Parameters: + * ptr - pointer to a `SCOTCH_Dgraph` structure (as `void *`) + * + * Returns: + * integer error code returned by `SCOTCH_dgraphCheck` (0 on success). + * + ********************************************************************************/ int scotchm_dgraphcheck(void *ptr) { return SCOTCH_dgraphCheck((SCOTCH_Dgraph *)ptr); } +/******************************************************************************** + * + * scotchm_dgraphpart + * + * Partition the distributed graph into `num_part` parts using the provided + * SCOTCH strategy object. + * + * Parameters: + * ptr - pointer to a `SCOTCH_Dgraph` structure (as `void *`) + * num_part - number of partitions + * ptr_strat - pointer to a `SCOTCH_Strat` structure (as `void *`) + * parttab - output array receiving part numbers for local vertices + * + * Returns: + * integer error code returned by `SCOTCH_dgraphPart` (0 on success). + * + ********************************************************************************/ int scotchm_dgraphpart(void *ptr, SCOTCH_Num num_part, void *ptr_strat, SCOTCH_Num *parttab) { SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *)ptr; @@ -133,6 +195,22 @@ int scotchm_dgraphpart(void *ptr, SCOTCH_Num num_part, void *ptr_strat, SCOTCH_N } +/******************************************************************************** + * + * scotchm_dgraphredist + * + * Redistribute a distributed SCOTCH graph given the partition table. + * + * Parameters: + * ptr - pointer to input `SCOTCH_Dgraph` structure (as `void *`) + * partloctab - partition table for local vertices + * ptr_out - pointer to output `SCOTCH_Dgraph` structure (as `void *`) + * vertlocnbr - pointer to return the number of local vertices in output + * + * Returns: + * integer error code returned by `SCOTCH_dgraphRedist` (0 on success). + * + ********************************************************************************/ int scotchm_dgraphredist(void *ptr, SCOTCH_Num *partloctab, void *ptr_out, SCOTCH_Num *vertlocnbr) { SCOTCH_Dgraph *dgraph_in = (SCOTCH_Dgraph *)ptr; @@ -152,6 +230,20 @@ int scotchm_dgraphredist(void *ptr, SCOTCH_Num *partloctab, void *ptr_out, SCOTC } +/******************************************************************************** + * + * scotchm_dgraphout + * + * Extract vertex labels (or stored IDs) for local vertices into `cell_list`. + * + * Parameters: + * ptr - pointer to a `Dgraph2` (or `SCOTCH_Dgraph`) structure (as `void *`) + * cell_list - output array to receive vertex labels for local vertices + * + * Returns: + * integer error code (currently returns the local `err` variable; 0 on success). + * + ********************************************************************************/ int scotchm_dgraphout(void *ptr, SCOTCH_Num *cell_list) { SCOTCH_Num *permgsttab = NULL; /* Redistribution permutation array */ @@ -169,12 +261,39 @@ int scotchm_dgraphout(void *ptr, SCOTCH_Num *cell_list) } +/******************************************************************************** + * + * scotchm_dgraphexit + * + * Finalize/cleanup a `SCOTCH_Dgraph` object. + * + * Parameters: + * ptr - pointer to a `SCOTCH_Dgraph` structure (as `void *`) + * + * Returns: + * nothing (wraps `SCOTCH_dgraphExit`). + * + ********************************************************************************/ void scotchm_dgraphexit(void *ptr) { - return SCOTCH_dgraphExit((SCOTCH_Dgraph *)ptr); + SCOTCH_dgraphExit((SCOTCH_Dgraph *)ptr); } +/******************************************************************************** + * + * scotchm_stratinit + * + * Initialize a SCOTCH strategy object and build a default strategy for + * distributed graph mapping. + * + * Parameters: + * strat_ptr - pointer to a `SCOTCH_Strat` structure (as `void *`) + * + * Returns: + * integer (0 on success). + * + ********************************************************************************/ int scotchm_stratinit(void *strat_ptr) { SCOTCH_stratInit((SCOTCH_Strat *)strat_ptr); @@ -185,8 +304,21 @@ int scotchm_stratinit(void *strat_ptr) } +/* ******************************************************************************** + * + * scotchm_stratexit + * + * Finalize/cleanup a `SCOTCH_Strat` strategy object. + * + * Parameters: + * strat_ptr - pointer to a `SCOTCH_Strat` structure (as `void *`) + * + * Returns: + * nothing (wraps `SCOTCH_stratExit`). + * + ********************************************************************************/ void scotchm_stratexit(void *strat_ptr) { - return SCOTCH_stratExit((SCOTCH_Strat *)strat_ptr); + SCOTCH_stratExit((SCOTCH_Strat *)strat_ptr); } #endif \ No newline at end of file From 602c79aa006c3b93304ad9d7e98387bbd551bf1f Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 09:21:35 -0700 Subject: [PATCH 29/43] Adding some documentation --- src/framework/mpas_ptscotch_interface.F | 178 ++++++++++++++++++++---- 1 file changed, 148 insertions(+), 30 deletions(-) diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index 5205ed7231..aaff3362a3 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -8,6 +8,21 @@ module mpas_ptscotch_interface contains + !----------------------------------------------------------------------- + ! function scotch_dgraphinit + ! + !> \brief Initialize a SCOTCH distributed graph object + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Initializes a SCOTCH_Dgraph structure using a Fortran MPI communicator. + !> This function wraps the C function scotchm_dgraphinit. + !> Returns an error code (0 if successful) + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to be initialized + !> comm - Fortran MPI communicator integer + ! + !----------------------------------------------------------------------- function scotch_dgraphinit(dgraph, comm) result(ierr) use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write @@ -40,10 +55,28 @@ end function scotchfdgraphinit end function scotch_dgraphinit - - function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) - - use iso_c_binding, only : c_ptr, c_loc + !----------------------------------------------------------------------- + ! function scotch_dgraphbuild + ! + !> \brief Build a SCOTCH distributed graph from local vertex/edge arrays + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Constructs a SCOTCH_Dgraph from local vertex and edge connectivity data. + !> This function wraps the C function scotchm_dgraphbuild, and returns an + !> error code (0 if successful). + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to be built + !> nVertices - Number of local vertices + !> vertloctab - Array of size (nVertices+1) + !> giving the start index of edges for each local vertex + !> nLocEdgesGraph - Total number of local edges in the graph + !> edgelocsiz - Size of the adjncy array + !> adjncy - Array of size nLocEdgesGraph containing the + !> adjacency list for local vertices + ! + !----------------------------------------------------------------------- + function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT @@ -90,9 +123,21 @@ end function scotchfdgraphbuild end function scotch_dgraphbuild - function scotch_dgraphcheck(dgraph) result(ierr) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! function scotch_dgraphcheck + ! + !> \brief Perform consistency check on a SCOTCH distributed graph + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Validates the internal structure of a SCOTCH_Dgraph for consistency. + !> This function wraps the C function scotchm_dgraphcheck. + !> Returns an error code (0 if successful) + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to be checked + ! + !----------------------------------------------------------------------- + function scotch_dgraphcheck(dgraph) result(ierr) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -122,10 +167,20 @@ end function scotchfdgraphcheck end function scotch_dgraphcheck - - subroutine scotch_dgraphexit(dgraph) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! subroutine scotch_dgraphexit + ! + !> \brief Finalize/cleanup a SCOTCH distributed graph object + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Deallocates internal structures associated with a SCOTCH_Dgraph. + !> This subroutine wraps the C function scotchm_dgraphexit. + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to be finalized + ! + !----------------------------------------------------------------------- + subroutine scotch_dgraphexit(dgraph) use mpas_log, only : mpas_log_write use iso_c_binding, only : c_ptr, c_loc implicit none @@ -143,10 +198,21 @@ end subroutine scotchfdgraphexit end subroutine scotch_dgraphexit - - function scotch_stratinit(stradat) result(ierr) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! function scotch_stratinit + ! + !> \brief Initialize a SCOTCH strategy object + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Initializes a SCOTCH_Strat structure and builds a default strategy + !> for distributed graph mapping. This function wraps the C function + !> scotchm_stratinit. Returns an error code (0 if successful) + !> \arguments + !> stradat - SCOTCH_Strat structure to be initialized + ! + !----------------------------------------------------------------------- + function scotch_stratinit(stradat) result(ierr) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -175,9 +241,20 @@ end function scotchfstratinit end function scotch_stratinit - subroutine scotch_stratexit(stradat) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! subroutine scotch_stratexit + ! + !> \brief Finalize/cleanup a SCOTCH strategy object + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Deallocates internal structures associated with a SCOTCH_Strat. + !> This subroutine wraps the C function scotchm_stratexit. + !> \arguments + !> stradat - SCOTCH_Strat structure to be finalized + ! + !----------------------------------------------------------------------- + subroutine scotch_stratexit(stradat) use mpas_log, only : mpas_log_write use iso_c_binding, only : c_ptr, c_loc implicit none @@ -195,10 +272,24 @@ end subroutine scotchfstratexit end subroutine scotch_stratexit - - function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! function scotch_dgraphpart + ! + !> \brief Partition a SCOTCH distributed graph + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Partitions the distributed graph into num_part parts using the + !> provided SCOTCH strategy object. This function wraps the C function + !> scotchm_dgraphpart. Returns an error code (0 if successful) + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to be partitioned + !> num_part - Number of partitions + !> stradat - SCOTCH_Strat structure containing partitioning strategy + !> parttab - Output array of size equal to number of local vertices, + ! + !----------------------------------------------------------------------- + function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -233,10 +324,25 @@ end function scotchfdgraphpart end function scotch_dgraphpart - - function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) result(ierr) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! function scotch_dgraphredist + ! + !> \brief Redistribute a SCOTCH distributed graph according to partitions + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Redistributes the distributed graph structure based on a partition + !> table. This function wraps the C function scotchm_dgraphredist. + !> Returns an error code (0 if successful) + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to be redistributed + !> parttab - Input array of size equal to number of local vertices, + !> containing partition assignments + !> dgraph_out - SCOTCH_Dgraph structure to hold redistributed graph + !> num_local_vertices - Number of local vertices in the redistributed graph + ! + !----------------------------------------------------------------------- + function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) result(ierr) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -272,10 +378,22 @@ end function scotchfdgraphredist end function scotch_dgraphredist - - function scotch_dgraphout(dgraph, local_cell_list) result(ierr) - - use mpas_log, only : mpas_log_write + !----------------------------------------------------------------------- + ! function scotch_dgraphout + ! + !> \brief Extract vertex labels from a SCOTCH distributed graph + !> \author Abishek Gopal + !> \date 8 Dec 2025 + !> \details + !> Extracts vertex labels or stored IDs for local vertices into the + !> output array. This function wraps the C function scotchm_dgraphout. + !> Returns an error code (0 if successful) + !> \arguments + !> dgraph - SCOTCH_Dgraph structure to extract from + !> local_cell_list - Output array to hold vertex labels for local vertices + ! + !----------------------------------------------------------------------- + function scotch_dgraphout(dgraph, local_cell_list) result(ierr) use mpas_log, only : mpas_log_write use iso_c_binding, only : c_ptr, c_loc implicit none From 2188fef0c9dbbc2437a9d0e62ee3dc69d503fa38 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 14:42:27 -0700 Subject: [PATCH 30/43] Fixes and cleanup --- src/framework/mpas_ptscotch_interface.F | 42 +++++++++++++------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index aaff3362a3..4bf9b211ca 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -76,7 +76,8 @@ end function scotch_dgraphinit !> adjacency list for local vertices ! !----------------------------------------------------------------------- - function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) use iso_c_binding, only : c_ptr, c_loc + function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) + use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT @@ -137,7 +138,8 @@ end function scotch_dgraphbuild !> dgraph - SCOTCH_Dgraph structure to be checked ! !----------------------------------------------------------------------- - function scotch_dgraphcheck(dgraph) result(ierr) use mpas_log, only : mpas_log_write + function scotch_dgraphcheck(dgraph) result(ierr) + use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -180,7 +182,8 @@ end function scotch_dgraphcheck !> dgraph - SCOTCH_Dgraph structure to be finalized ! !----------------------------------------------------------------------- - subroutine scotch_dgraphexit(dgraph) use mpas_log, only : mpas_log_write + subroutine scotch_dgraphexit(dgraph) + use mpas_log, only : mpas_log_write use iso_c_binding, only : c_ptr, c_loc implicit none @@ -212,7 +215,8 @@ end subroutine scotch_dgraphexit !> stradat - SCOTCH_Strat structure to be initialized ! !----------------------------------------------------------------------- - function scotch_stratinit(stradat) result(ierr) use mpas_log, only : mpas_log_write + function scotch_stratinit(stradat) result(ierr) + use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -254,7 +258,8 @@ end function scotch_stratinit !> stradat - SCOTCH_Strat structure to be finalized ! !----------------------------------------------------------------------- - subroutine scotch_stratexit(stradat) use mpas_log, only : mpas_log_write + subroutine scotch_stratexit(stradat) + use mpas_log, only : mpas_log_write use iso_c_binding, only : c_ptr, c_loc implicit none @@ -289,7 +294,8 @@ end subroutine scotch_stratexit !> parttab - Output array of size equal to number of local vertices, ! !----------------------------------------------------------------------- - function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) use mpas_log, only : mpas_log_write + function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) + use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -342,7 +348,8 @@ end function scotch_dgraphpart !> num_local_vertices - Number of local vertices in the redistributed graph ! !----------------------------------------------------------------------- - function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) result(ierr) use mpas_log, only : mpas_log_write + function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) result(ierr) + use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -379,21 +386,21 @@ end function scotchfdgraphredist end function scotch_dgraphredist !----------------------------------------------------------------------- - ! function scotch_dgraphout + ! function scotch_dgraphdata ! !> \brief Extract vertex labels from a SCOTCH distributed graph !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Extracts vertex labels or stored IDs for local vertices into the - !> output array. This function wraps the C function scotchm_dgraphout. - !> Returns an error code (0 if successful) + !> output array. This function wraps the C function scotchm_dgraphdata. !> \arguments !> dgraph - SCOTCH_Dgraph structure to extract from !> local_cell_list - Output array to hold vertex labels for local vertices ! !----------------------------------------------------------------------- - function scotch_dgraphout(dgraph, local_cell_list) result(ierr) use mpas_log, only : mpas_log_write + subroutine scotch_dgraphdata(dgraph, local_cell_list) + use mpas_log, only : mpas_log_write use iso_c_binding, only : c_ptr, c_loc implicit none @@ -401,22 +408,17 @@ function scotch_dgraphout(dgraph, local_cell_list) result(ierr) use mpas_ doubleprecision, target, intent(in) :: dgraph (SCOTCH_DGRAPHDIM) integer(SCOTCH_NUMSIZE), intent(out) :: local_cell_list(*) - ! Return value - integer :: ierr - interface - function scotchfdgraphout(dgraph_ptr, cell_list) bind(C, name='scotchm_dgraphout') result(err) + subroutine scotchfdgraphdata(dgraph_ptr, cell_list) bind(C, name='scotchm_dgraphdata') use iso_c_binding, only : c_int, c_ptr type(c_ptr), value :: dgraph_ptr integer(c_int) :: cell_list(*) - integer(c_int) :: err - end function scotchfdgraphout + end subroutine scotchfdgraphdata end interface - ierr = scotchfdgraphout(c_loc(dgraph), local_cell_list) - + call scotchfdgraphdata(c_loc(dgraph), local_cell_list) - end function scotch_dgraphout + end subroutine scotch_dgraphdata end module mpas_ptscotch_interface From 82450f202ba43d86825dbab8327fd3e5ae12924a Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 14:43:34 -0700 Subject: [PATCH 31/43] Removing DGraph2 and using SCOTCH_dgraphData, SCOTCH_dgraphSize instead --- src/framework/ptscotch_interface.c | 76 +++++++----------------------- 1 file changed, 18 insertions(+), 58 deletions(-) diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index db2ff2fd08..792eae9221 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -16,50 +16,6 @@ #include "ptscotch.h" - -typedef struct Dgraph_ { - unsigned int flagval; /*+ Graph properties +*/ - SCOTCH_Num baseval; /*+ Base index for edge/vertex arrays +*/ - SCOTCH_Num vertglbnbr;/*+ Global number of vertices +*/ - SCOTCH_Num vertglbmax;/*+ Maximum number of local vertices over all processes +*/ - SCOTCH_Num vertgstnbr;/*+ Number of local + ghost vertices +*/ - SCOTCH_Num vertgstnnd;/*+ vertgstnbr + baseval +*/ - SCOTCH_Num vertlocnbr;/*+ Local number of vertices +*/ - SCOTCH_Num vertlocnnd;/*+ Local number of vertices + baseval +*/ - SCOTCH_Num *vertloctax;/*+ Local vertex beginning index array [based] +*/ - SCOTCH_Num *vendloctax;/*+ Local vertex end index array [based] +*/ - SCOTCH_Num *veloloctax;/*+ Local vertex load array if present +*/ - SCOTCH_Num velolocsum; /*+ Local sum of all vertex loads +*/ - SCOTCH_Num veloglbsum; /*+ Global sum of all vertex loads +*/ - SCOTCH_Num *vnumloctax;/*+ Arrays of global vertex numbers in original graph +*/ - SCOTCH_Num *vlblloctax;/*+ Arrays of vertex labels (when read from file) +*/ - SCOTCH_Num edgeglbnbr; /*+ Global number of arcs +*/ - SCOTCH_Num edgeglbmax; /*+ Maximum number of local edges over all processes +*/ - SCOTCH_Num edgelocnbr; /*+ Number of local edges +*/ - SCOTCH_Num edgelocsiz; /*+ Size of local edge array (= edgelocnbr when compact) +*/ - SCOTCH_Num edgeglbsmx; /*+ Maximum size of local edge arrays over all processes +*/ - SCOTCH_Num *edgegsttax;/*+ Edge array holding local indices of neighbors [based] +*/ - SCOTCH_Num *edgeloctax;/*+ Edge array holding global neighbor numbers [based] +*/ - SCOTCH_Num *edloloctax;/*+ Edge load array +*/ - SCOTCH_Num degrglbmax; /*+ Maximum degree over all processes +*/ - SCOTCH_Num pkeyglbval; /*+ Communicator key value: folded communicators are distinct+*/ - MPI_Comm proccomm; /*+ Graph communicator +*/ - SCOTCH_Num procglbnbr; /*+ Number of processes sharing graph data +*/ - SCOTCH_Num proclocnum; /*+ Number of this process +*/ - SCOTCH_Num *procvrttab;/*+ Global array of vertex number ranges [+1,based] +*/ - SCOTCH_Num *proccnttab;/*+ Count array for local number of vertices +*/ - SCOTCH_Num *procdsptab;/*+ Displacement array with respect to proccnttab [+1,based] +*/ - SCOTCH_Num procngbnbr; /*+ Number of neighboring processes +*/ - SCOTCH_Num procngbmax; /*+ Maximum number of neighboring processes +*/ - SCOTCH_Num *procngbtab;/*+ Array of neighbor process numbers [sorted] +*/ - SCOTCH_Num *procrcvtab;/*+ Number of vertices to receive in ghost vertex sub-arrays +*/ - SCOTCH_Num procsndnbr; /*+ Overall size of local send array +*/ - SCOTCH_Num *procsndtab;/*+ Number of vertices to send in ghost vertex sub-arrays +*/ - SCOTCH_Num *procsidtab;/*+ Array of indices to build communication vectors (send) +*/ - SCOTCH_Num procsidnbr; /*+ Size of the send index array +*/ -} Dgraph2; - - /******************************************************************************** * * scotchm_dgraphinit @@ -222,42 +178,46 @@ int scotchm_dgraphredist(void *ptr, SCOTCH_Num *partloctab, void *ptr_out, SCOTC err = SCOTCH_dgraphRedist(dgraph_in, partloctab, permgsttab, vertlocdlt, edgelocdlt, dgraph_out); - Dgraph2 *dgraph = (Dgraph2 *)dgraph_out; - - *vertlocnbr = dgraph->vertlocnbr; + // Call SCOTCH_dgraphSize to obtain the number of local vertices in the redistributed graph + SCOTCH_dgraphSize(dgraph_out, NULL, vertlocnbr, NULL, NULL); return err; } - /******************************************************************************** * - * scotchm_dgraphout + * scotchm_dgraphdata * * Extract vertex labels (or stored IDs) for local vertices into `cell_list`. * * Parameters: - * ptr - pointer to a `Dgraph2` (or `SCOTCH_Dgraph`) structure (as `void *`) + * ptr - pointer to a `SCOTCH_Dgraph` structure (as `void *`) * cell_list - output array to receive vertex labels for local vertices * * Returns: * integer error code (currently returns the local `err` variable; 0 on success). * ********************************************************************************/ -int scotchm_dgraphout(void *ptr, SCOTCH_Num *cell_list) +void scotchm_dgraphdata(void *ptr, SCOTCH_Num *cell_list) { - SCOTCH_Num *permgsttab = NULL; /* Redistribution permutation array */ - SCOTCH_Num vertlocdlt = 0; /* Extra size of local vertex array */ - SCOTCH_Num edgelocdlt = 0; /* Extra size of local edge array */ + int err; - Dgraph2 *dgraph = (Dgraph2 *)ptr; + SCOTCH_Num vertlocnbr; + SCOTCH_Num *vlblloctab; /* vertex labels */ + + SCOTCH_Dgraph *dgraph = (SCOTCH_Dgraph *)ptr; + + SCOTCH_dgraphData(dgraph, NULL, NULL, &vertlocnbr, NULL, NULL, + NULL, NULL, NULL, &vlblloctab, + NULL, NULL, NULL, + NULL, NULL, NULL, NULL); - for (SCOTCH_Num i = 0; i < dgraph->vertlocnbr; i++) { - cell_list[i] = *(dgraph->vlblloctax + dgraph->baseval + i); + // Copy vertex labels to output array + for (SCOTCH_Num i = 0; i < vertlocnbr; i++) { + cell_list[i] = vlblloctab[i]; } - return err; } From f92997e8bef5d7f843e70fbe3ff495cbce81bd85 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 14:44:21 -0700 Subject: [PATCH 32/43] Fixes and cleanup --- src/framework/mpas_block_decomp.F | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 0a7348caae..9c3e894ca1 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -492,13 +492,19 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBloc local_block_id_arr, & ! Input: the partition array scotchdgraph_redist(1), & ! Output: re-distributed graph num_local_vertices) ! Output: number of local vertices - - allocate(local_cell_list(num_local_vertices)) - allocate(local_block_list(num_local_vertices)) - - ierr = scotch_dgraphout (scotchdgraph_redist(1), local_cell_list) + ! DO NOT REMOVE: This call is required if we want to read the local cell list directly after partitioning, + ! instead of reading it from the output partition file. + ! Extract the local cell list from the re-distributed graph. + ! allocate(local_cell_list(num_local_vertices)) + ! call scotch_dgraphdata(scotchdgraph_redist(1), local_cell_list) + ! do i=1,num_local_vertices + ! call mpas_log_write('local_cell_list($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_cell_list(i)/)) + ! end do + ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? + + allocate(local_block_list(num_local_vertices)) local_block_list(:)=dminfo % my_proc_id @@ -564,8 +570,15 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBloc call scotch_dgraphexit (scotchdgraph (1)) call scotch_dgraphexit (scotchdgraph_redist (1)) call scotch_stratexit (stradat (1)) + deallocate(edgeloctab) deallocate(vertloctab) + deallocate(local_block_list) + deallocate(local_nvertices) + deallocate(global_start) + deallocate(global_block_id_arr) + deallocate(local_block_id_arr) + call MPI_Comm_free(localcomm, mpi_ierr) call mpas_timer_stop('scotch_total') call mpas_log_write('Scotch partition successful') From 8849a0b5ff8499eed76fbbc5536e312fb3954fc7 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 14:55:55 -0700 Subject: [PATCH 33/43] Cleaning up whitespace and variables in mpas_block_decomp_scotch --- src/framework/mpas_block_decomp.F | 373 +++++++++++++++--------------- 1 file changed, 185 insertions(+), 188 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 9c3e894ca1..80ed403329 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -341,6 +341,7 @@ end subroutine mpas_block_decomp_cells_for_proc!}}} !> specified prefix. ! !----------------------------------------------------------------------- +#ifdef MPAS_SCOTCH subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBlocks, blockFilePrefix)!{{{ use mpas_timer, only : mpas_timer_start, mpas_timer_stop @@ -362,14 +363,12 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBloc integer, dimension(:), allocatable :: local_block_list integer, dimension(:), allocatable ::global_block_id_arr, local_block_id_arr - integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k - integer :: blocks_per_proc, err, ierr + integer :: i, global_block_id, local_block_id, iunit, ounit, istatus, ostatus, j, k + integer :: err, ierr integer, dimension(:), pointer :: local_nvertices integer :: num_local_vertices !< Number of local vertices for this processor character (len=StrKIND) :: filename, msg - logical :: no_blocks -#ifdef MPAS_SCOTCH integer :: nLocEdgesGraph = 0, nLocVerticesGraph = 0, edgelocsiz = 0, npart = 1 character (len=StrKIND) :: partitionFilePrefix integer, dimension(:), allocatable :: edgeloctab, vertloctab @@ -383,208 +382,206 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBloc #else integer :: localcomm #endif - - integer :: vertglbnbr, vertlocmax, vertgstnbr, vertlocidx, vendlocidx, velolocidx, vlbllocidx - integer :: edgeglbnbr, edgelocnbr, edgelocidx, edgegstidx, edlolocidx, comm, baseval -#endif allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) allocate(local_block_id_arr(partial_global_graph_info % nVertices)) -#ifdef MPAS_SCOTCH - call mpas_timer_start('scotch_total') - - call mpas_log_write('Using LibScotch for graph partitioning') - - ! Count the number of edges (including to ghost cells) in the portion of graph - ! owned by the current rank. Each edge is counted twice, once for each vertex, - ! with the exception of edges to ghost vertices, which are counted only once. - do i=1,partial_global_graph_info % nVertices - do j=1,partial_global_graph_info % nAdjacent(i) - if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle - nLocEdgesGraph = nLocEdgesGraph + 1 - ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) - end do + + call mpas_timer_start('scotch_total') + + call mpas_log_write('Using LibScotch for graph partitioning') + + ! Count the number of edges (including to ghost cells) in the portion of graph + ! owned by the current rank. Each edge is counted twice, once for each vertex, + ! with the exception of edges to ghost vertices, which are counted only once. + do i=1,partial_global_graph_info % nVertices + do j=1,partial_global_graph_info % nAdjacent(i) + if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle + nLocEdgesGraph = nLocEdgesGraph + 1 + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) end do + end do - ! Holds the adjacency array for every local vertex - allocate(edgeloctab(nLocEdgesGraph)) - ! Array of start indices in edgeloctab for each local vertex - allocate(vertloctab(partial_global_graph_info % nVertices + 1)) - - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) - ! end do - - ! Fill up edgeloctab and vertloctab - k = 1 - do i=1,partial_global_graph_info % nVertices - vertloctab(i) = k - !call mpas_log_write('rank=$i, i=$i vertloctab= $i', intArgs=(/dminfo % my_proc_id, i,vertloctab(i)/) ) - !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) - do j=1,partial_global_graph_info % nAdjacent(i) - - if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle - - edgeloctab(k) = partial_global_graph_info % adjacencyList(j,i) - !call mpas_log_write('rank=$i k=$i edgeloctab= $i', intArgs=(/dminfo % my_proc_id, k,edgeloctab(k)/) ) - k = k + 1 - end do - end do - vertloctab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 - - ! Duplicate the communicator to be used by Scotch - call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) - if (mpi_ierr .ne. 0) then - call mpas_log_write('Cannot duplicate communicator') - endif - ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph + ! Holds the adjacency array for every local vertex + allocate(edgeloctab(nLocEdgesGraph)) + ! Array of start indices in edgeloctab for each local vertex + allocate(vertloctab(partial_global_graph_info % nVertices + 1)) + + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) + ! end do + + ! Fill up edgeloctab and vertloctab + k = 1 + do i=1,partial_global_graph_info % nVertices + vertloctab(i) = k + !call mpas_log_write('rank=$i, i=$i vertloctab= $i', intArgs=(/dminfo % my_proc_id, i,vertloctab(i)/) ) + !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) + do j=1,partial_global_graph_info % nAdjacent(i) + + if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle + + edgeloctab(k) = partial_global_graph_info % adjacencyList(j,i) + !call mpas_log_write('rank=$i k=$i edgeloctab= $i', intArgs=(/dminfo % my_proc_id, k,edgeloctab(k)/) ) + k = k + 1 + end do + end do + vertloctab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 + + ! Duplicate the communicator to be used by Scotch + call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) + if (mpi_ierr .ne. 0) then + call mpas_log_write('Cannot duplicate communicator') + endif + ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph #ifdef MPAS_USE_MPI_F08 - ierr = scotch_dgraphinit(scotchdgraph(1), localcomm% mpi_val) - ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm% mpi_val) + ierr = scotch_dgraphinit(scotchdgraph(1), localcomm% mpi_val) + ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm% mpi_val) #else - ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) - ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm) + ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) + ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm) #endif - ! From Scotch documentation: edgelocsiz is lower-bounded by the minimum size - ! of the edge array required to encompass all used adjacency values; it is - ! therefore at least equal to the maximum of the vendloctab entries, over all - ! local vertices, minus baseval; it can be set to edgelocnbr if the edge array is compact. - edgelocsiz = maxval(vertloctab) - 1 - - nLocVerticesGraph = partial_global_graph_info % nVertices - - call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/nLocVerticesGraph, nLocEdgesGraph/)) - - ! Build the distributed Scotch graph and save it in scotchdgraph - ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. - ierr = scotch_dgraphbuild (scotchdgraph(1), & - nLocVerticesGraph, & ! num of local vertices on the calling process - vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex - nLocEdgesGraph, & ! Number of local edges, including to ghost vertices - edgelocsiz, & ! Defined previously - edgeloctab(1)) ! Holds the adjacency array for every local vertex - - ! Only needed during development/debugging. - ierr = scotch_dgraphcheck (scotchdgraph(1)) + ! From Scotch documentation: edgelocsiz is lower-bounded by the minimum size + ! of the edge array required to encompass all used adjacency values; it is + ! therefore at least equal to the maximum of the vendloctab entries, over all + ! local vertices, minus baseval; it can be set to edgelocnbr if the edge array is compact. + edgelocsiz = maxval(vertloctab) - 1 + + nLocVerticesGraph = partial_global_graph_info % nVertices + + call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/nLocVerticesGraph, nLocEdgesGraph/)) + + ! Build the distributed Scotch graph and save it in scotchdgraph + ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. + ierr = scotch_dgraphbuild (scotchdgraph(1), & + nLocVerticesGraph, & ! num of local vertices on the calling process + vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex + nLocEdgesGraph, & ! Number of local edges, including to ghost vertices + edgelocsiz, & ! Defined previously + edgeloctab(1)) ! Holds the adjacency array for every local vertex + + ! Only needed during development/debugging. + ierr = scotch_dgraphcheck (scotchdgraph(1)) - ! Initialize the strategy data structure - ierr = scotch_stratinit (stradat (1)) - - call mpas_timer_start('scotch_graph_partitioning') - ! Partition the distributed graph and save the result in local_block_id_arr - ! TODO: Should this be totalBlocks instead of nProcs? - npart = dminfo % nProcs - ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i, local_block_id_arr(i)/) ) - ! end do - call mpas_timer_stop('scotch_graph_partitioning') - call mpas_log_write('Graph parition successful ') - - ! After the paritioning above, each processor would not necessarily have information about all of the - ! vertices it owns. To obtain this information, Scotch provides a convenience function to redistribute the graph - ! to all processors, so that each processor has information about all of the vertices it owns. - ierr = scotch_dgraphredist(scotchdgraph(1), & ! Input: original distributed graph - local_block_id_arr, & ! Input: the partition array - scotchdgraph_redist(1), & ! Output: re-distributed graph - num_local_vertices) ! Output: number of local vertices - - ! DO NOT REMOVE: This call is required if we want to read the local cell list directly after partitioning, - ! instead of reading it from the output partition file. - ! Extract the local cell list from the re-distributed graph. - ! allocate(local_cell_list(num_local_vertices)) - ! call scotch_dgraphdata(scotchdgraph_redist(1), local_cell_list) - ! do i=1,num_local_vertices - ! call mpas_log_write('local_cell_list($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_cell_list(i)/)) - ! end do - - ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? - - allocate(local_block_list(num_local_vertices)) - - local_block_list(:)=dminfo % my_proc_id - - !call mpas_log_write('nVertices $i num_local_vertices: $i',MPAS_LOG_ERR, intArgs=(/partial_global_graph_info % nVertices,num_local_vertices/)) - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('local_block_id_arr($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_block_id_arr(i)/)) - ! end do - - ! Using the local_nvertices array to hold the original number of vertices in - ! the partial graph readb by each processor. Might need to use a different array - ! to clear up potential confusion. - local_nvertices(dminfo % my_proc_id + 1) = partial_global_graph_info % nVertices - - ! call mpas_log_write('local_nvertices($i): $i', MPAS_LOG_ERR, intArgs=(/i,num_local_vertices/)) - - ! Gather all the partial_global_graph_info % nVertices to IO_NODE. - ! num_local_vertices is the number of vertices that this processor owns, determined by the - ! Scotch paritioning. Whereas artial_global_graph_info % nVertices is the number of vertices - ! resident in the partial graph read by this processor. The latter is the correct size of the - ! local_block_id_arr. - call MPI_Gather( partial_global_graph_info % nVertices, 1, MPI_INTEGER, local_nvertices, & - 1, MPI_INTEGER, 0, localcomm, ierr) - - ! if (dminfo % my_proc_id == IO_NODE) then - ! call mpas_log_write('After gathering local_nvertices on IO_NODE: ') - ! do i=1, npart - ! call mpas_log_write('local_nvertices: $i', intArgs=(/local_nvertices(i)/) ) - ! end do - ! end if + ! Initialize the strategy data structure + ierr = scotch_stratinit (stradat (1)) + + call mpas_timer_start('scotch_graph_partitioning') + ! Partition the distributed graph and save the result in local_block_id_arr + ! TODO: Should this be totalBlocks instead of nProcs? + npart = dminfo % nProcs + ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i, local_block_id_arr(i)/) ) + ! end do + call mpas_timer_stop('scotch_graph_partitioning') + call mpas_log_write('Graph parition successful ') + + ! After the paritioning above, each processor would not necessarily have information about all of the + ! vertices it owns. To obtain this information, Scotch provides a convenience function to redistribute the graph + ! to all processors, so that each processor has information about all of the vertices it owns. + ierr = scotch_dgraphredist(scotchdgraph(1), & ! Input: original distributed graph + local_block_id_arr, & ! Input: the partition array + scotchdgraph_redist(1), & ! Output: re-distributed graph + num_local_vertices) ! Output: number of local vertices + + ! DO NOT REMOVE: This call is required if we want to read the local cell list directly after partitioning, + ! instead of reading it from the output partition file. + ! Extract the local cell list from the re-distributed graph. + ! allocate(local_cell_list(num_local_vertices)) + ! call scotch_dgraphdata(scotchdgraph_redist(1), local_cell_list) + ! do i=1,num_local_vertices + ! call mpas_log_write('local_cell_list($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_cell_list(i)/)) + ! end do + + ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? + + allocate(local_block_list(num_local_vertices)) + + local_block_list(:)=dminfo % my_proc_id + + !call mpas_log_write('nVertices $i num_local_vertices: $i',MPAS_LOG_ERR, intArgs=(/partial_global_graph_info % nVertices,num_local_vertices/)) + ! do i=1,partial_global_graph_info % nVertices + ! call mpas_log_write('local_block_id_arr($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_block_id_arr(i)/)) + ! end do + + ! Using the local_nvertices array to hold the original number of vertices in + ! the partial graph readb by each processor. Might need to use a different array + ! to clear up potential confusion. + local_nvertices(dminfo % my_proc_id + 1) = partial_global_graph_info % nVertices + + ! call mpas_log_write('local_nvertices($i): $i', MPAS_LOG_ERR, intArgs=(/i,num_local_vertices/)) + + ! Gather all the partial_global_graph_info % nVertices to IO_NODE. + ! num_local_vertices is the number of vertices that this processor owns, determined by the + ! Scotch paritioning. Whereas artial_global_graph_info % nVertices is the number of vertices + ! resident in the partial graph read by this processor. The latter is the correct size of the + ! local_block_id_arr. + call MPI_Gather( partial_global_graph_info % nVertices, 1, MPI_INTEGER, local_nvertices, & + 1, MPI_INTEGER, 0, localcomm, ierr) + + ! if (dminfo % my_proc_id == IO_NODE) then + ! call mpas_log_write('After gathering local_nvertices on IO_NODE: ') + ! do i=1, npart + ! call mpas_log_write('local_nvertices: $i', intArgs=(/local_nvertices(i)/) ) + ! end do + ! end if + + ! Compute the displacements for gathering all the local_block_id_arr to global_block_id_arr + global_start(1) = 0 + do i=2,dminfo % nprocs + global_start(i) = global_start(i-1) + local_nvertices(i-1) + end do + ! do i=1, dminfo % nProcs + ! call mpas_log_write('global_start: $i', intArgs=(/global_start(i)/) ) + ! end do + + ! Gather all the local block ids to global_block_id_arr so IO_NODE can write out the partitioning data + call MPI_Gatherv( local_block_id_arr, partial_global_graph_info % nVertices, MPI_INTEGER, global_block_id_arr, & + local_nvertices, global_start, MPI_INTEGER, 0, localcomm, ierr) + ! Write out the paritioning data to a file from IO_NODE + if (dminfo % my_proc_id == IO_NODE) then + partitionFilePrefix=trim(blockFilePrefix) + if (trim(partitionFilePrefix) == '') then + write(partitionFilePrefix,'(a,i0,a)') 'x1.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' + end if + write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % total_blocks - ! Compute the displacements for gathering all the local_block_id_arr to global_block_id_arr - global_start(1) = 0 - do i=2,dminfo % nprocs - global_start(i) = global_start(i-1) + local_nvertices(i-1) + call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) + call mpas_new_unit(ounit) + open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) + do i=1,partial_global_graph_info % nVerticesTotal + write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) + !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) end do - ! do i=1, dminfo % nProcs - ! call mpas_log_write('global_start: $i', intArgs=(/global_start(i)/) ) - ! end do - - ! Gather all the local block ids to global_block_id_arr so IO_NODE can write out the partitioning data - call MPI_Gatherv( local_block_id_arr, partial_global_graph_info % nVertices, MPI_INTEGER, global_block_id_arr, & - local_nvertices, global_start, MPI_INTEGER, 0, localcomm, ierr) - ! Write out the paritioning data to a file from IO_NODE - if (dminfo % my_proc_id == IO_NODE) then - partitionFilePrefix=trim(blockFilePrefix) - if (trim(partitionFilePrefix) == '') then - write(partitionFilePrefix,'(a,i0,a)') 'x1.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' - end if - write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % total_blocks - - call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) - call mpas_new_unit(ounit) - open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) - do i=1,partial_global_graph_info % nVerticesTotal - write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) - !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) - end do - close(unit=ounit) - call mpas_release_unit(ounit) - end if + close(unit=ounit) + call mpas_release_unit(ounit) + end if + + ! Clean up + call scotch_dgraphexit (scotchdgraph (1)) + call scotch_dgraphexit (scotchdgraph_redist (1)) + call scotch_stratexit (stradat (1)) + + deallocate(edgeloctab) + deallocate(vertloctab) + deallocate(local_block_list) + deallocate(local_nvertices) + deallocate(global_start) + deallocate(global_block_id_arr) + deallocate(local_block_id_arr) + + call MPI_Comm_free(localcomm, mpi_ierr) + call mpas_timer_stop('scotch_total') + call mpas_log_write('Scotch partition successful') - ! Clean up - call scotch_dgraphexit (scotchdgraph (1)) - call scotch_dgraphexit (scotchdgraph_redist (1)) - call scotch_stratexit (stradat (1)) - - deallocate(edgeloctab) - deallocate(vertloctab) - deallocate(local_block_list) - deallocate(local_nvertices) - deallocate(global_start) - deallocate(global_block_id_arr) - deallocate(local_block_id_arr) - - call MPI_Comm_free(localcomm, mpi_ierr) - call mpas_timer_stop('scotch_total') - call mpas_log_write('Scotch partition successful') -#endif end subroutine mpas_block_decomp_scotch +#endif + !*********************************************************************** ! ! routine mpas_block_decomp_partitioned_edge_list From 87c7358b3a94058a7d86652e97e1342899b6f4dd Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 15:04:05 -0700 Subject: [PATCH 34/43] Prevent scotch logic when config_number_of_blocks is not zero --- src/framework/mpas_block_decomp.F | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 80ed403329..81a552ddd4 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -143,7 +143,12 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l if (useScotch) then ! Using PT-Scotch across all MPI ranks - call mpas_block_decomp_scotch(dminfo, partial_global_graph_info, dminfo % total_blocks, blockFilePrefix) + ! Pre-emptively blocking this untested code path. + if (numBlocks /= 0) then + call mpas_log_write('Scotch partitioning not available when config_number_of_blocks != 0 ', MPAS_LOG_CRIT) + end if + + call mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFilePrefix) if (dminfo % my_proc_id == IO_NODE) then open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) @@ -342,7 +347,7 @@ end subroutine mpas_block_decomp_cells_for_proc!}}} ! !----------------------------------------------------------------------- #ifdef MPAS_SCOTCH - subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBlocks, blockFilePrefix)!{{{ + subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFilePrefix)!{{{ use mpas_timer, only : mpas_timer_start, mpas_timer_stop #ifdef MPAS_USE_MPI_F08 @@ -355,7 +360,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBloc type (dm_info), intent(inout) :: dminfo !< Input: domain information type (graph), intent(in) :: partial_global_graph_info !< Input: Global graph information - integer, intent(in) :: totalBlocks !< Input: Number of blocks (from config_num_blocks) character (len=*), intent(in) :: blockFilePrefix !< Input: File prefix for block decomposition integer, dimension(:), pointer :: global_start @@ -471,7 +475,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, totalBloc call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr - ! TODO: Should this be totalBlocks instead of nProcs? npart = dminfo % nProcs ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) ! do i=1,partial_global_graph_info % nVertices From be6cfa2b554bbfc5d4f19eb8caef5ee4df0fdfb5 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 15:21:44 -0700 Subject: [PATCH 35/43] Cleanup and converting Scotch F interface functions to subroutines --- src/framework/mpas_block_decomp.F | 50 ++++-------------- src/framework/mpas_ptscotch_interface.F | 67 ++++++++++--------------- src/framework/ptscotch_interface.c | 3 +- 3 files changed, 39 insertions(+), 81 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 81a552ddd4..98971f80f9 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -392,7 +392,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) allocate(local_block_id_arr(partial_global_graph_info % nVertices)) - call mpas_timer_start('scotch_total') call mpas_log_write('Using LibScotch for graph partitioning') @@ -412,23 +411,14 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile allocate(edgeloctab(nLocEdgesGraph)) ! Array of start indices in edgeloctab for each local vertex allocate(vertloctab(partial_global_graph_info % nVertices + 1)) - - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,partial_global_graph_info % adjacencyList(j,i)/) ) - ! end do ! Fill up edgeloctab and vertloctab k = 1 do i=1,partial_global_graph_info % nVertices vertloctab(i) = k - !call mpas_log_write('rank=$i, i=$i vertloctab= $i', intArgs=(/dminfo % my_proc_id, i,vertloctab(i)/) ) - !call mpas_log_write('i=$i vertexID= $i', intArgs=(/i,partial_global_graph_info % vertexID(i)/) ) do j=1,partial_global_graph_info % nAdjacent(i) - if (partial_global_graph_info % adjacencyList(j,i) == 0) cycle - edgeloctab(k) = partial_global_graph_info % adjacencyList(j,i) - !call mpas_log_write('rank=$i k=$i edgeloctab= $i', intArgs=(/dminfo % my_proc_id, k,edgeloctab(k)/) ) k = k + 1 end do end do @@ -441,11 +431,11 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile endif ! Initialize the Scotch graph data structure, and an extra one to hold the re-distributed graph #ifdef MPAS_USE_MPI_F08 - ierr = scotch_dgraphinit(scotchdgraph(1), localcomm% mpi_val) - ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm% mpi_val) + call scotch_dgraphinit(scotchdgraph(1), localcomm% mpi_val) + call scotch_dgraphinit(scotchdgraph_redist(1), localcomm% mpi_val) #else - ierr = scotch_dgraphinit(scotchdgraph(1), localcomm) - ierr = scotch_dgraphinit(scotchdgraph_redist(1), localcomm) + call scotch_dgraphinit(scotchdgraph(1), localcomm) + call scotch_dgraphinit(scotchdgraph_redist(1), localcomm) #endif ! From Scotch documentation: edgelocsiz is lower-bounded by the minimum size @@ -460,7 +450,7 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile ! Build the distributed Scotch graph and save it in scotchdgraph ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. - ierr = scotch_dgraphbuild (scotchdgraph(1), & + call scotch_dgraphbuild (scotchdgraph(1), & nLocVerticesGraph, & ! num of local vertices on the calling process vertloctab (1), & ! Array of start indices in edgeloctab for each local vertex nLocEdgesGraph, & ! Number of local edges, including to ghost vertices @@ -468,25 +458,23 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile edgeloctab(1)) ! Holds the adjacency array for every local vertex ! Only needed during development/debugging. - ierr = scotch_dgraphcheck (scotchdgraph(1)) + call scotch_dgraphcheck (scotchdgraph(1)) ! Initialize the strategy data structure - ierr = scotch_stratinit (stradat (1)) + call scotch_stratinit (stradat (1)) call mpas_timer_start('scotch_graph_partitioning') ! Partition the distributed graph and save the result in local_block_id_arr npart = dminfo % nProcs - ierr = scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('AFTER PART rank=$i, PARTTAB ($i)= $i', intArgs=(/dminfo % my_proc_id, i, local_block_id_arr(i)/) ) - ! end do + call scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) + call mpas_timer_stop('scotch_graph_partitioning') call mpas_log_write('Graph parition successful ') ! After the paritioning above, each processor would not necessarily have information about all of the ! vertices it owns. To obtain this information, Scotch provides a convenience function to redistribute the graph ! to all processors, so that each processor has information about all of the vertices it owns. - ierr = scotch_dgraphredist(scotchdgraph(1), & ! Input: original distributed graph + call scotch_dgraphredist(scotchdgraph(1), & ! Input: original distributed graph local_block_id_arr, & ! Input: the partition array scotchdgraph_redist(1), & ! Output: re-distributed graph num_local_vertices) ! Output: number of local vertices @@ -500,16 +488,10 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile ! call mpas_log_write('local_cell_list($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_cell_list(i)/)) ! end do - ! NOTE: TODO: local_block_list might not be the same as proc_id when num blocks > 0? allocate(local_block_list(num_local_vertices)) local_block_list(:)=dminfo % my_proc_id - - !call mpas_log_write('nVertices $i num_local_vertices: $i',MPAS_LOG_ERR, intArgs=(/partial_global_graph_info % nVertices,num_local_vertices/)) - ! do i=1,partial_global_graph_info % nVertices - ! call mpas_log_write('local_block_id_arr($i): $i',MPAS_LOG_ERR, intArgs=(/i,local_block_id_arr(i)/)) - ! end do ! Using the local_nvertices array to hold the original number of vertices in ! the partial graph readb by each processor. Might need to use a different array @@ -525,22 +507,12 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile ! local_block_id_arr. call MPI_Gather( partial_global_graph_info % nVertices, 1, MPI_INTEGER, local_nvertices, & 1, MPI_INTEGER, 0, localcomm, ierr) - - ! if (dminfo % my_proc_id == IO_NODE) then - ! call mpas_log_write('After gathering local_nvertices on IO_NODE: ') - ! do i=1, npart - ! call mpas_log_write('local_nvertices: $i', intArgs=(/local_nvertices(i)/) ) - ! end do - ! end if ! Compute the displacements for gathering all the local_block_id_arr to global_block_id_arr global_start(1) = 0 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) end do - ! do i=1, dminfo % nProcs - ! call mpas_log_write('global_start: $i', intArgs=(/global_start(i)/) ) - ! end do ! Gather all the local block ids to global_block_id_arr so IO_NODE can write out the partitioning data call MPI_Gatherv( local_block_id_arr, partial_global_graph_info % nVertices, MPI_INTEGER, global_block_id_arr, & @@ -558,7 +530,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) do i=1,partial_global_graph_info % nVerticesTotal write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) - !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) end do close(unit=ounit) call mpas_release_unit(ounit) @@ -581,7 +552,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile call mpas_timer_stop('scotch_total') call mpas_log_write('Scotch partition successful') - end subroutine mpas_block_decomp_scotch #endif diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index 4bf9b211ca..86d1ee75ca 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -4,26 +4,23 @@ module mpas_ptscotch_interface #include "ptscotchf.h" public :: scotch_dgraphinit, scotch_dgraphbuild - - contains !----------------------------------------------------------------------- - ! function scotch_dgraphinit + ! subroutine scotch_dgraphinit ! !> \brief Initialize a SCOTCH distributed graph object !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Initializes a SCOTCH_Dgraph structure using a Fortran MPI communicator. - !> This function wraps the C function scotchm_dgraphinit. - !> Returns an error code (0 if successful) + !> This subroutine wraps the C function scotchm_dgraphinit. !> \arguments !> dgraph - SCOTCH_Dgraph structure to be initialized !> comm - Fortran MPI communicator integer ! !----------------------------------------------------------------------- - function scotch_dgraphinit(dgraph, comm) result(ierr) + subroutine scotch_dgraphinit(dgraph, comm) use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT @@ -53,18 +50,17 @@ end function scotchfdgraphinit call mpas_log_write('Successfully initialized distributed Scotch graph') end if - end function scotch_dgraphinit + end subroutine scotch_dgraphinit !----------------------------------------------------------------------- - ! function scotch_dgraphbuild + ! subroutine scotch_dgraphbuild ! !> \brief Build a SCOTCH distributed graph from local vertex/edge arrays !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Constructs a SCOTCH_Dgraph from local vertex and edge connectivity data. - !> This function wraps the C function scotchm_dgraphbuild, and returns an - !> error code (0 if successful). + !> This subroutine wraps the C function scotchm_dgraphbuild !> \arguments !> dgraph - SCOTCH_Dgraph structure to be built !> nVertices - Number of local vertices @@ -76,7 +72,7 @@ end function scotch_dgraphinit !> adjacency list for local vertices ! !----------------------------------------------------------------------- - function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) result(ierr) + subroutine scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgelocsiz, adjncy) use iso_c_binding, only : c_ptr, c_loc use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT @@ -90,8 +86,6 @@ function scotch_dgraphbuild(dgraph, nVertices, vertloctab, nLocEdgesGraph, edgel integer(SCOTCH_NUMSIZE), intent(in) :: edgelocsiz integer(SCOTCH_NUMSIZE), intent(in) :: adjncy(nLocEdgesGraph) - - ! Return value integer :: ierr @@ -109,7 +103,6 @@ function scotchfdgraphbuild(dgraph_ptr, nVertices, vertloctab, & end function scotchfdgraphbuild end interface - ierr = 0 ierr = scotchfdgraphbuild(c_loc(dgraph), nVertices, vertloctab, & @@ -121,24 +114,22 @@ end function scotchfdgraphbuild call mpas_log_write('Successfully built distributed Scotch graph') end if - - end function scotch_dgraphbuild + end subroutine scotch_dgraphbuild !----------------------------------------------------------------------- - ! function scotch_dgraphcheck + ! subroutine scotch_dgraphcheck ! !> \brief Perform consistency check on a SCOTCH distributed graph !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Validates the internal structure of a SCOTCH_Dgraph for consistency. - !> This function wraps the C function scotchm_dgraphcheck. - !> Returns an error code (0 if successful) + !> This subroutine wraps the C function scotchm_dgraphcheck. !> \arguments !> dgraph - SCOTCH_Dgraph structure to be checked ! !----------------------------------------------------------------------- - function scotch_dgraphcheck(dgraph) result(ierr) + subroutine scotch_dgraphcheck(dgraph) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -166,8 +157,7 @@ end function scotchfdgraphcheck call mpas_log_write('Successfully checked distributed Scotch graph') end if - - end function scotch_dgraphcheck + end subroutine scotch_dgraphcheck !----------------------------------------------------------------------- ! subroutine scotch_dgraphexit @@ -202,7 +192,7 @@ end subroutine scotchfdgraphexit end subroutine scotch_dgraphexit !----------------------------------------------------------------------- - ! function scotch_stratinit + ! subroutine scotch_stratinit ! !> \brief Initialize a SCOTCH strategy object !> \author Abishek Gopal @@ -210,12 +200,12 @@ end subroutine scotch_dgraphexit !> \details !> Initializes a SCOTCH_Strat structure and builds a default strategy !> for distributed graph mapping. This function wraps the C function - !> scotchm_stratinit. Returns an error code (0 if successful) + !> scotchm_stratinit. !> \arguments !> stradat - SCOTCH_Strat structure to be initialized ! !----------------------------------------------------------------------- - function scotch_stratinit(stradat) result(ierr) + subroutine scotch_stratinit(stradat) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -243,7 +233,7 @@ end function scotchfstratinit call mpas_log_write('Successfully initialized Scotch strategy') end if - end function scotch_stratinit + end subroutine scotch_stratinit !----------------------------------------------------------------------- ! subroutine scotch_stratexit @@ -278,15 +268,15 @@ end subroutine scotchfstratexit end subroutine scotch_stratexit !----------------------------------------------------------------------- - ! function scotch_dgraphpart + ! subroutine scotch_dgraphpart ! !> \brief Partition a SCOTCH distributed graph !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Partitions the distributed graph into num_part parts using the - !> provided SCOTCH strategy object. This function wraps the C function - !> scotchm_dgraphpart. Returns an error code (0 if successful) + !> provided SCOTCH strategy object. This subroutine wraps the C function + !> scotchm_dgraphpart. !> \arguments !> dgraph - SCOTCH_Dgraph structure to be partitioned !> num_part - Number of partitions @@ -294,7 +284,7 @@ end subroutine scotch_stratexit !> parttab - Output array of size equal to number of local vertices, ! !----------------------------------------------------------------------- - function scotch_dgraphpart(dgraph, num_part, stradat, parttab) result(ierr) + subroutine scotch_dgraphpart(dgraph, num_part, stradat, parttab) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -328,18 +318,17 @@ end function scotchfdgraphpart call mpas_log_write('Successfully partitioned distributed Scotch graph') end if - end function scotch_dgraphpart + end subroutine scotch_dgraphpart !----------------------------------------------------------------------- - ! function scotch_dgraphredist + ! subroutine scotch_dgraphredist ! !> \brief Redistribute a SCOTCH distributed graph according to partitions !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Redistributes the distributed graph structure based on a partition - !> table. This function wraps the C function scotchm_dgraphredist. - !> Returns an error code (0 if successful) + !> table. This subroutine wraps the C function scotchm_dgraphredist. !> \arguments !> dgraph - SCOTCH_Dgraph structure to be redistributed !> parttab - Input array of size equal to number of local vertices, @@ -348,7 +337,7 @@ end function scotch_dgraphpart !> num_local_vertices - Number of local vertices in the redistributed graph ! !----------------------------------------------------------------------- - function scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) result(ierr) + subroutine scotch_dgraphredist(dgraph, parttab, dgraph_out, num_local_vertices) use mpas_log, only : mpas_log_write use mpas_derived_types, only : MPAS_LOG_CRIT use iso_c_binding, only : c_ptr, c_loc @@ -382,18 +371,17 @@ end function scotchfdgraphredist call mpas_log_write('Successfully redistributed Scotch graph') end if - - end function scotch_dgraphredist + end subroutine scotch_dgraphredist !----------------------------------------------------------------------- - ! function scotch_dgraphdata + ! subroutine scotch_dgraphdata ! !> \brief Extract vertex labels from a SCOTCH distributed graph !> \author Abishek Gopal !> \date 8 Dec 2025 !> \details !> Extracts vertex labels or stored IDs for local vertices into the - !> output array. This function wraps the C function scotchm_dgraphdata. + !> output array. This subroutine wraps the C function scotchm_dgraphdata. !> \arguments !> dgraph - SCOTCH_Dgraph structure to extract from !> local_cell_list - Output array to hold vertex labels for local vertices @@ -420,6 +408,5 @@ end subroutine scotchfdgraphdata end subroutine scotch_dgraphdata - end module mpas_ptscotch_interface #endif \ No newline at end of file diff --git a/src/framework/ptscotch_interface.c b/src/framework/ptscotch_interface.c index 792eae9221..c12718abae 100644 --- a/src/framework/ptscotch_interface.c +++ b/src/framework/ptscotch_interface.c @@ -257,7 +257,8 @@ void scotchm_dgraphexit(void *ptr) int scotchm_stratinit(void *strat_ptr) { SCOTCH_stratInit((SCOTCH_Strat *)strat_ptr); - /*SCOTCH_stratDgraphMapBuild ((SCOTCH_Strat *) strat_ptr, SCOTCH_STRATDEFAULT, 16, 16, 0.03);*/ + + // This was required to avoid crashes when scaling up to large core counts SCOTCH_stratDgraphMapBuild((SCOTCH_Strat *)strat_ptr, SCOTCH_STRATSCALABILITY, 1, 0, 0.05); return 0; From 7d370ad518d1467c8a60a318589031c1896736dd Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 11 Dec 2025 15:25:54 -0700 Subject: [PATCH 36/43] Replace total_blocks with nProcs --- src/framework/mpas_block_decomp.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 98971f80f9..5b586cdbf0 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -523,7 +523,7 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile if (trim(partitionFilePrefix) == '') then write(partitionFilePrefix,'(a,i0,a)') 'x1.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' end if - write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % total_blocks + write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % nProcs call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) call mpas_new_unit(ounit) From 79f759a28ca23effd94598f11298043aa6ce98f2 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 15:07:39 -0700 Subject: [PATCH 37/43] Some minor edits to address review comments --- Makefile | 2 +- src/framework/mpas_block_decomp.F | 4 ++-- src/framework/mpas_ptscotch_interface.F | 7 +++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 59b16a9765..0a9de8c015 100644 --- a/Makefile +++ b/Makefile @@ -1429,7 +1429,7 @@ scotch_fortran_test: @# @# Create a Fortran test program that will link against the SCOTCH library @# - $(info Checking for a working MUSICA-Fortran library...) + $(info Checking for a working Scotch library...) $(eval SCOTCH_FORTRAN_TEST := $(shell $\ printf "program test_scotch_fortran\n$\ & include \"ptscotchf.h\"\n$\ diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 5b586cdbf0..97da31f1b8 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -232,7 +232,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l close(unit=iunit) call mpas_release_unit(iunit) - + else call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) @@ -521,7 +521,7 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile if (dminfo % my_proc_id == IO_NODE) then partitionFilePrefix=trim(blockFilePrefix) if (trim(partitionFilePrefix) == '') then - write(partitionFilePrefix,'(a,i0,a)') 'x1.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' + write(partitionFilePrefix,'(a,i0,a)') 'grid.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' end if write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % nProcs diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index 86d1ee75ca..4e77407f83 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -1,3 +1,10 @@ +! Copyright (c) 2025, Los Alamos National Security, LLC (LANS) +! and the University Corporation for Atmospheric Research (UCAR). +! +! Unless noted otherwise source code is licensed under the BSD license. +! Additional copyright and license information can be found in the LICENSE file +! distributed with this code, or at http://mpas-dev.github.com/license.html +! #ifdef MPAS_SCOTCH module mpas_ptscotch_interface use iso_c_binding, only : c_int, c_double From 3150b1a151183ad6cc36f3b56eeee62f468d8264 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 15:24:28 -0700 Subject: [PATCH 38/43] Replacing MPI_COMM_WORLD with dminfo%comm --- src/framework/mpas_block_decomp.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 97da31f1b8..bccdf5c611 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -425,7 +425,7 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile vertloctab(partial_global_graph_info % nVertices+1) = nLocEdgesGraph + 1 ! Duplicate the communicator to be used by Scotch - call MPI_Comm_dup(MPI_COMM_WORLD, localcomm, mpi_ierr) + call MPI_Comm_dup(dminfo % comm, localcomm, mpi_ierr) if (mpi_ierr .ne. 0) then call mpas_log_write('Cannot duplicate communicator') endif From ef465d46b190f9fd2ef1b12930e0d22a2db5aabe Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 15:50:27 -0700 Subject: [PATCH 39/43] Fix to non-Scotch code path --- src/framework/mpas_block_decomp.F | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index bccdf5c611..96bda19a88 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -139,6 +139,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end if end if +#ifdef MPAS_SCOTCH call mpas_dmpar_bcast_logical(dminfo, useScotch) if (useScotch) then ! Using PT-Scotch across all MPI ranks @@ -158,6 +159,7 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l end if end if end if +#endif if (dminfo % my_proc_id == IO_NODE) then From 6cd6eb9f054a5f954b34ae49af1474b380ab86d0 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 19:15:04 -0700 Subject: [PATCH 40/43] Updating Scotch build tests in Makefile. Now tries to build C and Fortran test programs --- Makefile | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 0a9de8c015..68f031d6f0 100644 --- a/Makefile +++ b/Makefile @@ -1427,9 +1427,34 @@ musica_fortran_test: scotch_fortran_test: @# - @# Create a Fortran test program that will link against the SCOTCH library + @# Create C and Fortran test programs and try to build against the PT-SCOTCH library @# $(info Checking for a working Scotch library...) + $(eval SCOTCH_C_TEST := $(shell $\ + printf "#include \n\ + &#include \"mpi.h\"\n\ + &#include \"ptscotch.h\"\n\ + &int main(){\n\ + & int err;\n\ + & SCOTCH_Dgraph *dgraph;\n\ + & err = SCOTCH_dgraphInit(dgraph, MPI_COMM_WORLD);\n\ + & SCOTCH_dgraphExit(dgraph);\n\ + & return err;\n\ + &}\n" | sed 's/&/ /' > ptscotch_c_test.c; $\ + $\ + $(CC) $(CPPINCLUDES) $(CFLAGS) $(LDFLAGS) ptscotch_c_test.c -o ptscotch_c_test.x $(SCOTCH_LIBS) > ptscotch_c_test.log 2>&1; $\ + scotch_c_status=$$?; $\ + if [ $$scotch_c_status -eq 0 ]; then $\ + printf "1"; $\ + rm -f ptscotch_c_test.c ptscotch_c_test.x ptscotch_c_test.log; $\ + else $\ + printf "0"; $\ + fi $\ + )) + $(if $(findstring 0,$(SCOTCH_C_TEST)), $(error Could not build a simple C program with Scotch. $\ + Test program ptscotch_c_test.c and output ptscotch_c_test.log have been left $\ + in the top-level MPAS directory for further debugging )) + $(if $(findstring 1,$(SCOTCH_C_TEST)), $(info Built a simple C program with Scotch )) $(eval SCOTCH_FORTRAN_TEST := $(shell $\ printf "program test_scotch_fortran\n$\ & include \"ptscotchf.h\"\n$\ @@ -1438,19 +1463,21 @@ scotch_fortran_test: & ierr = 0\n$\ & call scotchfgraphinit(scotchgraph (1), ierr)\n$\ & call scotchfgraphexit(scotchgraph(1))\n$\ - end program test_scotch_fortran\n" | sed 's/&/ /' > test_scotch_fortran.f90; $\ + end program test_scotch_fortran\n" | sed 's/&/ /' > ptscotch_f_test.f90; $\ $\ - $(FC) $(SCOTCH_FCINCLUDES) $(SCOTCH_FFLAGS) test_scotch_fortran.f90 -o test_scotch_fortran.x $(SCOTCH_LIBS) > /dev/null 2>&1; $\ + $(FC) $(SCOTCH_FCINCLUDES) $(SCOTCH_FFLAGS) ptscotch_f_test.f90 -o ptscotch_f_test.x $(SCOTCH_LIBS) > ptscotch_f_test.log 2>&1; $\ scotch_fortran_status=$$?; $\ - rm -f test_scotch_fortran.f90 test_scotch_fortran.x; $\ if [ $$scotch_fortran_status -eq 0 ]; then $\ printf "1"; $\ + rm -f ptscotch_f_test.f90 ptscotch_f_test.x ptscotch_f_test.log; $\ else $\ printf "0"; $\ fi $\ )) - $(if $(findstring 0,$(SCOTCH_FORTRAN_TEST)), $(error Could not build a simple test program with Scotch)) - $(if $(findstring 1,$(SCOTCH_FORTRAN_TEST)), $(info Built a simple test program with Scotch )) + $(if $(findstring 0,$(SCOTCH_FORTRAN_TEST)), $(error Could not build a simple Fortran program with Scotch. $\ + Test program ptscotch_f_test.f90 and output ptscotch_f_test.log have been left $\ + in the top-level MPAS directory for further debugging )) + $(if $(findstring 1,$(SCOTCH_FORTRAN_TEST)), $(info Built a simple Fortran program with Scotch )) pnetcdf_test: @# From 0ef7da02aad97a8a4c40b903667d3cdba252b665 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 19:19:02 -0700 Subject: [PATCH 41/43] Updating copyright block --- src/framework/mpas_ptscotch_interface.F | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/framework/mpas_ptscotch_interface.F b/src/framework/mpas_ptscotch_interface.F index 4e77407f83..03097984ee 100644 --- a/src/framework/mpas_ptscotch_interface.F +++ b/src/framework/mpas_ptscotch_interface.F @@ -1,9 +1,8 @@ -! Copyright (c) 2025, Los Alamos National Security, LLC (LANS) -! and the University Corporation for Atmospheric Research (UCAR). +! Copyright (c) 2025 The University Corporation for Atmospheric Research (UCAR). ! ! Unless noted otherwise source code is licensed under the BSD license. ! Additional copyright and license information can be found in the LICENSE file -! distributed with this code, or at http://mpas-dev.github.com/license.html +! distributed with this code, or at https://mpas-dev.github.io/license.html . ! #ifdef MPAS_SCOTCH module mpas_ptscotch_interface From 47d7701865aa3f1a901265aab052dc046e57bd3a Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 19:44:39 -0700 Subject: [PATCH 42/43] Small fixes and cleanup for code path when config_block_decomp_file_prefix is empty. --- src/framework/mpas_block_decomp.F | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 96bda19a88..a74dbe7047 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -149,10 +149,12 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l call mpas_log_write('Scotch partitioning not available when config_number_of_blocks != 0 ', MPAS_LOG_CRIT) end if - call mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFilePrefix) + call mpas_log_write('No existing block decomposition file found, invoking Scotch.') + call mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFilePrefix, filename) if (dminfo % my_proc_id == IO_NODE) then open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) + call mpas_log_write('After Scotch decomposition, attempting to read block decomposition file: '//trim(filename)) if (istatus /= 0) then call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) @@ -349,7 +351,7 @@ end subroutine mpas_block_decomp_cells_for_proc!}}} ! !----------------------------------------------------------------------- #ifdef MPAS_SCOTCH - subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFilePrefix)!{{{ + subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFilePrefix, blockFilename)!{{{ use mpas_timer, only : mpas_timer_start, mpas_timer_stop #ifdef MPAS_USE_MPI_F08 @@ -363,6 +365,7 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile type (dm_info), intent(inout) :: dminfo !< Input: domain information type (graph), intent(in) :: partial_global_graph_info !< Input: Global graph information character (len=*), intent(in) :: blockFilePrefix !< Input: File prefix for block decomposition + character (len=*), intent(out) :: blockFilename !< Output: Block decomposition file name integer, dimension(:), pointer :: global_start integer, dimension(:), allocatable :: local_cell_list @@ -373,7 +376,7 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile integer :: err, ierr integer, dimension(:), pointer :: local_nvertices integer :: num_local_vertices !< Number of local vertices for this processor - character (len=StrKIND) :: filename, msg + character (len=StrKIND) :: msg integer :: nLocEdgesGraph = 0, nLocVerticesGraph = 0, edgelocsiz = 0, npart = 1 character (len=StrKIND) :: partitionFilePrefix @@ -523,13 +526,13 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile if (dminfo % my_proc_id == IO_NODE) then partitionFilePrefix=trim(blockFilePrefix) if (trim(partitionFilePrefix) == '') then - write(partitionFilePrefix,'(a,i0,a)') 'grid.',partial_global_graph_info%nVerticesTotal,'.graph.info.part.' + write(partitionFilePrefix,'(i0,a)') partial_global_graph_info%nVerticesTotal,'.graph.info.part.' end if - write(filename,'(a,i0)') trim(partitionFilePrefix), dminfo % nProcs + write(blockFilename,'(a,i0)') trim(partitionFilePrefix), dminfo % nProcs - call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) + call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(blockFilename)) call mpas_new_unit(ounit) - open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) + open(unit=ounit, file=trim(blockFilename), form='formatted', status='new', action="write", iostat=ostatus) do i=1,partial_global_graph_info % nVerticesTotal write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) end do From bc93fb0aad6393dca69890d8064df4b7249ef14a Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 26 Dec 2025 19:54:59 -0700 Subject: [PATCH 43/43] Cleanup of print statements --- src/framework/mpas_block_decomp.F | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index a74dbe7047..6faf05ca54 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -399,8 +399,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile call mpas_timer_start('scotch_total') - call mpas_log_write('Using LibScotch for graph partitioning') - ! Count the number of edges (including to ghost cells) in the portion of graph ! owned by the current rank. Each edge is counted twice, once for each vertex, ! with the exception of edges to ghost vertices, which are counted only once. @@ -451,7 +449,8 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile nLocVerticesGraph = partial_global_graph_info % nVertices - call mpas_log_write('nvertices =$i nLocEdgesGraph= $i', intArgs=(/nLocVerticesGraph, nLocEdgesGraph/)) + call mpas_log_write('Before SCOTCH distributed graph build. nLocVertices =$i nLocEdges= $i', & + intArgs=(/nLocVerticesGraph, nLocEdgesGraph/)) ! Build the distributed Scotch graph and save it in scotchdgraph ! Note: Optional arguments veloloctab, vlblloctab, edgegsttab, and edloloctab are not needed here. @@ -474,7 +473,6 @@ subroutine mpas_block_decomp_scotch(dminfo, partial_global_graph_info, blockFile call scotch_dgraphpart (scotchdgraph(1), npart, stradat (1), local_block_id_arr(1)) call mpas_timer_stop('scotch_graph_partitioning') - call mpas_log_write('Graph parition successful ') ! After the paritioning above, each processor would not necessarily have information about all of the ! vertices it owns. To obtain this information, Scotch provides a convenience function to redistribute the graph