From 5c2fa590bd5d0cac370e1476f0bed9fbca22da49 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Wed, 15 Jan 2025 19:39:33 -0700 Subject: [PATCH 01/79] Add parallel and loop directives to atm_bdy_adjust_scalars_work This commit adds an initial port of this routine using OpenACC. More changes are needed for performance and data management. --- .../dynamics/mpas_atm_time_integration.F | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index a82fc9e0c0..63bcafbe49 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6781,12 +6781,16 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, !--- + !$acc parallel + !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells if ( (bdyMaskCell(iCell) > 1) .and. (bdyMaskCell(iCell) <= nRelaxZone) ) then ! relaxation zone laplacian_filter_coef = dt_rk*(real(bdyMaskCell(iCell)) - 1.)/real(nRelaxZone)/(10.*dt*meshScalingRegionalCell(iCell)) rayleigh_damping_coef = laplacian_filter_coef/5.0 + + !$acc loop collapse(2) do k=1,nVertLevels do iScalar=1,num_scalars scalars_tmp(iScalar,k,iCell) = scalars_new(iScalar,k,iCell) @@ -6795,6 +6799,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, ! first, we compute the 2nd-order laplacian filter ! + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) ! edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) * laplacian_filter_coef @@ -6803,6 +6808,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) !DIR$ IVDEP + !$acc loop collapse(2) do k=1,nVertLevels do iScalar = 1, num_scalars filter_flux = edge_sign*( (scalars_new(iScalar,k,cell2)-scalars_driving(iScalar,k,cell2)) & @@ -6815,6 +6821,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, ! second, we compute the Rayleigh damping component ! !DIR$ IVDEP + !$acc loop collapse(2) do k=1,nVertLevels do iScalar = 1, num_scalars scalars_tmp(iScalar,k,iCell) =scalars_tmp(iScalar,k,iCell) - rayleigh_damping_coef * (scalars_new(iScalar,k,iCell)-scalars_driving(iScalar,k,iCell)) @@ -6826,6 +6833,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, ! update the specified-zone values ! !DIR$ IVDEP + !$acc loop collapse(2) do k=1,nVertLevels do iScalar = 1, num_scalars scalars_tmp(iScalar,k,iCell) = scalars_driving(iScalar,k,iCell) @@ -6835,12 +6843,16 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, end if end do ! updates now in temp storage + !$acc end parallel !$OMP BARRIER + !$acc parallel + !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells if (bdyMaskCell(iCell) > 1) then ! update values !DIR$ IVDEP + !$acc loop collapse(2) do k=1,nVertLevels do iScalar = 1, num_scalars scalars_new(iScalar,k,iCell) = scalars_tmp(iScalar,k,iCell) @@ -6848,6 +6860,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, end do end if end do + !$acc end parallel end subroutine atm_bdy_adjust_scalars_work From bd8f074b95312e64bfb0fc8d8caf35036f8a15d6 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Thu, 16 Jan 2025 12:12:12 -0700 Subject: [PATCH 02/79] Copy invariant fields used in atm_bdy_adjust_scalars_work Ensures the fields which don't change while the model is running are present on the device from model startup to model shutdown. --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 63bcafbe49..8da99978eb 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -233,6 +233,7 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell real (kind=RKIND), dimension(:), pointer :: fzm real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell #endif @@ -356,6 +357,9 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'fzp', fzp) !$acc enter data copyin(fzp) + call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + !$acc enter data copyin(meshScalingRegionalCell) + #endif end subroutine mpas_atm_dynamics_init @@ -425,6 +429,7 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell real (kind=RKIND), dimension(:), pointer :: fzm real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell #endif @@ -547,6 +552,10 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'fzp', fzp) !$acc exit data delete(fzp) + + call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + !$acc exit data delete(meshScalingRegionalCell) + #endif end subroutine mpas_atm_dynamics_finalize From 143585d832a83fb908033b4929df70ab06e004fc Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Thu, 16 Jan 2025 12:15:25 -0700 Subject: [PATCH 03/79] Add acc data movement to atm_bdy_adjust_scalars_work Ensure that the other, non-invariant fields are available for this routine. Variables that are overwritten during this routine are only created while others are copied in. Any variables overwritten by this routine are copied out at the end. Timing for these transfers are reported in the output log file in the new timer: 'atm_bdy_adjust_scalars [ACC_data_xfer]'. Also add default(present) to parallel directives to ensure data movement is correct and prevent any implicit data movements from the compiler. --- .../dynamics/mpas_atm_time_integration.F | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 8da99978eb..b15282f956 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6789,8 +6789,12 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, integer :: iCell, iEdge, iScalar, i, k, cell1, cell2 !--- + MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') + !$acc enter data create(scalars_tmp) & + !$acc copyin(scalars_driving, scalars_new) + MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells @@ -6856,7 +6860,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, !$OMP BARRIER - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells if (bdyMaskCell(iCell) > 1) then ! update values @@ -6871,6 +6875,11 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, end do !$acc end parallel + MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') + !$acc exit data delete(scalars_tmp, scalars_driving) & + !$acc copyout(scalars_new) + MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') + end subroutine atm_bdy_adjust_scalars_work !------------------------------------------------------------------------- From 80fa8f7953edf396e1c9a2defb9d52e1e8f37366 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 1 Aug 2024 15:22:41 -0600 Subject: [PATCH 04/79] initial OpenACC port of atm_rk_dynamics_substep_finish --- .../dynamics/mpas_atm_time_integration.F | 120 +++++++++++++++--- 1 file changed, 102 insertions(+), 18 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5ea2ca1154..8224356f0e 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -1364,7 +1364,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !$OMP PARALLEL DO do thread=1,nThreads - call atm_rk_dynamics_substep_finish(state, diag, dynamics_substep, dynamics_split, & + call atm_rk_dynamics_substep_finish(state, diag, nVertLevels, dynamics_substep, dynamics_split, & cellThreadStart(thread), cellThreadEnd(thread), & vertexThreadStart(thread), vertexThreadEnd(thread), & edgeThreadStart(thread), edgeThreadEnd(thread), & @@ -6514,7 +6514,7 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & end subroutine atm_init_coupled_diagnostics - subroutine atm_rk_dynamics_substep_finish( state, diag, dynamics_substep, dynamics_split, & + subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_substep, dynamics_split, & cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, & cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd) @@ -6528,7 +6528,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, dynamics_substep, dynami type (mpas_pool_type), intent(inout) :: state type (mpas_pool_type), intent(inout) :: diag - integer, intent(in) :: dynamics_substep, dynamics_split + integer, intent(in) :: nVertLevels, dynamics_substep, dynamics_split integer, intent(in) :: cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd integer, intent(in) :: cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd @@ -6548,6 +6548,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, dynamics_substep, dynami real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2, rho_zz_old_split real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + integer :: iCell, iEdge, j, k call mpas_pool_get_array(diag, 'ru', ru) call mpas_pool_get_array(diag, 'ru_save', ru_save) @@ -6572,35 +6573,118 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, dynamics_substep, dynami call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + + MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') + !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, & + !$acc w_1, rho_zz_1) & + !$acc copyin(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & + !$acc w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split) + MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') + inv_dynamics_split = 1.0_RKIND / real(dynamics_split) + !$acc parallel if (dynamics_substep < dynamics_split) then - ru_save(:,edgeStart:edgeEnd) = ru(:,edgeStart:edgeEnd) - rw_save(:,cellStart:cellEnd) = rw(:,cellStart:cellEnd) - rtheta_p_save(:,cellStart:cellEnd) = rtheta_p(:,cellStart:cellEnd) - rho_p_save(:,cellStart:cellEnd) = rho_p(:,cellStart:cellEnd) + !$acc loop gang worker + do iEdge = edgeStart,edgeEnd + !$acc loop vector + do k = 1,nVertLevels + ru_save(k,iEdge) = ru(k,iEdge) + u_1(k,iEdge) = u_2(k,iEdge) + end do + end do - u_1(:,edgeStart:edgeEnd) = u_2(:,edgeStart:edgeEnd) - w_1(:,cellStart:cellEnd) = w_2(:,cellStart:cellEnd) - theta_m_1(:,cellStart:cellEnd) = theta_m_2(:,cellStart:cellEnd) - rho_zz_1(:,cellStart:cellEnd) = rho_zz_2(:,cellStart:cellEnd) + !$acc loop gang worker + do iCell = cellStart,cellEnd + !$acc loop vector + do k = 1,nVertLevels + rtheta_p_save(k,iCell) = rtheta_p(k,iCell) + rho_p_save(k,iCell) = rho_p(k,iCell) + theta_m_1(k,iCell) = theta_m_2(k,iCell) + rho_zz_1(k,iCell) = rho_zz_2(k,iCell) + end do + end do + + !$acc loop gang worker + do iCell = cellStart,cellEnd + !$acc loop vector + do k = 1,nVertLevels+1 + rw_save(k,iCell) = rw(k,iCell) + w_1(k,iCell) = w_2(k,iCell) + end do + end do end if + + !!$acc parallel if (dynamics_substep == 1) then - ruAvg_split(:,edgeStart:edgeEnd) = ruAvg(:,edgeStart:edgeEnd) - wwAvg_split(:,cellStart:cellEnd) = wwAvg(:,cellStart:cellEnd) + !$acc loop gang worker + do iEdge = edgeStart,edgeEnd + !$acc loop vector + do k = 1,nVertLevels + ruAvg_split(k,iEdge) = ruAvg(k,iEdge) + end do + end do + !$acc loop gang worker + do iCell = cellStart,cellEnd + !$acc loop vector + do k = 1,nVertLevels+1 + wwAvg_split(k,iCell) = wwAvg(k,iCell) + end do + end do else - ruAvg_split(:,edgeStart:edgeEnd) = ruAvg(:,edgeStart:edgeEnd)+ruAvg_split(:,edgeStart:edgeEnd) - wwAvg_split(:,cellStart:cellEnd) = wwAvg(:,cellStart:cellEnd)+wwAvg_split(:,cellStart:cellEnd) + !$acc loop gang worker + do iEdge = edgeStart,edgeEnd + !$acc loop vector + do k = 1,nVertLevels + ruAvg_split(k,iEdge) = ruAvg(k,iEdge) + ruAvg_split(k,iEdge) + end do + end do + !$acc loop gang worker + do iCell = cellStart,cellEnd + !$acc loop vector + do k = 1,nVertLevels+1 + wwAvg_split(k,iCell) = wwAvg(k,iCell) + wwAvg_split(k,iCell) + end do + end do end if if (dynamics_substep == dynamics_split) then - ruAvg(:,edgeStart:edgeEnd) = ruAvg_split(:,edgeStart:edgeEnd) * inv_dynamics_split - wwAvg(:,cellStart:cellEnd) = wwAvg_split(:,cellStart:cellEnd) * inv_dynamics_split - rho_zz_1(:,cellStart:cellEnd) = rho_zz_old_split(:,cellStart:cellEnd) + !$acc loop gang worker + do iEdge = edgeStart,edgeEnd + !$acc loop vector + do k = 1,nVertLevels + ruAvg(k,iEdge) = ruAvg_split(k,iEdge) * inv_dynamics_split + end do + end do + !$acc loop gang worker + do iCell = cellStart,cellEnd + !$acc loop vector + do k = 1,nVertLevels+1 + wwAvg(k,iCell) = wwAvg_split(k,iCell) * inv_dynamics_split + end do + end do + !$acc loop gang worker + do iCell = cellStart,cellEnd + !$acc loop vector + do k = 1,nVertLevels + rho_zz_1(k,iCell) = rho_zz_old_split(k,iCell) + end do + end do end if + !$acc end parallel + + !!$acc exit data copyout(rho_zz_1, ruAvg, wwAvg, ruAvg_split, wwAvg_split) + + MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') + !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, & + !$acc w_1, theta_m_1, rho_zz_1, ruAvg, wwAvg, ruAvg_split, & + !$acc wwAvg_split) & + !$acc delete(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & + !$acc w_2, rho_zz_old_split) + MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') end subroutine atm_rk_dynamics_substep_finish From 8c48f75606e19ca7c8c3984f9a579491e9af78e1 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 16 Jan 2025 17:09:53 -0700 Subject: [PATCH 05/79] Moving parallel directives to inside if-else conditions --- .../dynamics/mpas_atm_time_integration.F | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 8224356f0e..0f3a8b4cf0 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6573,7 +6573,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - + MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, & !$acc w_1, rho_zz_1) & @@ -6583,9 +6583,9 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su inv_dynamics_split = 1.0_RKIND / real(dynamics_split) - !$acc parallel - if (dynamics_substep < dynamics_split) then + if (dynamics_substep < dynamics_split) then + !$acc parallel !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6615,11 +6615,12 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su w_1(k,iCell) = w_2(k,iCell) end do end do + !$acc end parallel end if - !!$acc parallel if (dynamics_substep == 1) then + !$acc parallel !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6634,7 +6635,9 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su wwAvg_split(k,iCell) = wwAvg(k,iCell) end do end do + !$acc end parallel else + !$acc parallel !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6649,9 +6652,11 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su wwAvg_split(k,iCell) = wwAvg(k,iCell) + wwAvg_split(k,iCell) end do end do + !$acc end parallel end if if (dynamics_substep == dynamics_split) then + !$acc parallel !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6673,10 +6678,8 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su rho_zz_1(k,iCell) = rho_zz_old_split(k,iCell) end do end do + !$acc end parallel end if - !$acc end parallel - - !!$acc exit data copyout(rho_zz_1, ruAvg, wwAvg, ruAvg_split, wwAvg_split) MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, & From ed2db57ffd26209627dee1f1f0d88a20bce3d1c1 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 16 Jan 2025 17:11:10 -0700 Subject: [PATCH 06/79] Initializing the garbage cells for theta_m_1 in atm_rk_dynamics_substep_finish This commit provides an interim fix for a potential issue in limited area runs relating to the uninitialized garbage cells in the 1st time level of theta_m. --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 0f3a8b4cf0..74505048e9 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6573,7 +6573,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, & !$acc w_1, rho_zz_1) & @@ -6581,9 +6580,12 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su !$acc w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split) MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') + !$acc kernels + theta_m_1(:,cellEnd+1) = 0.0_RKIND + !$acc end kernels + inv_dynamics_split = 1.0_RKIND / real(dynamics_split) - if (dynamics_substep < dynamics_split) then !$acc parallel !$acc loop gang worker From d45b96a9515dcc5735554f4962cdcd5d5e3823de Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 22 Jan 2025 09:23:35 -0700 Subject: [PATCH 07/79] Clean up to address review comments --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 74505048e9..a0fb993c77 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6620,7 +6620,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su !$acc end parallel end if - if (dynamics_substep == 1) then !$acc parallel !$acc loop gang worker @@ -6675,10 +6674,10 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su end do !$acc loop gang worker do iCell = cellStart,cellEnd - !$acc loop vector - do k = 1,nVertLevels - rho_zz_1(k,iCell) = rho_zz_old_split(k,iCell) - end do + !$acc loop vector + do k = 1,nVertLevels + rho_zz_1(k,iCell) = rho_zz_old_split(k,iCell) + end do end do !$acc end parallel end if From 852fa3412cdbdb09e3545dff6a3200489bfb5e48 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 31 Jan 2025 13:44:27 -0700 Subject: [PATCH 08/79] Adding default(present) clauses to the parallel regions --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index a0fb993c77..63d211403d 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6587,7 +6587,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su inv_dynamics_split = 1.0_RKIND / real(dynamics_split) if (dynamics_substep < dynamics_split) then - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6621,7 +6621,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su end if if (dynamics_substep == 1) then - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6638,7 +6638,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su end do !$acc end parallel else - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector @@ -6657,7 +6657,7 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su end if if (dynamics_substep == dynamics_split) then - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart,edgeEnd !$acc loop vector From 8ae0a8f6c5d9ce63368f64d9eeae22402c49ad6b Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 17 Jan 2025 13:06:53 -0700 Subject: [PATCH 09/79] First working version --- .../dynamics/mpas_atm_time_integration.F | 91 ++++++++++++++++--- 1 file changed, 78 insertions(+), 13 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5ea2ca1154..c24a0035d6 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -243,6 +243,8 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:,:), pointer :: dss real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalEdge #endif @@ -395,6 +397,13 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) !$acc enter data copyin(specZoneMaskCell) + + call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + !$acc enter data copyin(meshScalingRegionalCell) + + call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) + !$acc enter data copyin(meshScalingRegionalEdge) + #endif end subroutine mpas_atm_dynamics_init @@ -474,6 +483,8 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:,:), pointer :: dss real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalEdge #endif @@ -626,6 +637,13 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) !$acc exit data delete(specZoneMaskCell) + + call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + !$acc exit data delete(meshScalingRegionalCell) + + call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) + !$acc exit data delete(meshScalingRegionalEdge) + #endif end subroutine mpas_atm_dynamics_finalize @@ -1119,6 +1137,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) rt_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step ) rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) + call mpas_timer_start('atm_bdy_adjust_dynamics_relaxzone_tend') !$OMP PARALLEL DO do thread=1,nThreads call atm_bdy_adjust_dynamics_relaxzone_tend( block % configs, tend, state, diag, mesh, nVertLevels, dt, & @@ -1129,6 +1148,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) edgeSolveThreadStart(thread), edgeSolveThreadEnd(thread) ) end do !$OMP END PARALLEL DO + call mpas_timer_stop('atm_bdy_adjust_dynamics_relaxzone_tend') deallocate(ru_driving_values) deallocate(rt_driving_values) @@ -6769,11 +6789,13 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign, edgesOnVertex_sign integer, dimension(:), pointer :: bdyMaskCell, bdyMaskEdge, nEdgesOnCell integer, dimension(:,:), pointer :: cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex - integer, pointer :: vertexDegree + integer, pointer :: vertexDegree_ptr + integer :: vertexDegree real (kind=RKIND) :: edge_sign, laplacian_filter_coef, rayleigh_damping_coef, r_dc, r_dv, invArea - real (kind=RKIND), pointer :: divdamp_coef + real (kind=RKIND), pointer :: divdamp_coef_ptr + real (kind=RKIND) :: divdamp_coef real (kind=RKIND), dimension(nVertLevels) :: divergence1, divergence2, vorticity1, vorticity2 integer :: iCell, iEdge, i, k, cell1, cell2, iEdge_vort, iEdge_div integer :: vertex1, vertex2, iVertex @@ -6794,7 +6816,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me call mpas_pool_get_array(state, 'theta_m', theta_m, 2) call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2) - call mpas_pool_get_dimension(mesh, 'vertexDegree', vertexDegree) + call mpas_pool_get_dimension(mesh, 'vertexDegree', vertexDegree_ptr) call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) @@ -6809,37 +6831,55 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me call mpas_pool_get_array(mesh, 'nEdgesOnCell',nEdgesOnCell) call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) - call mpas_pool_get_config(config, 'config_relax_zone_divdamp_coef', divdamp_coef) + call mpas_pool_get_config(config, 'config_relax_zone_divdamp_coef', divdamp_coef_ptr) + + divdamp_coef = divdamp_coef_ptr + vertexDegree = vertexDegree_ptr + + MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') + !$acc enter data copyin(tend_rho, tend_rt, rho_zz, theta_m, rho_driving_values, & + !$acc rt_driving_values, tend_ru, ru, ru_driving_values) + !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2) + MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') ! First, Rayleigh damping terms for ru, rtheta_m and rho_zz - + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd if( (bdyMaskCell(iCell) > 1) .and. (bdyMaskCell(iCell) <= nRelaxZone) ) then rayleigh_damping_coef = (real(bdyMaskCell(iCell)) - 1.)/real(nRelaxZone)/(50.*dt*meshScalingRegionalCell(iCell)) + !$acc loop vector do k=1, nVertLevels tend_rho(k,iCell) = tend_rho(k,iCell) - rayleigh_damping_coef * (rho_zz(k,iCell) - rho_driving_values(k,iCell)) tend_rt(k,iCell) = tend_rt(k,iCell) - rayleigh_damping_coef * (rho_zz(k,iCell)*theta_m(k,iCell) - rt_driving_values(k,iCell)) end do end if end do + !$acc end parallel + !$acc parallel default(present) + !$acc loop gang worker do iEdge = edgeStart, edgeEnd if( (bdyMaskEdge(iEdge) > 1) .and. (bdyMaskEdge(iEdge) <= nRelaxZone) ) then rayleigh_damping_coef = (real(bdyMaskEdge(iEdge)) - 1.)/real(nRelaxZone)/(50.*dt*meshScalingRegionalEdge(iEdge)) + !$acc loop vector do k=1, nVertLevels tend_ru(k,iEdge) = tend_ru(k,iEdge) - rayleigh_damping_coef * (ru(k,iEdge) - ru_driving_values(k,iEdge)) end do end if end do - - ! Second, the horizontal filter for rtheta_m and rho_zz + !$acc end parallel + ! Second, the horizontal filter for rtheta_m and rho_zz + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells if ( (bdyMaskCell(iCell) > 1) .and. (bdyMaskCell(iCell) <= nRelaxZone) ) then ! relaxation zone laplacian_filter_coef = (real(bdyMaskCell(iCell)) - 1.)/real(nRelaxZone)/(10.*dt*meshScalingRegionalCell(iCell)) ! + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) ! edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) * laplacian_filter_coef @@ -6848,6 +6888,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels tend_rt(k,iCell) = tend_rt(k,iCell) + edge_sign*( (rho_zz(k,cell2)*theta_m(k,cell2)-rt_driving_values(k,cell2)) & - (rho_zz(k,cell1)*theta_m(k,cell1)-rt_driving_values(k,cell1)) ) @@ -6859,14 +6900,18 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end if end do + !$acc end parallel ! Third (and last), the horizontal filter for ru - + !$acc parallel default(present) + !$acc loop gang worker private(cell1, cell2, vertex1, vertex2, r_dc, r_dv, & + !$acc iCell, iVertex, invArea, iEdge_div, iEdge_vort, edge_sign, & + !$acc laplacian_filter_coef, divergence1, divergence2, vorticity1, vorticity2) do iEdge = edgeStart, edgeEnd if ( (bdyMaskEdge(iEdge) > 1) .and. (bdyMaskEdge(iEdge) <= nRelaxZone) ) then ! relaxation zone - laplacian_filter_coef = dcEdge(iEdge)**2 * (real(bdyMaskEdge(iEdge)) - 1.)/ & + laplacian_filter_coef = dcEdge(iEdge)*dcEdge(iEdge)* (real(bdyMaskEdge(iEdge)) - 1.)/ & real(nRelaxZone)/(10.*dt*meshScalingRegionalEdge(iEdge)) cell1 = cellsOnEdge(1,iEdge) @@ -6878,10 +6923,19 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me iCell = cell1 invArea = invAreaCell(iCell) - divergence1(1:nVertLevels) = 0. + !$acc loop vector + do k=1,nVertLevels + divergence1(k) = 0. + divergence2(k) = 0. + vorticity1(k) = 0. + vorticity2(k) = 0. + end do + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge_div = edgesOnCell(i,iCell) edge_sign = invArea * dvEdge(iEdge_div) * edgesOnCell_sign(i,iCell) + !$acc loop vector do k=1,nVertLevels divergence1(k) = divergence1(k) + edge_sign * (ru(k,iEdge_div) - ru_driving_values(k,iEdge_div)) end do @@ -6889,30 +6943,33 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me iCell = cell2 invArea = invAreaCell(iCell) - divergence2(1:nVertLevels) = 0. + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge_div = edgesOnCell(i,iCell) edge_sign = invArea * dvEdge(iEdge_div) * edgesOnCell_sign(i,iCell) + !$acc loop vector do k=1,nVertLevels divergence2(k) = divergence2(k) + edge_sign * (ru(k,iEdge_div) - ru_driving_values(k,iEdge_div)) end do end do iVertex = vertex1 - vorticity1(1:nVertLevels) = 0. + !$acc loop seq do i=1,vertexDegree iEdge_vort = edgesOnVertex(i,iVertex) edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge_vort) * edgesOnVertex_sign(i,iVertex) + !$acc loop vector do k=1,nVertLevels vorticity1(k) = vorticity1(k) + edge_sign * (ru(k,iEdge_vort) - ru_driving_values(k,iEdge_vort)) end do end do iVertex = vertex2 - vorticity2(1:nVertLevels) = 0. + !$acc loop seq do i=1,vertexDegree iEdge_vort = edgesOnVertex(i,iVertex) edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge_vort) * edgesOnVertex_sign(i,iVertex) + !$acc loop vector do k=1,nVertLevels vorticity2(k) = vorticity2(k) + edge_sign * (ru(k,iEdge_vort) - ru_driving_values(k,iEdge_vort)) end do @@ -6920,6 +6977,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity ! + !$acc loop vector do k=1,nVertLevels tend_ru(k,iEdge) = tend_ru(k,iEdge) + laplacian_filter_coef & * (divdamp_coef * (divergence2(k) - divergence1(k)) * r_dc & @@ -6929,6 +6987,13 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end if ! end test for relaxation-zone edge end do ! end of loop over edges + !$acc end parallel + + MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') + !$acc exit data copyout(tend_rho, tend_rt, tend_ru) + !$acc exit data delete(rho_zz, theta_m, ru, rho_driving_values, rt_driving_values, & + !$acc ru_driving_values, divergence1, divergence2, vorticity1, vorticity2) + MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') end subroutine atm_bdy_adjust_dynamics_relaxzone_tend From cef6ed8f4669e627c8e8c7a716f1f2ef5ff6266e Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 22 Jan 2025 11:33:32 -0700 Subject: [PATCH 10/79] Removing scalars from private clause and reverting exponent operation --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index c24a0035d6..b2985e86e4 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6904,14 +6904,12 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me ! Third (and last), the horizontal filter for ru !$acc parallel default(present) - !$acc loop gang worker private(cell1, cell2, vertex1, vertex2, r_dc, r_dv, & - !$acc iCell, iVertex, invArea, iEdge_div, iEdge_vort, edge_sign, & - !$acc laplacian_filter_coef, divergence1, divergence2, vorticity1, vorticity2) + !$acc loop gang worker private(divergence1, divergence2, vorticity1, vorticity2) do iEdge = edgeStart, edgeEnd if ( (bdyMaskEdge(iEdge) > 1) .and. (bdyMaskEdge(iEdge) <= nRelaxZone) ) then ! relaxation zone - laplacian_filter_coef = dcEdge(iEdge)*dcEdge(iEdge)* (real(bdyMaskEdge(iEdge)) - 1.)/ & + laplacian_filter_coef = dcEdge(iEdge)**2 * (real(bdyMaskEdge(iEdge)) - 1.)/ & real(nRelaxZone)/(10.*dt*meshScalingRegionalEdge(iEdge)) cell1 = cellsOnEdge(1,iEdge) From 5c7d1983feb53acbdf587efbcc0c64bf8d4fd670 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 31 Jan 2025 15:54:18 -0700 Subject: [PATCH 11/79] adding comment --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index b2985e86e4..b66b49ebb7 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6833,6 +6833,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me call mpas_pool_get_config(config, 'config_relax_zone_divdamp_coef', divdamp_coef_ptr) + ! De-referencing scalar integer pointers so that acc parallel regions correctly + ! copy these scalar integers onto the device divdamp_coef = divdamp_coef_ptr vertexDegree = vertexDegree_ptr From a6f0b7f3edc5cede384e8065c01c4c87de9be09d Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Mon, 3 Feb 2025 11:36:24 -0700 Subject: [PATCH 12/79] fixup! Add parallel and loop directives to atm_bdy_adjust_scalars_work Keep consistency with explicit loop schedules --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index b15282f956..083dbf27bd 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6821,7 +6821,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) !DIR$ IVDEP - !$acc loop collapse(2) + !$acc loop vector collapse(2) do k=1,nVertLevels do iScalar = 1, num_scalars filter_flux = edge_sign*( (scalars_new(iScalar,k,cell2)-scalars_driving(iScalar,k,cell2)) & From 8d55069e0e58b31715206853445717a06cfdb137 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Mon, 3 Feb 2025 11:39:46 -0700 Subject: [PATCH 13/79] fixup! Add parallel and loop directives to atm_bdy_adjust_scalars_work Remove newline, keep changes minimal --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 083dbf27bd..a295f7dc4b 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -6802,7 +6802,6 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, laplacian_filter_coef = dt_rk*(real(bdyMaskCell(iCell)) - 1.)/real(nRelaxZone)/(10.*dt*meshScalingRegionalCell(iCell)) rayleigh_damping_coef = laplacian_filter_coef/5.0 - !$acc loop collapse(2) do k=1,nVertLevels do iScalar=1,num_scalars From 5c41e0ee2b7c0c79bccbef0a27f936493deef03c Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Thu, 6 Feb 2025 20:04:14 -0700 Subject: [PATCH 14/79] Prepare for OpenACC porting in mpas_reconstruct_2d Add nVertLevels and derefernce integer pointers to loop bounds so they transfer to the GPU correctly. Also make loops in vertical dimension explicit for OpenACC parallel loop directives. --- src/operators/mpas_vector_reconstruction.F | 53 ++++++++++++++-------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index 789ba50c1e..fa079d4db1 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -207,10 +207,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon ! temporary arrays needed in the compute procedure logical :: includeHalosLocal - integer, pointer :: nCells + integer, pointer :: nCells_ptr, nVertLevels_ptr + integer :: nCells, nVertLevels integer, dimension(:,:), pointer :: edgesOnCell integer, dimension(:), pointer :: nEdgesOnCell - integer :: iCell,iEdge, i + integer :: iCell,iEdge, i, k real(kind=RKIND), dimension(:), pointer :: latCell, lonCell real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct @@ -233,10 +234,14 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) if ( includeHalosLocal ) then - call mpas_pool_get_dimension(meshPool, 'nCells', nCells) + call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr) else - call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells) + call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr) end if + call mpas_pool_get_dimension(meshPool, 'nVertLevels', nVertLevels_ptr) + ! Dereference pointers so that OpenACC copies them correctly + nCells = nCells_ptr + nVertLevels = nVertLevels_ptr call mpas_pool_get_array(meshPool, 'latCell', latCell) call mpas_pool_get_array(meshPool, 'lonCell', lonCell) @@ -247,20 +252,24 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp do schedule(runtime) do iCell = 1, nCells ! initialize the reconstructed vectors - uReconstructX(:,iCell) = 0.0 - uReconstructY(:,iCell) = 0.0 - uReconstructZ(:,iCell) = 0.0 + do k=1,nVertLevels + uReconstructX(k,iCell) = 0.0 + uReconstructY(k,iCell) = 0.0 + uReconstructZ(k,iCell) = 0.0 + end do ! a more efficient reconstruction where rbf_values*matrix_reconstruct ! has been precomputed in coeffs_reconstruct do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) - uReconstructX(:,iCell) = uReconstructX(:,iCell) & - + coeffs_reconstruct(1,i,iCell) * u(:,iEdge) - uReconstructY(:,iCell) = uReconstructY(:,iCell) & - + coeffs_reconstruct(2,i,iCell) * u(:,iEdge) - uReconstructZ(:,iCell) = uReconstructZ(:,iCell) & - + coeffs_reconstruct(3,i,iCell) * u(:,iEdge) + do k=1,nVertLevels + uReconstructX(k,iCell) = uReconstructX(k,iCell) & + + coeffs_reconstruct(1,i,iCell) * u(k,iEdge) + uReconstructY(k,iCell) = uReconstructY(k,iCell) & + + coeffs_reconstruct(2,i,iCell) * u(k,iEdge) + uReconstructZ(k,iCell) = uReconstructZ(k,iCell) & + + coeffs_reconstruct(3,i,iCell) * u(k,iEdge) + end do enddo enddo ! iCell @@ -275,18 +284,22 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon slat = sin(latCell(iCell)) clon = cos(lonCell(iCell)) slon = sin(lonCell(iCell)) - uReconstructZonal(:,iCell) = -uReconstructX(:,iCell)*slon + & - uReconstructY(:,iCell)*clon - uReconstructMeridional(:,iCell) = -(uReconstructX(:,iCell)*clon & - + uReconstructY(:,iCell)*slon)*slat & - + uReconstructZ(:,iCell)*clat + do k=1,nVertLevels + uReconstructZonal(k,iCell) = -uReconstructX(k,iCell)*slon + & + uReconstructY(k,iCell)*clon + uReconstructMeridional(k,iCell) = -(uReconstructX(k,iCell)*clon & + + uReconstructY(k,iCell)*slon)*slat & + + uReconstructZ(k,iCell)*clat + end do end do !$omp end do else !$omp do schedule(runtime) do iCell = 1, nCells - uReconstructZonal (:,iCell) = uReconstructX(:,iCell) - uReconstructMeridional(:,iCell) = uReconstructY(:,iCell) + do k=1,nVertLevels + uReconstructZonal (k,iCell) = uReconstructX(k,iCell) + uReconstructMeridional(k,iCell) = uReconstructY(k,iCell) + end do end do !$omp end do end if From e8cf29bee4a1c0b3c017a5bd9f87664927176627 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Thu, 13 Feb 2025 20:21:59 -0700 Subject: [PATCH 15/79] Initial OpenACC directives added to mpas_vector_reconstruct_2d --- src/operators/mpas_vector_reconstruction.F | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index fa079d4db1..1d720cae38 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -250,8 +250,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon ! loop over cell centers !$omp do schedule(runtime) + !$acc parallel + !$acc loop gang do iCell = 1, nCells ! initialize the reconstructed vectors + !$acc loop vector do k=1,nVertLevels uReconstructX(k,iCell) = 0.0 uReconstructY(k,iCell) = 0.0 @@ -260,8 +263,10 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon ! a more efficient reconstruction where rbf_values*matrix_reconstruct ! has been precomputed in coeffs_reconstruct + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) + !$acc loop vector do k=1,nVertLevels uReconstructX(k,iCell) = uReconstructX(k,iCell) & + coeffs_reconstruct(1,i,iCell) * u(k,iEdge) @@ -273,17 +278,21 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon enddo enddo ! iCell + !$acc end parallel !$omp end do call mpas_threading_barrier() if (on_a_sphere) then !$omp do schedule(runtime) + !$acc parallel + !$acc loop gang do iCell = 1, nCells clat = cos(latCell(iCell)) slat = sin(latCell(iCell)) clon = cos(lonCell(iCell)) slon = sin(lonCell(iCell)) + !$acc loop vector do k=1,nVertLevels uReconstructZonal(k,iCell) = -uReconstructX(k,iCell)*slon + & uReconstructY(k,iCell)*clon @@ -292,15 +301,19 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon + uReconstructZ(k,iCell)*clat end do end do + !$acc end parallel !$omp end do else !$omp do schedule(runtime) + !$acc parallel + !$acc loop gang vector collapse(2) do iCell = 1, nCells do k=1,nVertLevels uReconstructZonal (k,iCell) = uReconstructX(k,iCell) uReconstructMeridional(k,iCell) = uReconstructY(k,iCell) end do end do + !$acc end parallel !$omp end do end if From b7a3cbc5c6f290e4efdc415bc00865727dfa4be3 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Tue, 18 Feb 2025 11:20:00 -0700 Subject: [PATCH 16/79] Add OpenACC transfer of invariant fields within mpas_reconstruct_2d Since this routine is called before mpas_atm_dynamics_init during atm_core_init, these fields must also be transfered within mpas_reconstruct_2d routine. After mpas_atm_dynamics_init, these fields are not transferred during following uses of mpas_reconstruct_2d due to OpenACC present_or_copyin behavior. --- src/operators/mpas_vector_reconstruction.F | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index 1d720cae38..b92c5e423e 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -248,6 +248,9 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere) + ! Only use sections needed, nCells may be all cells or only non-halo cells + !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & + !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) ! loop over cell centers !$omp do schedule(runtime) !$acc parallel @@ -317,6 +320,8 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do end if + !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & + !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) end subroutine mpas_reconstruct_2d!}}} From 96fe04ecdaaa5f24ff17474250e068e679ca176a Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Fri, 14 Feb 2025 17:38:30 -0700 Subject: [PATCH 17/79] Add OpenACC data movement to mpas_reconstruct_2d This change allows data needed for the mpas_reconstruct_2d routine to be fetched onto the device (GPU) at the beginning and end of the routine. The time for these transfers are captured in a new timer 'mpas_reconstruct_2d [ACC_data_xfer]'. --- .../dynamics/mpas_atm_time_integration.F | 24 +++++++++++++++ src/operators/mpas_vector_reconstruction.F | 30 +++++++++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5ea2ca1154..32a0fe212e 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -243,6 +243,9 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:,:), pointer :: dss real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:), pointer :: latCell + real (kind=RKIND), dimension(:), pointer :: lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct #endif @@ -395,6 +398,15 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) !$acc enter data copyin(specZoneMaskCell) + + call mpas_pool_get_array(mesh, 'latCell', latCell) + !$acc enter data copyin(latCell) + + call mpas_pool_get_array(mesh, 'lonCell', lonCell) + !$acc enter data copyin(lonCell) + + call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) + !$acc enter data copyin(coeffs_reconstruct) #endif end subroutine mpas_atm_dynamics_init @@ -474,6 +486,9 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:,:), pointer :: dss real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:), pointer :: latCell + real (kind=RKIND), dimension(:), pointer :: lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct #endif @@ -626,6 +641,15 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) !$acc exit data delete(specZoneMaskCell) + + call mpas_pool_get_array(mesh, 'latCell', latCell) + !$acc exit data delete(latCell) + + call mpas_pool_get_array(mesh, 'lonCell', lonCell) + !$acc exit data delete(lonCell) + + call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) + !$acc exit data delete(coeffs_reconstruct) #endif end subroutine mpas_atm_dynamics_finalize diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index b92c5e423e..6bc3a3d804 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -24,6 +24,16 @@ module mpas_vector_reconstruction use mpas_rbf_interpolation use mpas_vector_operations + ! For use in OpenACC ported code to track in-function transfers + use mpas_timer, only : mpas_timer_start, mpas_timer_stop +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + implicit none public :: mpas_init_reconstruct, mpas_reconstruct @@ -248,12 +258,19 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere) + MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') ! Only use sections needed, nCells may be all cells or only non-halo cells !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) + !$acc enter data copyin(u(:,:)) + !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & + !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & + !$acc uReconstructMeridional(:,1:nCells)) + MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') + ! loop over cell centers !$omp do schedule(runtime) - !$acc parallel + !$acc parallel default(present) !$acc loop gang do iCell = 1, nCells ! initialize the reconstructed vectors @@ -288,7 +305,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon if (on_a_sphere) then !$omp do schedule(runtime) - !$acc parallel + !$acc parallel default(present) !$acc loop gang do iCell = 1, nCells clat = cos(latCell(iCell)) @@ -308,7 +325,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do else !$omp do schedule(runtime) - !$acc parallel + !$acc parallel default(present) !$acc loop gang vector collapse(2) do iCell = 1, nCells do k=1,nVertLevels @@ -320,8 +337,15 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do end if + MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) + !$acc exit data delete(u(:,:)) + !$acc exit data copyout(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & + !$acc uReconstructZ(:,1:nCells), uReconstructZonal(:,1:nCells), & + !$acc uReconstructMeridional(:,1:nCells)) + MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') + end subroutine mpas_reconstruct_2d!}}} From 1b2dae1cb3e8fd8563d0d6e074ab95357baa9f8d Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 15:34:27 -0700 Subject: [PATCH 18/79] WIP: porting compute_dyn_tend_work To this point, results are bit-wise identical. --- .../dynamics/mpas_atm_time_integration.F | 101 +++++++++++++++++- 1 file changed, 96 insertions(+), 5 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5ea2ca1154..c3139d92f6 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -243,6 +243,8 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:,:), pointer :: dss real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:,:), pointer :: defc_a + real (kind=RKIND), dimension(:,:), pointer :: defc_b #endif @@ -395,6 +397,12 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) !$acc enter data copyin(specZoneMaskCell) + + call mpas_pool_get_array(mesh, 'defc_a', defc_a) + !$acc enter data copyin(defc_a) + + call mpas_pool_get_array(mesh, 'defc_b', defc_b) + !$acc enter data copyin(defc_b) #endif end subroutine mpas_atm_dynamics_init @@ -474,6 +482,8 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:,:), pointer :: dss real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:,:), pointer :: defc_a + real (kind=RKIND), dimension(:,:), pointer :: defc_b #endif @@ -626,6 +636,12 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) !$acc exit data delete(specZoneMaskCell) + + call mpas_pool_get_array(mesh, 'defc_a', defc_a) + !$acc exit data delete(defc_a) + + call mpas_pool_get_array(mesh, 'defc_b', defc_b) + !$acc exit data delete(defc_b) #endif end subroutine mpas_atm_dynamics_finalize @@ -4996,10 +5012,23 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! The integration coefficients were precomputed and stored in defc_a and defc_b if(config_horiz_mixing == "2d_smagorinsky") then + + !$acc enter data create(kdiff) + !$acc enter data copyin(u, v) + + !$acc parallel default(present) + !$acc loop gang worker private(d_diag, d_off_diag) do iCell = cellStart,cellEnd - d_diag(1:nVertLevels) = 0.0 - d_off_diag(1:nVertLevels) = 0.0 + + !$acc loop vector + do k = 1, nVertLevels + d_diag(k) = 0.0_RKIND + d_off_diag(k) = 0.0_RKIND + end do + + !$acc loop seq do iEdge=1,nEdgesOnCell(iCell) + !$acc loop vector do k=1,nVertLevels d_diag(k) = d_diag(k) + defc_a(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) @@ -5008,19 +5037,39 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do !DIR$ IVDEP + !$acc loop vector do k=1, nVertLevels ! here is the Smagorinsky formulation, ! followed by imposition of an upper bound on the eddy viscosity kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2),(0.01*config_len_disp**2) * invDt) end do end do + !$acc end parallel + + !$acc exit data copyout(kdiff) + !$acc exit data delete(u, v) h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 h_theta_eddy_visc4 = h_mom_eddy_visc4 else if(config_horiz_mixing == "2d_fixed") then - kdiff(1:nVertLevels,cellStart:cellEnd) = config_h_theta_eddy_visc2 + !!! MGD UNTESTED !!! + + !$acc enter data create(kdiff) + + !$acc parallel default(present) + !$acc loop gang worker + do iCell = cellStart, cellEnd + !$acc loop vector + do k = 1, nVertLevels + kdiff(k,iCell) = config_h_theta_eddy_visc2 + end do + end do + !$acc end parallel + + !$acc exit data copyout(kdiff) + h_mom_eddy_visc4 = config_h_mom_eddy_visc4 h_theta_eddy_visc4 = config_h_theta_eddy_visc4 @@ -5028,17 +5077,27 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (config_mpas_cam_coef > 0.0) then + !!! MGD UNTESTED !!! + + !$acc enter data copyin(kdiff) + + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellStart,cellEnd ! ! 2nd-order filter for top absorbing layer similar to that in CAM-SE : WCS 10 May 2017, modified 7 April 2023 ! From MPAS-CAM V4.0 code, with addition to config-specified coefficient (V4.0_coef = 0.2; SE_coef = 1.0) ! + !$acc loop vector do k = nVertLevels-config_number_cam_damping_levels + 1, nVertLevels visc2cam = 4.0*2.0833*config_len_disp*config_mpas_cam_coef visc2cam = visc2cam*(1.0-real(nVertLevels-k)/real(config_number_cam_damping_levels)) kdiff(k ,iCell) = max(kdiff(nVertLevels ,iCell),visc2cam) end do end do + !$acc end parallel + + !$acc exit data copyout(kdiff) end if @@ -5049,26 +5108,46 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! accumulate horizontal mass-flux + !$acc enter data create(h_divergence) + !$acc enter data copyin(ru) + + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellStart,cellEnd - h_divergence(1:nVertLevels,iCell) = 0.0 + + !$acc loop vector + do k=1,nVertLevels + h_divergence(k,iCell) = 0.0_RKIND + end do + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) edge_sign = edgesOnCell_sign(i,iCell) * dvEdge(iEdge) !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels h_divergence(k,iCell) = h_divergence(k,iCell) + edge_sign * ru(k,iEdge) end do end do end do + !$acc end parallel ! compute horiontal mass-flux divergence, add vertical mass flux divergence to complete tend_rho + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellStart,cellEnd r = invAreaCell(iCell) + !$acc loop vector do k = 1,nVertLevels h_divergence(k,iCell) = h_divergence(k,iCell) * r end do - end do + end do + !$acc end parallel + + !$acc exit data copyout(h_divergence) + !$acc exit data delete(ru) ! ! dp / dz and tend_rho @@ -5078,14 +5157,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if(rk_step == 1) then rgas_cprcv = rgas*cp/cv + + !$acc enter data copyin(h_divergence, tend_rho_physics, rdzw, rw, rb, qtot, rr_save) + !$acc enter data create(tend_rho, dpdz) + + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellStart,cellEnd !DIR$ IVDEP + !$acc loop vector do k = 1,nVertLevels tend_rho(k,iCell) = -h_divergence(k,iCell)-rdzw(k)*(rw(k+1,iCell)-rw(k,iCell)) + tend_rho_physics(k,iCell) dpdz(k,iCell) = -gravity*(rb(k,iCell)*(qtot(k,iCell)) + rr_save(k,iCell)*(1.+qtot(k,iCell))) end do end do + !$acc end parallel + + !$acc exit data delete(h_divergence, tend_rho_physics, rdzw, rw, rb, qtot, rr_save) + !$acc exit data copyout(tend_rho, dpdz) + end if !$OMP BARRIER From f5ada69d2eeb48a49a08e581f9fbea6d7168fab1 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Thu, 2 Jan 2025 18:05:21 -0700 Subject: [PATCH 19/79] WIP: checkpoint compute_dyn_tend work, 2 Jan 2025, 6:05pm --- .../dynamics/mpas_atm_time_integration.F | 92 ++++++++++++++++++- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index c3139d92f6..a1b93aacc5 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -245,6 +245,10 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell real (kind=RKIND), dimension(:,:), pointer :: defc_a real (kind=RKIND), dimension(:,:), pointer :: defc_b + real (kind=RKIND), dimension(:,:), pointer :: zxu + real (kind=RKIND), dimension(:), pointer :: latEdge + real (kind=RKIND), dimension(:), pointer :: angleEdge + real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 #endif @@ -403,6 +407,18 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'defc_b', defc_b) !$acc enter data copyin(defc_b) + + call mpas_pool_get_array(mesh, 'zxu', zxu) + !$acc enter data copyin(zxu) + + call mpas_pool_get_array(mesh, 'latEdge', latEdge) + !$acc enter data copyin(latEdge) + + call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + !$acc enter data copyin(angleEdge) + + call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) + !$acc enter data copyin(meshScalingDel2) #endif end subroutine mpas_atm_dynamics_init @@ -484,6 +500,10 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell real (kind=RKIND), dimension(:,:), pointer :: defc_a real (kind=RKIND), dimension(:,:), pointer :: defc_b + real (kind=RKIND), dimension(:,:), pointer :: zxu + real (kind=RKIND), dimension(:), pointer :: latEdge + real (kind=RKIND), dimension(:), pointer :: angleEdge + real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 #endif @@ -642,6 +662,18 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'defc_b', defc_b) !$acc exit data delete(defc_b) + + call mpas_pool_get_array(mesh, 'zxu', zxu) + !$acc exit data delete(zxu) + + call mpas_pool_get_array(mesh, 'latEdge', latEdge) + !$acc exit data delete(latEdge) + + call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + !$acc exit data delete(angleEdge) + + call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) + !$acc exit data delete(meshScalingDel2) #endif end subroutine mpas_atm_dynamics_finalize @@ -5006,7 +5038,19 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then -! tend_u_euler(1:nVertLevels,edgeStart:edgeEnd) = 0.0 + !$acc enter data create(tend_u_euler) + + !$acc parallel + !$acc loop gang worker + do iEdge = edgeStart, edgeEnd + !$acc loop vector + do k = 1, nVertLevels + tend_u_euler(k,iEdge) = 0.0_RKIND + end do + end do + !$acc end parallel + + !$acc exit data copyout(tend_u_euler) ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces). ! The integration coefficients were precomputed and stored in defc_a and defc_b @@ -5185,6 +5229,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Compute u (normal) velocity tendency for each edge (cell face) ! + !$acc enter data copyin(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, h_divergence, rdzw) + !$acc enter data copyin(tend_u_euler) + !$acc enter data create(tend_u) + + !$acc parallel default(present) + !$acc loop gang worker private(wduz, q) do iEdge=edgeSolveStart,edgeSolveEnd cell1 = cellsOnEdge(1,iEdge) @@ -5194,6 +5244,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if(rk_step == 1) then !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels tend_u_euler(k,iEdge) = - cqu(k,iEdge)*( (pp(k,cell2)-pp(k,cell1))*invDcEdge(iEdge)/(.5*(zz(k,cell2)+zz(k,cell1))) & -0.5*zxu(k,iEdge)*(dpdz(k,cell1)+dpdz(k,cell2)) ) @@ -5207,6 +5258,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm k = 2 wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge)) + !$acc loop vector do k=3,nVertLevels-1 wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND ) end do @@ -5216,15 +5268,23 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm wduz(nVertLevels+1) = 0. !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels tend_u(k,iEdge) = - rdzw(k)*(wduz(k+1)-wduz(k)) ! first use of tend_u end do ! Next, nonlinear Coriolis term (q) following Ringler et al JCP 2009 - q(:) = 0.0 + !$acc loop vector + do k=1,nVertLevels + q(k) = 0.0_RKIND + end do + + !$acc loop seq do j = 1,nEdgesOnEdge(iEdge) eoe = edgesOnEdge(j,iEdge) + + !$acc loop vector do k=1,nVertLevels workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe)) ! the original definition of pv_edge had a factor of 1/density. We have removed that factor @@ -5234,6 +5294,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels ! horizontal ke gradient and vorticity terms in the vector invariant formulation @@ -5242,6 +5303,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm * invDcEdge(iEdge)) & - u(k,iEdge)*0.5*(h_divergence(k,cell1)+h_divergence(k,cell2)) #ifdef CURVATURE + + !!! MGD UNTESTED !!! + ! curvature terms for the sphere tend_u(k,iEdge) = tend_u(k,iEdge) & - 2.*omega*cos(angleEdge(iEdge))*cos(latEdge(iEdge)) & @@ -5252,7 +5316,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do + !$acc end parallel + !$acc exit data delete(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, h_divergence, rdzw) + !$acc exit data copyout(tend_u_euler, tend_u) ! ! horizontal mixing for u @@ -5267,8 +5334,21 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ). ! First, storage to hold the result from the first del^2 computation. - delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0 + !$acc enter data create(delsq_u) + !$acc enter data copyin(divergence, vorticity, kdiff, tend_u_euler, rho_edge) + !$acc parallel default(present) + !$acc loop gang worker + do iEdge = edgeStart, edgeEnd + !$acc loop vector + do k = 1, nVertLevels + delsq_u(k,iEdge) = 0.0_RKIND + end do + end do + !$acc end parallel + + !$acc parallel default(present) + !$acc loop gang worker do iEdge=edgeStart,edgeEnd cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -5278,6 +5358,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge)) !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity @@ -5296,6 +5377,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do + !$acc end parallel + + !$acc exit data copyout(delsq_u, tend_u_euler) + !$acc exit data delete(divergence, vorticity, kdiff, rho_edge) + if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active !$OMP BARRIER From 5ed529e81c59f06b6f91dfe3ef87c3c899eb8341 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Thu, 2 Jan 2025 18:20:42 -0700 Subject: [PATCH 20/79] WIP: checkpoint compute_dyn_tend work The changes in this PR haven't been tested, and doing so will require modifying specific namelist options and creating new baselines. --- .../dynamics/mpas_atm_time_integration.F | 72 +++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index a1b93aacc5..cb9bb1bd70 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -249,6 +249,7 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: latEdge real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 + real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 #endif @@ -419,6 +420,9 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) !$acc enter data copyin(meshScalingDel2) + + call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) + !$acc enter data copyin(meshScalingDel4) #endif end subroutine mpas_atm_dynamics_init @@ -504,6 +508,7 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:), pointer :: latEdge real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 + real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 #endif @@ -674,6 +679,9 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) !$acc exit data delete(meshScalingDel2) + + call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) + !$acc exit data delete(meshScalingDel4) #endif end subroutine mpas_atm_dynamics_finalize @@ -5386,34 +5394,62 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$OMP BARRIER + !!! MGD UNTESTED !!! + + !$acc enter data create(delsq_vorticity, delsq_divergence) + !$acc enter data copyin(delsq_u, tend_u_euler, rho_edge) + + !$acc parallel default(present) + + !$acc loop gang worker do iVertex=vertexStart,vertexEnd - delsq_vorticity(1:nVertLevels,iVertex) = 0.0 + + !$acc loop vector + do k=1,nVertLevels + delsq_vorticity(k,iVertex) = 0.0_RKIND + end do + + !$acc loop seq do i=1,vertexDegree iEdge = edgesOnVertex(i,iVertex) edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex) + + !$acc loop vector do k=1,nVertLevels delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge) end do end do end do + !$acc loop gang worker do iCell=cellStart,cellEnd - delsq_divergence(1:nVertLevels,iCell) = 0.0 + + !$acc loop vector + do k=1,nVertLevels + delsq_divergence(k,iCell) = 0.0_RKIND + end do + r = invAreaCell(iCell) + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell) + + !$acc loop vector do k=1,nVertLevels delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge) end do end do end do - - + !$acc end parallel !$OMP BARRIER + !$acc parallel default(present) + + !$acc loop gang worker do iEdge=edgeSolveStart,edgeSolveEnd cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -5425,6 +5461,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge)) !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity @@ -5440,6 +5477,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do + + !$acc end parallel + + !$acc exit data copyout(tend_u_euler) + !$acc exit data delete(delsq_u, rho_edge, delsq_vorticity, delsq_divergence) end if ! 4th order mixing is active @@ -5448,13 +5490,20 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! if ( v_mom_eddy_visc2 > 0.0 ) then + !!! MGD UNTESTED !!! + if (config_mix_full) then ! mix full state + !$acc enter data copyin(tend_u_euler, rho_edge, u) + + !$acc parallel default(present) + !$acc loop gang worker do iEdge=edgeSolveStart,edgeSolveEnd cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) + !$acc loop vector do k=2,nVertLevels-1 z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2)) @@ -5471,19 +5520,30 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm -(u(k ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm)) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_u_euler) + !$acc exit data delete(rho_edge, u) else ! idealized cases where we mix on the perturbation from the initial 1-D state + + !$acc enter data copyin(tend_u_euler, u_init, v_init, rho_edge, u) + + !$acc parallel default(present) + !$acc loop gang worker private(u_mix) do iEdge=edgeSolveStart,edgeSolveEnd cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) + !$acc loop vector do k=1,nVertLevels u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) & - v_init(k) * sin( angleEdge(iEdge) ) end do + !$acc loop vector do k=2,nVertLevels-1 z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2)) @@ -5500,6 +5560,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm -(u_mix(k )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm)) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_u_euler) + !$acc exit data delete(rho_edge, u, u_init, v_init) end if ! mix perturbation state From b4fa45699891e28781c7e486ddc77c3cf87729c3 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 6 Jan 2025 15:44:40 -0700 Subject: [PATCH 21/79] WIP: checkpoint changes in time integration Default configuration gives bit-identical results. --- .../dynamics/mpas_atm_time_integration.F | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index cb9bb1bd70..682c240e88 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5577,27 +5577,52 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Rayleigh damping on u if (config_rayleigh_damp_u) then + rayleigh_coef_inverse = 1.0 / ( real(config_number_rayleigh_damp_u_levels) & * (config_rayleigh_damp_u_timescale_days*seconds_per_day) ) + do k=nVertLevels-config_number_rayleigh_damp_u_levels+1,nVertLevels rayleigh_damp_coef(k) = real(k - (nVertLevels-config_number_rayleigh_damp_u_levels))*rayleigh_coef_inverse end do + !!! MGD UNTESTED !!! + + !$acc enter data copyin(tend_u, rho_edge, rayleigh_damp_coef, u) + + !$acc parallel default(present) + + !$acc loop gang worker do iEdge=edgeSolveStart,edgeSolveEnd !DIR$ IVDEP + !$acc loop vector do k=nVertlevels-config_number_rayleigh_damp_u_levels+1,nVertLevels tend_u(k,iEdge) = tend_u(k,iEdge) - rho_edge(k,iEdge)*u(k,iEdge)*rayleigh_damp_coef(k) end do end do + + !$acc end parallel + + !$acc exit data copyout(tend_u) + !$acc exit data delete(rho_edge, rayleigh_damp_coef, u) + end if + !$acc enter data copyin(tend_u, tend_u_euler, tend_ru_physics) + + !$acc parallel default(present) + !$acc loop gang worker do iEdge=edgeSolveStart,edgeSolveEnd !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels ! tend_u(k,iEdge) = tend_u(k,iEdge) + tend_u_euler(k,iEdge) tend_u(k,iEdge) = tend_u(k,iEdge) + tend_u_euler(k,iEdge) + tend_ru_physics(k,iEdge) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_u) + !$acc exit data delete(tend_u_euler, tend_ru_physics) !----------- rhs for w From 8335cfd958fe9567182c64195fbd2513fc3b589a Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 6 Jan 2025 19:18:21 -0700 Subject: [PATCH 22/79] WIP: finished porting w tendencies in mpas_atm_time_integration There is some code that hasn't been tested because it requires non-default namelist settings to activate. --- .../dynamics/mpas_atm_time_integration.F | 126 +++++++++++++++++- 1 file changed, 121 insertions(+), 5 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 682c240e88..71643ee96c 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -250,6 +250,8 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 + real (kind=RKIND), dimension(:), pointer :: rdzu + real (kind=RKIND), dimension(:), pointer :: rdzw #endif @@ -423,6 +425,12 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc enter data copyin(meshScalingDel4) + + call mpas_pool_get_array(mesh, 'rdzu', rdzu) + !$acc enter data copyin(rdzu) + + call mpas_pool_get_array(mesh, 'rdzw', rdzw) + !$acc enter data copyin(rdzw) #endif end subroutine mpas_atm_dynamics_init @@ -509,6 +517,8 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 + real (kind=RKIND), dimension(:), pointer :: rdzu + real (kind=RKIND), dimension(:), pointer :: rdzw #endif @@ -682,6 +692,12 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc exit data delete(meshScalingDel4) + + call mpas_pool_get_array(mesh, 'rdzu', rdzu) + !$acc exit data delete(rdzu) + + call mpas_pool_get_array(mesh, 'rdzw', rdzw) + !$acc exit data delete(rdzw) #endif end subroutine mpas_atm_dynamics_finalize @@ -5632,22 +5648,41 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! horizontal advection for w ! + !$acc enter data create(tend_w) + !$acc enter data copyin(ru, w) + + !$acc parallel default(present) + !$acc loop gang worker private(ru_edge_w, flux_arr) do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... - tend_w(1:nVertLevels+1,iCell) = 0.0 + + !$acc loop vector + do k=1,nVertLevels+1 + tend_w(k,iCell) = 0.0_RKIND + end do + + !$acc loop seq do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) edge_sign = edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * 0.5 + !$acc loop vector do k=2,nVertLevels ru_edge_w(k) = fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge) end do - flux_arr(1:nVertLevels) = 0.0 + !$acc loop vector + do k=1,nVertLevels + flux_arr(k) = 0.0_RKIND + end do ! flux_arr stores the value of w at the cell edge used in the horizontal transport + !$acc loop seq do j=1,nAdvCellsForEdge(iEdge) iAdvCell = advCellsForEdge(j,iEdge) + + !$acc loop vector do k=2,nVertLevels scalar_weight = adv_coefs(j,iEdge) + sign(1.0_RKIND,ru_edge_w(k)) * adv_coefs_3rd(j,iEdge) flux_arr(k) = flux_arr(k) + scalar_weight * w(k,iAdvCell) @@ -5655,16 +5690,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels tend_w(k,iCell) = tend_w(k,iCell) - edgesOnCell_sign(i,iCell) * ru_edge_w(k)*flux_arr(k) end do end do end do + !$acc end parallel #ifdef CURVATURE + + !!! MGD UNTESTED !!! + + !$acc enter data copyin(rho_zz, ur_cell, vr_cell) + + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels tend_w(k,iCell) = tend_w(k,iCell) + (rho_zz(k,iCell)*fzm(k)+rho_zz(k-1,iCell)*fzp(k))* & ( (fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))**2. & @@ -5675,8 +5720,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do + !$acc end parallel + + !$acc exit data delete(rho_zz, ur_cell, vr_cell) #endif + !$acc exit data copyout(tend_w) + !$acc exit data delete(ru, w) ! ! horizontal mixing for w - we could combine this with advection directly (i.e. as a turbulent flux), @@ -5692,12 +5742,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! First, storage to hold the result from the first del^2 computation. ! we copied code from the theta mixing, hence the theta* names. + !$acc enter data create(delsq_w, tend_w_euler) + !$acc enter data copyin(rho_edge, w, kdiff) - delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0 - + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellStart,cellEnd - tend_w_euler(1:nVertLevels+1,iCell) = 0.0 + + !$acc loop vector + do k=1,nVertLevels + delsq_w(k,iCell) = 0.0_RKIND + end do + + !$acc loop vector + do k=1,nVertLevels+1 + tend_w_euler(k,iCell) = 0.0_RKIND + end do + r_areaCell = invAreaCell(iCell) + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) @@ -5707,6 +5771,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm cell2 = cellsOnEdge(2,iEdge) !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels w_turb_flux = edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1)) @@ -5717,13 +5782,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do end do + !$acc end parallel + + !$acc exit data copyout(delsq_w, tend_w_euler) + !$acc exit data delete(rho_edge, w, kdiff) !$OMP BARRIER if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active + !!! MGD UNTESTED !!! + + !$acc enter data copyin(tend_w_euler, delsq_w) + + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... + r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell) + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) cell1 = cellsOnEdge(1,iEdge) @@ -5731,12 +5809,17 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge) + !$acc loop vector do k=2,nVertLevels tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1)) end do end do end do + !$acc end parallel + + !$acc exit data copyout(tend_w_euler) + !$acc exit data delete(delsq_w) end if ! 4th order mixing is active @@ -5753,15 +5836,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! + !$acc enter data copyin(tend_w, tend_w_euler, rw, w, cqw, pp, dpdz) + + !$acc parallel default(present) + !$acc loop gang worker private(wdwz) do iCell=cellSolveStart,cellSolveEnd wdwz(1) = 0.0 k = 2 wdwz(k) = 0.25*(rw(k,icell)+rw(k-1,iCell))*(w(k,iCell)+w(k-1,iCell)) + + !$acc loop vector do k=3,nVertLevels-1 wdwz(k) = flux3( w(k-2,iCell),w(k-1,iCell),w(k,iCell),w(k+1,iCell),0.5*(rw(k,iCell)+rw(k-1,iCell)), 1.0_RKIND ) end do + k = nVertLevels wdwz(k) = 0.25*(rw(k,icell)+rw(k-1,iCell))*(w(k,iCell)+w(k-1,iCell)) @@ -5770,12 +5860,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: next we are also dividing through by the cell area after the horizontal flux divergence !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels tend_w(k,iCell) = tend_w(k,iCell) * invAreaCell(iCell) -rdzu(k)*(wdwz(k+1)-wdwz(k)) end do if(rk_step == 1) then !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - cqw(k,iCell)*( & rdzu(k)*(pp(k,iCell)-pp(k-1,iCell)) & @@ -5784,19 +5876,34 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if end do + !$acc end parallel + + !$acc exit data copyout(tend_w, tend_w_euler) + !$acc exit data delete(rw, w, cqw, pp, dpdz) if (rk_step == 1) then if ( v_mom_eddy_visc2 > 0.0 ) then + !!! MGD UNTESTED !!! + + !$acc enter data copyin(tend_w_euler, rho_zz, w) + + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*( & (w(k+1,iCell)-w(k ,iCell))*rdzw(k) & -(w(k ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_w_euler) + !$acc exit data delete(rho_zz, w) end if @@ -5804,12 +5911,21 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! add in mixing terms for w + !$acc enter data copyin(tend_w, tend_w_euler) + + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd !DIR$ IVDEP + !$acc loop vector do k=2,nVertLevels tend_w(k,iCell) = tend_w(k,iCell) + tend_w_euler(k,iCell) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_w) + !$acc exit data delete(tend_w_euler) !----------- rhs for theta From 7be18614f5f1ce94e46984d71b84b468f77756a9 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 6 Jan 2025 20:25:25 -0700 Subject: [PATCH 23/79] WIP: completed porting of theta tendency terms Some untested code has been ported. --- .../dynamics/mpas_atm_time_integration.F | 123 +++++++++++++++++- 1 file changed, 118 insertions(+), 5 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 71643ee96c..02ec8de467 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5933,15 +5933,32 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! horizontal advection for theta ! + !$acc enter data create(tend_theta) + !$acc enter data copyin(ru, theta_m) + + !$acc parallel default(present) + !$acc loop gang worker private(flux_arr) do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... - tend_theta(1:nVertLevels,iCell) = 0.0 + + !$acc loop vector + do k=1,nVertLevels + tend_theta(k,iCell) = 0.0_RKIND + end do + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) - flux_arr(1:nVertLevels) = 0.0 + !$acc loop vector + do k=1,nVertLevels + flux_arr(k) = 0.0_RKIND + end do + !$acc loop seq do j=1,nAdvCellsForEdge(iEdge) iAdvCell = advCellsForEdge(j,iEdge) + + !$acc loop vector do k=1,nVertLevels scalar_weight = adv_coefs(j,iEdge) + sign(1.0_RKIND,ru(k,iEdge))*adv_coefs_3rd(j,iEdge) flux_arr(k) = flux_arr(k) + scalar_weight* theta_m(k,iAdvCell) @@ -5949,28 +5966,46 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels tend_theta(k,iCell) = tend_theta(k,iCell) - edgesOnCell_sign(i,iCell) * ru(k,iEdge) * flux_arr(k) end do end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta) + !$acc exit data delete(ru, theta_m) ! addition to pick up perturbation flux for rtheta_pp equation if(rk_step > 1) then + + !$acc enter data copyin(tend_theta, ru_save, ru, theta_m_save) + + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels flux = edgesOnCell_sign(i,iCell)*dvEdge(iEdge)*(ru_save(k,iEdge)-ru(k,iEdge))*0.5*(theta_m_save(k,cell2)+theta_m_save(k,cell1)) tend_theta(k,iCell) = tend_theta(k,iCell)-flux ! division by areaCell picked up down below end do end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta) + !$acc exit data delete(ru_save, ru, theta_m_save) + end if ! @@ -5980,11 +6015,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then - delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0 + !$acc enter data create(delsq_theta, tend_theta_euler) + !$acc enter data copyin(theta_m, rho_edge, kdiff) + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellStart,cellEnd - tend_theta_euler(1:nVertLevels,iCell) = 0.0 + + !$acc loop vector + do k=1,nVertLevels + delsq_theta(k,iCell) = 0.0_RKIND + end do + + !$acc loop vector + do k=1,nVertLevels + tend_theta_euler(k,iCell) = 0.0_RKIND + end do + r_areaCell = invAreaCell(iCell) + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) @@ -5992,6 +6042,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels ! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below @@ -6004,13 +6055,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do end do end do + !$acc end parallel + + !$acc exit data copyout(delsq_theta, tend_theta_euler) + !$acc exit data delete(theta_m, rho_edge, kdiff) !$OMP BARRIER if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active + !!! MGD UNTESTED !!! + + !$acc enter data copyin(tend_theta_euler, delsq_theta) + + !$acc parallel default(present) + !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... + r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell) + + !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) @@ -6019,20 +6083,32 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) + !$acc loop vector do k=1,nVertLevels tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1)) end do end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta_euler) + !$acc exit data delete(delsq_theta) end if ! 4th order mixing is active end if ! theta mixing calculated first rk_step + ! ! vertical advection plus diabatic term ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! + + !$acc enter data copyin(tend_theta, rw, rw_save, theta_m, theta_m_save, tend_rho, rho_zz, rt_diabatic_tend) + !$acc enter data create(rthdynten) + + !$acc parallel default(present) + !$acc loop gang worker private(wdtz) do iCell = cellSolveStart,cellSolveEnd wdtz(1) = 0.0 @@ -6040,22 +6116,30 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm k = 2 wdtz(k) = rw(k,icell)*(fzm(k)*theta_m(k,iCell)+fzp(k)*theta_m(k-1,iCell)) wdtz(k) = wdtz(k)+(rw_save(k,icell)-rw(k,icell))*(fzm(k)*theta_m_save(k,iCell)+fzp(k)*theta_m_save(k-1,iCell)) + + !$acc loop vector do k=3,nVertLevels-1 wdtz(k) = flux3( theta_m(k-2,iCell),theta_m(k-1,iCell),theta_m(k,iCell),theta_m(k+1,iCell), rw(k,iCell), coef_3rd_order ) wdtz(k) = wdtz(k) + (rw_save(k,icell)-rw(k,iCell))*(fzm(k)*theta_m_save(k,iCell)+fzp(k)*theta_m_save(k-1,iCell)) ! rtheta_pp redefinition end do + k = nVertLevels wdtz(k) = rw_save(k,icell)*(fzm(k)*theta_m(k,iCell)+fzp(k)*theta_m(k-1,iCell)) ! rtheta_pp redefinition wdtz(nVertLevels+1) = 0.0 !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels tend_theta(k,iCell) = tend_theta(k,iCell)*invAreaCell(iCell) -rdzw(k)*(wdtz(k+1)-wdtz(k)) rthdynten(k,iCell) = (tend_theta(k,iCell)-tend_rho(k,iCell)*theta_m(k,iCell))/rho_zz(k,iCell) tend_theta(k,iCell) = tend_theta(k,iCell) + rho_zz(k,iCell)*rt_diabatic_tend(k,iCell) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta, rthdynten) + !$acc exit data delete(rw, rw_save, theta_m, theta_m_save, tend_rho, rho_zz, rt_diabatic_tend) ! ! vertical mixing for theta - 2nd order @@ -6067,7 +6151,15 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (config_mix_full) then + !!! MGD UNTESTED !!! + + !$acc enter data copyin(tend_theta_euler, rho_zz, theta_m) + + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd + + !$acc loop vector do k=2,nVertLevels-1 z1 = zgrid(k-1,iCell) z2 = zgrid(k ,iCell) @@ -6083,9 +6175,17 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm -(theta_m(k ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm)) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta_euler) + !$acc exit data delete(rho_zz, theta_m) + + else ! idealized cases where we mix on the perturbation from the initial 1-D state - else ! idealized cases where we mix on the perturbation from the initial 1-D state + !$acc enter data copyin(tend_theta_euler, rho_zz, theta_m, t_init) + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd do k=2,nVertLevels-1 z1 = zgrid(k-1,iCell) @@ -6102,6 +6202,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm -((theta_m(k ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm)) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta_euler) + !$acc exit data delete(rho_zz, theta_m, t_init) end if @@ -6109,13 +6213,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if ! compute vertical theta mixing on first rk_step + !$acc enter data copyin(tend_theta, tend_theta_euler, tend_rtheta_physics) + + !$acc parallel default(present) + !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd !DIR$ IVDEP + !$acc loop vector do k=1,nVertLevels ! tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell) tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell) + tend_rtheta_physics(k,iCell) end do end do + !$acc end parallel + + !$acc exit data copyout(tend_theta) + !$acc exit data delete(tend_theta_euler, tend_rtheta_physics) end subroutine atm_compute_dyn_tend_work From 6e252100c50caf38efaa633e2693627f1d9400aa Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Thu, 16 Jan 2025 18:57:13 -0700 Subject: [PATCH 24/79] Fuse data movement for tend_u_euler --- .../dynamics/mpas_atm_time_integration.F | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 02ec8de467..c60b1ad0aa 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5053,6 +5053,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm flux4(q_im2, q_im1, q_i, q_ip1, ua) + & coef3*abs(ua)*((q_ip1 - q_im2)-3.*(q_i-q_im1))/12.0 + MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') + if (rk_step == 1) then + !$acc enter data create(tend_u_euler) + else + !$acc enter data copyin(tend_u_euler) + end if + MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') + prandtl_inv = 1.0_RKIND / prandtl invDt = 1.0_RKIND / dt inv_r_earth = 1.0_RKIND / r_earth @@ -5062,8 +5070,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then - !$acc enter data create(tend_u_euler) - !$acc parallel !$acc loop gang worker do iEdge = edgeStart, edgeEnd @@ -5074,8 +5080,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_u_euler) - ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces). ! The integration coefficients were precomputed and stored in defc_a and defc_b @@ -5254,7 +5258,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! !$acc enter data copyin(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, h_divergence, rdzw) - !$acc enter data copyin(tend_u_euler) !$acc enter data create(tend_u) !$acc parallel default(present) @@ -5343,7 +5346,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data delete(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, h_divergence, rdzw) - !$acc exit data copyout(tend_u_euler, tend_u) + !$acc exit data copyout(tend_u) ! ! horizontal mixing for u @@ -5359,7 +5362,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! First, storage to hold the result from the first del^2 computation. !$acc enter data create(delsq_u) - !$acc enter data copyin(divergence, vorticity, kdiff, tend_u_euler, rho_edge) + !$acc enter data copyin(divergence, vorticity, kdiff, rho_edge) !$acc parallel default(present) !$acc loop gang worker @@ -5403,7 +5406,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data copyout(delsq_u, tend_u_euler) + !$acc exit data copyout(delsq_u) !$acc exit data delete(divergence, vorticity, kdiff, rho_edge) if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active @@ -5413,7 +5416,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! !$acc enter data create(delsq_vorticity, delsq_divergence) - !$acc enter data copyin(delsq_u, tend_u_euler, rho_edge) + !$acc enter data copyin(delsq_u, rho_edge) !$acc parallel default(present) @@ -5496,7 +5499,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data copyout(tend_u_euler) !$acc exit data delete(delsq_u, rho_edge, delsq_vorticity, delsq_divergence) end if ! 4th order mixing is active @@ -5510,7 +5512,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (config_mix_full) then ! mix full state - !$acc enter data copyin(tend_u_euler, rho_edge, u) + !$acc enter data copyin(rho_edge, u) !$acc parallel default(present) !$acc loop gang worker @@ -5538,13 +5540,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_u_euler) !$acc exit data delete(rho_edge, u) else ! idealized cases where we mix on the perturbation from the initial 1-D state - !$acc enter data copyin(tend_u_euler, u_init, v_init, rho_edge, u) + !$acc enter data copyin(u_init, v_init, rho_edge, u) !$acc parallel default(present) !$acc loop gang worker private(u_mix) @@ -5578,7 +5579,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_u_euler) !$acc exit data delete(rho_edge, u, u_init, v_init) end if ! mix perturbation state @@ -5623,7 +5623,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if - !$acc enter data copyin(tend_u, tend_u_euler, tend_ru_physics) + !$acc enter data copyin(tend_u, tend_ru_physics) !$acc parallel default(present) !$acc loop gang worker @@ -5638,7 +5638,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_u) - !$acc exit data delete(tend_u_euler, tend_ru_physics) + !$acc exit data delete(tend_ru_physics) !----------- rhs for w @@ -6230,6 +6230,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(tend_theta) !$acc exit data delete(tend_theta_euler, tend_rtheta_physics) + MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') + !$acc exit data copyout(tend_u_euler) + MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') + end subroutine atm_compute_dyn_tend_work From 0c8090930189d5dd44064b272e064fe7ad79b157 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Thu, 16 Jan 2025 19:11:01 -0700 Subject: [PATCH 25/79] Fuse data movement for kdiff --- .../dynamics/mpas_atm_time_integration.F | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index c60b1ad0aa..1ccf93a15d 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5056,6 +5056,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then !$acc enter data create(tend_u_euler) + !$acc enter data create(kdiff) else !$acc enter data copyin(tend_u_euler) end if @@ -5085,7 +5086,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if(config_horiz_mixing == "2d_smagorinsky") then - !$acc enter data create(kdiff) !$acc enter data copyin(u, v) !$acc parallel default(present) @@ -5118,7 +5118,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(kdiff) !$acc exit data delete(u, v) h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 @@ -5128,8 +5127,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data create(kdiff) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellStart, cellEnd @@ -5140,8 +5137,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(kdiff) - h_mom_eddy_visc4 = config_h_mom_eddy_visc4 h_theta_eddy_visc4 = config_h_theta_eddy_visc4 @@ -5151,8 +5146,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(kdiff) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellStart,cellEnd @@ -5169,8 +5162,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(kdiff) - end if end if @@ -5362,7 +5353,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! First, storage to hold the result from the first del^2 computation. !$acc enter data create(delsq_u) - !$acc enter data copyin(divergence, vorticity, kdiff, rho_edge) + !$acc enter data copyin(divergence, vorticity, rho_edge) !$acc parallel default(present) !$acc loop gang worker @@ -5407,7 +5398,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_u) - !$acc exit data delete(divergence, vorticity, kdiff, rho_edge) + !$acc exit data delete(divergence, vorticity, rho_edge) if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active @@ -5743,7 +5734,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! we copied code from the theta mixing, hence the theta* names. !$acc enter data create(delsq_w, tend_w_euler) - !$acc enter data copyin(rho_edge, w, kdiff) + !$acc enter data copyin(rho_edge, w) !$acc parallel default(present) !$acc loop gang worker @@ -5785,7 +5776,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_w, tend_w_euler) - !$acc exit data delete(rho_edge, w, kdiff) + !$acc exit data delete(rho_edge, w) !$OMP BARRIER @@ -6016,7 +6007,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc enter data create(delsq_theta, tend_theta_euler) - !$acc enter data copyin(theta_m, rho_edge, kdiff) + !$acc enter data copyin(theta_m, rho_edge) !$acc parallel default(present) !$acc loop gang worker @@ -6058,7 +6049,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_theta, tend_theta_euler) - !$acc exit data delete(theta_m, rho_edge, kdiff) + !$acc exit data delete(theta_m, rho_edge) !$OMP BARRIER @@ -6232,6 +6223,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') !$acc exit data copyout(tend_u_euler) + !$acc exit data delete(kdiff) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From a92e78f23cc28cef830034293e18f611592c50e8 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Thu, 16 Jan 2025 19:21:20 -0700 Subject: [PATCH 26/79] Fuse data movement for u, v, h_divergence, ru --- .../dynamics/mpas_atm_time_integration.F | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 1ccf93a15d..0b45a6ce52 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5057,9 +5057,15 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc enter data create(tend_u_euler) !$acc enter data create(kdiff) + + if (config_horiz_mixing == '2d_smagorinsky') then + !$acc enter data copyin(u, v) + end if else !$acc enter data copyin(tend_u_euler) end if + !$acc enter data create(h_divergence) + !$acc enter data copyin(ru) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5086,8 +5092,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if(config_horiz_mixing == "2d_smagorinsky") then - !$acc enter data copyin(u, v) - !$acc parallel default(present) !$acc loop gang worker private(d_diag, d_off_diag) do iCell = cellStart,cellEnd @@ -5118,8 +5122,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(u, v) - h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 h_theta_eddy_visc4 = h_mom_eddy_visc4 @@ -5171,9 +5173,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! accumulate horizontal mass-flux - !$acc enter data create(h_divergence) - !$acc enter data copyin(ru) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellStart,cellEnd @@ -5209,9 +5208,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(h_divergence) - !$acc exit data delete(ru) - ! ! dp / dz and tend_rho ! @@ -5221,7 +5217,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm rgas_cprcv = rgas*cp/cv - !$acc enter data copyin(h_divergence, tend_rho_physics, rdzw, rw, rb, qtot, rr_save) + !$acc enter data copyin(tend_rho_physics, rdzw, rw, rb, qtot, rr_save) !$acc enter data create(tend_rho, dpdz) !$acc parallel default(present) @@ -5237,7 +5233,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(h_divergence, tend_rho_physics, rdzw, rw, rb, qtot, rr_save) + !$acc exit data delete(tend_rho_physics, rdzw, rw, rb, qtot, rr_save) !$acc exit data copyout(tend_rho, dpdz) end if @@ -5248,7 +5244,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Compute u (normal) velocity tendency for each edge (cell face) ! - !$acc enter data copyin(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, h_divergence, rdzw) + !$acc enter data copyin(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, rdzw) !$acc enter data create(tend_u) !$acc parallel default(present) @@ -5336,7 +5332,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, h_divergence, rdzw) + !$acc exit data delete(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, rdzw) !$acc exit data copyout(tend_u) ! @@ -5640,7 +5636,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! !$acc enter data create(tend_w) - !$acc enter data copyin(ru, w) + !$acc enter data copyin(w) !$acc parallel default(present) !$acc loop gang worker private(ru_edge_w, flux_arr) @@ -5717,7 +5713,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm #endif !$acc exit data copyout(tend_w) - !$acc exit data delete(ru, w) + !$acc exit data delete(w) ! ! horizontal mixing for w - we could combine this with advection directly (i.e. as a turbulent flux), @@ -5925,7 +5921,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! !$acc enter data create(tend_theta) - !$acc enter data copyin(ru, theta_m) + !$acc enter data copyin(theta_m) !$acc parallel default(present) !$acc loop gang worker private(flux_arr) @@ -5967,13 +5963,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta) - !$acc exit data delete(ru, theta_m) + !$acc exit data delete(theta_m) ! addition to pick up perturbation flux for rtheta_pp equation if(rk_step > 1) then - !$acc enter data copyin(tend_theta, ru_save, ru, theta_m_save) + !$acc enter data copyin(tend_theta, ru_save, theta_m_save) !$acc parallel default(present) !$acc loop gang worker @@ -5995,7 +5991,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta) - !$acc exit data delete(ru_save, ru, theta_m_save) + !$acc exit data delete(ru_save, theta_m_save) end if @@ -6224,6 +6220,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') !$acc exit data copyout(tend_u_euler) !$acc exit data delete(kdiff) + if (rk_step == 1) then + if (config_horiz_mixing == '2d_smagorinsky') then + !$acc exit data delete(u, v) + end if + end if + !$acc exit data copyout(h_divergence) + !$acc exit data delete(ru) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From 44e84f0bfe1b03e307662c578b0358b2f22a87a6 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Thu, 16 Jan 2025 19:49:31 -0700 Subject: [PATCH 27/79] Fuse data movement for tend_rho and tend_rho_physics --- .../dynamics/mpas_atm_time_integration.F | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 0b45a6ce52..6c919212fc 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5057,12 +5057,15 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc enter data create(tend_u_euler) !$acc enter data create(kdiff) + !$acc enter data copyin(tend_rho_physics) + !$acc enter data create(tend_rho) if (config_horiz_mixing == '2d_smagorinsky') then !$acc enter data copyin(u, v) end if else !$acc enter data copyin(tend_u_euler) + !$acc enter data copyin(tend_rho) end if !$acc enter data create(h_divergence) !$acc enter data copyin(ru) @@ -5217,8 +5220,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm rgas_cprcv = rgas*cp/cv - !$acc enter data copyin(tend_rho_physics, rdzw, rw, rb, qtot, rr_save) - !$acc enter data create(tend_rho, dpdz) + !$acc enter data copyin(rdzw, rw, rb, qtot, rr_save) + !$acc enter data create(dpdz) !$acc parallel default(present) !$acc loop gang worker @@ -5233,8 +5236,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(tend_rho_physics, rdzw, rw, rb, qtot, rr_save) - !$acc exit data copyout(tend_rho, dpdz) + !$acc exit data delete(rdzw, rw, rb, qtot, rr_save) + !$acc exit data copyout(dpdz) end if @@ -6091,7 +6094,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! - !$acc enter data copyin(tend_theta, rw, rw_save, theta_m, theta_m_save, tend_rho, rho_zz, rt_diabatic_tend) + !$acc enter data copyin(tend_theta, rw, rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc parallel default(present) @@ -6126,7 +6129,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta, rthdynten) - !$acc exit data delete(rw, rw_save, theta_m, theta_m_save, tend_rho, rho_zz, rt_diabatic_tend) + !$acc exit data delete(rw, rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) ! ! vertical mixing for theta - 2nd order @@ -6221,9 +6224,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(tend_u_euler) !$acc exit data delete(kdiff) if (rk_step == 1) then + !$acc exit data copyout(tend_rho) + !$acc exit data delete(tend_rho_physics) if (config_horiz_mixing == '2d_smagorinsky') then !$acc exit data delete(u, v) end if + else + !$acc exit data delete(tend_rho) end if !$acc exit data copyout(h_divergence) !$acc exit data delete(ru) From ad7c88c2db8c9f672d41991f2071e8b256865f89 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 17 Jan 2025 09:55:24 -0700 Subject: [PATCH 28/79] Fuse data movement for rw --- .../dynamics/mpas_atm_time_integration.F | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 6c919212fc..248d76a899 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5068,7 +5068,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(tend_rho) end if !$acc enter data create(h_divergence) - !$acc enter data copyin(ru) + !$acc enter data copyin(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5220,7 +5220,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm rgas_cprcv = rgas*cp/cv - !$acc enter data copyin(rdzw, rw, rb, qtot, rr_save) + !$acc enter data copyin(rdzw, rb, qtot, rr_save) !$acc enter data create(dpdz) !$acc parallel default(present) @@ -5236,7 +5236,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rdzw, rw, rb, qtot, rr_save) + !$acc exit data delete(rdzw, rb, qtot, rr_save) !$acc exit data copyout(dpdz) end if @@ -5247,7 +5247,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Compute u (normal) velocity tendency for each edge (cell face) ! - !$acc enter data copyin(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, rdzw) + !$acc enter data copyin(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke, rdzw) !$acc enter data create(tend_u) !$acc parallel default(present) @@ -5335,7 +5335,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(cqu, pp, dpdz, u, w, rw, pv_edge, rho_edge, ke, rdzw) + !$acc exit data delete(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke, rdzw) !$acc exit data copyout(tend_u) ! @@ -5826,7 +5826,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! - !$acc enter data copyin(tend_w, tend_w_euler, rw, w, cqw, pp, dpdz) + !$acc enter data copyin(tend_w, tend_w_euler, w, cqw, pp, dpdz) !$acc parallel default(present) !$acc loop gang worker private(wdwz) @@ -5869,7 +5869,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_w, tend_w_euler) - !$acc exit data delete(rw, w, cqw, pp, dpdz) + !$acc exit data delete(w, cqw, pp, dpdz) if (rk_step == 1) then @@ -6094,7 +6094,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! - !$acc enter data copyin(tend_theta, rw, rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) + !$acc enter data copyin(tend_theta, rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc parallel default(present) @@ -6129,7 +6129,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta, rthdynten) - !$acc exit data delete(rw, rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) + !$acc exit data delete(rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) ! ! vertical mixing for theta - 2nd order @@ -6233,7 +6233,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(tend_rho) end if !$acc exit data copyout(h_divergence) - !$acc exit data delete(ru) + !$acc exit data delete(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From ca613661089e214df1f292f9f2a5939e587fca23 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 17 Jan 2025 15:43:12 -0700 Subject: [PATCH 29/79] FIXUP for 167dd4de75 (remove redundant rdzu,rdzw) --- .../dynamics/mpas_atm_time_integration.F | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 248d76a899..0a0b53a4d7 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -250,8 +250,6 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 - real (kind=RKIND), dimension(:), pointer :: rdzu - real (kind=RKIND), dimension(:), pointer :: rdzw #endif @@ -425,12 +423,6 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc enter data copyin(meshScalingDel4) - - call mpas_pool_get_array(mesh, 'rdzu', rdzu) - !$acc enter data copyin(rdzu) - - call mpas_pool_get_array(mesh, 'rdzw', rdzw) - !$acc enter data copyin(rdzw) #endif end subroutine mpas_atm_dynamics_init @@ -517,8 +509,6 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 - real (kind=RKIND), dimension(:), pointer :: rdzu - real (kind=RKIND), dimension(:), pointer :: rdzw #endif @@ -692,12 +682,6 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc exit data delete(meshScalingDel4) - - call mpas_pool_get_array(mesh, 'rdzu', rdzu) - !$acc exit data delete(rdzu) - - call mpas_pool_get_array(mesh, 'rdzw', rdzw) - !$acc exit data delete(rdzw) #endif end subroutine mpas_atm_dynamics_finalize From 32f71f56b2c181159e6fce8c4b17d97819fc99da Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 16:26:14 -0700 Subject: [PATCH 30/79] fixup: remove redundant (after rebase) code for moving 'zxu' --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 0a0b53a4d7..6668a3c074 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -245,7 +245,6 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell real (kind=RKIND), dimension(:,:), pointer :: defc_a real (kind=RKIND), dimension(:,:), pointer :: defc_b - real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:), pointer :: latEdge real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 @@ -409,9 +408,6 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'defc_b', defc_b) !$acc enter data copyin(defc_b) - call mpas_pool_get_array(mesh, 'zxu', zxu) - !$acc enter data copyin(zxu) - call mpas_pool_get_array(mesh, 'latEdge', latEdge) !$acc enter data copyin(latEdge) @@ -504,7 +500,6 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell real (kind=RKIND), dimension(:,:), pointer :: defc_a real (kind=RKIND), dimension(:,:), pointer :: defc_b - real (kind=RKIND), dimension(:,:), pointer :: zxu real (kind=RKIND), dimension(:), pointer :: latEdge real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 @@ -668,9 +663,6 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'defc_b', defc_b) !$acc exit data delete(defc_b) - call mpas_pool_get_array(mesh, 'zxu', zxu) - !$acc exit data delete(zxu) - call mpas_pool_get_array(mesh, 'latEdge', latEdge) !$acc exit data delete(latEdge) From 54fecf5593a7e7905ec4643bd594461bb458fd33 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 17:45:21 -0700 Subject: [PATCH 31/79] data movement: rdzw --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 6668a3c074..71a37c593c 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5196,7 +5196,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm rgas_cprcv = rgas*cp/cv - !$acc enter data copyin(rdzw, rb, qtot, rr_save) + !$acc enter data copyin(rb, qtot, rr_save) !$acc enter data create(dpdz) !$acc parallel default(present) @@ -5212,7 +5212,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rdzw, rb, qtot, rr_save) + !$acc exit data delete(rb, qtot, rr_save) !$acc exit data copyout(dpdz) end if @@ -5223,7 +5223,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Compute u (normal) velocity tendency for each edge (cell face) ! - !$acc enter data copyin(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke, rdzw) + !$acc enter data copyin(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke) !$acc enter data create(tend_u) !$acc parallel default(present) @@ -5311,7 +5311,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke, rdzw) + !$acc exit data delete(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke) !$acc exit data copyout(tend_u) ! From 571c52d35cfd548ffdc35a651a4e7f1a189ebf3f Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 18:00:23 -0700 Subject: [PATCH 32/79] data merge: rb, qtot, rr_save --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 71a37c593c..b5ab52e70f 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5035,6 +5035,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data create(kdiff) !$acc enter data copyin(tend_rho_physics) !$acc enter data create(tend_rho) + !$acc enter data copyin(rb, qtot, rr_save) if (config_horiz_mixing == '2d_smagorinsky') then !$acc enter data copyin(u, v) @@ -5196,7 +5197,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm rgas_cprcv = rgas*cp/cv - !$acc enter data copyin(rb, qtot, rr_save) !$acc enter data create(dpdz) !$acc parallel default(present) @@ -5212,7 +5212,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rb, qtot, rr_save) !$acc exit data copyout(dpdz) end if @@ -6202,6 +6201,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc exit data copyout(tend_rho) !$acc exit data delete(tend_rho_physics) + !$acc exit data delete(rb, qtot, rr_save) if (config_horiz_mixing == '2d_smagorinsky') then !$acc exit data delete(u, v) end if From 5dc6856ee5a14ed6f9317e08a4916811a5980a6e Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 18:10:45 -0700 Subject: [PATCH 33/79] data movement: dpdz --- .../dynamics/mpas_atm_time_integration.F | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index b5ab52e70f..0b8a901cff 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5044,6 +5044,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) end if + !$acc enter data create(dpdz) !$acc enter data create(h_divergence) !$acc enter data copyin(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -5197,8 +5198,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm rgas_cprcv = rgas*cp/cv - !$acc enter data create(dpdz) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellStart,cellEnd @@ -5212,8 +5211,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(dpdz) - end if !$OMP BARRIER @@ -5222,7 +5219,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Compute u (normal) velocity tendency for each edge (cell face) ! - !$acc enter data copyin(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke) + !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc enter data create(tend_u) !$acc parallel default(present) @@ -5310,7 +5307,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(cqu, pp, dpdz, u, w, pv_edge, rho_edge, ke) + !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc exit data copyout(tend_u) ! @@ -5801,7 +5798,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! - !$acc enter data copyin(tend_w, tend_w_euler, w, cqw, pp, dpdz) + !$acc enter data copyin(tend_w, tend_w_euler, w, cqw, pp) !$acc parallel default(present) !$acc loop gang worker private(wdwz) @@ -5844,7 +5841,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_w, tend_w_euler) - !$acc exit data delete(w, cqw, pp, dpdz) + !$acc exit data delete(w, cqw, pp) if (rk_step == 1) then @@ -6208,6 +6205,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm else !$acc exit data delete(tend_rho) end if + !$acc exit data delete(dpdz) !$acc exit data copyout(h_divergence) !$acc exit data delete(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') From 3ee06992cfdff62b89cb668b0156c3fe84f59ceb Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 18:27:21 -0700 Subject: [PATCH 34/79] data movement: tend_u --- .../dynamics/mpas_atm_time_integration.F | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 0b8a901cff..bd6a9e7dc9 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5045,6 +5045,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(tend_rho) end if !$acc enter data create(dpdz) + !$acc enter data create(tend_u) !$acc enter data create(h_divergence) !$acc enter data copyin(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -5220,7 +5221,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc enter data create(tend_u) !$acc parallel default(present) !$acc loop gang worker private(wduz, q) @@ -5308,7 +5308,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc exit data copyout(tend_u) ! ! horizontal mixing for u @@ -5565,7 +5564,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_u, rho_edge, rayleigh_damp_coef, u) + !$acc enter data copyin(rho_edge, rayleigh_damp_coef, u) !$acc parallel default(present) @@ -5580,12 +5579,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data copyout(tend_u) !$acc exit data delete(rho_edge, rayleigh_damp_coef, u) end if - !$acc enter data copyin(tend_u, tend_ru_physics) + !$acc enter data copyin(tend_ru_physics) !$acc parallel default(present) !$acc loop gang worker @@ -5599,7 +5597,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_u) !$acc exit data delete(tend_ru_physics) @@ -6206,6 +6203,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(tend_rho) end if !$acc exit data delete(dpdz) + !$acc exit data copyout(tend_u) !$acc exit data copyout(h_divergence) !$acc exit data delete(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') From 7e76e54c4c6eec20b0894e4a9e82e46304cb0e56 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Feb 2025 18:40:36 -0700 Subject: [PATCH 35/79] data movement: cqu, pp, u, w, pv_edge, rho_edge, ke --- .../dynamics/mpas_atm_time_integration.F | 41 ++++++++----------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index bd6a9e7dc9..c90a7fa898 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5046,6 +5046,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc enter data create(dpdz) !$acc enter data create(tend_u) + !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc enter data create(h_divergence) !$acc enter data copyin(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -5220,8 +5221,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Compute u (normal) velocity tendency for each edge (cell face) ! - !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc parallel default(present) !$acc loop gang worker private(wduz, q) do iEdge=edgeSolveStart,edgeSolveEnd @@ -5307,8 +5306,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) - ! ! horizontal mixing for u ! mixing terms are integrated using forward-Euler, so this tendency is only computed in the @@ -5323,7 +5320,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! First, storage to hold the result from the first del^2 computation. !$acc enter data create(delsq_u) - !$acc enter data copyin(divergence, vorticity, rho_edge) + !$acc enter data copyin(divergence, vorticity) !$acc parallel default(present) !$acc loop gang worker @@ -5368,7 +5365,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_u) - !$acc exit data delete(divergence, vorticity, rho_edge) + !$acc exit data delete(divergence, vorticity) if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active @@ -5377,7 +5374,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! !$acc enter data create(delsq_vorticity, delsq_divergence) - !$acc enter data copyin(delsq_u, rho_edge) + !$acc enter data copyin(delsq_u) !$acc parallel default(present) @@ -5460,7 +5457,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data delete(delsq_u, rho_edge, delsq_vorticity, delsq_divergence) + !$acc exit data delete(delsq_u, delsq_vorticity, delsq_divergence) end if ! 4th order mixing is active @@ -5473,8 +5470,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (config_mix_full) then ! mix full state - !$acc enter data copyin(rho_edge, u) - !$acc parallel default(present) !$acc loop gang worker do iEdge=edgeSolveStart,edgeSolveEnd @@ -5501,12 +5496,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rho_edge, u) else ! idealized cases where we mix on the perturbation from the initial 1-D state - !$acc enter data copyin(u_init, v_init, rho_edge, u) + !$acc enter data copyin(u_init, v_init) !$acc parallel default(present) !$acc loop gang worker private(u_mix) @@ -5540,7 +5534,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rho_edge, u, u_init, v_init) + !$acc exit data delete(u_init, v_init) end if ! mix perturbation state @@ -5564,7 +5558,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(rho_edge, rayleigh_damp_coef, u) + !$acc enter data copyin(rayleigh_damp_coef) !$acc parallel default(present) @@ -5579,7 +5573,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data delete(rho_edge, rayleigh_damp_coef, u) + !$acc exit data delete(rayleigh_damp_coef) end if @@ -5608,7 +5602,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! !$acc enter data create(tend_w) - !$acc enter data copyin(w) !$acc parallel default(present) !$acc loop gang worker private(ru_edge_w, flux_arr) @@ -5685,7 +5678,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm #endif !$acc exit data copyout(tend_w) - !$acc exit data delete(w) ! ! horizontal mixing for w - we could combine this with advection directly (i.e. as a turbulent flux), @@ -5702,7 +5694,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! we copied code from the theta mixing, hence the theta* names. !$acc enter data create(delsq_w, tend_w_euler) - !$acc enter data copyin(rho_edge, w) !$acc parallel default(present) !$acc loop gang worker @@ -5744,7 +5735,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_w, tend_w_euler) - !$acc exit data delete(rho_edge, w) !$OMP BARRIER @@ -5795,7 +5785,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! - !$acc enter data copyin(tend_w, tend_w_euler, w, cqw, pp) + !$acc enter data copyin(tend_w, tend_w_euler, cqw) !$acc parallel default(present) !$acc loop gang worker private(wdwz) @@ -5838,7 +5828,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_w, tend_w_euler) - !$acc exit data delete(w, cqw, pp) + !$acc exit data delete(cqw) if (rk_step == 1) then @@ -5846,7 +5836,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_w_euler, rho_zz, w) + !$acc enter data copyin(tend_w_euler, rho_zz) !$acc parallel default(present) !$acc loop gang worker @@ -5862,7 +5852,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_w_euler) - !$acc exit data delete(rho_zz, w) + !$acc exit data delete(rho_zz) end if @@ -5975,7 +5965,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc enter data create(delsq_theta, tend_theta_euler) - !$acc enter data copyin(theta_m, rho_edge) + !$acc enter data copyin(theta_m) !$acc parallel default(present) !$acc loop gang worker @@ -6017,7 +6007,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_theta, tend_theta_euler) - !$acc exit data delete(theta_m, rho_edge) + !$acc exit data delete(theta_m) !$OMP BARRIER @@ -6204,6 +6194,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc exit data delete(dpdz) !$acc exit data copyout(tend_u) + !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc exit data copyout(h_divergence) !$acc exit data delete(ru, rw) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') From 8e2d056783b278dbbabf9eb86d0300bb55e8290a Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 3 Mar 2025 21:37:15 -0700 Subject: [PATCH 36/79] data movement: divergence, vorticity --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index c90a7fa898..cf362a3a28 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5036,6 +5036,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(tend_rho_physics) !$acc enter data create(tend_rho) !$acc enter data copyin(rb, qtot, rr_save) + !$acc enter data copyin(divergence, vorticity) if (config_horiz_mixing == '2d_smagorinsky') then !$acc enter data copyin(u, v) @@ -5320,7 +5321,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! First, storage to hold the result from the first del^2 computation. !$acc enter data create(delsq_u) - !$acc enter data copyin(divergence, vorticity) !$acc parallel default(present) !$acc loop gang worker @@ -5365,7 +5365,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_u) - !$acc exit data delete(divergence, vorticity) if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active @@ -6186,6 +6185,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(tend_rho) !$acc exit data delete(tend_rho_physics) !$acc exit data delete(rb, qtot, rr_save) + !$acc exit data delete(divergence, vorticity) if (config_horiz_mixing == '2d_smagorinsky') then !$acc exit data delete(u, v) end if From b25cf7e722d9d42ccf768a35488cfa0cef51e9c6 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 3 Mar 2025 22:38:29 -0700 Subject: [PATCH 37/79] data movement: delsq_u, delsq_vorticity, delsq_divergence --- .../dynamics/mpas_atm_time_integration.F | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index cf362a3a28..cad93899fd 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5037,10 +5037,16 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data create(tend_rho) !$acc enter data copyin(rb, qtot, rr_save) !$acc enter data copyin(divergence, vorticity) + !$acc enter data create(delsq_u) if (config_horiz_mixing == '2d_smagorinsky') then !$acc enter data copyin(u, v) end if + + ! The following are only needed if h_mom_eddy_visc4 > 0.0, but the conditional check + ! would be messy, and these variables are created and deleted, rather than copied in + ! and out + !$acc enter data create(delsq_vorticity, delsq_divergence) else !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) @@ -5320,8 +5326,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ). ! First, storage to hold the result from the first del^2 computation. - !$acc enter data create(delsq_u) - !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart, edgeEnd @@ -5364,17 +5368,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data copyout(delsq_u) - if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active !$OMP BARRIER !!! MGD UNTESTED !!! - !$acc enter data create(delsq_vorticity, delsq_divergence) - !$acc enter data copyin(delsq_u) - !$acc parallel default(present) !$acc loop gang worker @@ -5455,8 +5454,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - - !$acc exit data delete(delsq_u, delsq_vorticity, delsq_divergence) end if ! 4th order mixing is active @@ -6186,9 +6183,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(tend_rho_physics) !$acc exit data delete(rb, qtot, rr_save) !$acc exit data delete(divergence, vorticity) + !$acc exit data copyout(delsq_u) + if (config_horiz_mixing == '2d_smagorinsky') then !$acc exit data delete(u, v) end if + + !$acc exit data delete(delsq_vorticity, delsq_divergence) else !$acc exit data delete(tend_rho) end if From acd3e521c4f3095220962e52d0115069acad38de Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 3 Mar 2025 22:58:40 -0700 Subject: [PATCH 38/79] data movement: u_init, v_init, rayleigh_damp_coef, tend_ru_physics --- .../dynamics/mpas_atm_time_integration.F | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index cad93899fd..f1678f0804 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5047,6 +5047,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! would be messy, and these variables are created and deleted, rather than copied in ! and out !$acc enter data create(delsq_vorticity, delsq_divergence) + + if (.not. config_mix_full) then + !$acc enter data copyin(u_init, v_init) + end if else !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) @@ -5056,6 +5060,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc enter data create(h_divergence) !$acc enter data copyin(ru, rw) + if (config_rayleigh_damp_u) then + !$acc enter data copyin(rayleigh_damp_coef) + end if + !$acc enter data copyin(tend_ru_physics) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5495,8 +5503,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm else ! idealized cases where we mix on the perturbation from the initial 1-D state - - !$acc enter data copyin(u_init, v_init) + !!! MGD UNTESTED !!! !$acc parallel default(present) !$acc loop gang worker private(u_mix) @@ -5530,8 +5537,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(u_init, v_init) - end if ! mix perturbation state end if ! vertical mixing of horizontal momentum @@ -5554,8 +5559,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(rayleigh_damp_coef) - !$acc parallel default(present) !$acc loop gang worker @@ -5569,12 +5572,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel - !$acc exit data delete(rayleigh_damp_coef) - end if - !$acc enter data copyin(tend_ru_physics) - !$acc parallel default(present) !$acc loop gang worker do iEdge=edgeSolveStart,edgeSolveEnd @@ -5587,8 +5586,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(tend_ru_physics) - !----------- rhs for w @@ -6190,6 +6187,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc exit data delete(delsq_vorticity, delsq_divergence) + + if (.not. config_mix_full) then + !$acc exit data delete(u_init, v_init) + end if else !$acc exit data delete(tend_rho) end if @@ -6198,6 +6199,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc exit data copyout(h_divergence) !$acc exit data delete(ru, rw) + if (config_rayleigh_damp_u) then + !$acc exit data delete(rayleigh_damp_coef) + end if + !$acc exit data delete(tend_ru_physics) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From cf7c2ff287ab162bb66125964309d57046ec11d6 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Mon, 3 Mar 2025 23:16:13 -0700 Subject: [PATCH 39/79] data movement: delsq_w, tend_w_euler, tend_w_euler, delsq_w, tend_w, rho_zz, ur_cell, vr_cell --- .../dynamics/mpas_atm_time_integration.F | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index f1678f0804..043c7f6717 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5051,6 +5051,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (.not. config_mix_full) then !$acc enter data copyin(u_init, v_init) end if + !$acc enter data create(delsq_w, tend_w_euler) + + ! The following are only needed if h_mom_eddy_visc4 > 0.0, but the conditional check + ! would be messy, and these variables are created and deleted, rather than copied in + ! and out + !$acc enter data copyin(tend_w_euler, delsq_w) else !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) @@ -5064,6 +5070,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(rayleigh_damp_coef) end if !$acc enter data copyin(tend_ru_physics) + !$acc enter data create(tend_w) +#ifdef CURVATURE + !$acc enter data copyin(rho_zz, ur_cell, vr_cell) +#endif MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5594,8 +5604,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! horizontal advection for w ! - !$acc enter data create(tend_w) - !$acc parallel default(present) !$acc loop gang worker private(ru_edge_w, flux_arr) do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... @@ -5648,8 +5656,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(rho_zz, ur_cell, vr_cell) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd @@ -5667,11 +5673,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rho_zz, ur_cell, vr_cell) #endif - !$acc exit data copyout(tend_w) - ! ! horizontal mixing for w - we could combine this with advection directly (i.e. as a turbulent flux), ! but here we can also code in hyperdiffusion if we wish (2nd order at present) @@ -5686,8 +5689,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! First, storage to hold the result from the first del^2 computation. ! we copied code from the theta mixing, hence the theta* names. - !$acc enter data create(delsq_w, tend_w_euler) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellStart,cellEnd @@ -5727,16 +5728,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(delsq_w, tend_w_euler) - !$OMP BARRIER if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_w_euler, delsq_w) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... @@ -5760,9 +5757,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_w_euler) - !$acc exit data delete(delsq_w) - end if ! 4th order mixing is active end if ! horizontal mixing for w computed in first rk_step @@ -6191,6 +6185,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (.not. config_mix_full) then !$acc exit data delete(u_init, v_init) end if + + !$acc exit data copyout(delsq_w, tend_w_euler) + !$acc exit data copyout(tend_w_euler) + !$acc exit data delete(delsq_w) else !$acc exit data delete(tend_rho) end if @@ -6203,6 +6201,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(rayleigh_damp_coef) end if !$acc exit data delete(tend_ru_physics) + !$acc exit data copyout(tend_w) +#ifdef CURVATURE + !$acc exit data delete(rho_zz, ur_cell, vr_cell) +#endif MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From 4af60d9ec5ea6c4588d600071658caf5c9f3e3f3 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 7 Mar 2025 17:08:30 -0700 Subject: [PATCH 40/79] data movement: tend_w_euler --- .../dynamics/mpas_atm_time_integration.F | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 043c7f6717..f2571a93e4 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5051,12 +5051,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (.not. config_mix_full) then !$acc enter data copyin(u_init, v_init) end if - !$acc enter data create(delsq_w, tend_w_euler) + !$acc enter data create(delsq_w) ! The following are only needed if h_mom_eddy_visc4 > 0.0, but the conditional check ! would be messy, and these variables are created and deleted, rather than copied in ! and out - !$acc enter data copyin(tend_w_euler, delsq_w) + !$acc enter data copyin(delsq_w) else !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) @@ -5071,6 +5071,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc enter data copyin(tend_ru_physics) !$acc enter data create(tend_w) + !$acc enter data copyin(tend_w_euler) #ifdef CURVATURE !$acc enter data copyin(rho_zz, ur_cell, vr_cell) #endif @@ -5772,7 +5773,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! - !$acc enter data copyin(tend_w, tend_w_euler, cqw) + !$acc enter data copyin(tend_w, cqw) !$acc parallel default(present) !$acc loop gang worker private(wdwz) @@ -5814,7 +5815,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_w, tend_w_euler) + !$acc exit data copyout(tend_w) !$acc exit data delete(cqw) if (rk_step == 1) then @@ -5823,7 +5824,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_w_euler, rho_zz) + !$acc enter data copyin(rho_zz) !$acc parallel default(present) !$acc loop gang worker @@ -5838,7 +5839,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_w_euler) !$acc exit data delete(rho_zz) end if @@ -5847,7 +5847,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! add in mixing terms for w - !$acc enter data copyin(tend_w, tend_w_euler) + !$acc enter data copyin(tend_w) !$acc parallel default(present) !$acc loop gang worker @@ -5861,7 +5861,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_w) - !$acc exit data delete(tend_w_euler) !----------- rhs for theta @@ -6186,8 +6185,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(u_init, v_init) end if - !$acc exit data copyout(delsq_w, tend_w_euler) - !$acc exit data copyout(tend_w_euler) + !$acc exit data copyout(delsq_w) !$acc exit data delete(delsq_w) else !$acc exit data delete(tend_rho) @@ -6202,6 +6200,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc exit data delete(tend_ru_physics) !$acc exit data copyout(tend_w) + !$acc exit data copyout(tend_w_euler) #ifdef CURVATURE !$acc exit data delete(rho_zz, ur_cell, vr_cell) #endif From c8908f8015bcaa547cb116ffb2a2a3eba6c5a38f Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 7 Mar 2025 17:35:02 -0700 Subject: [PATCH 41/79] data movement: tend_w --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index f2571a93e4..a448c49c4c 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5773,7 +5773,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! - !$acc enter data copyin(tend_w, cqw) + !$acc enter data copyin(cqw) !$acc parallel default(present) !$acc loop gang worker private(wdwz) @@ -5815,7 +5815,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_w) !$acc exit data delete(cqw) if (rk_step == 1) then @@ -5847,8 +5846,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! add in mixing terms for w - !$acc enter data copyin(tend_w) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd @@ -5860,8 +5857,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_w) - !----------- rhs for theta ! From d422801c2a30843706eb340ad61006eaa8066d0e Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 7 Mar 2025 17:46:39 -0700 Subject: [PATCH 42/79] data movement: rho_zz --- .../dynamics/mpas_atm_time_integration.F | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index a448c49c4c..d8c1efb3f1 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5073,8 +5073,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data create(tend_w) !$acc enter data copyin(tend_w_euler) #ifdef CURVATURE - !$acc enter data copyin(rho_zz, ur_cell, vr_cell) + !$acc enter data copyin(ur_cell, vr_cell) #endif + !$acc enter data copyin(rho_zz) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5823,8 +5824,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(rho_zz) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd @@ -5838,8 +5837,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(rho_zz) - end if end if ! mixing term computed first rk_step @@ -6034,7 +6031,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! - !$acc enter data copyin(tend_theta, rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) + !$acc enter data copyin(tend_theta, rw_save, theta_m, theta_m_save, rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc parallel default(present) @@ -6069,7 +6066,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta, rthdynten) - !$acc exit data delete(rw_save, theta_m, theta_m_save, rho_zz, rt_diabatic_tend) + !$acc exit data delete(rw_save, theta_m, theta_m_save, rt_diabatic_tend) ! ! vertical mixing for theta - 2nd order @@ -6083,7 +6080,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_theta_euler, rho_zz, theta_m) + !$acc enter data copyin(tend_theta_euler, theta_m) !$acc parallel default(present) !$acc loop gang worker @@ -6108,11 +6105,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta_euler) - !$acc exit data delete(rho_zz, theta_m) + !$acc exit data delete(theta_m) else ! idealized cases where we mix on the perturbation from the initial 1-D state - !$acc enter data copyin(tend_theta_euler, rho_zz, theta_m, t_init) + !$acc enter data copyin(tend_theta_euler, theta_m, t_init) !$acc parallel default(present) !$acc loop gang worker @@ -6135,7 +6132,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta_euler) - !$acc exit data delete(rho_zz, theta_m, t_init) + !$acc exit data delete(theta_m, t_init) end if @@ -6197,8 +6194,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(tend_w) !$acc exit data copyout(tend_w_euler) #ifdef CURVATURE - !$acc exit data delete(rho_zz, ur_cell, vr_cell) + !$acc exit data delete(ur_cell, vr_cell) #endif + !$acc exit data delete(rho_zz) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From d8193248ea6e085cb358895fb004f30d825d37d9 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 7 Mar 2025 17:59:19 -0700 Subject: [PATCH 43/79] data movement: tend_theta --- .../dynamics/mpas_atm_time_integration.F | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index d8c1efb3f1..d59e14ef89 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5076,6 +5076,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(ur_cell, vr_cell) #endif !$acc enter data copyin(rho_zz) + !$acc enter data create(tend_theta) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5860,7 +5861,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! horizontal advection for theta ! - !$acc enter data create(tend_theta) !$acc enter data copyin(theta_m) !$acc parallel default(present) @@ -5902,14 +5902,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta) !$acc exit data delete(theta_m) ! addition to pick up perturbation flux for rtheta_pp equation if(rk_step > 1) then - !$acc enter data copyin(tend_theta, ru_save, theta_m_save) + !$acc enter data copyin(ru_save, theta_m_save) !$acc parallel default(present) !$acc loop gang worker @@ -5930,7 +5929,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta) !$acc exit data delete(ru_save, theta_m_save) end if @@ -6031,7 +6029,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! - !$acc enter data copyin(tend_theta, rw_save, theta_m, theta_m_save, rt_diabatic_tend) + !$acc enter data copyin(rw_save, theta_m, theta_m_save, rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc parallel default(present) @@ -6065,7 +6063,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta, rthdynten) + !$acc exit data copyout(rthdynten) !$acc exit data delete(rw_save, theta_m, theta_m_save, rt_diabatic_tend) ! @@ -6140,7 +6138,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if ! compute vertical theta mixing on first rk_step - !$acc enter data copyin(tend_theta, tend_theta_euler, tend_rtheta_physics) + !$acc enter data copyin(tend_theta_euler, tend_rtheta_physics) !$acc parallel default(present) !$acc loop gang worker @@ -6154,7 +6152,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta) !$acc exit data delete(tend_theta_euler, tend_rtheta_physics) MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -6197,6 +6194,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(ur_cell, vr_cell) #endif !$acc exit data delete(rho_zz) + !$acc exit data copyout(tend_theta) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From ad181fa8b7a1e3ee4e091ade66f2a27717892e98 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 7 Mar 2025 18:35:04 -0700 Subject: [PATCH 44/79] data movement: theta_m, ru_save, theta_m_save --- .../dynamics/mpas_atm_time_integration.F | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index d59e14ef89..cccb6ad435 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5077,6 +5077,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm #endif !$acc enter data copyin(rho_zz) !$acc enter data create(tend_theta) + !$acc enter data copyin(theta_m) + !$acc enter data copyin(ru_save, theta_m_save) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5861,8 +5863,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! horizontal advection for theta ! - !$acc enter data copyin(theta_m) - !$acc parallel default(present) !$acc loop gang worker private(flux_arr) do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... @@ -5902,14 +5902,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(theta_m) - ! addition to pick up perturbation flux for rtheta_pp equation if(rk_step > 1) then - !$acc enter data copyin(ru_save, theta_m_save) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd @@ -5929,8 +5925,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(ru_save, theta_m_save) - end if ! @@ -5941,7 +5935,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc enter data create(delsq_theta, tend_theta_euler) - !$acc enter data copyin(theta_m) !$acc parallel default(present) !$acc loop gang worker @@ -5983,7 +5976,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(delsq_theta, tend_theta_euler) - !$acc exit data delete(theta_m) !$OMP BARRIER @@ -6029,7 +6021,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! - !$acc enter data copyin(rw_save, theta_m, theta_m_save, rt_diabatic_tend) + !$acc enter data copyin(rw_save, rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc parallel default(present) @@ -6064,7 +6056,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(rthdynten) - !$acc exit data delete(rw_save, theta_m, theta_m_save, rt_diabatic_tend) + !$acc exit data delete(rw_save, rt_diabatic_tend) ! ! vertical mixing for theta - 2nd order @@ -6078,7 +6070,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_theta_euler, theta_m) + !$acc enter data copyin(tend_theta_euler) !$acc parallel default(present) !$acc loop gang worker @@ -6103,11 +6095,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta_euler) - !$acc exit data delete(theta_m) else ! idealized cases where we mix on the perturbation from the initial 1-D state - !$acc enter data copyin(tend_theta_euler, theta_m, t_init) + !$acc enter data copyin(tend_theta_euler, t_init) !$acc parallel default(present) !$acc loop gang worker @@ -6130,7 +6121,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel !$acc exit data copyout(tend_theta_euler) - !$acc exit data delete(theta_m, t_init) + !$acc exit data delete(t_init) end if @@ -6195,6 +6186,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm #endif !$acc exit data delete(rho_zz) !$acc exit data copyout(tend_theta) + !$acc exit data delete(theta_m) + !$acc exit data delete(ru_save, theta_m_save) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From 93df11a9f6f49284013fe3132434fee4d3db6051 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 7 Mar 2025 18:47:53 -0700 Subject: [PATCH 45/79] data movement: tend_theta_euler --- .../dynamics/mpas_atm_time_integration.F | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index cccb6ad435..028813091a 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5057,9 +5057,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! would be messy, and these variables are created and deleted, rather than copied in ! and out !$acc enter data copyin(delsq_w) + !$acc enter data create(tend_theta_euler) else !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) + !$acc enter data copyin(tend_theta_euler) end if !$acc enter data create(dpdz) !$acc enter data create(tend_u) @@ -5934,7 +5936,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then - !$acc enter data create(delsq_theta, tend_theta_euler) + !$acc enter data create(delsq_theta) !$acc parallel default(present) !$acc loop gang worker @@ -5975,7 +5977,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(delsq_theta, tend_theta_euler) + !$acc exit data copyout(delsq_theta) !$OMP BARRIER @@ -5983,7 +5985,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_theta_euler, delsq_theta) + !$acc enter data copyin(delsq_theta) !$acc parallel default(present) !$acc loop gang worker @@ -6008,7 +6010,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta_euler) !$acc exit data delete(delsq_theta) end if ! 4th order mixing is active @@ -6070,8 +6071,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !!! MGD UNTESTED !!! - !$acc enter data copyin(tend_theta_euler) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd @@ -6094,11 +6093,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta_euler) - else ! idealized cases where we mix on the perturbation from the initial 1-D state - !$acc enter data copyin(tend_theta_euler, t_init) + !$acc enter data copyin(t_init) !$acc parallel default(present) !$acc loop gang worker @@ -6120,7 +6117,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(tend_theta_euler) !$acc exit data delete(t_init) end if @@ -6129,7 +6125,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if ! compute vertical theta mixing on first rk_step - !$acc enter data copyin(tend_theta_euler, tend_rtheta_physics) + !$acc enter data copyin(tend_rtheta_physics) !$acc parallel default(present) !$acc loop gang worker @@ -6143,7 +6139,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(tend_theta_euler, tend_rtheta_physics) + !$acc exit data delete(tend_rtheta_physics) MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') !$acc exit data copyout(tend_u_euler) @@ -6188,6 +6184,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(tend_theta) !$acc exit data delete(theta_m) !$acc exit data delete(ru_save, theta_m_save) + !$acc exit data copyout(tend_theta_euler) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From e623406513552760b67402c433f5970ecc53b542 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Wed, 12 Mar 2025 16:34:53 -0600 Subject: [PATCH 46/79] Dereference single-value integer pointers to integers in summarize_timestep These 'integer, pointer' variables are used as loop bounds and can cause issues with OpenACC. Since OpenACC will directly copy the pointers and may not preserve the association (may not also copy the value pointed to), odd behavior can result. --- .../dynamics/mpas_atm_time_integration.F | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5ea2ca1154..c383a7f664 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7242,7 +7242,8 @@ subroutine summarize_timestep(domain) logical, pointer :: config_print_global_minmax_sca integer :: iCell, k, iEdge, iScalar - integer, pointer :: num_scalars, nCellsSolve, nEdgesSolve, nVertLevels + integer, pointer :: num_scalars_ptr, nCellsSolve_ptr, nEdgesSolve_ptr, nVertLevels_ptr + integer :: num_scalars, nCellsSolve, nEdgesSolve, nVertLevels type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: diag @@ -7286,9 +7287,12 @@ subroutine summarize_timestep(domain) call mpas_pool_get_array(mesh, 'lonCell', lonCell) call mpas_pool_get_array(mesh, 'latEdge', latEdge) call mpas_pool_get_array(mesh, 'lonEdge', lonEdge) - call mpas_pool_get_dimension(state, 'nCellsSolve', nCellsSolve) - call mpas_pool_get_dimension(state, 'nEdgesSolve', nEdgesSolve) - call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels) + call mpas_pool_get_dimension(state, 'nCellsSolve', nCellsSolve_ptr) + call mpas_pool_get_dimension(state, 'nEdgesSolve', nEdgesSolve_ptr) + call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels_ptr) + nCellsSolve = nCellsSolve_ptr + nEdgesSolve = nEdgesSolve_ptr + nVertLevels = nVertLevels_ptr scalar_min = 1.0e20 indexMax = -1 @@ -7496,9 +7500,12 @@ subroutine summarize_timestep(domain) call mpas_pool_get_subpool(block % structs, 'state', state) call mpas_pool_get_array(state, 'w', w, 2) call mpas_pool_get_array(state, 'u', u, 2) - call mpas_pool_get_dimension(state, 'nCellsSolve', nCellsSolve) - call mpas_pool_get_dimension(state, 'nEdgesSolve', nEdgesSolve) - call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels) + call mpas_pool_get_dimension(state, 'nCellsSolve', nCellsSolve_ptr) + call mpas_pool_get_dimension(state, 'nEdgesSolve', nEdgesSolve_ptr) + call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels_ptr) + nCellsSolve = nCellsSolve_ptr + nEdgesSolve = nEdgesSolve_ptr + nVertLevels = nVertLevels_ptr scalar_min = 0.0 scalar_max = 0.0 @@ -7532,9 +7539,12 @@ subroutine summarize_timestep(domain) call mpas_pool_get_subpool(block % structs, 'state', state) call mpas_pool_get_array(state, 'scalars', scalars, 2) - call mpas_pool_get_dimension(state, 'nCellsSolve', nCellsSolve) - call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels) - call mpas_pool_get_dimension(state, 'num_scalars', num_scalars) + call mpas_pool_get_dimension(state, 'nCellsSolve', nCellsSolve_ptr) + call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels_ptr) + call mpas_pool_get_dimension(state, 'num_scalars', num_scalars_ptr) + nCellsSolve = nCellsSolve_ptr + nVertLevels = nVertLevels_ptr + num_scalars = num_scalars_ptr do iScalar = 1, num_scalars scalar_min = 0.0 From cb2ba6a97ad018e3d4dac4d60cbf84c1c1e964b0 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Thu, 20 Mar 2025 12:18:01 -0600 Subject: [PATCH 47/79] Move mpas_log_write outside Check for NaNs loops in summarize_timestep This prevents having to add OpenACC routine information to mpas_log_write and routine(s) it calls. --- .../dynamics/mpas_atm_time_integration.F | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index c383a7f664..d9e4a52b00 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7268,6 +7268,8 @@ subroutine summarize_timestep(domain) real (kind=RKIND), dimension(:,:), pointer :: u, v, uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2 + logical found_NaN + call mpas_pool_get_config(block % configs, 'config_print_global_minmax_vel', config_print_global_minmax_vel) call mpas_pool_get_config(block % configs, 'config_print_detailed_minmax_vel', config_print_detailed_minmax_vel) call mpas_pool_get_config(block % configs, 'config_print_global_minmax_sca', config_print_global_minmax_sca) @@ -7478,21 +7480,30 @@ subroutine summarize_timestep(domain) ! ! Check for NaNs ! + found_NaN = .false. + do iCell = 1, nCellsSolve do k = 1, nVertLevels if (ieee_is_nan(w(k,iCell))) then - call mpas_log_write('NaN detected in ''w'' field.', messageType=MPAS_LOG_CRIT) + found_NaN = .true. end if end do end do + if (found_NaN) then + call mpas_log_write('NaN detected in ''w'' field.', messageType=MPAS_LOG_CRIT) + end if + do iEdge = 1, nEdgesSolve do k = 1, nVertLevels if (ieee_is_nan(u(k,iEdge))) then - call mpas_log_write('NaN detected in ''u'' field.', messageType=MPAS_LOG_CRIT) + found_NaN = .true. end if end do end do + if (found_NaN) then + call mpas_log_write('NaN detected in ''u'' field.', messageType=MPAS_LOG_CRIT) + end if else if (config_print_global_minmax_vel) then call mpas_log_write('') From 7c3b1fe360e79f3e0134717856c6064aa66d5f95 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Mon, 17 Mar 2025 12:41:31 -0600 Subject: [PATCH 48/79] Use 2 loop search for min/max with location in summarize_timestep Only for loops executed when 'config_print_detailed_minmax_vel = .true.' When running with OpenACC, it is difficult to get values to "come with" during a reduction. This means it's difficult to get both the minimum value of an array and the position (the array indices) it occurs at in one pass. Using 2 loops like this ensures the correct positions are found. Using a "linear index" to record the position of the minimum/maximum values helps in cases where the min/max occurs at multiple points in an array. The detailed prints will always use the first occurence of the min/max in the array. --- .../dynamics/mpas_atm_time_integration.F | 104 ++++++++++++------ 1 file changed, 72 insertions(+), 32 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index d9e4a52b00..3c6bc8734d 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7263,11 +7263,16 @@ subroutine summarize_timestep(domain) real (kind=RKIND) :: lonMax, lonMax_global real (kind=RKIND), dimension(5) :: localVals, globalVals - real (kind=RKIND) :: spd + real (kind=RKIND), dimension(:,:), allocatable :: spd real (kind=RKIND), dimension(:,:), pointer :: w real (kind=RKIND), dimension(:,:), pointer :: u, v, uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2 + integer :: lin_indexMax ! Position of min/max in some array as a single integer. + ! If multiple locations match the extrema, always pick the lowest lin_indexMax. + ! array(k,idx) -> lin_indexMax = k+(idx-1)*size(array,1) + ! array(5,4) with size(array,1) = 10 -> lin_indexMax = 35 + logical found_NaN call mpas_pool_get_config(block % configs, 'config_print_global_minmax_vel', config_print_global_minmax_vel) @@ -7296,22 +7301,31 @@ subroutine summarize_timestep(domain) nEdgesSolve = nEdgesSolve_ptr nVertLevels = nVertLevels_ptr + allocate(spd(nVertLevels,nEdgesSolve)) + scalar_min = 1.0e20 + lin_indexMax = huge(1) indexMax = -1 kMax = -1 latMax = 0.0 lonMax = 0.0 do iCell = 1, nCellsSolve do k = 1, nVertLevels - if (w(k,iCell) < scalar_min) then - scalar_min = w(k,iCell) - indexMax = iCell - kMax = k - latMax = latCell(iCell) - lonMax = lonCell(iCell) + scalar_min = min(scalar_min, w(k,iCell)) + end do + end do + do iCell = 1, nCellsSolve + do k = 1, nVertLevels + if (w(k,iCell) == scalar_min) then + ! In case 2 locations tie, only save the minimum value + lin_indexMax = min(lin_indexMax, k + size(w,1)*(iCell-1)) end if end do end do + kMax = modulo(lin_indexMax, size(w,1)) + indexMax = ((lin_indexMax - kMax) / size(w,1)) + 1 + latMax = latCell(indexMax) + lonMax = lonCell(indexMax) localVals(1) = scalar_min localVals(2) = real(indexMax,kind=RKIND) localVals(3) = real(kMax,kind=RKIND) @@ -7333,21 +7347,27 @@ subroutine summarize_timestep(domain) realArgs=(/global_scalar_min, latMax_global, lonMax_global/)) scalar_max = -1.0e20 + lin_indexMax = huge(1) indexMax = -1 kMax = -1 latMax = 0.0 lonMax = 0.0 do iCell = 1, nCellsSolve do k = 1, nVertLevels - if (w(k,iCell) > scalar_max) then - scalar_max = w(k,iCell) - indexMax = iCell - kMax = k - latMax = latCell(iCell) - lonMax = lonCell(iCell) + scalar_max = max(scalar_max, w(k,iCell)) + end do + end do + do iCell = 1, nCellsSolve + do k = 1, nVertLevels + if (w(k,iCell) == scalar_max) then + lin_indexMax = min(lin_indexMax, k + size(w,1)*(iCell-1)) end if end do end do + kMax = modulo(lin_indexMax, size(w,1)) + indexMax = ((lin_indexMax - kMax) / size(w,1)) + 1 + latMax = latCell(indexMax) + lonMax = lonCell(indexMax) localVals(1) = scalar_max localVals(2) = real(indexMax,kind=RKIND) localVals(3) = real(kMax,kind=RKIND) @@ -7369,21 +7389,27 @@ subroutine summarize_timestep(domain) realArgs=(/global_scalar_max, latMax_global, lonMax_global/)) scalar_min = 1.0e20 + lin_indexMax = huge(1) indexMax = -1 kMax = -1 latMax = 0.0 lonMax = 0.0 do iEdge = 1, nEdgesSolve do k = 1, nVertLevels - if (u(k,iEdge) < scalar_min) then - scalar_min = u(k,iEdge) - indexMax = iEdge - kMax = k - latMax = latEdge(iEdge) - lonMax = lonEdge(iEdge) + scalar_min = min(scalar_min, u(k,iEdge)) + end do + end do + do iEdge = 1, nEdgesSolve + do k = 1, nVertLevels + if (u(k,iEdge) == scalar_min) then + lin_indexMax = min(lin_indexMax, k + size(u,1)*(iEdge-1)) end if end do end do + kMax = modulo(lin_indexMax, size(u,1)) + indexMax = ((lin_indexMax - kMax) / size(u,1)) + 1 + latMax = latEdge(indexMax) + lonMax = lonEdge(indexMax) localVals(1) = scalar_min localVals(2) = real(indexMax,kind=RKIND) localVals(3) = real(kMax,kind=RKIND) @@ -7405,21 +7431,27 @@ subroutine summarize_timestep(domain) realArgs=(/global_scalar_min, latMax_global, lonMax_global/)) scalar_max = -1.0e20 + lin_indexMax = huge(1) indexMax = -1 kMax = -1 latMax = 0.0 lonMax = 0.0 do iEdge = 1, nEdgesSolve do k = 1, nVertLevels - if (u(k,iEdge) > scalar_max) then - scalar_max = u(k,iEdge) - indexMax = iEdge - kMax = k - latMax = latEdge(iEdge) - lonMax = lonEdge(iEdge) + scalar_max = max(scalar_max, u(k,iEdge)) + end do + end do + do iEdge = 1, nEdgesSolve + do k = 1, nVertLevels + if (u(k,iEdge) == scalar_max) then + lin_indexMax = min(lin_indexMax, k + size(u,1)*(iEdge-1)) end if end do end do + kMax = modulo(lin_indexMax, size(u,1)) + indexMax = ((lin_indexMax - kMax) / size(u,1)) + 1 + latMax = latEdge(indexMax) + lonMax = lonEdge(indexMax) localVals(1) = scalar_max localVals(2) = real(indexMax,kind=RKIND) localVals(3) = real(kMax,kind=RKIND) @@ -7441,22 +7473,28 @@ subroutine summarize_timestep(domain) realArgs=(/global_scalar_max, latMax_global, lonMax_global/)) scalar_max = -1.0e20 + lin_indexMax = huge(1) indexMax = -1 kMax = -1 latMax = 0.0 lonMax = 0.0 do iEdge = 1, nEdgesSolve do k = 1, nVertLevels - spd = sqrt(u(k,iEdge)*u(k,iEdge) + v(k,iEdge)*v(k,iEdge)) - if (spd > scalar_max) then - scalar_max = spd - indexMax = iEdge - kMax = k - latMax = latEdge(iEdge) - lonMax = lonEdge(iEdge) + spd(k,iEdge) = sqrt(u(k,iEdge)*u(k,iEdge) + v(k,iEdge)*v(k,iEdge)) + scalar_max = max(scalar_max, spd(k,iEdge)) + end do + end do + do iEdge = 1, nEdgesSolve + do k = 1, nVertLevels + if (spd(k,iEdge) == scalar_max) then + lin_indexMax = min(lin_indexMax, k + size(u,1)*(iEdge-1)) end if end do end do + kMax = modulo(lin_indexMax, size(u,1)) + indexMax = ((lin_indexMax - kMax) / size(u,1)) + 1 + latMax = latEdge(indexMax) + lonMax = lonEdge(indexMax) localVals(1) = scalar_max localVals(2) = real(indexMax,kind=RKIND) localVals(3) = real(kMax,kind=RKIND) @@ -7505,6 +7543,8 @@ subroutine summarize_timestep(domain) call mpas_log_write('NaN detected in ''u'' field.', messageType=MPAS_LOG_CRIT) end if + deallocate(spd) + else if (config_print_global_minmax_vel) then call mpas_log_write('') From 6802185c8629c6e1f3267f12f4d239028b04ed3c Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Thu, 20 Mar 2025 12:51:54 -0600 Subject: [PATCH 49/79] Add OpenACC for config_print_global_minmax_sca in summarize_timestep Adds timing information for the OpenACC data transfers in the 'summarize_timestep [ACC_data_xfer]' timer. This timer will be removed as data regions in the MPAS-A dycore are fused. --- .../dynamics/mpas_atm_time_integration.F | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 3c6bc8734d..40f7d3d426 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7597,19 +7597,30 @@ subroutine summarize_timestep(domain) nVertLevels = nVertLevels_ptr num_scalars = num_scalars_ptr + MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') + !$acc enter data copyin(scalars) + MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') + do iScalar = 1, num_scalars scalar_min = 0.0 scalar_max = 0.0 + !$acc parallel default(present) + !$acc loop gang vector collapse(2) reduction(min:scalar_min) reduction(max:scalar_max) do iCell = 1, nCellsSolve do k = 1, nVertLevels scalar_min = min(scalar_min, scalars(iScalar,k,iCell)) scalar_max = max(scalar_max, scalars(iScalar,k,iCell)) end do end do + !$acc end parallel call mpas_dmpar_min_real(domain % dminfo, scalar_min, global_scalar_min) call mpas_dmpar_max_real(domain % dminfo, scalar_max, global_scalar_max) call mpas_log_write(' global min, max scalar $i $r $r', intArgs=(/iScalar/), realArgs=(/global_scalar_min, global_scalar_max/)) end do + + MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') + !$acc exit data delete(scalars) + MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') end if end subroutine summarize_timestep From ee1d77bab47809164d6a411a85cac492bee2ef21 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 21 Mar 2025 16:59:23 -0600 Subject: [PATCH 50/79] broken wip: trying to develop checkpointing infrastructure --- src/core_atmosphere/Registry.xml | 5 ++ src/core_atmosphere/dynamics/Makefile | 2 +- .../dynamics/mpas_atm_time_integration.F | 68 +++++++++++-------- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index 6378596797..2b5e6a46a7 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -388,6 +388,11 @@ units="-" description="Method to use for exchanging halos" possible_values="`mpas_dmpar', `mpas_halo'"/> + + diff --git a/src/core_atmosphere/dynamics/Makefile b/src/core_atmosphere/dynamics/Makefile index 6892633c68..15a6411c99 100644 --- a/src/core_atmosphere/dynamics/Makefile +++ b/src/core_atmosphere/dynamics/Makefile @@ -5,7 +5,7 @@ OBJS = mpas_atm_time_integration.o \ all: $(OBJS) -mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o +mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o mpas_checkpointing.o mpas_atm_boundaries.o: diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 028813091a..0b6481ddc9 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -26,6 +26,7 @@ module atm_time_integration use mpas_derived_types, only : MPAS_Time_type, MPAS_TimeInterval_type, MPAS_NOW use mpas_timekeeping, only: mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+) use mpas_timer + use mpas_checkpointing #ifdef DO_PHYSICS use mpas_atmphys_driver_microphysics @@ -250,6 +251,12 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 #endif + logical, pointer :: config_create_checkpoint + + nullify(config_create_checkpoint) + call mpas_pool_get_config(domain % blocklist % configs, 'config_create_checkpoint', config_create_checkpoint) + + call mpas_checkpoint_init(domain % dminfo % my_proc_id, config_create_checkpoint) #ifdef MPAS_CAM_DYCORE @@ -676,6 +683,8 @@ subroutine mpas_atm_dynamics_finalize(domain) !$acc exit data delete(meshScalingDel4) #endif + call mpas_checkpoint_finalize() + end subroutine mpas_atm_dynamics_finalize @@ -5029,6 +5038,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm flux4(q_im2, q_im1, q_i, q_ip1, ua) + & coef3*abs(ua)*((q_ip1 - q_im2)-3.*(q_i-q_im1))/12.0 + call mpas_log_write('-- RK step $i --', intArgs=[rk_step]) + MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then !$acc enter data create(tend_u_euler) @@ -5058,6 +5069,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! and out !$acc enter data copyin(delsq_w) !$acc enter data create(tend_theta_euler) + !$acc enter data create(delsq_theta) else !$acc enter data copyin(tend_u_euler) !$acc enter data copyin(tend_rho) @@ -5090,6 +5102,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm v_mom_eddy_visc2 = config_v_mom_eddy_visc2 v_theta_eddy_visc2 = config_v_theta_eddy_visc2 +!MGD BROKEN + if (rk_step == 1) then !$acc parallel @@ -5102,11 +5116,15 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel + end if + +!MGD BROKEN + + if (rk_step == 1) then + ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces). ! The integration coefficients were precomputed and stored in defc_a and defc_b - if(config_horiz_mixing == "2d_smagorinsky") then - !$acc parallel default(present) !$acc loop gang worker private(d_diag, d_off_diag) do iCell = cellStart,cellEnd @@ -5140,24 +5158,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 h_theta_eddy_visc4 = h_mom_eddy_visc4 - else if(config_horiz_mixing == "2d_fixed") then - - !!! MGD UNTESTED !!! - - !$acc parallel default(present) - !$acc loop gang worker - do iCell = cellStart, cellEnd - !$acc loop vector - do k = 1, nVertLevels - kdiff(k,iCell) = config_h_theta_eddy_visc2 - end do - end do - !$acc end parallel + end if - h_mom_eddy_visc4 = config_h_mom_eddy_visc4 - h_theta_eddy_visc4 = config_h_theta_eddy_visc4 +!MGD OK HERE + !$acc enter data copyin(cqw) - end if + if (rk_step == 1) then if (config_mpas_cam_coef > 0.0) then @@ -5183,6 +5189,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if +!MGD OK HERE + ! tendency for density. ! accumulate total water here for later use in w tendency calculation. @@ -5223,6 +5231,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel +!MGD WORKS HERE + ! ! dp / dz and tend_rho ! @@ -5344,6 +5354,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! first Runge-Kutta substep and saved for use in later RK substeps 2 and 3. ! +!MGD OK HERE? + if (rk_step == 1) then !$OMP BARRIER @@ -5606,6 +5618,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !----------- rhs for w +!MGD OK HERE ! ! horizontal advection for w @@ -5687,6 +5700,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! but here we can also code in hyperdiffusion if we wish (2nd order at present) ! +!MGD OK HERE + if (rk_step == 1) then ! !OMP BARRIER why is this openmp barrier here??? @@ -5779,8 +5794,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! vertical advection, pressure gradient and buoyancy for w ! - !$acc enter data copyin(cqw) - !$acc parallel default(present) !$acc loop gang worker private(wdwz) do iCell=cellSolveStart,cellSolveEnd @@ -5936,8 +5949,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then - !$acc enter data create(delsq_theta) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellStart,cellEnd @@ -5977,16 +5988,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(delsq_theta) - !$OMP BARRIER if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active !!! MGD UNTESTED !!! - !$acc enter data copyin(delsq_theta) - !$acc parallel default(present) !$acc loop gang worker do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... @@ -6010,21 +6017,24 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(delsq_theta) - end if ! 4th order mixing is active end if ! theta mixing calculated first rk_step - ! ! vertical advection plus diabatic term ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! !$acc enter data copyin(rw_save, rt_diabatic_tend) + + if (rk_step == 1) then + !$acc exit data delete(delsq_theta) + end if + !$acc enter data create(rthdynten) + !$acc parallel default(present) !$acc loop gang worker private(wdtz) do iCell = cellSolveStart,cellSolveEnd From d7d40bf380ff88e96a1596e582b1b76b576f179d Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 21 Mar 2025 20:02:50 -0600 Subject: [PATCH 51/79] WIP: just copy most fields to work around answer differences --- .../dynamics/mpas_atm_time_integration.F | 137 ++++++------------ 1 file changed, 48 insertions(+), 89 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 0b6481ddc9..fea3c9ac16 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5042,57 +5042,43 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc enter data create(tend_u_euler) - !$acc enter data create(kdiff) - !$acc enter data copyin(tend_rho_physics) - !$acc enter data create(tend_rho) - !$acc enter data copyin(rb, qtot, rr_save) - !$acc enter data copyin(divergence, vorticity) - !$acc enter data create(delsq_u) - - if (config_horiz_mixing == '2d_smagorinsky') then - !$acc enter data copyin(u, v) - end if - - ! The following are only needed if h_mom_eddy_visc4 > 0.0, but the conditional check - ! would be messy, and these variables are created and deleted, rather than copied in - ! and out - !$acc enter data create(delsq_vorticity, delsq_divergence) - - if (.not. config_mix_full) then - !$acc enter data copyin(u_init, v_init) - end if - !$acc enter data create(delsq_w) - - ! The following are only needed if h_mom_eddy_visc4 > 0.0, but the conditional check - ! would be messy, and these variables are created and deleted, rather than copied in - ! and out - !$acc enter data copyin(delsq_w) - !$acc enter data create(tend_theta_euler) - !$acc enter data create(delsq_theta) + !$acc enter data create(tend_w_euler) + !$acc enter data create(tend_u_euler) + !$acc enter data create(tend_theta_euler) + !$acc enter data create(tend_rho) else - !$acc enter data copyin(tend_u_euler) - !$acc enter data copyin(tend_rho) - !$acc enter data copyin(tend_theta_euler) + !$acc enter data copyin(tend_w_euler) + !$acc enter data copyin(tend_u_euler) + !$acc enter data copyin(tend_theta_euler) + !$acc enter data copyin(tend_rho) end if + !$acc enter data create(kdiff) + !$acc enter data copyin(tend_rho_physics) + !$acc enter data copyin(rb, qtot, rr_save) + !$acc enter data copyin(divergence, vorticity) + !$acc enter data create(delsq_u) + !$acc enter data copyin(u, v) + !$acc enter data create(delsq_vorticity, delsq_divergence) + !$acc enter data copyin(u_init, v_init) + !$acc enter data copyin(delsq_w) !$acc enter data create(dpdz) !$acc enter data create(tend_u) !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc enter data create(h_divergence) !$acc enter data copyin(ru, rw) - if (config_rayleigh_damp_u) then - !$acc enter data copyin(rayleigh_damp_coef) - end if + !$acc enter data copyin(rayleigh_damp_coef) !$acc enter data copyin(tend_ru_physics) !$acc enter data create(tend_w) - !$acc enter data copyin(tend_w_euler) -#ifdef CURVATURE - !$acc enter data copyin(ur_cell, vr_cell) -#endif !$acc enter data copyin(rho_zz) !$acc enter data create(tend_theta) !$acc enter data copyin(theta_m) !$acc enter data copyin(ru_save, theta_m_save) + !$acc enter data create(delsq_theta) + !$acc enter data copyin(cqw) + !$acc enter data copyin(tend_rtheta_physics) + !$acc enter data copyin(rw_save, rt_diabatic_tend) + !$acc enter data create(rthdynten) + !$acc enter data copyin(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5161,7 +5147,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !MGD OK HERE - !$acc enter data copyin(cqw) if (rk_step == 1) then @@ -5834,8 +5819,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(cqw) - if (rk_step == 1) then if ( v_mom_eddy_visc2 > 0.0 ) then @@ -6026,14 +6009,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Note: we are also dividing through by the cell area after the horizontal flux divergence ! - !$acc enter data copyin(rw_save, rt_diabatic_tend) - - if (rk_step == 1) then - !$acc exit data delete(delsq_theta) - end if - - !$acc enter data create(rthdynten) - !$acc parallel default(present) !$acc loop gang worker private(wdtz) @@ -6066,9 +6041,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data copyout(rthdynten) - !$acc exit data delete(rw_save, rt_diabatic_tend) - ! ! vertical mixing for theta - 2nd order ! @@ -6105,8 +6077,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm else ! idealized cases where we mix on the perturbation from the initial 1-D state - !$acc enter data copyin(t_init) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd @@ -6127,16 +6097,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(t_init) - end if end if end if ! compute vertical theta mixing on first rk_step - !$acc enter data copyin(tend_rtheta_physics) - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd @@ -6149,52 +6115,45 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - !$acc exit data delete(tend_rtheta_physics) - MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - !$acc exit data copyout(tend_u_euler) - !$acc exit data delete(kdiff) if (rk_step == 1) then - !$acc exit data copyout(tend_rho) - !$acc exit data delete(tend_rho_physics) - !$acc exit data delete(rb, qtot, rr_save) - !$acc exit data delete(divergence, vorticity) - !$acc exit data copyout(delsq_u) - - if (config_horiz_mixing == '2d_smagorinsky') then - !$acc exit data delete(u, v) - end if - - !$acc exit data delete(delsq_vorticity, delsq_divergence) - - if (.not. config_mix_full) then - !$acc exit data delete(u_init, v_init) - end if - - !$acc exit data copyout(delsq_w) - !$acc exit data delete(delsq_w) + !$acc exit data copyout(tend_w_euler) + !$acc exit data copyout(tend_u_euler) + !$acc exit data copyout(tend_theta_euler) + !$acc exit data copyout(tend_rho) else - !$acc exit data delete(tend_rho) + !$acc exit data delete(tend_w_euler) + !$acc exit data delete(tend_u_euler) + !$acc exit data delete(tend_theta_euler) + !$acc exit data delete(tend_rho) end if + !$acc exit data delete(kdiff) + !$acc exit data delete(tend_rho_physics) + !$acc exit data delete(rb, qtot, rr_save) + !$acc exit data delete(divergence, vorticity) + !$acc exit data copyout(delsq_u) + !$acc exit data delete(u, v) + !$acc exit data delete(delsq_vorticity, delsq_divergence) + !$acc exit data delete(u_init, v_init) + !$acc exit data copyout(delsq_w) !$acc exit data delete(dpdz) !$acc exit data copyout(tend_u) !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) !$acc exit data copyout(h_divergence) !$acc exit data delete(ru, rw) - if (config_rayleigh_damp_u) then - !$acc exit data delete(rayleigh_damp_coef) - end if + !$acc exit data delete(rayleigh_damp_coef) !$acc exit data delete(tend_ru_physics) !$acc exit data copyout(tend_w) - !$acc exit data copyout(tend_w_euler) -#ifdef CURVATURE - !$acc exit data delete(ur_cell, vr_cell) -#endif !$acc exit data delete(rho_zz) !$acc exit data copyout(tend_theta) !$acc exit data delete(theta_m) !$acc exit data delete(ru_save, theta_m_save) - !$acc exit data copyout(tend_theta_euler) + !$acc exit data delete(delsq_theta) + !$acc exit data delete(cqw) + !$acc exit data delete(tend_rtheta_physics) + !$acc exit data delete(rw_save, rt_diabatic_tend) + !$acc exit data copyout(rthdynten) + !$acc exit data delete(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From ed9b10ffd946b81e9137d2fcf8e1028fc25f4827 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Fri, 21 Mar 2025 20:23:12 -0600 Subject: [PATCH 52/79] WIP: add more data directives within (rk_step == 1) test --- .../dynamics/mpas_atm_time_integration.F | 70 ++++++++++--------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index fea3c9ac16..41f6f47688 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5042,25 +5042,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc enter data create(tend_w_euler) - !$acc enter data create(tend_u_euler) - !$acc enter data create(tend_theta_euler) - !$acc enter data create(tend_rho) + !$acc enter data create(tend_w_euler) + !$acc enter data create(tend_u_euler) + !$acc enter data create(tend_theta_euler) + !$acc enter data create(tend_rho) + + !$acc enter data create(kdiff) + !$acc enter data copyin(tend_rho_physics) + !$acc enter data copyin(rb, qtot, rr_save) + !$acc enter data copyin(divergence, vorticity) + !$acc enter data create(delsq_u) + !$acc enter data copyin(v) + !$acc enter data create(delsq_vorticity, delsq_divergence) + !$acc enter data copyin(u_init, v_init) + !$acc enter data create(delsq_w) else - !$acc enter data copyin(tend_w_euler) - !$acc enter data copyin(tend_u_euler) - !$acc enter data copyin(tend_theta_euler) - !$acc enter data copyin(tend_rho) + !$acc enter data copyin(tend_w_euler) + !$acc enter data copyin(tend_u_euler) + !$acc enter data copyin(tend_theta_euler) + !$acc enter data copyin(tend_rho) end if - !$acc enter data create(kdiff) - !$acc enter data copyin(tend_rho_physics) - !$acc enter data copyin(rb, qtot, rr_save) - !$acc enter data copyin(divergence, vorticity) - !$acc enter data create(delsq_u) - !$acc enter data copyin(u, v) - !$acc enter data create(delsq_vorticity, delsq_divergence) - !$acc enter data copyin(u_init, v_init) - !$acc enter data copyin(delsq_w) !$acc enter data create(dpdz) !$acc enter data create(tend_u) !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) @@ -6117,25 +6118,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc exit data copyout(tend_w_euler) - !$acc exit data copyout(tend_u_euler) - !$acc exit data copyout(tend_theta_euler) - !$acc exit data copyout(tend_rho) + !$acc exit data copyout(tend_w_euler) + !$acc exit data copyout(tend_u_euler) + !$acc exit data copyout(tend_theta_euler) + !$acc exit data copyout(tend_rho) + + !$acc exit data delete(kdiff) + !$acc exit data delete(tend_rho_physics) + !$acc exit data delete(rb, qtot, rr_save) + !$acc exit data delete(divergence, vorticity) + !$acc exit data copyout(delsq_u) + !$acc exit data delete(v) + !$acc exit data delete(delsq_vorticity, delsq_divergence) + !$acc exit data delete(u_init, v_init) + !$acc exit data copyout(delsq_w) else - !$acc exit data delete(tend_w_euler) - !$acc exit data delete(tend_u_euler) - !$acc exit data delete(tend_theta_euler) - !$acc exit data delete(tend_rho) + !$acc exit data delete(tend_w_euler) + !$acc exit data delete(tend_u_euler) + !$acc exit data delete(tend_theta_euler) + !$acc exit data delete(tend_rho) end if - !$acc exit data delete(kdiff) - !$acc exit data delete(tend_rho_physics) - !$acc exit data delete(rb, qtot, rr_save) - !$acc exit data delete(divergence, vorticity) - !$acc exit data copyout(delsq_u) - !$acc exit data delete(u, v) - !$acc exit data delete(delsq_vorticity, delsq_divergence) - !$acc exit data delete(u_init, v_init) - !$acc exit data copyout(delsq_w) !$acc exit data delete(dpdz) !$acc exit data copyout(tend_u) !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) From 46fd13f42bb84095a033bfd3fe4332fa3d2ca1c8 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Tue, 25 Mar 2025 16:41:07 -0600 Subject: [PATCH 53/79] Add OpenACC for config_print_global_minmax_vel in summarize_timestep Adds timing information for the OpenACC data transfers in the temporary 'summarize_timestep [ACC_data_xfer]' timer. --- .../dynamics/mpas_atm_time_integration.F | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 40f7d3d426..61d45cfe03 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7558,29 +7558,43 @@ subroutine summarize_timestep(domain) nEdgesSolve = nEdgesSolve_ptr nVertLevels = nVertLevels_ptr + MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') + !$acc enter data copyin(w,u) + MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') + scalar_min = 0.0 scalar_max = 0.0 + !$acc parallel default(present) + !$acc loop gang vector collapse(2) reduction(min:scalar_min) reduction(max:scalar_max) do iCell = 1, nCellsSolve do k = 1, nVertLevels scalar_min = min(scalar_min, w(k,iCell)) scalar_max = max(scalar_max, w(k,iCell)) end do end do + !$acc end parallel call mpas_dmpar_min_real(domain % dminfo, scalar_min, global_scalar_min) call mpas_dmpar_max_real(domain % dminfo, scalar_max, global_scalar_max) call mpas_log_write('global min, max w $r $r', realArgs=(/global_scalar_min, global_scalar_max/)) scalar_min = 0.0 scalar_max = 0.0 + !$acc parallel default(present) + !$acc loop gang vector collapse(2) reduction(min:scalar_min) reduction(max:scalar_max) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels scalar_min = min(scalar_min, u(k,iEdge)) scalar_max = max(scalar_max, u(k,iEdge)) end do end do + !$acc end parallel call mpas_dmpar_min_real(domain % dminfo, scalar_min, global_scalar_min) call mpas_dmpar_max_real(domain % dminfo, scalar_max, global_scalar_max) call mpas_log_write('global min, max u $r $r', realArgs=(/global_scalar_min, global_scalar_max/)) + + MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') + !$acc exit data delete(w,u) + MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') end if if (config_print_global_minmax_sca) then From a21db2c350dd41ac9cb0dbac6ca9306f3dc37896 Mon Sep 17 00:00:00 2001 From: "G. Dylan Dickerson" Date: Tue, 25 Mar 2025 16:41:43 -0600 Subject: [PATCH 54/79] Add OpenACC for config_print_detailed_minmax_vel in summarize_timestep Adds timing information for the OpenACC data transfers in the temporary 'summarize_timestep [ACC_data_xfer]' timer. --- .../dynamics/mpas_atm_time_integration.F | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 61d45cfe03..7db718d6d2 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7264,6 +7264,7 @@ subroutine summarize_timestep(domain) real (kind=RKIND), dimension(5) :: localVals, globalVals real (kind=RKIND), dimension(:,:), allocatable :: spd + !$acc declare create(spd) real (kind=RKIND), dimension(:,:), pointer :: w real (kind=RKIND), dimension(:,:), pointer :: u, v, uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2 @@ -7303,17 +7304,26 @@ subroutine summarize_timestep(domain) allocate(spd(nVertLevels,nEdgesSolve)) + MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') + !$acc enter data copyin(w,u,v) + MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') + scalar_min = 1.0e20 lin_indexMax = huge(1) indexMax = -1 kMax = -1 latMax = 0.0 lonMax = 0.0 + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:scalar_min) do iCell = 1, nCellsSolve do k = 1, nVertLevels scalar_min = min(scalar_min, w(k,iCell)) end do end do + !$acc end parallel + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:lin_indexMax) do iCell = 1, nCellsSolve do k = 1, nVertLevels if (w(k,iCell) == scalar_min) then @@ -7322,6 +7332,7 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel kMax = modulo(lin_indexMax, size(w,1)) indexMax = ((lin_indexMax - kMax) / size(w,1)) + 1 latMax = latCell(indexMax) @@ -7352,11 +7363,16 @@ subroutine summarize_timestep(domain) kMax = -1 latMax = 0.0 lonMax = 0.0 + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(max:scalar_max) do iCell = 1, nCellsSolve do k = 1, nVertLevels scalar_max = max(scalar_max, w(k,iCell)) end do end do + !$acc end parallel + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:lin_indexMax) do iCell = 1, nCellsSolve do k = 1, nVertLevels if (w(k,iCell) == scalar_max) then @@ -7364,6 +7380,7 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel kMax = modulo(lin_indexMax, size(w,1)) indexMax = ((lin_indexMax - kMax) / size(w,1)) + 1 latMax = latCell(indexMax) @@ -7394,11 +7411,16 @@ subroutine summarize_timestep(domain) kMax = -1 latMax = 0.0 lonMax = 0.0 + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:scalar_min) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels scalar_min = min(scalar_min, u(k,iEdge)) end do end do + !$acc end parallel + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:lin_indexMax) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels if (u(k,iEdge) == scalar_min) then @@ -7406,6 +7428,7 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel kMax = modulo(lin_indexMax, size(u,1)) indexMax = ((lin_indexMax - kMax) / size(u,1)) + 1 latMax = latEdge(indexMax) @@ -7436,11 +7459,16 @@ subroutine summarize_timestep(domain) kMax = -1 latMax = 0.0 lonMax = 0.0 + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(max:scalar_max) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels scalar_max = max(scalar_max, u(k,iEdge)) end do end do + !$acc end parallel + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:lin_indexMax) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels if (u(k,iEdge) == scalar_max) then @@ -7448,6 +7476,7 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel kMax = modulo(lin_indexMax, size(u,1)) indexMax = ((lin_indexMax - kMax) / size(u,1)) + 1 latMax = latEdge(indexMax) @@ -7478,12 +7507,17 @@ subroutine summarize_timestep(domain) kMax = -1 latMax = 0.0 lonMax = 0.0 + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(max:scalar_max) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels spd(k,iEdge) = sqrt(u(k,iEdge)*u(k,iEdge) + v(k,iEdge)*v(k,iEdge)) scalar_max = max(scalar_max, spd(k,iEdge)) end do end do + !$acc end parallel + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(min:lin_indexMax) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels if (spd(k,iEdge) == scalar_max) then @@ -7491,6 +7525,7 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel kMax = modulo(lin_indexMax, size(u,1)) indexMax = ((lin_indexMax - kMax) / size(u,1)) + 1 latMax = latEdge(indexMax) @@ -7520,6 +7555,8 @@ subroutine summarize_timestep(domain) ! found_NaN = .false. + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(.or.:found_NaN) do iCell = 1, nCellsSolve do k = 1, nVertLevels if (ieee_is_nan(w(k,iCell))) then @@ -7527,11 +7564,14 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel if (found_NaN) then call mpas_log_write('NaN detected in ''w'' field.', messageType=MPAS_LOG_CRIT) end if + !$acc parallel default(present) + !$acc loop collapse(2) gang vector reduction(.or.:found_NaN) do iEdge = 1, nEdgesSolve do k = 1, nVertLevels if (ieee_is_nan(u(k,iEdge))) then @@ -7539,10 +7579,15 @@ subroutine summarize_timestep(domain) end if end do end do + !$acc end parallel if (found_NaN) then call mpas_log_write('NaN detected in ''u'' field.', messageType=MPAS_LOG_CRIT) end if + MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') + !$acc exit data delete(w,u,v) + MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') + deallocate(spd) else if (config_print_global_minmax_vel) then From 3f1965ee3b223197696a3631a3e0c69e11ff90fe Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Mar 2025 11:06:34 -0600 Subject: [PATCH 55/79] Clean up changes for initial profiling --- .../dynamics/mpas_atm_time_integration.F | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 41f6f47688..9d63d94bf6 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -5086,10 +5086,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm invDt = 1.0_RKIND / dt inv_r_earth = 1.0_RKIND / r_earth - v_mom_eddy_visc2 = config_v_mom_eddy_visc2 - v_theta_eddy_visc2 = config_v_theta_eddy_visc2 - -!MGD BROKEN + v_mom_eddy_visc2 = config_v_mom_eddy_visc2 + v_theta_eddy_visc2 = config_v_theta_eddy_visc2 if (rk_step == 1) then @@ -5103,15 +5101,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - end if - -!MGD BROKEN - - if (rk_step == 1) then - ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces). ! The integration coefficients were precomputed and stored in defc_a and defc_b + if(config_horiz_mixing == "2d_smagorinsky") then + !$acc parallel default(present) !$acc loop gang worker private(d_diag, d_off_diag) do iCell = cellStart,cellEnd @@ -5145,11 +5139,24 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 h_theta_eddy_visc4 = h_mom_eddy_visc4 - end if + else if(config_horiz_mixing == "2d_fixed") then -!MGD OK HERE + !!! MGD UNTESTED !!! - if (rk_step == 1) then + !$acc parallel default(present) + !$acc loop gang worker + do iCell = cellStart, cellEnd + !$acc loop vector + do k = 1, nVertLevels + kdiff(k,iCell) = config_h_theta_eddy_visc2 + end do + end do + !$acc end parallel + + h_mom_eddy_visc4 = config_h_mom_eddy_visc4 + h_theta_eddy_visc4 = config_h_theta_eddy_visc4 + + end if if (config_mpas_cam_coef > 0.0) then @@ -5175,8 +5182,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if -!MGD OK HERE - ! tendency for density. ! accumulate total water here for later use in w tendency calculation. @@ -5217,8 +5222,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel -!MGD WORKS HERE - ! ! dp / dz and tend_rho ! @@ -5340,8 +5343,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! first Runge-Kutta substep and saved for use in later RK substeps 2 and 3. ! -!MGD OK HERE? - if (rk_step == 1) then !$OMP BARRIER @@ -5604,8 +5605,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !----------- rhs for w -!MGD OK HERE - ! ! horizontal advection for w ! @@ -5686,8 +5685,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! but here we can also code in hyperdiffusion if we wish (2nd order at present) ! -!MGD OK HERE - if (rk_step == 1) then ! !OMP BARRIER why is this openmp barrier here??? From 8414225dc636f135887b1e46f39d2d8d93ee1814 Mon Sep 17 00:00:00 2001 From: Michael Duda Date: Wed, 26 Mar 2025 12:07:43 -0600 Subject: [PATCH 56/79] Remove code related to checkpointing --- src/core_atmosphere/Registry.xml | 5 ----- src/core_atmosphere/dynamics/Makefile | 2 +- .../dynamics/mpas_atm_time_integration.F | 10 ---------- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index 2b5e6a46a7..6378596797 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -388,11 +388,6 @@ units="-" description="Method to use for exchanging halos" possible_values="`mpas_dmpar', `mpas_halo'"/> - - diff --git a/src/core_atmosphere/dynamics/Makefile b/src/core_atmosphere/dynamics/Makefile index 15a6411c99..6892633c68 100644 --- a/src/core_atmosphere/dynamics/Makefile +++ b/src/core_atmosphere/dynamics/Makefile @@ -5,7 +5,7 @@ OBJS = mpas_atm_time_integration.o \ all: $(OBJS) -mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o mpas_checkpointing.o +mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o mpas_atm_boundaries.o: diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 9d63d94bf6..c4a5decec0 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -26,7 +26,6 @@ module atm_time_integration use mpas_derived_types, only : MPAS_Time_type, MPAS_TimeInterval_type, MPAS_NOW use mpas_timekeeping, only: mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+) use mpas_timer - use mpas_checkpointing #ifdef DO_PHYSICS use mpas_atmphys_driver_microphysics @@ -251,13 +250,6 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 #endif - logical, pointer :: config_create_checkpoint - - nullify(config_create_checkpoint) - call mpas_pool_get_config(domain % blocklist % configs, 'config_create_checkpoint', config_create_checkpoint) - - call mpas_checkpoint_init(domain % dminfo % my_proc_id, config_create_checkpoint) - #ifdef MPAS_CAM_DYCORE nullify(tend_physics) @@ -683,8 +675,6 @@ subroutine mpas_atm_dynamics_finalize(domain) !$acc exit data delete(meshScalingDel4) #endif - call mpas_checkpoint_finalize() - end subroutine mpas_atm_dynamics_finalize From b497a71b59788bb22571cbb7701f1892fe8fdb18 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 17 Apr 2025 15:10:37 -0600 Subject: [PATCH 57/79] first attempt --- .../dynamics/mpas_atm_time_integration.F | 503 ++++++++++++++---- src/core_atmosphere/mpas_atm_core.F | 6 +- src/operators/mpas_vector_reconstruction.F | 6 - 3 files changed, 397 insertions(+), 118 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 164509f2c9..658810b410 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -198,6 +198,8 @@ subroutine mpas_atm_dynamics_init(domain) #ifdef MPAS_OPENACC type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag real (kind=RKIND), dimension(:), pointer :: dvEdge integer, dimension(:,:), pointer :: cellsOnCell @@ -254,6 +256,22 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: latCell real (kind=RKIND), dimension(:), pointer :: lonCell real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct + real (kind=RKIND), dimension(:,:), pointer :: ru + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split #endif #ifdef MPAS_CAM_DYCORE @@ -271,8 +289,322 @@ subroutine mpas_atm_dynamics_init(domain) #endif #ifdef MPAS_OPENACC + ! nullify(mesh) + ! call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + + ! call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + ! call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + + ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + ! !$acc enter data copyin(dvEdge) + + ! call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) + ! !$acc enter data copyin(cellsOnCell) + + ! call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + ! !$acc enter data copyin(cellsOnEdge) + + ! call mpas_pool_get_array(mesh, 'advCellsForEdge', advCellsForEdge) + ! !$acc enter data copyin(advCellsForEdge) + + ! call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + ! !$acc enter data copyin(edgesOnCell) + + ! call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) + ! !$acc enter data copyin(nAdvCellsForEdge) + + ! call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + ! !$acc enter data copyin(nEdgesOnCell) + + ! call mpas_pool_get_array(mesh, 'adv_coefs', adv_coefs) + ! !$acc enter data copyin(adv_coefs) + + ! call mpas_pool_get_array(mesh, 'adv_coefs_3rd', adv_coefs_3rd) + ! !$acc enter data copyin(adv_coefs_3rd) + + ! call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + ! !$acc enter data copyin(edgesOnCell_sign) + + ! call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + ! !$acc enter data copyin(invAreaCell) + + ! call mpas_pool_get_array(mesh, 'bdyMaskCell', bdyMaskCell) + ! !$acc enter data copyin(bdyMaskCell) + + ! call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) + ! !$acc enter data copyin(bdyMaskEdge) + + ! call mpas_pool_get_array(mesh, 'specZoneMaskEdge', specZoneMaskEdge) + ! !$acc enter data copyin(specZoneMaskEdge) + + ! call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + ! !$acc enter data copyin(invDvEdge) + + ! call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + ! !$acc enter data copyin(dcEdge) + + ! call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + ! !$acc enter data copyin(invDcEdge) + + ! call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + ! !$acc enter data copyin(edgesOnEdge) + + ! call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + ! !$acc enter data copyin(edgesOnVertex) + + ! call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + ! !$acc enter data copyin(edgesOnVertex_sign) + + ! call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + ! !$acc enter data copyin(nEdgesOnEdge) + + ! call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + ! !$acc enter data copyin(weightsOnEdge) + + ! call mpas_pool_get_array(mesh, 'cellsOnVertex', cellsOnVertex) + ! !$acc enter data copyin(cellsOnVertex) + + ! call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + ! !$acc enter data copyin(verticesOnCell) + + ! call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + ! !$acc enter data copyin(verticesOnEdge) + + ! call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + ! !$acc enter data copyin(invAreaTriangle) + + ! call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + ! !$acc enter data copyin(kiteForCell) + + ! call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + ! !$acc enter data copyin(kiteAreasOnVertex) + + ! call mpas_pool_get_array(mesh, 'fVertex', fVertex) + ! !$acc enter data copyin(fVertex) + + ! call mpas_pool_get_array(mesh, 'fEdge', fEdge) + ! !$acc enter data copyin(fEdge) + + ! call mpas_pool_get_array(mesh, 'zz', zz) + ! !$acc enter data copyin(zz) + + ! call mpas_pool_get_array(mesh, 'rdzw', rdzw) + ! !$acc enter data copyin(rdzw) + + ! call mpas_pool_get_array(mesh, 'rdzu', rdzu) + ! !$acc enter data copyin(rdzu) + + ! call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + ! !$acc enter data copyin(zb_cell) + + ! call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + ! !$acc enter data copyin(zb3_cell) + + ! call mpas_pool_get_array(mesh, 'fzm', fzm) + ! !$acc enter data copyin(fzm) + + ! call mpas_pool_get_array(mesh, 'fzp', fzp) + ! !$acc enter data copyin(fzp) + + ! call mpas_pool_get_array(mesh, 'zb', zb) + ! !$acc enter data copyin(zb) + + ! call mpas_pool_get_array(mesh, 'zb3', zb3) + ! !$acc enter data copyin(zb3) + + ! call mpas_pool_get_array(mesh, 'nearestRelaxationCell', nearestRelaxationCell) + ! !$acc enter data copyin(nearestRelaxationCell) + + ! call mpas_pool_get_array(mesh, 'zgrid', zgrid) + ! !$acc enter data copyin(zgrid) + + ! call mpas_pool_get_array(mesh, 'zxu', zxu) + ! !$acc enter data copyin(zxu) + + ! call mpas_pool_get_array(mesh, 'dss', dss) + ! !$acc enter data copyin(dss) + + ! call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) + ! !$acc enter data copyin(specZoneMaskCell) + + ! call mpas_pool_get_array(mesh, 'defc_a', defc_a) + ! !$acc enter data copyin(defc_a) + + ! call mpas_pool_get_array(mesh, 'defc_b', defc_b) + ! !$acc enter data copyin(defc_b) + + ! call mpas_pool_get_array(mesh, 'latEdge', latEdge) + ! !$acc enter data copyin(latEdge) + + ! call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + ! !$acc enter data copyin(angleEdge) + + ! call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) + ! !$acc enter data copyin(meshScalingDel2) + + ! call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) + ! !$acc enter data copyin(meshScalingDel4) + ! call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + ! !$acc enter data copyin(meshScalingRegionalCell) + + ! call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) + ! !$acc enter data copyin(meshScalingRegionalEdge) + + ! call mpas_pool_get_array(mesh, 'latCell', latCell) + ! !$acc enter data copyin(latCell) + + ! call mpas_pool_get_array(mesh, 'lonCell', lonCell) + ! !$acc enter data copyin(lonCell) + + ! call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) + ! !$acc enter data copyin(coeffs_reconstruct) + +#endif + + end subroutine mpas_atm_dynamics_init + + subroutine mpas_atm_pre_init(domain) + + implicit none + + type (domain_type), intent(inout) :: domain + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + + + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:), pointer :: rdzw + real (kind=RKIND), dimension(:), pointer :: rdzu + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + integer, dimension(:), pointer :: nearestRelaxationCell + real (kind=RKIND), dimension(:,:), pointer :: zgrid + real (kind=RKIND), dimension(:,:), pointer :: zxu + real (kind=RKIND), dimension(:,:), pointer :: dss + real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell + real (kind=RKIND), dimension(:,:), pointer :: defc_a + real (kind=RKIND), dimension(:,:), pointer :: defc_b + real (kind=RKIND), dimension(:), pointer :: latEdge + real (kind=RKIND), dimension(:), pointer :: angleEdge + real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 + real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell + real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalEdge + real (kind=RKIND), dimension(:), pointer :: latCell + real (kind=RKIND), dimension(:), pointer :: lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct + + real (kind=RKIND), dimension(:,:), pointer :: ru + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + nullify(state) + nullify(diag) nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + + call mpas_pool_get_array(diag, 'ru', ru) + !$acc enter data copyin(ru) + call mpas_pool_get_array(diag, 'ru_save', ru_save) + !$acc enter data copyin(ru_save) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc enter data copyin(rw) + call mpas_pool_get_array(diag, 'rw_save', rw_save) + !$acc enter data copyin(rw_save) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc enter data copyin(rtheta_p) + call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) + !$acc enter data copyin(rtheta_p_save) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + !$acc enter data copyin(rho_p) + call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) + !$acc enter data copyin(rho_p_save) + call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) + !$acc enter data copyin(rho_zz_old_split) + + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) + !$acc enter data copyin(ruAvg) + call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) + !$acc enter data copyin(ruAvg_split) + call mpas_pool_get_array(diag, 'wwAvg', wwAvg) + !$acc enter data copyin(wwAvg) + call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) + !$acc enter data copyin(wwAvg_split) + + call mpas_pool_get_array(state, 'u', u_1, 1) + !$acc enter data copyin(u_1) + call mpas_pool_get_array(state, 'u', u_2, 2) + !$acc enter data copyin(u_2) + call mpas_pool_get_array(state, 'w', w_1, 1) + !$acc enter data copyin(w_1) + call mpas_pool_get_array(state, 'w', w_2, 2) + !$acc enter data copyin(w_2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc enter data copyin(theta_m_1) + call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + !$acc enter data copyin(theta_m_2) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + !$acc enter data copyin(rho_zz_1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + !$acc enter data copyin(rho_zz_2) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc enter data copyin(scalars_1) + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc enter data copyin(scalars_2) call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -437,9 +769,11 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) !$acc enter data copyin(coeffs_reconstruct) + #endif - end subroutine mpas_atm_dynamics_init + end subroutine mpas_atm_pre_init + !---------------------------------------------------------------------------- @@ -1883,10 +2217,10 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_2, 2) MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - !$acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, & - !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - !$acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - !$acc rho_zz_1, scalars_1) + ! $acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, & + ! $acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & + ! $acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & + ! $acc rho_zz_1, scalars_1) MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') !$acc kernels @@ -1936,10 +2270,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & !$acc end parallel MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - !$acc exit data copyout(ru_save, rw_save, rtheta_p_save, rho_p_save, u_2, & - !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - !$acc delete(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - !$acc rho_zz_1, scalars_1) MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') end subroutine atm_rk_integration_setup @@ -1992,8 +2322,8 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & moist_end = moist_end_ptr MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc enter data create(qtot, cqw, cqu) & - !$acc copyin(scalars) + !$acc enter data create(qtot, cqw, cqu) + ! $acc copyin(scalars) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc parallel default(present) @@ -2045,8 +2375,8 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc exit data copyout(cqw, cqu, qtot) & - !$acc delete(scalars) + !$acc exit data copyout(cqw, cqu, qtot) + ! $acc delete(scalars) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') end subroutine atm_compute_moist_coefficients @@ -2644,13 +2974,15 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') !$acc enter data copyin(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save) + !$acc a_tri,alpha_tri,gamma_tri, & + !$acc tend_ru,tend_rho,tend_rt,tend_rw) !$acc enter data create(rtheta_pp_old) if(small_step == 1) then - !$acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) + !$acc enter data create(ru_p,rho_pp,rtheta_pp,rw_p) + ! $acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) else - !$acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) + !$acc enter data copyin(ru_p,rho_pp,rtheta_pp,rw_p) + ! $acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) end if MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') @@ -2881,10 +3213,10 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') !$acc exit data delete(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save) - !$acc exit data copyout(rtheta_pp_old,ru_p,ruAvg,rho_pp, & - !$acc rtheta_pp,wwAvg,rw_p) + !$acc a_tri,alpha_tri,gamma_tri, & + !$acc tend_ru,tend_rho,tend_rt,tend_rw) + !$acc exit data copyout(rtheta_pp_old,ru_p,rho_pp, & + !$acc rtheta_pp,rw_p) MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') end subroutine atm_advance_acoustic_step_work @@ -2938,7 +3270,7 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart nVertLevels = nVertLevels_ptr MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old, theta_m) + !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old) MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') !$acc parallel default(present) @@ -2974,7 +3306,7 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') !$acc exit data copyout(ru_p) & - !$acc delete(rtheta_pp, rtheta_pp_old, theta_m) + !$acc delete(rtheta_pp, rtheta_pp_old) MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') @@ -3167,11 +3499,9 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE real (kind=RKIND) :: invNs, rcv, p0, flux MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, & - !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_save,ru_p,wwAvg,ruAvg) & - !$acc create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, & - !$acc ru,u) + !$acc enter data copyin(rho_pp,rho_base,rw_p, & + !$acc rtheta_pp,rtheta_base, & + !$acc ru_p) if (rk_step == 3) then !$acc enter data copyin(rt_diabatic_tend,exner_base) & !$acc create(exner,pressure_p) @@ -3323,11 +3653,9 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE !$acc end parallel MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, & + !$acc exit data delete(rho_pp,rho_base,rw_p, & !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_save,ru_p) & - !$acc copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, & - !$acc ru,u,wwAvg,ruAvg) + !$acc ru_p) if (rk_step == 3) then !$acc exit data delete(rt_diabatic_tend,exner_base) & !$acc copyout(exner,pressure_p) @@ -3569,7 +3897,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc enter data create(horiz_flux_arr) - !$acc enter data copyin(uhAvg, scalar_new) + !$acc enter data copyin(uhAvg) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') !$acc parallel async @@ -3671,7 +3999,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & #else !$acc enter data copyin(scalar_tend_save) #endif - !$acc enter data copyin(scalar_old, fnm, fnp, rdnw, wwAvg, rho_zz_old, rho_zz_new) + !$acc enter data copyin(scalar_old, fnm, fnp, rdnw) !$acc enter data create(scalar_tend_column, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') @@ -3754,9 +4082,8 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc exit data copyout(scalar_new) - !$acc exit data delete(scalar_tend_column, wdtn, uhAvg, wwAvg, scalar_old, fnm, fnp, & - !$acc rdnw, rho_zz_old, rho_zz_new, horiz_flux_arr, scalar_tend_save) + !$acc exit data delete(scalar_tend_column, wdtn, uhAvg, fnm, fnp, & + !$acc rdnw, horiz_flux_arr, scalar_tend_save) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') end subroutine atm_advance_scalars_work @@ -4027,7 +4354,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge if (local_advance_density) then !$acc enter data copyin(rho_zz_int) end if - !$acc enter data copyin(scalars_old, rho_zz_old, rdnw, uhAvg, wwAvg) + !$acc enter data copyin(rdnw, uhAvg) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$acc parallel @@ -4117,11 +4444,8 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end if MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - if (.not. local_advance_density) then - !$acc enter data copyin(rho_zz_new) - end if - !$acc enter data copyin(scalars_new, fnm, fnp) - !$acc enter data create(scalar_old, scalar_new, scale_arr, s_min, s_max, & + !$acc enter data copyin(fnm, fnp) + !$acc enter data create(scale_arr, s_min, s_max, & !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -4584,7 +4908,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc end parallel #ifdef DEBUG_TRANSPORT - !$acc update self(scalar_new) !$acc update self(s_max) !$acc update self(s_min) @@ -4629,12 +4952,9 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') if (local_advance_density) then !$acc exit data copyout(rho_zz_int) - else - !$acc exit data delete(rho_zz_new) end if - !$acc exit data copyout(scalars_new) - !$acc exit data delete(scalars_old, scalar_old, scalar_new, scale_arr, s_min, s_max, & - !$acc rho_zz_old, flux_arr, flux_tmp, flux_upwind_tmp, wdtn, wwAvg, & + !$acc exit data delete(scale_arr, s_min, s_max, & + !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn, & !$acc uhAvg, fnm, fnp, rdnw) !$acc end data @@ -5095,20 +5415,18 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc enter data create(dpdz) !$acc enter data create(tend_u) - !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) + !$acc enter data copyin(cqu, pp, pv_edge, rho_edge, ke) !$acc enter data create(h_divergence) !$acc enter data copyin(ru, rw) !$acc enter data copyin(rayleigh_damp_coef) !$acc enter data copyin(tend_ru_physics) !$acc enter data create(tend_w) - !$acc enter data copyin(rho_zz) !$acc enter data create(tend_theta) - !$acc enter data copyin(theta_m) - !$acc enter data copyin(ru_save, theta_m_save) + !$acc enter data copyin(theta_m_save) !$acc enter data create(delsq_theta) !$acc enter data copyin(cqw) !$acc enter data copyin(tend_rtheta_physics) - !$acc enter data copyin(rw_save, rt_diabatic_tend) + !$acc enter data copyin(rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc enter data copyin(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -6168,20 +6486,17 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if !$acc exit data delete(dpdz) !$acc exit data copyout(tend_u) - !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) + !$acc exit data delete(cqu, pp, pv_edge, rho_edge, ke) !$acc exit data copyout(h_divergence) - !$acc exit data delete(ru, rw) !$acc exit data delete(rayleigh_damp_coef) !$acc exit data delete(tend_ru_physics) !$acc exit data copyout(tend_w) - !$acc exit data delete(rho_zz) !$acc exit data copyout(tend_theta) - !$acc exit data delete(theta_m) - !$acc exit data delete(ru_save, theta_m_save) + !$acc exit data delete(theta_m_save) !$acc exit data delete(delsq_theta) !$acc exit data delete(cqw) !$acc exit data delete(tend_rtheta_physics) - !$acc exit data delete(rw_save, rt_diabatic_tend) + !$acc exit data delete(rt_diabatic_tend) !$acc exit data copyout(rthdynten) !$acc exit data delete(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -6353,17 +6668,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc fVertex, & - !$acc verticesOnEdge, & - !$acc invDvEdge,invDcEdge) - !$acc enter data copyin(u,h) + !$acc enter data copyin(h) MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') ! @@ -6708,16 +7013,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc verticesOnEdge, & - !$acc fVertex,invDvEdge,invDcEdge) - !$acc exit data delete(u,h) + !$acc exit data delete(h) !$acc exit data copyout(h_edge,ke_edge,vorticity,divergence, & !$acc ke, & !$acc v, & @@ -6819,11 +7115,11 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc zb_cell,zb3_cell) ! copyin the data that is only on the right-hand side - !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc enter data copyin(rho,theta, & !$acc rho_base,theta_base) ! copyin the data that will be modified in this routine - !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc enter data create(rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -6953,12 +7249,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc zb_cell,zb3_cell) ! delete the data that is only on the right-hand side - !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc exit data delete(rho,theta, & !$acc rho_base,theta_base) ! copyout the data that will be modified in this routine - !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & - !$acc rtheta_p,exner,exner_base,pressure_p, & + !$acc exit data copyout(rho_p,rtheta_base, & + !$acc exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -7025,10 +7321,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, & - !$acc w_1, rho_zz_1) & - !$acc copyin(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & - !$acc w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split) MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') !$acc kernels @@ -7134,11 +7426,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su end if MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, & - !$acc w_1, theta_m_1, rho_zz_1, ruAvg, wwAvg, ruAvg_split, & - !$acc wwAvg_split) & - !$acc delete(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & - !$acc w_2, rho_zz_old_split) MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') end subroutine atm_rk_dynamics_substep_finish @@ -7195,7 +7482,7 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, integer :: iCell, k MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - !$acc enter data copyin(w) + ! $acc enter data copyin(w) MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') !$acc parallel default(present) @@ -7212,7 +7499,7 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, !$acc end parallel MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - !$acc exit data copyout(w) + ! $acc exit data copyout(w) MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') @@ -7380,8 +7667,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me vertexDegree = vertexDegree_ptr MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_rho, tend_rt, rho_zz, theta_m, rho_driving_values, & - !$acc rt_driving_values, tend_ru, ru, ru_driving_values) + !$acc enter data copyin(tend_rho, tend_rt, rho_driving_values, & + !$acc rt_driving_values, tend_ru, ru_driving_values) !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') @@ -7532,7 +7819,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') !$acc exit data copyout(tend_rho, tend_rt, tend_ru) - !$acc exit data delete(rho_zz, theta_m, ru, rho_driving_values, rt_driving_values, & + !$acc exit data delete(rho_driving_values, rt_driving_values, & !$acc ru_driving_values, divergence1, divergence2, vorticity1, vorticity2) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') @@ -7682,7 +7969,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, !--- MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') !$acc enter data create(scalars_tmp) & - !$acc copyin(scalars_driving, scalars_new) + !$acc copyin(scalars_driving) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') !$acc parallel default(present) @@ -7766,8 +8053,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc exit data delete(scalars_tmp, scalars_driving) & - !$acc copyout(scalars_new) + !$acc exit data delete(scalars_tmp, scalars_driving) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') end subroutine atm_bdy_adjust_scalars_work @@ -7839,7 +8125,7 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !--- MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc enter data copyin(scalars_new, scalars_driving) + !$acc enter data copyin(scalars_driving) MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') !$acc parallel default(present) @@ -7864,7 +8150,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc exit data copyout(scalars_new) !$acc exit data delete(scalars_driving) MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') @@ -7950,7 +8235,7 @@ subroutine summarize_timestep(domain) allocate(spd(nVertLevels,nEdgesSolve)) MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc enter data copyin(w,u,v) + !$acc enter data copyin(v) MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') scalar_min = 1.0e20 @@ -8230,7 +8515,7 @@ subroutine summarize_timestep(domain) end if MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc exit data delete(w,u,v) + !$acc exit data delete(v) MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') deallocate(spd) @@ -8249,7 +8534,6 @@ subroutine summarize_timestep(domain) nVertLevels = nVertLevels_ptr MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc enter data copyin(w,u) MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') scalar_min = 0.0 @@ -8283,7 +8567,6 @@ subroutine summarize_timestep(domain) call mpas_log_write('global min, max u $r $r', realArgs=(/global_scalar_min, global_scalar_max/)) MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc exit data delete(w,u) MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') end if @@ -8302,7 +8585,6 @@ subroutine summarize_timestep(domain) num_scalars = num_scalars_ptr MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc enter data copyin(scalars) MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') do iScalar = 1, num_scalars @@ -8323,7 +8605,6 @@ subroutine summarize_timestep(domain) end do MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc exit data delete(scalars) MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') end if diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index 997d7ca8ba..149775e85d 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -43,7 +43,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) use mpas_atm_dimensions, only : mpas_atm_set_dims use mpas_atm_diagnostics_manager, only : mpas_atm_diag_setup use mpas_atm_threading, only : mpas_atm_threading_init - use atm_time_integration, only : mpas_atm_dynamics_init + use atm_time_integration, only : mpas_atm_dynamics_init, mpas_atm_pre_init use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpas_attlist, only : mpas_modify_att use mpas_string_utils, only : mpas_string_replace @@ -264,6 +264,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) do while (associated(block)) call mpas_pool_get_subpool(block % structs, 'mesh', mesh) call mpas_pool_get_subpool(block % structs, 'state', state) + call mpas_atm_pre_init(domain) call atm_mpas_init_block(domain % dminfo, domain % streamManager, block, mesh, dt) @@ -1000,8 +1001,11 @@ subroutine atm_do_timestep(domain, dt, itimestep) #ifdef DO_PHYSICS !proceed with physics if moist_physics is set to true: if(moist_physics) then + call mpas_log_write('call physics timetracker') call physics_timetracker(domain,dt,clock,itimestep,xtime_s) + call mpas_log_write('call physics driver') call physics_driver(domain,itimestep,xtime_s) + call mpas_log_write('end physics timetracker') endif #endif diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index 6bc3a3d804..7be0820a9e 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -260,9 +260,6 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') ! Only use sections needed, nCells may be all cells or only non-halo cells - !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & - !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) - !$acc enter data copyin(u(:,:)) !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & !$acc uReconstructMeridional(:,1:nCells)) @@ -338,9 +335,6 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon end if MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') - !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & - !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) - !$acc exit data delete(u(:,:)) !$acc exit data copyout(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & !$acc uReconstructZ(:,1:nCells), uReconstructZonal(:,1:nCells), & !$acc uReconstructMeridional(:,1:nCells)) From b9de95409557f13c4e0ab4c94fd1ed766caea1e4 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 17 Apr 2025 22:01:46 -0600 Subject: [PATCH 58/79] first working --- .../dynamics/mpas_atm_time_integration.F | 273 ++++++++---------- src/core_atmosphere/mpas_atm_core.F | 3 +- src/operators/mpas_vector_reconstruction.F | 6 + 3 files changed, 136 insertions(+), 146 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 658810b410..c43ad63d54 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -475,6 +475,8 @@ subroutine mpas_atm_pre_init(domain) type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: tend_physics real (kind=RKIND), dimension(:), pointer :: dvEdge @@ -533,15 +535,19 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:), pointer :: lonCell real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct - real (kind=RKIND), dimension(:,:), pointer :: ru + real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p real (kind=RKIND), dimension(:,:), pointer :: ru_save - real (kind=RKIND), dimension(:,:), pointer :: rw + real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p real (kind=RKIND), dimension(:,:), pointer :: rw_save real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save - real (kind=RKIND), dimension(:,:), pointer :: rho_p + real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base real (kind=RKIND), dimension(:,:), pointer :: rho_p_save real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, v real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 @@ -550,31 +556,76 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend + real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + + + nullify(state) nullify(diag) nullify(mesh) + nullify(tend) + nullify(tend_physics) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) call mpas_pool_get_array(diag, 'ru', ru) !$acc enter data copyin(ru) + call mpas_pool_get_array(diag, 'ru_p', ru_p) + !$acc enter data copyin(ru_p) call mpas_pool_get_array(diag, 'ru_save', ru_save) !$acc enter data copyin(ru_save) call mpas_pool_get_array(diag, 'rw', rw) !$acc enter data copyin(rw) + call mpas_pool_get_array(diag, 'rw_p', rw_p) + !$acc enter data copyin(rw_p) call mpas_pool_get_array(diag, 'rw_save', rw_save) !$acc enter data copyin(rw_save) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc enter data copyin(rtheta_p) call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) !$acc enter data copyin(rtheta_p_save) + call mpas_pool_get_array(diag, 'exner', exner) + !$acc enter data copyin(exner) + call mpas_pool_get_array(diag, 'exner_base', exner_base) + !$acc enter data copyin(exner_base) + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + !$acc enter data copyin(rtheta_base) + call mpas_pool_get_array(diag, 'rho_base', rho_base) + !$acc enter data copyin(rho_base) + call mpas_pool_get_array(diag, 'rho', rho) + !$acc enter data copyin(rho) + call mpas_pool_get_array(diag, 'theta', theta) + !$acc enter data copyin(theta) + call mpas_pool_get_array(diag, 'theta_base', theta_base) + !$acc enter data copyin(theta_base) call mpas_pool_get_array(diag, 'rho_p', rho_p) !$acc enter data copyin(rho_p) call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) !$acc enter data copyin(rho_p_save) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc enter data copyin(rho_pp) call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) !$acc enter data copyin(rho_zz_old_split) + call mpas_pool_get_array(diag, 'cqw', cqw) + !$acc enter data copyin(cqw) + call mpas_pool_get_array(diag, 'cqu', cqu) + !$acc enter data copyin(cqu) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + !$acc enter data copyin(pressure_p) + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + !$acc enter data copyin(pressure_base) + call mpas_pool_get_array(diag, 'v', v) + !$acc enter data copyin(v) + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc enter data copyin(rtheta_pp) + call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) + !$acc enter data copyin(rtheta_pp_old) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) !$acc enter data copyin(ruAvg) @@ -606,6 +657,29 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc enter data copyin(scalars_2) + + call mpas_pool_get_array(tend, 'u', tend_ru) + !$acc enter data copyin(tend_ru) + call mpas_pool_get_array(tend, 'rho_zz', tend_rho) + !$acc enter data copyin(tend_rho) + call mpas_pool_get_array(tend, 'theta_m', tend_rt) + !$acc enter data copyin(tend_rt) + call mpas_pool_get_array(tend, 'w', tend_rw) + !$acc enter data copyin(tend_rw) + call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) + !$acc enter data copyin(rt_diabatic_tend) + call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) + !$acc enter data copyin(tend_u_euler) + call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) + !$acc enter data copyin(tend_theta_euler) + call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) + !$acc enter data copyin(tend_w_euler) + call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) + !$acc enter data copyin(tend_w_pgf) + call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + !$acc enter data copyin(tend_w_buoy) + + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -2216,12 +2290,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_1, 1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - ! $acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, & - ! $acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - ! $acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - ! $acc rho_zz_1, scalars_1) - MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') !$acc kernels theta_m_2(:,cellEnd+1) = 0.0_RKIND @@ -2269,8 +2337,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') end subroutine atm_rk_integration_setup @@ -2322,8 +2388,7 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & moist_end = moist_end_ptr MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc enter data create(qtot, cqw, cqu) - ! $acc copyin(scalars) + !$acc enter data create(qtot) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc parallel default(present) @@ -2375,8 +2440,7 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc exit data copyout(cqw, cqu, qtot) - ! $acc delete(scalars) + !$acc exit data copyout(qtot) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') end subroutine atm_compute_moist_coefficients @@ -2510,7 +2574,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc enter data copyin(cqw, p, t, qtot, rb, rtb, rt, pb) + !$acc enter data copyin(qtot) !$acc enter data create(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & !$acc c_tri, alpha_tri, gamma_tri) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') @@ -2596,7 +2660,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') !$acc exit data copyout(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & !$acc c_tri, alpha_tri, gamma_tri) - !$acc exit data delete(cqw, p, t, qtot, rb, rtb, rt, pb) + !$acc exit data delete(qtot) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') end subroutine atm_compute_vert_imp_coefs_work @@ -2701,9 +2765,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, & integer :: iCell, iEdge, i, k real (kind=RKIND) :: flux - MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]') - !$acc enter data copyin(u_tend, w_tend) - MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]') ! we solve for omega instead of w (see Klemp et al MWR 2007), ! so here we change the w_p tendency to an omega_p tendency @@ -2736,10 +2797,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]') - !$acc exit data delete(u_tend) - !$acc exit data copyout(w_tend) - MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]') end subroutine atm_set_smlstep_pert_variables_work @@ -2973,17 +3030,8 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart rdts = 1./dts MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc enter data copyin(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw) - !$acc enter data create(rtheta_pp_old) - if(small_step == 1) then - !$acc enter data create(ru_p,rho_pp,rtheta_pp,rw_p) - ! $acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) - else - !$acc enter data copyin(ru_p,rho_pp,rtheta_pp,rw_p) - ! $acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) - end if + !$acc enter data copyin(cofwt,coftz,cofrz,cofwr,cofwz, & + !$acc a_tri,alpha_tri,gamma_tri) MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') if(small_step /= 1) then ! not needed on first small step @@ -3212,11 +3260,8 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc exit data delete(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw) - !$acc exit data copyout(rtheta_pp_old,ru_p,rho_pp, & - !$acc rtheta_pp,rw_p) + !$acc exit data delete(cofwt,coftz,cofrz,cofwr,cofwz, & + !$acc a_tri,alpha_tri,gamma_tri) MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') end subroutine atm_advance_acoustic_step_work @@ -3269,9 +3314,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart nCellsSolve = nCellsSolve_ptr nVertLevels = nVertLevels_ptr - MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old) - MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -3304,10 +3346,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart end do ! end loop over edges !$acc end parallel - MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc exit data copyout(ru_p) & - !$acc delete(rtheta_pp, rtheta_pp_old) - MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') end subroutine atm_divergence_damping_3d @@ -3499,12 +3537,8 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE real (kind=RKIND) :: invNs, rcv, p0, flux MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc enter data copyin(rho_pp,rho_base,rw_p, & - !$acc rtheta_pp,rtheta_base, & - !$acc ru_p) if (rk_step == 3) then - !$acc enter data copyin(rt_diabatic_tend,exner_base) & - !$acc create(exner,pressure_p) + !$acc enter data copyin(rt_diabatic_tend) end if MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') @@ -3653,12 +3687,8 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE !$acc end parallel MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc exit data delete(rho_pp,rho_base,rw_p, & - !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_p) if (rk_step == 3) then - !$acc exit data delete(rt_diabatic_tend,exner_base) & - !$acc copyout(exner,pressure_p) + !$acc exit data delete(rt_diabatic_tend) end if MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') @@ -4342,9 +4372,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc data present(nEdgesOnCell, edgesOnCell, edgesOnCell_sign, & - !$acc invAreaCell, cellsOnCell, cellsOnEdge, nAdvCellsForEdge, & - !$acc advCellsForEdge, adv_coefs, adv_coefs_3rd, dvEdge, bdyMaskCell) #ifdef DO_PHYSICS !$acc enter data copyin(scalar_tend) @@ -4443,9 +4470,11 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end if + ! AG: Note that scalar_old, scalar_new in this subroutine are not the same + ! variables as 'scalars' obtained from the state pool. MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') !$acc enter data copyin(fnm, fnp) - !$acc enter data create(scale_arr, s_min, s_max, & + !$acc enter data create(scalar_old, scalar_new, scale_arr, s_min, s_max, & !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -4953,11 +4982,9 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge if (local_advance_density) then !$acc exit data copyout(rho_zz_int) end if - !$acc exit data delete(scale_arr, s_min, s_max, & + !$acc exit data delete(scalar_old, scalar_new, scale_arr, s_min, s_max, & !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn, & !$acc uhAvg, fnm, fnp, rdnw) - - !$acc end data MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') end subroutine atm_advance_scalars_mono_work @@ -5393,40 +5420,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc enter data create(tend_w_euler) - !$acc enter data create(tend_u_euler) - !$acc enter data create(tend_theta_euler) - !$acc enter data create(tend_rho) - !$acc enter data create(kdiff) !$acc enter data copyin(tend_rho_physics) - !$acc enter data copyin(rb, qtot, rr_save) + !$acc enter data copyin(qtot) !$acc enter data copyin(divergence, vorticity) !$acc enter data create(delsq_u) - !$acc enter data copyin(v) !$acc enter data create(delsq_vorticity, delsq_divergence) !$acc enter data copyin(u_init, v_init) !$acc enter data create(delsq_w) - else - !$acc enter data copyin(tend_w_euler) - !$acc enter data copyin(tend_u_euler) - !$acc enter data copyin(tend_theta_euler) - !$acc enter data copyin(tend_rho) end if !$acc enter data create(dpdz) - !$acc enter data create(tend_u) - !$acc enter data copyin(cqu, pp, pv_edge, rho_edge, ke) + !$acc enter data copyin(pv_edge, rho_edge, ke) !$acc enter data create(h_divergence) - !$acc enter data copyin(ru, rw) !$acc enter data copyin(rayleigh_damp_coef) !$acc enter data copyin(tend_ru_physics) - !$acc enter data create(tend_w) - !$acc enter data create(tend_theta) - !$acc enter data copyin(theta_m_save) !$acc enter data create(delsq_theta) - !$acc enter data copyin(cqw) !$acc enter data copyin(tend_rtheta_physics) - !$acc enter data copyin(rt_diabatic_tend) !$acc enter data create(rthdynten) !$acc enter data copyin(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -6464,39 +6473,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc exit data copyout(tend_w_euler) - !$acc exit data copyout(tend_u_euler) - !$acc exit data copyout(tend_theta_euler) - !$acc exit data copyout(tend_rho) - !$acc exit data delete(kdiff) !$acc exit data delete(tend_rho_physics) - !$acc exit data delete(rb, qtot, rr_save) + !$acc exit data delete(qtot) !$acc exit data delete(divergence, vorticity) !$acc exit data copyout(delsq_u) - !$acc exit data delete(v) !$acc exit data delete(delsq_vorticity, delsq_divergence) !$acc exit data delete(u_init, v_init) !$acc exit data copyout(delsq_w) - else - !$acc exit data delete(tend_w_euler) - !$acc exit data delete(tend_u_euler) - !$acc exit data delete(tend_theta_euler) - !$acc exit data delete(tend_rho) end if !$acc exit data delete(dpdz) - !$acc exit data copyout(tend_u) - !$acc exit data delete(cqu, pp, pv_edge, rho_edge, ke) + !$acc exit data delete(pv_edge, rho_edge, ke) !$acc exit data copyout(h_divergence) !$acc exit data delete(rayleigh_damp_coef) !$acc exit data delete(tend_ru_physics) - !$acc exit data copyout(tend_w) - !$acc exit data copyout(tend_theta) - !$acc exit data delete(theta_m_save) !$acc exit data delete(delsq_theta) - !$acc exit data delete(cqw) !$acc exit data delete(tend_rtheta_physics) - !$acc exit data delete(rt_diabatic_tend) !$acc exit data copyout(rthdynten) !$acc exit data delete(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -6666,9 +6658,18 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & real (kind=RKIND) :: ke_fact, efac logical :: reconstruct_v - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data copyin(h) + !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, & + !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & + !$acc nEdgesOnCell,edgesOnCell, & + !$acc edgesOnCell_sign,invAreaCell, & + !$acc invAreaTriangle,edgesOnVertex, & + !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & + !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & + !$acc fVertex, & + !$acc verticesOnEdge, & + !$acc invDvEdge,invDcEdge) + !$acc enter data copyin(u,h) MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') ! @@ -7013,7 +7014,16 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(h) + !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, & + !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & + !$acc nEdgesOnCell,edgesOnCell, & + !$acc edgesOnCell_sign,invAreaCell, & + !$acc invAreaTriangle,edgesOnVertex, & + !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & + !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & + !$acc verticesOnEdge, & + !$acc fVertex,invDvEdge,invDcEdge) + !$acc exit data delete(u,h) !$acc exit data copyout(h_edge,ke_edge,vorticity,divergence, & !$acc ke, & !$acc v, & @@ -7115,17 +7125,15 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc zb_cell,zb3_cell) ! copyin the data that is only on the right-hand side - !$acc enter data copyin(rho,theta, & + !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & !$acc rho_base,theta_base) ! copyin the data that will be modified in this routine - !$acc enter data create(rho_p,rtheta_base, & + !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') - - rcv = rgas / (cp-rgas) p0 = 1.e5 ! this should come from somewhere else... @@ -7249,12 +7257,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc zb_cell,zb3_cell) ! delete the data that is only on the right-hand side - !$acc exit data delete(rho,theta, & + !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & !$acc rho_base,theta_base) ! copyout the data that will be modified in this routine - !$acc exit data copyout(rho_p,rtheta_base, & - !$acc exner,exner_base,pressure_p, & + !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -7320,8 +7328,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') !$acc kernels theta_m_1(:,cellEnd+1) = 0.0_RKIND @@ -7425,8 +7431,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su !$acc end parallel end if - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') end subroutine atm_rk_dynamics_substep_finish @@ -7481,9 +7485,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, integer :: iCell, k - MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - ! $acc enter data copyin(w) - MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -7498,10 +7499,7 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - ! $acc exit data copyout(w) - MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - + end subroutine atm_zero_gradient_w_bdy_work @@ -7542,9 +7540,7 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_ru,tend_rho,tend_rt,tend_rw, & - !$acc rt_diabatic_tend) & - !$acc copyin(rho_driving_tend,rt_driving_tend, & + !$acc enter data copyin(rho_driving_tend,rt_driving_tend, & !$acc ru_driving_tend) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') @@ -7576,9 +7572,7 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc exit data copyout(tend_ru,tend_rho,tend_rt, & - !$acc tend_rw,rt_diabatic_tend) & - !$acc delete(rho_driving_tend,rt_driving_tend, & + !$acc exit data delete(rho_driving_tend,rt_driving_tend, & !$acc ru_driving_tend) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') @@ -7667,8 +7661,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me vertexDegree = vertexDegree_ptr MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_rho, tend_rt, rho_driving_values, & - !$acc rt_driving_values, tend_ru, ru_driving_values) + !$acc enter data copyin(rho_driving_values, & + !$acc rt_driving_values, ru_driving_values) !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') @@ -7818,7 +7812,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc exit data copyout(tend_rho, tend_rt, tend_ru) !$acc exit data delete(rho_driving_values, rt_driving_values, & !$acc ru_driving_values, divergence1, divergence2, vorticity1, vorticity2) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') @@ -7857,8 +7850,7 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc enter data copyin(rt_driving_values, rho_driving_values, rtheta_base, & - !$acc theta_m, rtheta_p) + !$acc enter data copyin(rt_driving_values, rho_driving_values) MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') !$acc parallel default(present) @@ -7875,8 +7867,7 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc exit data copyout(theta_m, rtheta_p) & - !$acc delete(rt_driving_values, rho_driving_values, rtheta_base) + !$acc exit data delete(rt_driving_values, rho_driving_values) MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') end subroutine atm_bdy_reset_speczone_values @@ -8234,9 +8225,6 @@ subroutine summarize_timestep(domain) allocate(spd(nVertLevels,nEdgesSolve)) - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc enter data copyin(v) - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') scalar_min = 1.0e20 lin_indexMax = huge(1) @@ -8514,9 +8502,6 @@ subroutine summarize_timestep(domain) call mpas_log_write('NaN detected in ''u'' field.', messageType=MPAS_LOG_CRIT) end if - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - !$acc exit data delete(v) - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') deallocate(spd) @@ -8604,8 +8589,6 @@ subroutine summarize_timestep(domain) call mpas_log_write(' global min, max scalar $i $r $r', intArgs=(/iScalar/), realArgs=(/global_scalar_min, global_scalar_max/)) end do - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') end if end subroutine summarize_timestep diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index 149775e85d..d2a6eeb3e5 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -264,7 +264,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) do while (associated(block)) call mpas_pool_get_subpool(block % structs, 'mesh', mesh) call mpas_pool_get_subpool(block % structs, 'state', state) - call mpas_atm_pre_init(domain) + !call mpas_atm_pre_init(domain) call atm_mpas_init_block(domain % dminfo, domain % streamManager, block, mesh, dt) @@ -294,6 +294,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) ! ! Prepare the dynamics for integration ! + call mpas_atm_pre_init(domain) call mpas_atm_dynamics_init(domain) end function atm_core_init diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index 7be0820a9e..6bc3a3d804 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -260,6 +260,9 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') ! Only use sections needed, nCells may be all cells or only non-halo cells + !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & + !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) + !$acc enter data copyin(u(:,:)) !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & !$acc uReconstructMeridional(:,1:nCells)) @@ -335,6 +338,9 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon end if MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') + !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & + !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) + !$acc exit data delete(u(:,:)) !$acc exit data copyout(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & !$acc uReconstructZ(:,1:nCells), uReconstructZonal(:,1:nCells), & !$acc uReconstructMeridional(:,1:nCells)) From 08b212f77d9c20ad98fe1d80b7bbe17c00e730d3 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 17 Apr 2025 22:29:16 -0600 Subject: [PATCH 59/79] mesh fields need to be uploaded to device after atm_mpas_init_block --- .../dynamics/mpas_atm_time_integration.F | 448 +++++++++--------- src/core_atmosphere/mpas_atm_core.F | 4 +- 2 files changed, 226 insertions(+), 226 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index c43ad63d54..e68c901fff 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -289,175 +289,175 @@ subroutine mpas_atm_dynamics_init(domain) #endif #ifdef MPAS_OPENACC - ! nullify(mesh) - ! call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + nullify(mesh) + call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) - ! call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) - ! call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) - ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) - ! !$acc enter data copyin(dvEdge) + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc enter data copyin(dvEdge) - ! call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) - ! !$acc enter data copyin(cellsOnCell) + call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) + !$acc enter data copyin(cellsOnCell) - ! call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - ! !$acc enter data copyin(cellsOnEdge) + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc enter data copyin(cellsOnEdge) - ! call mpas_pool_get_array(mesh, 'advCellsForEdge', advCellsForEdge) - ! !$acc enter data copyin(advCellsForEdge) + call mpas_pool_get_array(mesh, 'advCellsForEdge', advCellsForEdge) + !$acc enter data copyin(advCellsForEdge) - ! call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) - ! !$acc enter data copyin(edgesOnCell) + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc enter data copyin(edgesOnCell) - ! call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) - ! !$acc enter data copyin(nAdvCellsForEdge) + call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) + !$acc enter data copyin(nAdvCellsForEdge) - ! call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) - ! !$acc enter data copyin(nEdgesOnCell) + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc enter data copyin(nEdgesOnCell) - ! call mpas_pool_get_array(mesh, 'adv_coefs', adv_coefs) - ! !$acc enter data copyin(adv_coefs) + call mpas_pool_get_array(mesh, 'adv_coefs', adv_coefs) + !$acc enter data copyin(adv_coefs) - ! call mpas_pool_get_array(mesh, 'adv_coefs_3rd', adv_coefs_3rd) - ! !$acc enter data copyin(adv_coefs_3rd) + call mpas_pool_get_array(mesh, 'adv_coefs_3rd', adv_coefs_3rd) + !$acc enter data copyin(adv_coefs_3rd) - ! call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) - ! !$acc enter data copyin(edgesOnCell_sign) + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc enter data copyin(edgesOnCell_sign) - ! call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) - ! !$acc enter data copyin(invAreaCell) + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc enter data copyin(invAreaCell) - ! call mpas_pool_get_array(mesh, 'bdyMaskCell', bdyMaskCell) - ! !$acc enter data copyin(bdyMaskCell) + call mpas_pool_get_array(mesh, 'bdyMaskCell', bdyMaskCell) + !$acc enter data copyin(bdyMaskCell) - ! call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) - ! !$acc enter data copyin(bdyMaskEdge) + call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) + !$acc enter data copyin(bdyMaskEdge) - ! call mpas_pool_get_array(mesh, 'specZoneMaskEdge', specZoneMaskEdge) - ! !$acc enter data copyin(specZoneMaskEdge) + call mpas_pool_get_array(mesh, 'specZoneMaskEdge', specZoneMaskEdge) + !$acc enter data copyin(specZoneMaskEdge) - ! call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) - ! !$acc enter data copyin(invDvEdge) + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc enter data copyin(invDvEdge) - ! call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) - ! !$acc enter data copyin(dcEdge) + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc enter data copyin(dcEdge) - ! call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) - ! !$acc enter data copyin(invDcEdge) + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc enter data copyin(invDcEdge) - ! call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) - ! !$acc enter data copyin(edgesOnEdge) + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc enter data copyin(edgesOnEdge) - ! call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) - ! !$acc enter data copyin(edgesOnVertex) + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc enter data copyin(edgesOnVertex) - ! call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) - ! !$acc enter data copyin(edgesOnVertex_sign) + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc enter data copyin(edgesOnVertex_sign) - ! call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) - ! !$acc enter data copyin(nEdgesOnEdge) + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc enter data copyin(nEdgesOnEdge) - ! call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) - ! !$acc enter data copyin(weightsOnEdge) + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc enter data copyin(weightsOnEdge) - ! call mpas_pool_get_array(mesh, 'cellsOnVertex', cellsOnVertex) - ! !$acc enter data copyin(cellsOnVertex) + call mpas_pool_get_array(mesh, 'cellsOnVertex', cellsOnVertex) + !$acc enter data copyin(cellsOnVertex) - ! call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) - ! !$acc enter data copyin(verticesOnCell) + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc enter data copyin(verticesOnCell) - ! call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) - ! !$acc enter data copyin(verticesOnEdge) + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc enter data copyin(verticesOnEdge) - ! call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) - ! !$acc enter data copyin(invAreaTriangle) + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc enter data copyin(invAreaTriangle) - ! call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) - ! !$acc enter data copyin(kiteForCell) + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc enter data copyin(kiteForCell) - ! call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) - ! !$acc enter data copyin(kiteAreasOnVertex) + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc enter data copyin(kiteAreasOnVertex) - ! call mpas_pool_get_array(mesh, 'fVertex', fVertex) - ! !$acc enter data copyin(fVertex) + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc enter data copyin(fVertex) - ! call mpas_pool_get_array(mesh, 'fEdge', fEdge) - ! !$acc enter data copyin(fEdge) + call mpas_pool_get_array(mesh, 'fEdge', fEdge) + !$acc enter data copyin(fEdge) - ! call mpas_pool_get_array(mesh, 'zz', zz) - ! !$acc enter data copyin(zz) + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc enter data copyin(zz) - ! call mpas_pool_get_array(mesh, 'rdzw', rdzw) - ! !$acc enter data copyin(rdzw) + call mpas_pool_get_array(mesh, 'rdzw', rdzw) + !$acc enter data copyin(rdzw) - ! call mpas_pool_get_array(mesh, 'rdzu', rdzu) - ! !$acc enter data copyin(rdzu) + call mpas_pool_get_array(mesh, 'rdzu', rdzu) + !$acc enter data copyin(rdzu) - ! call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) - ! !$acc enter data copyin(zb_cell) + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc enter data copyin(zb_cell) - ! call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) - ! !$acc enter data copyin(zb3_cell) + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc enter data copyin(zb3_cell) - ! call mpas_pool_get_array(mesh, 'fzm', fzm) - ! !$acc enter data copyin(fzm) + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc enter data copyin(fzm) - ! call mpas_pool_get_array(mesh, 'fzp', fzp) - ! !$acc enter data copyin(fzp) + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc enter data copyin(fzp) - ! call mpas_pool_get_array(mesh, 'zb', zb) - ! !$acc enter data copyin(zb) + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc enter data copyin(zb) - ! call mpas_pool_get_array(mesh, 'zb3', zb3) - ! !$acc enter data copyin(zb3) + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc enter data copyin(zb3) - ! call mpas_pool_get_array(mesh, 'nearestRelaxationCell', nearestRelaxationCell) - ! !$acc enter data copyin(nearestRelaxationCell) + call mpas_pool_get_array(mesh, 'nearestRelaxationCell', nearestRelaxationCell) + !$acc enter data copyin(nearestRelaxationCell) - ! call mpas_pool_get_array(mesh, 'zgrid', zgrid) - ! !$acc enter data copyin(zgrid) + call mpas_pool_get_array(mesh, 'zgrid', zgrid) + !$acc enter data copyin(zgrid) - ! call mpas_pool_get_array(mesh, 'zxu', zxu) - ! !$acc enter data copyin(zxu) + call mpas_pool_get_array(mesh, 'zxu', zxu) + !$acc enter data copyin(zxu) - ! call mpas_pool_get_array(mesh, 'dss', dss) - ! !$acc enter data copyin(dss) + call mpas_pool_get_array(mesh, 'dss', dss) + !$acc enter data copyin(dss) - ! call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) - ! !$acc enter data copyin(specZoneMaskCell) + call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) + !$acc enter data copyin(specZoneMaskCell) - ! call mpas_pool_get_array(mesh, 'defc_a', defc_a) - ! !$acc enter data copyin(defc_a) + call mpas_pool_get_array(mesh, 'defc_a', defc_a) + !$acc enter data copyin(defc_a) - ! call mpas_pool_get_array(mesh, 'defc_b', defc_b) - ! !$acc enter data copyin(defc_b) + call mpas_pool_get_array(mesh, 'defc_b', defc_b) + !$acc enter data copyin(defc_b) - ! call mpas_pool_get_array(mesh, 'latEdge', latEdge) - ! !$acc enter data copyin(latEdge) + call mpas_pool_get_array(mesh, 'latEdge', latEdge) + !$acc enter data copyin(latEdge) - ! call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) - ! !$acc enter data copyin(angleEdge) + call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + !$acc enter data copyin(angleEdge) - ! call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) - ! !$acc enter data copyin(meshScalingDel2) + call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) + !$acc enter data copyin(meshScalingDel2) - ! call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) - ! !$acc enter data copyin(meshScalingDel4) - ! call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) - ! !$acc enter data copyin(meshScalingRegionalCell) + call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) + !$acc enter data copyin(meshScalingDel4) + call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + !$acc enter data copyin(meshScalingRegionalCell) - ! call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) - ! !$acc enter data copyin(meshScalingRegionalEdge) + call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) + !$acc enter data copyin(meshScalingRegionalEdge) - ! call mpas_pool_get_array(mesh, 'latCell', latCell) - ! !$acc enter data copyin(latCell) + call mpas_pool_get_array(mesh, 'latCell', latCell) + !$acc enter data copyin(latCell) - ! call mpas_pool_get_array(mesh, 'lonCell', lonCell) - ! !$acc enter data copyin(lonCell) + call mpas_pool_get_array(mesh, 'lonCell', lonCell) + !$acc enter data copyin(lonCell) - ! call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) - ! !$acc enter data copyin(coeffs_reconstruct) + call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) + !$acc enter data copyin(coeffs_reconstruct) #endif @@ -680,169 +680,169 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data copyin(tend_w_buoy) - call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) - !$acc enter data copyin(dvEdge) + ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + ! !$acc enter data copyin(dvEdge) - call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) - !$acc enter data copyin(cellsOnCell) + ! call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) + ! !$acc enter data copyin(cellsOnCell) - call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - !$acc enter data copyin(cellsOnEdge) + ! call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + ! !$acc enter data copyin(cellsOnEdge) - call mpas_pool_get_array(mesh, 'advCellsForEdge', advCellsForEdge) - !$acc enter data copyin(advCellsForEdge) + ! call mpas_pool_get_array(mesh, 'advCellsForEdge', advCellsForEdge) + ! !$acc enter data copyin(advCellsForEdge) - call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) - !$acc enter data copyin(edgesOnCell) + ! call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + ! !$acc enter data copyin(edgesOnCell) - call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) - !$acc enter data copyin(nAdvCellsForEdge) + ! call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) + ! !$acc enter data copyin(nAdvCellsForEdge) - call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) - !$acc enter data copyin(nEdgesOnCell) + ! call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + ! !$acc enter data copyin(nEdgesOnCell) - call mpas_pool_get_array(mesh, 'adv_coefs', adv_coefs) - !$acc enter data copyin(adv_coefs) + ! call mpas_pool_get_array(mesh, 'adv_coefs', adv_coefs) + ! !$acc enter data copyin(adv_coefs) - call mpas_pool_get_array(mesh, 'adv_coefs_3rd', adv_coefs_3rd) - !$acc enter data copyin(adv_coefs_3rd) + ! call mpas_pool_get_array(mesh, 'adv_coefs_3rd', adv_coefs_3rd) + ! !$acc enter data copyin(adv_coefs_3rd) - call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) - !$acc enter data copyin(edgesOnCell_sign) + ! call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + ! !$acc enter data copyin(edgesOnCell_sign) - call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) - !$acc enter data copyin(invAreaCell) + ! call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + ! !$acc enter data copyin(invAreaCell) - call mpas_pool_get_array(mesh, 'bdyMaskCell', bdyMaskCell) - !$acc enter data copyin(bdyMaskCell) + ! call mpas_pool_get_array(mesh, 'bdyMaskCell', bdyMaskCell) + ! !$acc enter data copyin(bdyMaskCell) - call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) - !$acc enter data copyin(bdyMaskEdge) + ! call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) + ! !$acc enter data copyin(bdyMaskEdge) - call mpas_pool_get_array(mesh, 'specZoneMaskEdge', specZoneMaskEdge) - !$acc enter data copyin(specZoneMaskEdge) + ! call mpas_pool_get_array(mesh, 'specZoneMaskEdge', specZoneMaskEdge) + ! !$acc enter data copyin(specZoneMaskEdge) - call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) - !$acc enter data copyin(invDvEdge) + ! call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + ! !$acc enter data copyin(invDvEdge) - call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) - !$acc enter data copyin(dcEdge) + ! call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + ! !$acc enter data copyin(dcEdge) - call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) - !$acc enter data copyin(invDcEdge) + ! call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + ! !$acc enter data copyin(invDcEdge) - call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) - !$acc enter data copyin(edgesOnEdge) + ! call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + ! !$acc enter data copyin(edgesOnEdge) - call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) - !$acc enter data copyin(edgesOnVertex) + ! call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + ! !$acc enter data copyin(edgesOnVertex) - call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) - !$acc enter data copyin(edgesOnVertex_sign) + ! call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + ! !$acc enter data copyin(edgesOnVertex_sign) - call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) - !$acc enter data copyin(nEdgesOnEdge) + ! call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + ! !$acc enter data copyin(nEdgesOnEdge) - call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) - !$acc enter data copyin(weightsOnEdge) + ! call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + ! !$acc enter data copyin(weightsOnEdge) - call mpas_pool_get_array(mesh, 'cellsOnVertex', cellsOnVertex) - !$acc enter data copyin(cellsOnVertex) + ! call mpas_pool_get_array(mesh, 'cellsOnVertex', cellsOnVertex) + ! !$acc enter data copyin(cellsOnVertex) - call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) - !$acc enter data copyin(verticesOnCell) + ! call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + ! !$acc enter data copyin(verticesOnCell) - call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) - !$acc enter data copyin(verticesOnEdge) + ! call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + ! !$acc enter data copyin(verticesOnEdge) - call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) - !$acc enter data copyin(invAreaTriangle) + ! call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + ! !$acc enter data copyin(invAreaTriangle) - call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) - !$acc enter data copyin(kiteForCell) + ! call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + ! !$acc enter data copyin(kiteForCell) - call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) - !$acc enter data copyin(kiteAreasOnVertex) + ! call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + ! !$acc enter data copyin(kiteAreasOnVertex) - call mpas_pool_get_array(mesh, 'fVertex', fVertex) - !$acc enter data copyin(fVertex) + ! call mpas_pool_get_array(mesh, 'fVertex', fVertex) + ! !$acc enter data copyin(fVertex) - call mpas_pool_get_array(mesh, 'fEdge', fEdge) - !$acc enter data copyin(fEdge) + ! call mpas_pool_get_array(mesh, 'fEdge', fEdge) + ! !$acc enter data copyin(fEdge) - call mpas_pool_get_array(mesh, 'zz', zz) - !$acc enter data copyin(zz) + ! call mpas_pool_get_array(mesh, 'zz', zz) + ! !$acc enter data copyin(zz) - call mpas_pool_get_array(mesh, 'rdzw', rdzw) - !$acc enter data copyin(rdzw) + ! call mpas_pool_get_array(mesh, 'rdzw', rdzw) + ! !$acc enter data copyin(rdzw) - call mpas_pool_get_array(mesh, 'rdzu', rdzu) - !$acc enter data copyin(rdzu) + ! call mpas_pool_get_array(mesh, 'rdzu', rdzu) + ! !$acc enter data copyin(rdzu) - call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) - !$acc enter data copyin(zb_cell) + ! call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + ! !$acc enter data copyin(zb_cell) - call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) - !$acc enter data copyin(zb3_cell) + ! call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + ! !$acc enter data copyin(zb3_cell) - call mpas_pool_get_array(mesh, 'fzm', fzm) - !$acc enter data copyin(fzm) + ! call mpas_pool_get_array(mesh, 'fzm', fzm) + ! !$acc enter data copyin(fzm) - call mpas_pool_get_array(mesh, 'fzp', fzp) - !$acc enter data copyin(fzp) + ! call mpas_pool_get_array(mesh, 'fzp', fzp) + ! !$acc enter data copyin(fzp) - call mpas_pool_get_array(mesh, 'zb', zb) - !$acc enter data copyin(zb) + ! call mpas_pool_get_array(mesh, 'zb', zb) + ! !$acc enter data copyin(zb) - call mpas_pool_get_array(mesh, 'zb3', zb3) - !$acc enter data copyin(zb3) + ! call mpas_pool_get_array(mesh, 'zb3', zb3) + ! !$acc enter data copyin(zb3) - call mpas_pool_get_array(mesh, 'nearestRelaxationCell', nearestRelaxationCell) - !$acc enter data copyin(nearestRelaxationCell) + ! call mpas_pool_get_array(mesh, 'nearestRelaxationCell', nearestRelaxationCell) + ! !$acc enter data copyin(nearestRelaxationCell) - call mpas_pool_get_array(mesh, 'zgrid', zgrid) - !$acc enter data copyin(zgrid) + ! call mpas_pool_get_array(mesh, 'zgrid', zgrid) + ! !$acc enter data copyin(zgrid) - call mpas_pool_get_array(mesh, 'zxu', zxu) - !$acc enter data copyin(zxu) + ! call mpas_pool_get_array(mesh, 'zxu', zxu) + ! !$acc enter data copyin(zxu) - call mpas_pool_get_array(mesh, 'dss', dss) - !$acc enter data copyin(dss) + ! call mpas_pool_get_array(mesh, 'dss', dss) + ! !$acc enter data copyin(dss) - call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) - !$acc enter data copyin(specZoneMaskCell) + ! call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) + ! !$acc enter data copyin(specZoneMaskCell) - call mpas_pool_get_array(mesh, 'defc_a', defc_a) - !$acc enter data copyin(defc_a) + ! call mpas_pool_get_array(mesh, 'defc_a', defc_a) + ! !$acc enter data copyin(defc_a) - call mpas_pool_get_array(mesh, 'defc_b', defc_b) - !$acc enter data copyin(defc_b) + ! call mpas_pool_get_array(mesh, 'defc_b', defc_b) + ! !$acc enter data copyin(defc_b) - call mpas_pool_get_array(mesh, 'latEdge', latEdge) - !$acc enter data copyin(latEdge) + ! call mpas_pool_get_array(mesh, 'latEdge', latEdge) + ! !$acc enter data copyin(latEdge) - call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) - !$acc enter data copyin(angleEdge) + ! call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + ! !$acc enter data copyin(angleEdge) - call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) - !$acc enter data copyin(meshScalingDel2) + ! call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) + ! !$acc enter data copyin(meshScalingDel2) - call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) - !$acc enter data copyin(meshScalingDel4) - call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) - !$acc enter data copyin(meshScalingRegionalCell) + ! call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) + ! !$acc enter data copyin(meshScalingDel4) + ! call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) + ! !$acc enter data copyin(meshScalingRegionalCell) - call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) - !$acc enter data copyin(meshScalingRegionalEdge) + ! call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) + ! !$acc enter data copyin(meshScalingRegionalEdge) - call mpas_pool_get_array(mesh, 'latCell', latCell) - !$acc enter data copyin(latCell) + ! call mpas_pool_get_array(mesh, 'latCell', latCell) + ! !$acc enter data copyin(latCell) - call mpas_pool_get_array(mesh, 'lonCell', lonCell) - !$acc enter data copyin(lonCell) + ! call mpas_pool_get_array(mesh, 'lonCell', lonCell) + ! !$acc enter data copyin(lonCell) - call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) - !$acc enter data copyin(coeffs_reconstruct) + ! call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) + ! !$acc enter data copyin(coeffs_reconstruct) #endif diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index d2a6eeb3e5..c724c9183c 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -264,7 +264,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) do while (associated(block)) call mpas_pool_get_subpool(block % structs, 'mesh', mesh) call mpas_pool_get_subpool(block % structs, 'state', state) - !call mpas_atm_pre_init(domain) + call mpas_atm_pre_init(domain) call atm_mpas_init_block(domain % dminfo, domain % streamManager, block, mesh, dt) @@ -294,7 +294,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) ! ! Prepare the dynamics for integration ! - call mpas_atm_pre_init(domain) + !call mpas_atm_pre_init(domain) call mpas_atm_dynamics_init(domain) end function atm_core_init From 2654783147e4a98938588be5b119575d18e95a67 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 18 Apr 2025 11:16:48 -0600 Subject: [PATCH 60/79] second working --- .../dynamics/mpas_atm_time_integration.F | 113 +++++++++++------- 1 file changed, 67 insertions(+), 46 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index e68c901fff..5e08fdafd6 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -200,6 +200,7 @@ subroutine mpas_atm_dynamics_init(domain) type (mpas_pool_type), pointer :: mesh type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend_physics real (kind=RKIND), dimension(:), pointer :: dvEdge integer, dimension(:,:), pointer :: cellsOnCell @@ -272,6 +273,9 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + #endif #ifdef MPAS_CAM_DYCORE @@ -291,6 +295,9 @@ subroutine mpas_atm_dynamics_init(domain) #ifdef MPAS_OPENACC nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + nullify(tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) @@ -459,6 +466,17 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) !$acc enter data copyin(coeffs_reconstruct) + + ! Pointers for these are declared as module level variables + ! call mpas_pool_get_array(tend_physics, 'tend_rho_physics', tend_rho_physics) + ! !$acc enter data copyin(tend_rho_physics) + ! call mpas_pool_get_array(tend_physics, 'tend_ru_physics', tend_ru_physics) + ! !$acc enter data copyin(tend_ru_physics) + ! call mpas_pool_get_array(tend_physics, 'tend_rtheta_physics', tend_rtheta_physics) + ! !$acc enter data copyin(tend_rtheta_physics) + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc enter data copyin(rthdynten) + #endif end subroutine mpas_atm_dynamics_init @@ -548,6 +566,10 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, v + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri + real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 @@ -559,8 +581,9 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save - + real (kind=RKIND), dimension(:,:), pointer :: rthdynten nullify(state) nullify(diag) @@ -625,6 +648,33 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data copyin(rtheta_pp) call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) !$acc enter data copyin(rtheta_pp_old) + call mpas_pool_get_array(diag, 'kdiff', kdiff) + !$acc enter data copyin(kdiff) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc enter data copyin(pv_edge) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc enter data copyin(rho_edge) + call mpas_pool_get_array(diag, 'h_divergence', h_divergence) + !$acc enter data copyin(h_divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc enter data copyin(ke) + + call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) + !$acc enter data copyin(alpha_tri) + call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) + !$acc enter data copyin(gamma_tri) + call mpas_pool_get_array(diag, 'a_tri', a_tri) + !$acc enter data copyin(a_tri) + call mpas_pool_get_array(diag, 'cofwr', cofwr) + !$acc enter data copyin(cofwr) + call mpas_pool_get_array(diag, 'cofwz', cofwz) + !$acc enter data copyin(cofwz) + call mpas_pool_get_array(diag, 'coftz', coftz) + !$acc enter data copyin(coftz) + call mpas_pool_get_array(diag, 'cofwt', cofwt) + !$acc enter data copyin(cofwt) + call mpas_pool_get_array(diag, 'cofrz', cofrz) + !$acc enter data copyin(cofrz) call mpas_pool_get_array(diag, 'ruAvg', ruAvg) @@ -678,6 +728,9 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data copyin(tend_w_pgf) call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) !$acc enter data copyin(tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) + !$acc enter data copyin(scalar_tend_save) + ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) @@ -2575,8 +2628,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') !$acc enter data copyin(qtot) - !$acc enter data create(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & - !$acc c_tri, alpha_tri, gamma_tri) + !$acc enter data create(b_tri, c_tri) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') ! set coefficients @@ -2658,8 +2710,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc exit data copyout(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & - !$acc c_tri, alpha_tri, gamma_tri) + !$acc exit data copyout( b_tri, c_tri) !$acc exit data delete(qtot) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') @@ -3029,10 +3080,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart resm = (1.0 - epssm) / (1.0 + epssm) rdts = 1./dts - MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc enter data copyin(cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri) - MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') if(small_step /= 1) then ! not needed on first small step @@ -3259,10 +3306,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart end do ! end of loop over cells !$acc end parallel - MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc exit data delete(cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri) - MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') end subroutine atm_advance_acoustic_step_work @@ -3536,11 +3579,7 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE integer :: i, iCell, iEdge, k, cell1, cell2 real (kind=RKIND) :: invNs, rcv, p0, flux - MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - if (rk_step == 3) then - !$acc enter data copyin(rt_diabatic_tend) - end if - MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') + rcv = rgas/(cp-rgas) p0 = 1.0e+05 ! this should come from somewhere else... @@ -3686,11 +3725,7 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - if (rk_step == 3) then - !$acc exit data delete(rt_diabatic_tend) - end if - MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') + end subroutine atm_recover_large_step_variables_work @@ -3927,7 +3962,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc enter data create(horiz_flux_arr) - !$acc enter data copyin(uhAvg) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') !$acc parallel async @@ -4024,12 +4058,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & ! MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') -#ifndef DO_PHYSICS - !$acc enter data create(scalar_tend_save) -#else - !$acc enter data copyin(scalar_tend_save) -#endif - !$acc enter data copyin(scalar_old, fnm, fnp, rdnw) + !$acc enter data copyin(scalar_old) !$acc enter data create(scalar_tend_column, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') @@ -4112,8 +4141,8 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc exit data delete(scalar_tend_column, wdtn, uhAvg, fnm, fnp, & - !$acc rdnw, horiz_flux_arr, scalar_tend_save) + !$acc exit data delete(scalar_tend_column, wdtn, & + !$acc horiz_flux_arr) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') end subroutine atm_advance_scalars_work @@ -4381,7 +4410,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge if (local_advance_density) then !$acc enter data copyin(rho_zz_int) end if - !$acc enter data copyin(rdnw, uhAvg) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$acc parallel @@ -4473,7 +4501,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! AG: Note that scalar_old, scalar_new in this subroutine are not the same ! variables as 'scalars' obtained from the state pool. MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc enter data copyin(fnm, fnp) !$acc enter data create(scalar_old, scalar_new, scale_arr, s_min, s_max, & !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -4983,8 +5010,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc exit data copyout(rho_zz_int) end if !$acc exit data delete(scalar_old, scalar_new, scale_arr, s_min, s_max, & - !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn, & - !$acc uhAvg, fnm, fnp, rdnw) + !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') end subroutine atm_advance_scalars_mono_work @@ -5420,7 +5446,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc enter data create(kdiff) !$acc enter data copyin(tend_rho_physics) !$acc enter data copyin(qtot) !$acc enter data copyin(divergence, vorticity) @@ -5430,13 +5455,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data create(delsq_w) end if !$acc enter data create(dpdz) - !$acc enter data copyin(pv_edge, rho_edge, ke) - !$acc enter data create(h_divergence) !$acc enter data copyin(rayleigh_damp_coef) !$acc enter data copyin(tend_ru_physics) - !$acc enter data create(delsq_theta) !$acc enter data copyin(tend_rtheta_physics) - !$acc enter data create(rthdynten) + + + !$acc enter data create(delsq_theta) !$acc enter data copyin(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') @@ -6473,7 +6497,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then - !$acc exit data delete(kdiff) !$acc exit data delete(tend_rho_physics) !$acc exit data delete(qtot) !$acc exit data delete(divergence, vorticity) @@ -6483,13 +6506,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(delsq_w) end if !$acc exit data delete(dpdz) - !$acc exit data delete(pv_edge, rho_edge, ke) - !$acc exit data copyout(h_divergence) + !$acc exit data delete(rayleigh_damp_coef) !$acc exit data delete(tend_ru_physics) - !$acc exit data delete(delsq_theta) !$acc exit data delete(tend_rtheta_physics) - !$acc exit data copyout(rthdynten) + !$acc exit data delete(delsq_theta) !$acc exit data delete(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') From 4416beefd19fda03fd6eec4d53984179b43f7d6a Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Fri, 18 Apr 2025 14:17:32 -0600 Subject: [PATCH 61/79] still working jw --- .../dynamics/mpas_atm_time_integration.F | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5e08fdafd6..606b546bc4 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -276,6 +276,10 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:), pointer :: rthdynten + real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init + real (kind=RKIND), dimension(:,:), pointer :: t_init + + #endif #ifdef MPAS_CAM_DYCORE @@ -466,6 +470,15 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) !$acc enter data copyin(coeffs_reconstruct) + call mpas_pool_get_array(mesh, 'u_init', u_init) + !$acc enter data copyin(u_init) + call mpas_pool_get_array(mesh, 'v_init', v_init) + !$acc enter data copyin(v_init) + call mpas_pool_get_array(mesh, 't_init', t_init) + !$acc enter data copyin(t_init) + call mpas_pool_get_array(mesh, 'qv_init', qv_init) + !$acc enter data copyin(qv_init) + ! Pointers for these are declared as module level variables ! call mpas_pool_get_array(tend_physics, 'tend_rho_physics', tend_rho_physics) @@ -732,7 +745,6 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data copyin(scalar_tend_save) - ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) ! !$acc enter data copyin(dvEdge) @@ -4058,7 +4070,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & ! MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc enter data copyin(scalar_old) !$acc enter data create(scalar_tend_column, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') @@ -4501,7 +4512,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! AG: Note that scalar_old, scalar_new in this subroutine are not the same ! variables as 'scalars' obtained from the state pool. MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc enter data create(scalar_old, scalar_new, scale_arr, s_min, s_max, & + !$acc enter data create(scale_arr, s_min, s_max, & !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -5009,7 +5020,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge if (local_advance_density) then !$acc exit data copyout(rho_zz_int) end if - !$acc exit data delete(scalar_old, scalar_new, scale_arr, s_min, s_max, & + !$acc exit data delete(scale_arr, s_min, s_max, & !$acc flux_arr, flux_tmp, flux_upwind_tmp, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -5451,17 +5462,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc enter data copyin(divergence, vorticity) !$acc enter data create(delsq_u) !$acc enter data create(delsq_vorticity, delsq_divergence) - !$acc enter data copyin(u_init, v_init) !$acc enter data create(delsq_w) end if !$acc enter data create(dpdz) !$acc enter data copyin(rayleigh_damp_coef) !$acc enter data copyin(tend_ru_physics) !$acc enter data copyin(tend_rtheta_physics) - - !$acc enter data create(delsq_theta) - !$acc enter data copyin(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -6502,7 +6509,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(divergence, vorticity) !$acc exit data copyout(delsq_u) !$acc exit data delete(delsq_vorticity, delsq_divergence) - !$acc exit data delete(u_init, v_init) !$acc exit data copyout(delsq_w) end if !$acc exit data delete(dpdz) @@ -6511,7 +6517,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data delete(tend_ru_physics) !$acc exit data delete(tend_rtheta_physics) !$acc exit data delete(delsq_theta) - !$acc exit data delete(t_init) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From 01001e499293da6da1e548a77a3f6801aec61ec5 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 22 Apr 2025 19:00:19 -0600 Subject: [PATCH 62/79] some changes --- .../dynamics/mpas_atm_time_integration.F | 254 +++++++++++------- 1 file changed, 155 insertions(+), 99 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 606b546bc4..15e659d4b0 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -20,6 +20,11 @@ module atm_time_integration use mpas_pool_routines use mpas_kind_types use mpas_constants + !USE m_ser, ONLY: ser_init, ser_array + USE m_serialize, ONLY: fs_add_savepoint_metainfo, fs_read_field, fs_create_savepoint, fs_write_field + USE utils_ppser, ONLY: ppser_set_mode, ppser_initialize, ppser_get_mode, ppser_savepoint, & + ppser_serializer, ppser_serializer_ref, ppser_intlength, ppser_reallength, & + ppser_realtype, ppser_zrperturb use mpas_dmpar use mpas_vector_reconstruction ! Added only clause to keep xlf90 from getting confused from the overloaded abs intrinsic in mpas_timekeeping @@ -579,7 +584,7 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, v - real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri real (kind=RKIND), dimension(:), pointer :: cofrz @@ -596,7 +601,7 @@ subroutine mpas_atm_pre_init(domain) real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save - real (kind=RKIND), dimension(:,:), pointer :: rthdynten + real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity nullify(state) nullify(diag) @@ -610,27 +615,27 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) call mpas_pool_get_array(diag, 'ru', ru) - !$acc enter data copyin(ru) + !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'ru_p', ru_p) - !$acc enter data copyin(ru_p) + !$acc enter data create(ru_p) call mpas_pool_get_array(diag, 'ru_save', ru_save) - !$acc enter data copyin(ru_save) + !$acc enter data create(ru_save) call mpas_pool_get_array(diag, 'rw', rw) - !$acc enter data copyin(rw) + !$acc enter data copyin(rw) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rw_p', rw_p) - !$acc enter data copyin(rw_p) + !$acc enter data create(rw_p) call mpas_pool_get_array(diag, 'rw_save', rw_save) - !$acc enter data copyin(rw_save) + !$acc enter data create(rw_save) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) - !$acc enter data copyin(rtheta_p) + !$acc enter data copyin(rtheta_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) - !$acc enter data copyin(rtheta_p_save) + !$acc enter data create(rtheta_p_save) call mpas_pool_get_array(diag, 'exner', exner) - !$acc enter data copyin(exner) + !$acc enter data copyin(exner) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'exner_base', exner_base) - !$acc enter data copyin(exner_base) + !$acc enter data copyin(exner_base) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) - !$acc enter data copyin(rtheta_base) + !$acc enter data copyin(rtheta_base) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rho_base', rho_base) !$acc enter data copyin(rho_base) call mpas_pool_get_array(diag, 'rho', rho) @@ -640,91 +645,102 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(diag, 'theta_base', theta_base) !$acc enter data copyin(theta_base) call mpas_pool_get_array(diag, 'rho_p', rho_p) - !$acc enter data copyin(rho_p) + !$acc enter data copyin(rho_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) - !$acc enter data copyin(rho_p_save) + !$acc enter data create(rho_p_save) call mpas_pool_get_array(diag, 'rho_pp', rho_pp) - !$acc enter data copyin(rho_pp) + !$acc enter data create(rho_pp) call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) - !$acc enter data copyin(rho_zz_old_split) + !$acc enter data create(rho_zz_old_split) call mpas_pool_get_array(diag, 'cqw', cqw) - !$acc enter data copyin(cqw) + !$acc enter data create(cqw) call mpas_pool_get_array(diag, 'cqu', cqu) - !$acc enter data copyin(cqu) + !$acc enter data create(cqu) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) - !$acc enter data copyin(pressure_p) + !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'pressure_base', pressure_base) - !$acc enter data copyin(pressure_base) + !$acc enter data copyin(pressure_base) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'v', v) - !$acc enter data copyin(v) + !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) - !$acc enter data copyin(rtheta_pp) + !$acc enter data create(rtheta_pp) call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) - !$acc enter data copyin(rtheta_pp_old) + !$acc enter data create(rtheta_pp_old) call mpas_pool_get_array(diag, 'kdiff', kdiff) !$acc enter data copyin(kdiff) call mpas_pool_get_array(diag, 'pv_edge', pv_edge) - !$acc enter data copyin(pv_edge) + !$acc enter data create(pv_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc enter data create(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc enter data create(pv_cell) call mpas_pool_get_array(diag, 'rho_edge', rho_edge) - !$acc enter data copyin(rho_edge) + !$acc enter data copyin(rho_edge) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'h_divergence', h_divergence) !$acc enter data copyin(h_divergence) call mpas_pool_get_array(diag, 'ke', ke) - !$acc enter data copyin(ke) + !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) - !$acc enter data copyin(alpha_tri) + !$acc enter data create(alpha_tri) call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) - !$acc enter data copyin(gamma_tri) + !$acc enter data create(gamma_tri) call mpas_pool_get_array(diag, 'a_tri', a_tri) - !$acc enter data copyin(a_tri) + !$acc enter data create(a_tri) call mpas_pool_get_array(diag, 'cofwr', cofwr) - !$acc enter data copyin(cofwr) + !$acc enter data create(cofwr) call mpas_pool_get_array(diag, 'cofwz', cofwz) - !$acc enter data copyin(cofwz) + !$acc enter data create(cofwz) call mpas_pool_get_array(diag, 'coftz', coftz) - !$acc enter data copyin(coftz) + !$acc enter data create(coftz) call mpas_pool_get_array(diag, 'cofwt', cofwt) - !$acc enter data copyin(cofwt) + !$acc enter data create(cofwt) call mpas_pool_get_array(diag, 'cofrz', cofrz) - !$acc enter data copyin(cofrz) + !$acc enter data create(cofrz) + + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc enter data copyin(vorticity) + + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc enter data copyin(divergence) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) - !$acc enter data copyin(ruAvg) + !$acc enter data create(ruAvg) call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) !$acc enter data copyin(ruAvg_split) call mpas_pool_get_array(diag, 'wwAvg', wwAvg) - !$acc enter data copyin(wwAvg) + !$acc enter data create(wwAvg) call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) !$acc enter data copyin(wwAvg_split) call mpas_pool_get_array(state, 'u', u_1, 1) !$acc enter data copyin(u_1) call mpas_pool_get_array(state, 'u', u_2, 2) - !$acc enter data copyin(u_2) + !$acc enter data create(u_2) call mpas_pool_get_array(state, 'w', w_1, 1) !$acc enter data copyin(w_1) call mpas_pool_get_array(state, 'w', w_2, 2) - !$acc enter data copyin(w_2) - call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) - !$acc enter data copyin(theta_m_1) + !$acc enter data create(w_2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc enter data copyin(theta_m_1) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) - !$acc enter data copyin(theta_m_2) + !$acc enter data create(theta_m_2) call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) !$acc enter data copyin(rho_zz_1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - !$acc enter data copyin(rho_zz_2) + !$acc enter data create(rho_zz_2) call mpas_pool_get_array(state, 'scalars', scalars_1, 1) !$acc enter data copyin(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc enter data copyin(scalars_2) + !$acc enter data create(scalars_2) call mpas_pool_get_array(tend, 'u', tend_ru) !$acc enter data copyin(tend_ru) call mpas_pool_get_array(tend, 'rho_zz', tend_rho) - !$acc enter data copyin(tend_rho) + !$acc enter data create(tend_rho) call mpas_pool_get_array(tend, 'theta_m', tend_rt) !$acc enter data copyin(tend_rt) call mpas_pool_get_array(tend, 'w', tend_rw) @@ -732,17 +748,17 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) !$acc enter data copyin(rt_diabatic_tend) call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) - !$acc enter data copyin(tend_u_euler) + !$acc enter data create(tend_u_euler) call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) - !$acc enter data copyin(tend_theta_euler) + !$acc enter data create(tend_theta_euler) call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) - !$acc enter data copyin(tend_w_euler) + !$acc enter data create(tend_w_euler) call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) !$acc enter data copyin(tend_w_pgf) call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) !$acc enter data copyin(tend_w_buoy) call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) - !$acc enter data copyin(scalar_tend_save) + !$acc enter data create(scalar_tend_save) ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) @@ -1217,6 +1233,8 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) real (kind=RKIND) :: Time_new type (mpas_pool_type), pointer :: state character (len=StrKIND), pointer :: config_time_integration + character(len=StrKIND) :: timeStamp + character(len=StrKIND) :: savepoint_name clock => domain % clock @@ -1225,8 +1243,12 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) call mpas_pool_get_config(block % configs, 'config_time_integration', config_time_integration) call mpas_pool_get_config(block % configs, 'config_apply_lbcs', config_apply_lbcs) + call mpas_get_time(nowTime, dateTimeString=xtime_new) + !$ser init directory='./ser_data' prefix='mpas_dycore' prefix_ref='mpas_dycore-mem' + !$ser savepoint atm_srk3 datetime=xtime_new + if (trim(config_time_integration) == 'SRK3') then - call atm_srk3(domain, dt, itimestep, exchange_halo_group) + call atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) else call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR) call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT) @@ -1251,7 +1273,7 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) end subroutine atm_timestep - subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) + subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Advance model state forward in time by the specified time step using ! time-split RK3 scheme @@ -1271,6 +1293,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) real (kind=RKIND), intent(in) :: dt integer, intent(in) :: itimestep procedure (halo_exchange_routine) :: exchange_halo_group + type (MPAS_Time_type), intent(in) :: nowTime integer :: thread integer :: iCell, k, iEdge @@ -1286,6 +1309,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) integer :: rk_step, number_of_sub_steps integer :: iScalar + character (len=StrKIND) :: xtime_new + + real (kind=RKIND), dimension(3) :: rk_timestep, rk_sub_timestep integer, dimension(3) :: number_sub_steps integer :: small_step @@ -1333,6 +1359,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) logical, parameter :: debug = .false. + call mpas_get_time(nowTime, dateTimeString=xtime_new) + ! ! Retrieve configuration options ! @@ -1478,6 +1506,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p') call mpas_timer_start('atm_rk_integration_setup') + + !$ser savepoint rk_integration_setup datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1494,6 +1524,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_stop('atm_rk_integration_setup') call mpas_timer_start('atm_compute_moist_coefficients') + !$ser savepoint moist_coefficients datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1545,6 +1576,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_start('atm_compute_vert_imp_coefs') rk_step = 1 + !$ser savepoint vert_imp_coefs datetime=xtime_new dyn_substep=dynamics_substep + !$OMP PARALLEL DO do thread=1,nThreads call atm_compute_vert_imp_coefs( state, mesh, diag, block % configs, nVertLevels, rk_sub_timestep(rk_step), & @@ -1597,6 +1630,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) delsq_vorticity(:,nVertices+1) = 0.0_RKIND allocate(dpdz(nVertLevels,nCells+1)) dpdz(:,nCells+1) = 0.0_RKIND + !$ser savepoint compute_dyn_tend datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step !$OMP PARALLEL DO do thread=1,nThreads @@ -1645,7 +1679,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !------------------------------------------------------------------------------------------------------------------------ if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS dry dynamics in the specified zone - + !$ser savepoint apply_lbc datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + allocate(ru_driving_tend(nVertLevels,nEdges+1)) allocate(rt_driving_tend(nVertLevels,nCells+1)) allocate(rho_driving_tend(nVertLevels,nCells+1)) @@ -1819,6 +1854,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then + !$ser savepoint advance_scalars1 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + + call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) @@ -1963,6 +2001,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) RK3_SPLIT_TRANSPORT : do rk_step = 1, 3 ! Runge-Kutta loop + !$ser savepoint advance_scalars2 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) @@ -2356,6 +2395,14 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$ser mode write + !$ser accdata rtheta_p=rtheta_p + !$ser accdata rho_p=rho_p + !$ser accdata theta_m_1=theta_m_1 + !$ser accdata scalars_1=scalars_1 + !$ser accdata rho_zz_1=rho_zz_1 + + !$acc kernels theta_m_2(:,cellEnd+1) = 0.0_RKIND !$acc end kernels @@ -2402,6 +2449,8 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel + !$ser accdata scalars_2=scalars_2 + end subroutine atm_rk_integration_setup @@ -2504,6 +2553,10 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel + !$ser mode write + !$ser accdata cqu=cqu + !$ser accdata cqw=cqw + MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc exit data copyout(qtot) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') @@ -2643,6 +2696,12 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, !$acc enter data create(b_tri, c_tri) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') + + !$ser mode write + !$ser accdata p=p + !$ser accdata t=t + + ! set coefficients dtseps = .5*dts*(1.+epssm) rcv = rgas/(cp-rgas) @@ -2721,6 +2780,9 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, end do ! loop over cells !$acc end parallel + !$ser accdata cofrz=cofrz + !$ser accdata alpha_tri=alpha_tri + MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') !$acc exit data copyout( b_tri, c_tri) !$acc exit data delete(qtot) @@ -3976,7 +4038,12 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc enter data create(horiz_flux_arr) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - !$acc parallel async + + !$ser accdata scalar_new=scalar_new + !$ser accdata uhAvg=uhAvg + + + !$acc parallel !$acc loop gang worker private(scalar_weight2, ica) do iEdge=edgeStart,edgeEnd @@ -4073,6 +4140,10 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc enter data create(scalar_tend_column, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') + + !$ser accdata scalar_old=scalar_old + + !$acc parallel wait !$acc loop gang worker private(scalar_tend_column, wdtn) do iCell=cellSolveStart,cellSolveEnd @@ -4151,6 +4222,10 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do !$acc end parallel + !$ser accdata scalar_tend_save=scalar_tend_save + !$ser accdata scalar_tend_column=scalar_tend_column + !$ser accdata scalar_new2=scalar_new + MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc exit data delete(scalar_tend_column, wdtn, & !$acc horiz_flux_arr) @@ -4423,6 +4498,9 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end if MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') + + !$ser accdata scalars_old_mono=scalars_old + !$acc parallel !$acc loop gang worker @@ -5016,6 +5094,10 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end do ! loop over scalars + + !$ser accdata scalars_new_mono2=scalars_new + + MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') if (local_advance_density) then !$acc exit data copyout(rho_zz_int) @@ -5459,7 +5541,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc enter data copyin(tend_rho_physics) !$acc enter data copyin(qtot) - !$acc enter data copyin(divergence, vorticity) !$acc enter data create(delsq_u) !$acc enter data create(delsq_vorticity, delsq_divergence) !$acc enter data create(delsq_w) @@ -6506,7 +6587,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then !$acc exit data delete(tend_rho_physics) !$acc exit data delete(qtot) - !$acc exit data delete(divergence, vorticity) !$acc exit data copyout(delsq_u) !$acc exit data delete(delsq_vorticity, delsq_divergence) !$acc exit data copyout(delsq_w) @@ -6702,7 +6782,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Compute height on cell edges at velocity locations ! MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(h_edge,ke_edge,vorticity,divergence) + !$acc enter data create(ke_edge) ! local variable MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang @@ -6788,9 +6868,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ! Replace 2.0 with 2 in exponentiation to avoid outside chance that ! compiler will actually allow "float raised to float" operation - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(ke) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') + !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -6891,12 +6969,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & if(rk_step /= 3) reconstruct_v = .false. end if - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - if (reconstruct_v) then - !$acc enter data create(v) - else - !$acc enter data copyin(v) - end if + MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') if (reconstruct_v) then @@ -6925,10 +6998,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ( this computes pv_vertex at all vertices bounding real cells ) ! ! Avoid dividing h_vertex by areaTriangle and move areaTriangle into - ! numerator for the pv_vertex calculation - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_vertex) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') + ! numerator for the pv_vertex calculation !$acc parallel default(present) !$acc loop collapse(2) do iVertex = vertexStart,vertexEnd @@ -6952,9 +7022,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Compute pv at the edges ! ( this computes pv_edge at all edges bounding real cells ) ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_edge) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop collapse(2) do iEdge = edgeStart,edgeEnd @@ -6972,9 +7039,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ( this computes pv_cell for all real cells ) ! only needed for APVM upwinding ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_cell) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') + !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -7033,7 +7098,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(pv_cell,gradPVt,gradPVn) + !$acc exit data delete(gradPVt,gradPVn) MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') end if ! apvm upwinding @@ -7050,11 +7115,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & !$acc verticesOnEdge, & !$acc fVertex,invDvEdge,invDcEdge) !$acc exit data delete(u,h) - !$acc exit data copyout(h_edge,ke_edge,vorticity,divergence, & - !$acc ke, & - !$acc v, & - !$acc pv_vertex, & - !$acc pv_edge) + !$acc exit data copyout(ke_edge) MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') end subroutine atm_compute_solve_diagnostics_work @@ -7149,15 +7210,6 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, & !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & !$acc zb_cell,zb3_cell) - - ! copyin the data that is only on the right-hand side - !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & - !$acc rho_base,theta_base) - - ! copyin the data that will be modified in this routine - !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & - !$acc rtheta_p,exner,exner_base,pressure_p, & - !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') rcv = rgas / (cp-rgas) @@ -7281,15 +7333,6 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, & !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & !$acc zb_cell,zb3_cell) - - ! delete the data that is only on the right-hand side - !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & - !$acc rho_base,theta_base) - - ! copyout the data that will be modified in this routine - !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & - !$acc rtheta_p,exner,exner_base,pressure_p, & - !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') end subroutine atm_init_coupled_diagnostics @@ -7597,6 +7640,13 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end do !$acc end parallel + !$ser mode write + !$ser accdata tend_ru=tend_ru + !$ser accdata tend_rt=tend_rt + !$ser accdata tend_rw=tend_rw + !$ser accdata tend_rho=tend_rho + + MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') !$acc exit data delete(rho_driving_tend,rt_driving_tend, & !$acc ru_driving_tend) @@ -7837,6 +7887,12 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel + !$ser mode write + !$ser accdata divergence1=divergence1 + !$ser accdata divergence2=divergence2 + !$ser accdata vorticity1=vorticity1 + !$ser accdata vorticity2=vorticity2 + MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') !$acc exit data delete(rho_driving_values, rt_driving_values, & !$acc ru_driving_values, divergence1, divergence2, vorticity1, vorticity2) From e8d480baf71f2596b41bbe369554655c26e01327 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 23 Apr 2025 17:13:05 -0600 Subject: [PATCH 63/79] Lam no physics is correct on 1mpi rank --- .../dynamics/mpas_atm_time_integration.F | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 15e659d4b0..556fc57472 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -1687,6 +1687,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ru_driving_tend(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nEdges, 'ru', 0.0_RKIND ) rt_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rtheta_m', 0.0_RKIND ) rho_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rho_zz', 0.0_RKIND ) + + + !$ser mode write + !$ser data ru_driving_tend=ru_driving_tend + !$ser data tert_driving_tendnd_rt=rt_driving_tend + !$ser data rho_driving_tend=rho_driving_tend + !$OMP PARALLEL DO do thread=1,nThreads call atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, block % configs, nVertLevels, & @@ -1713,6 +1720,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) rt_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step ) rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) + + !$ser mode write + !$ser data ru_driving_values=ru_driving_values + !$ser data rt_driving_values=rt_driving_values + !$ser data rho_driving_values=rho_driving_values + call mpas_timer_start('atm_bdy_adjust_dynamics_relaxzone_tend') !$OMP PARALLEL DO do thread=1,nThreads @@ -1816,26 +1829,44 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) time_dyn_step = dt_dynamics*real(dynamics_substep-1) + rk_timestep(rk_step) ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'u', time_dyn_step ) + + !$acc enter data copyin(ru_driving_values) + ! do this inline at present - it is simple enough + !$acc parallel + !$acc loop gang do iEdge = 1, nEdgesSolve if(bdyMaskEdge(iEdge) > nRelaxZone) then + !$acc loop vector do k = 1, nVertLevels u(k,iEdge) = ru_driving_values(k,iEdge) end do end if end do + !$acc end parallel + + ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'ru', time_dyn_step ) + + !$acc update device(ru_driving_values) + call mpas_pool_get_array(diag, 'ru', u) ! do this inline at present - it is simple enough + !$acc parallel + !$acc loop gang do iEdge = 1, nEdges if(bdyMaskEdge(iEdge) > nRelaxZone) then + !$acc loop vector do k = 1, nVertLevels u(k,iEdge) = ru_driving_values(k,iEdge) end do end if end do + !$acc end parallel + !$acc exit data delete(ru_driving_values) + deallocate(ru_driving_values) end if ! regional_MPAS addition @@ -7757,6 +7788,11 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel + !$ser mode write + !$ser accdata tend_rho1=tend_rho + !$ser accdata tend_rt1=tend_rt + + !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart, edgeEnd @@ -7770,6 +7806,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel + !$ser accdata tend_ru1=tend_ru + ! Second, the horizontal filter for rtheta_m and rho_zz !$acc parallel default(present) !$acc loop gang worker @@ -7802,6 +7840,10 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel + !$ser accdata tend_rho2=tend_rho + !$ser accdata tend_rt2=tend_rt + + ! Third (and last), the horizontal filter for ru !$acc parallel default(present) !$acc loop gang worker private(divergence1, divergence2, vorticity1, vorticity2) @@ -7888,10 +7930,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me !$acc end parallel !$ser mode write - !$ser accdata divergence1=divergence1 - !$ser accdata divergence2=divergence2 - !$ser accdata vorticity1=vorticity1 - !$ser accdata vorticity2=vorticity2 + !$ser accdata tend_ru_fin=tend_ru MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') !$acc exit data delete(rho_driving_values, rt_driving_values, & From 3f314195195aa87cd2282a70665cf67adfe15454 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 24 Apr 2025 14:56:48 -0600 Subject: [PATCH 64/79] data movement around halo exchanges --- .../dynamics/mpas_atm_time_integration.F | 224 +++++++++++++----- src/core_atmosphere/mpas_atm_core.F | 10 +- 2 files changed, 176 insertions(+), 58 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 556fc57472..597a917f4c 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -1245,7 +1245,6 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) call mpas_get_time(nowTime, dateTimeString=xtime_new) !$ser init directory='./ser_data' prefix='mpas_dycore' prefix_ref='mpas_dycore-mem' - !$ser savepoint atm_srk3 datetime=xtime_new if (trim(config_time_integration) == 'SRK3') then call atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) @@ -1348,6 +1347,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2 real (kind=RKIND), dimension(:,:), pointer :: rqvdynten, rthdynten, theta_m + real (kind=RKIND), dimension(:,:), pointer :: pressure_p, rtheta_p, exner, tend_u + real (kind=RKIND), dimension(:,:), pointer :: rho_pp, rtheta_pp, ru_p, rw_p, pv_edge, rho_edge real (kind=RKIND) :: theta_local, fac_m #ifndef MPAS_CAM_DYCORE @@ -1360,6 +1361,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_get_time(nowTime, dateTimeString=xtime_new) + !$ser savepoint atm_srk3 datetime=xtime_new rank=domain%dm_info%my_proc_id ! ! Retrieve configuration options @@ -1503,11 +1505,26 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! ! Communicate halos for theta_m, scalars, pressure_p, and rtheta_p ! + call mpas_pool_get_array(state, 'theta_m', theta_m, 1) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) + !$ser mode write + !$ser data theta_m_1_pre=theta_m + !$ser data scalars_1_pre=scalars_1 + !$ser data pressure_p_pre=pressure_p + !$ser data rtheta_p_pre=rtheta_p call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p') - + !$ser data theta_m_1_post=theta_m + !$ser data scalars_1_post=scalars_1 + !$ser data pressure_p_post=pressure_p + !$ser data rtheta_p_post=rtheta_p + !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) + call mpas_timer_start('atm_rk_integration_setup') - !$ser savepoint rk_integration_setup datetime=xtime_new + ! $ser savepoint rk_integration_setup datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1524,7 +1541,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_stop('atm_rk_integration_setup') call mpas_timer_start('atm_compute_moist_coefficients') - !$ser savepoint moist_coefficients datetime=xtime_new + ! $ser savepoint moist_coefficients datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1576,7 +1593,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_start('atm_compute_vert_imp_coefs') rk_step = 1 - !$ser savepoint vert_imp_coefs datetime=xtime_new dyn_substep=dynamics_substep + ! $ser savepoint vert_imp_coefs datetime=xtime_new dyn_substep=dynamics_substep !$OMP PARALLEL DO do thread=1,nThreads @@ -1589,8 +1606,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !$OMP END PARALLEL DO call mpas_timer_stop('atm_compute_vert_imp_coefs') + + call mpas_pool_get_array(diag, 'exner', exner) + + !$acc update self(exner) + !$ser data exner_pre=exner call exchange_halo_group(domain, 'dynamics:exner') - + !$ser data exner_post=exner + !$acc update device(exner) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! BEGIN Runge-Kutta loop @@ -1630,7 +1653,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) delsq_vorticity(:,nVertices+1) = 0.0_RKIND allocate(dpdz(nVertLevels,nCells+1)) dpdz(:,nCells+1) = 0.0_RKIND - !$ser savepoint compute_dyn_tend datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + ! $ser savepoint compute_dyn_tend datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step !$OMP PARALLEL DO do thread=1,nThreads @@ -1661,9 +1684,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! we are solving for all edges of owned cells to minimize communications ! during the acoustic substeps !*********************************** + !$ser savepoint rk_loop datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step ! tend_u + call mpas_pool_get_array(tend, 'u', tend_u) + !$acc update self(tend_u) + !$ser data tend_u_pre=tend_u call exchange_halo_group(domain, 'dynamics:tend_u') + !$ser data tend_u_post=tend_u + !$acc update device(tend_u) call mpas_timer_start('small_step_prep') @@ -1679,7 +1708,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !------------------------------------------------------------------------------------------------------------------------ if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS dry dynamics in the specified zone - !$ser savepoint apply_lbc datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + ! $ser savepoint apply_lbc datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step allocate(ru_driving_tend(nVertLevels,nEdges+1)) allocate(rt_driving_tend(nVertLevels,nCells+1)) @@ -1689,10 +1718,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) rho_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rho_zz', 0.0_RKIND ) - !$ser mode write - !$ser data ru_driving_tend=ru_driving_tend - !$ser data tert_driving_tendnd_rt=rt_driving_tend - !$ser data rho_driving_tend=rho_driving_tend + ! $ser mode write + ! $ser data ru_driving_tend=ru_driving_tend + ! $ser data tert_driving_tendnd_rt=rt_driving_tend + ! $ser data rho_driving_tend=rho_driving_tend !$OMP PARALLEL DO do thread=1,nThreads @@ -1721,10 +1750,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) - !$ser mode write - !$ser data ru_driving_values=ru_driving_values - !$ser data rt_driving_values=rt_driving_values - !$ser data rho_driving_values=rho_driving_values + ! $ser mode write + ! $ser data ru_driving_values=ru_driving_values + ! $ser data rt_driving_values=rt_driving_values + ! $ser data rho_driving_values=rho_driving_values call mpas_timer_start('atm_bdy_adjust_dynamics_relaxzone_tend') !$OMP PARALLEL DO @@ -1752,8 +1781,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! do small_step = 1, number_sub_steps(rk_step) + !$ser savepoint acoustic datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step small_step=small_step + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc update self(rho_pp) + !$ser data rho_pp_pre=rho_pp call exchange_halo_group(domain, 'dynamics:rho_pp') + !$ser data rho_pp_post=rho_pp + !$acc update device(rho_pp) call mpas_timer_start('atm_advance_acoustic_step') @@ -1776,7 +1811,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! rtheta_pp ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc update self(rtheta_pp) + !$ser data rtheta_pp_pre=rtheta_pp call exchange_halo_group(domain, 'dynamics:rtheta_pp') + !$ser data rtheta_pp_post=rtheta_pp + !$acc update device(rtheta_pp) ! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step @@ -1796,7 +1836,23 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! ! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2] ! + !$ser savepoint rk_loop2 datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step + + call mpas_pool_get_array(diag, 'ru_p', ru_p) + call mpas_pool_get_array(diag, 'rw_p', rw_p) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) + !$ser data rw_p_pre=rw_p + !$ser data ru_p_pre=ru_p + !$ser data rho_pp_pre=rho_pp + !$ser data rtheta_pp_pre=rtheta_pp call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp') + !$ser data rw_p_post=rw_p + !$ser data ru_p_post=ru_p + !$ser data rho_pp_post=rho_pp + !$ser data rtheta_pp_post=rtheta_pp + !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) call mpas_timer_start('atm_recover_large_step_variables') @@ -1872,20 +1928,24 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) end if ! regional_MPAS addition !------------------------------------------------------------------- - + call mpas_pool_get_array(state, 'u', u, 2) + !$acc update self(u) + !$ser data u_pre=u ! u if (config_apply_lbcs) then call exchange_halo_group(domain, 'dynamics:u_123') else call exchange_halo_group(domain, 'dynamics:u_3') end if + !$ser data u_post=u + !$acc update device(u) ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). ! PD or monotonicity constraints applied only on the final Runge-Kutta substep. if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then - !$ser savepoint advance_scalars1 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + ! $ser savepoint advance_scalars1 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & @@ -1893,7 +1953,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + !$ser data scalars_2_pre_1=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') + !$ser data scalars_2_post_1=scalars_2 + !$acc update device(scalars_2) allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1938,18 +2003,33 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) deallocate(ke_edge) call mpas_timer_stop('atm_compute_solve_diagnostics') - + call mpas_pool_get_array(state, 'w', w, 2) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc update self(w,pv_edge,rho_edge) + !$ser data w_pre=w + !$ser data pv_edge_pre=pv_edge + !$ser data rho_edge_pre=rho_edge if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2] ! + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + !$ser data scalars_2_pre_2=scalars_2 call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars') + !$ser data scalars_2_post_2=scalars_2 + !$acc update device(scalars_2) else ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2] ! call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge') end if + !$ser data w_post=w + !$ser data pv_edge_post=pv_edge + !$ser data rho_edge_post=rho_edge + !$acc update device(w,pv_edge,rho_edge) ! set the zero-gradient condition on w for regional_MPAS @@ -1963,7 +2043,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !$OMP END PARALLEL DO ! w halo values needs resetting after regional boundary update + call mpas_pool_get_array(state, 'w', w, 2) + !$acc update self(w) + !$ser data w_pre=w call exchange_halo_group(domain, 'dynamics:w') + !$ser data w_post=w + !$acc update device(w) end if ! end of regional_MPAS addition @@ -1974,7 +2059,18 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! ! Communicate halos for theta_m[1,2], pressure_p[1,2], and rtheta_p[1,2] ! + call mpas_pool_get_array(state, 'theta_m', theta_m, 2) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc update self(theta_m,pressure_p,rtheta_p) + !$ser data theta_m_pre=theta_m + !$ser data pressure_p_pre=pressure_p + !$ser data rtheta_p_pre=rtheta_p call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p') + !$ser data theta_m_post=theta_m + !$ser data pressure_p_post=pressure_p + !$ser data rtheta_p_post=rtheta_p + !$acc update device(theta_m,pressure_p,rtheta_p) ! ! Note: A halo exchange for 'exner' here as well as after the call @@ -2032,7 +2128,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) RK3_SPLIT_TRANSPORT : do rk_step = 1, 3 ! Runge-Kutta loop - !$ser savepoint advance_scalars2 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + ! $ser savepoint advance_scalars2 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) @@ -2040,7 +2136,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport ! need to fill halo for horizontal filter + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + !$ser data scalars_2_pre=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') + !$ser data scalars_2_post=scalars_2 + !$acc update device(scalars_2) allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -2066,7 +2167,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !------------------------------------------------------------------------------------------------------------------------ if (rk_step < 3) then + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + !$ser data scalars_2_pre=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') + !$ser data scalars_2_post=scalars_2 + !$acc update device(scalars_2) end if end do RK3_SPLIT_TRANSPORT @@ -2179,7 +2285,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if (config_apply_lbcs) then ! adjust boundary values for regional_MPAS scalar transport + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + !$ser data scalars_2_pre=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') + !$ser data scalars_2_post=scalars_2 + !$acc update device(scalars_2) + allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -2426,12 +2538,12 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$ser mode write - !$ser accdata rtheta_p=rtheta_p - !$ser accdata rho_p=rho_p - !$ser accdata theta_m_1=theta_m_1 - !$ser accdata scalars_1=scalars_1 - !$ser accdata rho_zz_1=rho_zz_1 + ! $ser mode write + ! $ser accdata rtheta_p=rtheta_p + ! $ser accdata rho_p=rho_p + ! $ser accdata theta_m_1=theta_m_1 + ! $ser accdata scalars_1=scalars_1 + ! $ser accdata rho_zz_1=rho_zz_1 !$acc kernels @@ -2480,7 +2592,7 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel - !$ser accdata scalars_2=scalars_2 + ! $ser accdata scalars_2=scalars_2 end subroutine atm_rk_integration_setup @@ -2584,9 +2696,9 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel - !$ser mode write - !$ser accdata cqu=cqu - !$ser accdata cqw=cqw + ! $ser mode write + ! $ser accdata cqu=cqu + ! $ser accdata cqw=cqw MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc exit data copyout(qtot) @@ -2728,9 +2840,9 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$ser mode write - !$ser accdata p=p - !$ser accdata t=t + ! $ser mode write + ! $ser accdata p=p + ! $ser accdata t=t ! set coefficients @@ -2811,8 +2923,8 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, end do ! loop over cells !$acc end parallel - !$ser accdata cofrz=cofrz - !$ser accdata alpha_tri=alpha_tri + ! $ser accdata cofrz=cofrz + ! $ser accdata alpha_tri=alpha_tri MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') !$acc exit data copyout( b_tri, c_tri) @@ -4070,8 +4182,8 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - !$ser accdata scalar_new=scalar_new - !$ser accdata uhAvg=uhAvg + ! $ser accdata scalar_new=scalar_new + ! $ser accdata uhAvg=uhAvg !$acc parallel @@ -4172,7 +4284,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - !$ser accdata scalar_old=scalar_old + ! $ser accdata scalar_old=scalar_old !$acc parallel wait @@ -4253,9 +4365,9 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do !$acc end parallel - !$ser accdata scalar_tend_save=scalar_tend_save - !$ser accdata scalar_tend_column=scalar_tend_column - !$ser accdata scalar_new2=scalar_new + ! $ser accdata scalar_tend_save=scalar_tend_save + ! $ser accdata scalar_tend_column=scalar_tend_column + ! $ser accdata scalar_new2=scalar_new MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc exit data delete(scalar_tend_column, wdtn, & @@ -4530,7 +4642,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') - !$ser accdata scalars_old_mono=scalars_old + ! $ser accdata scalars_old_mono=scalars_old !$acc parallel @@ -5126,7 +5238,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end do ! loop over scalars - !$ser accdata scalars_new_mono2=scalars_new + ! $ser accdata scalars_new_mono2=scalars_new MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') @@ -7671,11 +7783,11 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end do !$acc end parallel - !$ser mode write - !$ser accdata tend_ru=tend_ru - !$ser accdata tend_rt=tend_rt - !$ser accdata tend_rw=tend_rw - !$ser accdata tend_rho=tend_rho + ! $ser mode write + ! $ser accdata tend_ru=tend_ru + ! $ser accdata tend_rt=tend_rt + ! $ser accdata tend_rw=tend_rw + ! $ser accdata tend_rho=tend_rho MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') @@ -7788,9 +7900,9 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - !$ser mode write - !$ser accdata tend_rho1=tend_rho - !$ser accdata tend_rt1=tend_rt + ! $ser mode write + ! $ser accdata tend_rho1=tend_rho + ! $ser accdata tend_rt1=tend_rt !$acc parallel default(present) @@ -7806,7 +7918,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - !$ser accdata tend_ru1=tend_ru + ! $ser accdata tend_ru1=tend_ru ! Second, the horizontal filter for rtheta_m and rho_zz !$acc parallel default(present) @@ -7840,8 +7952,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - !$ser accdata tend_rho2=tend_rho - !$ser accdata tend_rt2=tend_rt + ! $ser accdata tend_rho2=tend_rho + ! $ser accdata tend_rt2=tend_rt ! Third (and last), the horizontal filter for ru @@ -7929,8 +8041,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel - !$ser mode write - !$ser accdata tend_ru_fin=tend_ru + ! $ser mode write + ! $ser accdata tend_ru_fin=tend_ru MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') !$acc exit data delete(rho_driving_values, rt_driving_values, & diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index c724c9183c..cd1b0da545 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -76,7 +76,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) integer, pointer :: nVertLevels, maxEdges, maxEdges2, num_scalars character (len=ShortStrKIND) :: init_stream_name real (kind=R8KIND) :: input_start_time, input_stop_time - + real (kind=RKIND), dimension(:,:), pointer :: pv_edge, ru, rw ierr = 0 @@ -94,6 +94,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) ! Set up inner dimensions used by arrays in optimized dynamics routines ! call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels) call mpas_pool_get_dimension(state, 'maxEdges', maxEdges) call mpas_pool_get_dimension(state, 'maxEdges2', maxEdges2) @@ -285,8 +286,13 @@ function atm_core_init(domain, startTimeStamp) result(ierr) block => block % next end do - + + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + call mpas_pool_get_array(diag, 'ru', ru) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc update self(pv_edge,ru,rw) call exchange_halo_group(domain, 'initialization:pv_edge,ru,rw') + !$acc update device(pv_edge,ru,rw) call mpas_atm_diag_setup(domain % streamManager, domain % blocklist % configs, & domain % blocklist % structs, domain % clock, domain % dminfo) From 63a88b33d495c5f59d3ff427dcaed69099ecaa61 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 24 Apr 2025 16:49:44 -0600 Subject: [PATCH 65/79] restart files producing correct results for no_physics case with 1 rank --- .../dynamics/mpas_atm_time_integration.F | 70 ++++++++++++++++++- src/core_atmosphere/mpas_atm_core.F | 6 +- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 597a917f4c..1a1db29870 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -583,7 +583,7 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:,:), pointer :: rho_p_save real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp - real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, v + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri real (kind=RKIND), dimension(:), pointer :: cofrz @@ -660,6 +660,8 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'pressure_base', pressure_base) !$acc enter data copyin(pressure_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc enter data copyin(pressure) call mpas_pool_get_array(diag, 'v', v) !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) @@ -930,6 +932,72 @@ subroutine mpas_atm_pre_init(domain) end subroutine mpas_atm_pre_init + subroutine mpas_post_step_d2h(inPool) + + implicit none + + TYPE(mpas_pool_type), INTENT(IN) :: inPool + + +#ifdef MPAS_OPENACC + + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: tend_physics + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: theta, rho, pressure_base, pressure_p, pressure + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base, rtheta_p, rtheta_base, rho_p, ru, rw + + + nullify(state) + nullify(diag) + + + call mpas_pool_get_subpool(inPool, 'state', state) + call mpas_pool_get_subpool(inPool, 'diag', diag) + + + call mpas_pool_get_array(state, 'u', u_1, 1) + call mpas_pool_get_array(state, 'u', u_2, 2) + call mpas_pool_get_array(state, 'w', w_1, 1) + call mpas_pool_get_array(state, 'w', w_2, 2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + + !$acc update self(u_1, u_2, w_1, w_2, theta_m_1, theta_m_2, & + !$acc rho_zz_1, rho_zz_2, scalars_1, scalars_2) + + call mpas_pool_get_array(diag, 'theta', theta) + call mpas_pool_get_array(diag, 'rho', rho) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc update self(theta, rho, pressure_p, pressure_base, pressure) + + call mpas_pool_get_array(diag, 'exner', exner) + call mpas_pool_get_array(diag, 'exner_base', exner_base) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + call mpas_pool_get_array(diag, 'ru', ru) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc update self(exner, exner_base, rtheta_p, rtheta_base, rho_p, ru, rw) + + +#endif + + end subroutine mpas_post_step_d2h + !---------------------------------------------------------------------------- ! routine MPAS_atm_dynamics_finalize diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index cd1b0da545..af45103337 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -603,6 +603,7 @@ function atm_core_run(domain) result(ierr) use mpas_derived_types, only : MPAS_STREAM_LATEST_BEFORE, MPAS_STREAM_INPUT, MPAS_STREAM_INPUT_OUTPUT use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpas_atm_boundaries, only : mpas_atm_update_bdy_tend + use atm_time_integration, only : mpas_post_step_d2h use mpas_atm_diagnostics_manager, only : mpas_atm_diag_update, mpas_atm_diag_compute, mpas_atm_diag_reset implicit none @@ -825,7 +826,7 @@ function atm_core_run(domain) result(ierr) call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh) call mpas_pool_get_subpool(block_ptr % structs, 'tend', tend) call atm_compute_output_diagnostics(state, 1, diag, mesh) - + call mpas_post_step_d2h(block_ptr % structs) block_ptr => block_ptr % next end do end if @@ -933,6 +934,8 @@ subroutine atm_compute_output_diagnostics(state, time_lev, diag, mesh) call mpas_pool_get_array(mesh, 'zz', zz) + !$acc parallel + !$acc loop gang vector collapse(2) do iCell=1,nCells do k=1,nVertLevels theta(k,iCell) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell)) @@ -940,6 +943,7 @@ subroutine atm_compute_output_diagnostics(state, time_lev, diag, mesh) pressure(k,iCell) = pressure_base(k,iCell) + pressure_p(k,iCell) end do end do + !$acc end parallel end subroutine atm_compute_output_diagnostics From b3bc23dd469db9902b1b253460b033810ad249e9 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 29 Apr 2025 10:46:10 -0600 Subject: [PATCH 66/79] Initial OpenACC port of mpas_atm_update_bdy_tend --- .../dynamics/mpas_atm_boundaries.F | 154 +++++++++++++++--- 1 file changed, 134 insertions(+), 20 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index fca1734138..a947b1bfae 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -99,9 +99,12 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (mpas_pool_type), pointer :: lbc real (kind=RKIND) :: dt - integer, pointer :: nCells - integer, pointer :: nEdges - integer, pointer :: index_qv + integer, pointer :: nCells_ptr + integer, pointer :: nEdges_ptr + integer, pointer :: nVertLevels_ptr + integer, pointer :: index_qv_ptr + integer, pointer :: nScalars_ptr + integer :: nCells, nEdges, nVertLevels, index_qv, nScalars real (kind=RKIND), dimension(:,:), pointer :: u real (kind=RKIND), dimension(:,:), pointer :: ru @@ -129,7 +132,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (MPAS_Time_Type) :: currTime type (MPAS_TimeInterval_Type) :: lbc_interval character(len=StrKIND) :: read_time - integer :: iEdge + integer :: iEdge, iCell, k, j integer :: cell1, cell2 @@ -167,6 +170,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr ! Compute any derived fields from those that were read from the lbc_in stream ! call mpas_pool_get_array(lbc, 'lbc_u', u, 2) + call mpas_pool_get_array(lbc, 'lbc_w', w, 2) call mpas_pool_get_array(lbc, 'lbc_ru', ru, 2) call mpas_pool_get_array(lbc, 'lbc_rho_edge', rho_edge, 2) call mpas_pool_get_array(lbc, 'lbc_theta', theta, 2) @@ -176,26 +180,84 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_array(lbc, 'lbc_scalars', scalars, 2) call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - call mpas_pool_get_dimension(mesh, 'nCells', nCells) - call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) - call mpas_pool_get_dimension(lbc, 'index_qv', index_qv) + call mpas_pool_get_dimension(mesh, 'nCells', nCells_ptr) + call mpas_pool_get_dimension(mesh, 'nEdges', nEdges_ptr) + call mpas_pool_get_dimension(mesh, 'nVertLevels', nVertLevels_ptr) + call mpas_pool_get_dimension(state, 'num_scalars', nScalars_ptr) + call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) call mpas_pool_get_array(mesh, 'zz', zz) + if (.not. firstCall) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + + !$acc enter data copyin(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & + !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & + !$acc lbc_tend_rho, lbc_tend_scalars) + end if + !$acc enter data copyin(u, w, theta, rho, scalars) + !$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) + + ! Dereference the pointers to avoid non-array pointer for OpenACC + nCells = nCells_ptr + nEdges = nEdges_ptr + nVertLevels = nVertLevels_ptr + nScalars = nScalars_ptr + index_qv = index_qv_ptr + ! Compute lbc_rho_zz + + !$acc kernels zz(:,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line - rho_zz(:,:) = rho(:,:) / zz(:,:) + !$acc end kernels + + !$acc parallel + ! Compute lbc_rho_zz + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + rho_zz(k,iCell) = rho(k,iCell) / zz(k,iCell) + end do + end do + !$acc end parallel ! Average lbc_rho_zz to edges + !$acc parallel + !$acc loop gang worker do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) if (cell1 > 0 .and. cell2 > 0) then - rho_edge(:,iEdge) = 0.5_RKIND * (rho_zz(:,cell1) + rho_zz(:,cell2)) + !$acc loop vector + do k = 1, nVertLevels + rho_edge(k,iEdge) = 0.5_RKIND * (rho_zz(k,cell1) + rho_zz(k,cell2)) + end do end if end do + !$acc end parallel + + !$acc parallel + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + ru(k,iEdge) = u(k,iEdge) * rho_edge(k,iEdge) + end do + end do - ru(:,:) = u(:,:) * rho_edge(:,:) - rtheta_m(:,:) = theta(:,:) * rho_zz(:,:) * (1.0_RKIND + rvord * scalars(index_qv,:,:)) + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + rtheta_m(k,iCell) = theta(k,iCell) * rho_zz(k,iCell) * (1.0_RKIND + rvord * scalars(index_qv,k,iCell)) + end do + end do + !$acc end parallel if (.not. firstCall) then lbc_interval = currTime - LBC_intv_end @@ -225,15 +287,58 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr dt = 1.0_RKIND / dt - lbc_tend_u(:,:) = (u(:,:) - lbc_tend_u(:,:)) * dt - lbc_tend_ru(:,:) = (ru(:,:) - lbc_tend_ru(:,:)) * dt - lbc_tend_rho_edge(:,:) = (rho_edge(:,:) - lbc_tend_rho_edge(:,:)) * dt - lbc_tend_w(:,:) = (w(:,:) - lbc_tend_w(:,:)) * dt - lbc_tend_theta(:,:) = (theta(:,:) - lbc_tend_theta(:,:)) * dt - lbc_tend_rtheta_m(:,:) = (rtheta_m(:,:) - lbc_tend_rtheta_m(:,:)) * dt - lbc_tend_rho_zz(:,:) = (rho_zz(:,:) - lbc_tend_rho_zz(:,:)) * dt - lbc_tend_rho(:,:) = (rho(:,:) - lbc_tend_rho(:,:)) * dt - lbc_tend_scalars(:,:,:) = (scalars(:,:,:) - lbc_tend_scalars(:,:,:)) * dt + + !$acc parallel default(present) + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + lbc_tend_u(k,iEdge) = (u(k,iEdge) - lbc_tend_u(k,iEdge)) * dt + lbc_tend_ru(k,iEdge) = (ru(k,iEdge) - lbc_tend_ru(k,iEdge)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + lbc_tend_rho_edge(k,iEdge) = (rho_edge(k,iEdge) - lbc_tend_rho_edge(k,iEdge)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels+1 + lbc_tend_w(k,iCell) = (w(k,iCell) - lbc_tend_w(k,iCell)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + lbc_tend_theta(k,iCell) = (theta(k,iCell) - lbc_tend_theta(k,iCell)) * dt + lbc_tend_rtheta_m(k,iCell) = (rtheta_m(k,iCell) - lbc_tend_rtheta_m(k,iCell)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + lbc_tend_rho_zz(k,iCell) = (rho_zz(k,iCell) - lbc_tend_rho_zz(k,iCell)) * dt + lbc_tend_rho(k,iCell) = (rho(k,iCell) - lbc_tend_rho(k,iCell)) * dt + end do + end do + !$acc end parallel + + !$acc parallel default(present) + !$acc loop gang + do iCell=1,nCells+1 + !$acc loop vector collapse(2) + do k=1,nVertLevels + do j = 1,nScalars + lbc_tend_scalars(j,k,iCell) = (scalars(j,k,iCell) - lbc_tend_scalars(j,k,iCell)) * dt + end do + end do + end do + !$acc end parallel ! ! Logging the lbc start and end times appears to be backwards, but @@ -249,6 +354,15 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if + if (.not. firstCall) then + !$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & + !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & + !$acc lbc_tend_rho, lbc_tend_scalars) + end if + + !$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) + !$acc exit data delete(u, w, theta, rho, scalars) + LBC_intv_end = currTime end subroutine mpas_atm_update_bdy_tend From 8bc1e966fa2c783a12913fe0607b9dae514c3ef1 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 29 Apr 2025 10:56:53 -0600 Subject: [PATCH 67/79] Adding timers mpas_atm_update_bdy_tend [ACC_data_xfer] --- src/core_atmosphere/dynamics/mpas_atm_boundaries.F | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index a947b1bfae..0af66b8a20 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -187,6 +187,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) call mpas_pool_get_array(mesh, 'zz', zz) + MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') if (.not. firstCall) then call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) @@ -204,6 +205,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if !$acc enter data copyin(u, w, theta, rho, scalars) !$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) + MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') ! Dereference the pointers to avoid non-array pointer for OpenACC nCells = nCells_ptr @@ -354,6 +356,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if + MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') if (.not. firstCall) then !$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & @@ -362,6 +365,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr !$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) !$acc exit data delete(u, w, theta, rho, scalars) + MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') LBC_intv_end = currTime From 2720fcc614a3096ceeb35f78395997e686d8b520 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 30 Apr 2025 21:59:08 -0600 Subject: [PATCH 68/79] consolidating data movements used in mpas_atm_boundaries --- .../dynamics/mpas_atm_boundaries.F | 64 +++++-------------- .../dynamics/mpas_atm_time_integration.F | 50 +++++++++++++++ 2 files changed, 65 insertions(+), 49 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index a746d8b2c4..784f523edb 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -197,14 +197,9 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) - call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) - - !$acc enter data copyin(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & - !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & - !$acc lbc_tend_rho, lbc_tend_scalars) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) end if - !$acc enter data copyin(u, w, theta, rho, scalars) - !$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) + !$acc update device(u, w, theta, rho, scalars) MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') ! Dereference the pointers to avoid non-array pointer for OpenACC @@ -216,11 +211,11 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr ! Compute lbc_rho_zz - !$acc kernels + !$acc kernels default(present) zz(:,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line !$acc end kernels - !$acc parallel + !$acc parallel default(present) ! Compute lbc_rho_zz !$acc loop gang vector collapse(2) do iCell=1,nCells+1 @@ -231,7 +226,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr !$acc end parallel ! Average lbc_rho_zz to edges - !$acc parallel + !$acc parallel default(present) !$acc loop gang worker do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) @@ -245,7 +240,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end do !$acc end parallel - !$acc parallel + !$acc parallel default(present) !$acc loop gang vector collapse(2) do iEdge=1,nEdges+1 do k=1,nVertLevels @@ -356,17 +351,6 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if - MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') - if (.not. firstCall) then - !$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & - !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & - !$acc lbc_tend_rho, lbc_tend_scalars) - end if - - !$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) - !$acc exit data delete(u, w, theta, rho, scalars) - MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') - LBC_intv_end = currTime end subroutine mpas_atm_update_bdy_tend @@ -428,19 +412,10 @@ function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) nullify(tend) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) + MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc enter data create(return_tend) - if (associated(tend)) then - !$acc enter data copyin(tend) - else - call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) - !$acc enter data copyin(tend_scalars) - - ! Ensure the integer pointed to by idx_ptr is copied to the gpu device - call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr) - idx = idx_ptr - end if MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc parallel default(present) @@ -463,11 +438,6 @@ function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc exit data copyout(return_tend) - if (associated(tend)) then - !$acc exit data delete(tend) - else - !$acc exit data delete(tend_scalars) - end if MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') end function mpas_atm_get_bdy_tend @@ -510,6 +480,8 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta use mpas_pool_routines, only : mpas_pool_get_error_level, mpas_pool_set_error_level use mpas_derived_types, only : MPAS_POOL_SILENT + use mpas_log, only : mpas_log_write + implicit none @@ -571,8 +543,7 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta ! if (associated(tend) .and. associated(state)) then MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data create(return_state) & - !$acc copyin(tend, state) + !$acc enter data create(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) @@ -585,8 +556,7 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data copyout(return_state) & - !$acc delete(tend, state) + !$acc exit data copyout(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) @@ -596,8 +566,7 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta idx=idx_ptr ! Avoid non-array pointer for OpenACC MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data create(return_state) & - !$acc copyin(tend_scalars, state_scalars) + !$acc enter data create(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) @@ -610,8 +579,7 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data copyout(return_state) & - !$acc delete(tend_scalars, state_scalars) + !$acc exit data copyout(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') end if @@ -694,8 +662,7 @@ function mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, fi call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2) MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc enter data create(return_state) & - !$acc copyin(tend, state) + !$acc enter data create(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') !$acc parallel default(present) @@ -710,8 +677,7 @@ function mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, fi !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc exit data copyout(return_state) & - !$acc delete(tend, state) + !$acc exit data copyout(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') end function mpas_atm_get_bdy_state_3d diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 4579ada876..2731467569 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -513,6 +513,7 @@ subroutine mpas_atm_pre_init(domain) type (mpas_pool_type), pointer :: diag type (mpas_pool_type), pointer :: tend type (mpas_pool_type), pointer :: tend_physics + type (mpas_pool_type), pointer :: lbc real (kind=RKIND), dimension(:), pointer :: dvEdge @@ -603,16 +604,24 @@ subroutine mpas_atm_pre_init(domain) real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity + real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta + + real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars + nullify(state) nullify(diag) nullify(mesh) nullify(tend) nullify(tend_physics) + nullify(lbc) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) call mpas_pool_get_array(diag, 'ru', ru) !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics @@ -763,6 +772,47 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data create(scalar_tend_save) + call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) + !$acc enter data create(lbc_u) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) + !$acc enter data create(lbc_w) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) + !$acc enter data create(lbc_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) + !$acc enter data create(lbc_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) + !$acc enter data create(lbc_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) + !$acc enter data create(lbc_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) + !$acc enter data create(lbc_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) + !$acc enter data create(lbc_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) + !$acc enter data create(lbc_scalars) + + + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + !$acc enter data create(lbc_tend_u) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + !$acc enter data create(lbc_tend_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + !$acc enter data create(lbc_tend_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + !$acc enter data create(lbc_tend_w) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + !$acc enter data create(lbc_tend_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + !$acc enter data create(lbc_tend_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + !$acc enter data create(lbc_tend_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + !$acc enter data create(lbc_tend_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + !$acc enter data create(lbc_tend_scalars) + + + ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) ! !$acc enter data copyin(dvEdge) From 508f551cad891a083a214aa572ccab1238b750a2 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Mon, 5 May 2025 23:23:33 -0600 Subject: [PATCH 69/79] working lam with physics, need to check radiation --- .../dynamics/mpas_atm_boundaries.F | 152 +-- src/core_atmosphere/dynamics/mpas_atm_iau.F | 4 + .../dynamics/mpas_atm_time_integration.F | 872 +++++++++--------- src/core_atmosphere/mpas_atm_core.F | 24 +- .../physics/mpas_atmphys_interface.F | 7 +- .../physics/mpas_atmphys_todynamics.F | 9 + 6 files changed, 503 insertions(+), 565 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index 784f523edb..cf76aa5c4a 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -99,12 +99,9 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (mpas_pool_type), pointer :: lbc real (kind=RKIND) :: dt - integer, pointer :: nCells_ptr - integer, pointer :: nEdges_ptr - integer, pointer :: nVertLevels_ptr - integer, pointer :: index_qv_ptr - integer, pointer :: nScalars_ptr - integer :: nCells, nEdges, nVertLevels, index_qv, nScalars + integer, pointer :: nCells + integer, pointer :: nEdges + integer, pointer :: index_qv real (kind=RKIND), dimension(:,:), pointer :: u real (kind=RKIND), dimension(:,:), pointer :: ru @@ -132,7 +129,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (MPAS_Time_Type) :: currTime type (MPAS_TimeInterval_Type) :: lbc_interval character(len=StrKIND) :: read_time - integer :: iEdge, iCell, k, j + integer :: iEdge integer :: cell1, cell2 @@ -170,7 +167,6 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr ! Compute any derived fields from those that were read from the lbc_in stream ! call mpas_pool_get_array(lbc, 'lbc_u', u, 2) - call mpas_pool_get_array(lbc, 'lbc_w', w, 2) call mpas_pool_get_array(lbc, 'lbc_ru', ru, 2) call mpas_pool_get_array(lbc, 'lbc_rho_edge', rho_edge, 2) call mpas_pool_get_array(lbc, 'lbc_theta', theta, 2) @@ -180,81 +176,26 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_array(lbc, 'lbc_scalars', scalars, 2) call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - call mpas_pool_get_dimension(mesh, 'nCells', nCells_ptr) - call mpas_pool_get_dimension(mesh, 'nEdges', nEdges_ptr) - call mpas_pool_get_dimension(mesh, 'nVertLevels', nVertLevels_ptr) - call mpas_pool_get_dimension(state, 'num_scalars', nScalars_ptr) - call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) + call mpas_pool_get_dimension(mesh, 'nCells', nCells) + call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) + call mpas_pool_get_dimension(lbc, 'index_qv', index_qv) call mpas_pool_get_array(mesh, 'zz', zz) - MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') - if (.not. firstCall) then - call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) - call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) - call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) - call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) - call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) - call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) - call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) - call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) - call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) - end if - !$acc update device(u, w, theta, rho, scalars) - MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') - - ! Dereference the pointers to avoid non-array pointer for OpenACC - nCells = nCells_ptr - nEdges = nEdges_ptr - nVertLevels = nVertLevels_ptr - nScalars = nScalars_ptr - index_qv = index_qv_ptr - ! Compute lbc_rho_zz - - !$acc kernels default(present) zz(:,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line - !$acc end kernels - - !$acc parallel default(present) - ! Compute lbc_rho_zz - !$acc loop gang vector collapse(2) - do iCell=1,nCells+1 - do k=1,nVertLevels - rho_zz(k,iCell) = rho(k,iCell) / zz(k,iCell) - end do - end do - !$acc end parallel + rho_zz(:,:) = rho(:,:) / zz(:,:) ! Average lbc_rho_zz to edges - !$acc parallel default(present) - !$acc loop gang worker do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) if (cell1 > 0 .and. cell2 > 0) then - !$acc loop vector - do k = 1, nVertLevels - rho_edge(k,iEdge) = 0.5_RKIND * (rho_zz(k,cell1) + rho_zz(k,cell2)) - end do + rho_edge(:,iEdge) = 0.5_RKIND * (rho_zz(:,cell1) + rho_zz(:,cell2)) end if end do - !$acc end parallel - - !$acc parallel default(present) - !$acc loop gang vector collapse(2) - do iEdge=1,nEdges+1 - do k=1,nVertLevels - ru(k,iEdge) = u(k,iEdge) * rho_edge(k,iEdge) - end do - end do - !$acc loop gang vector collapse(2) - do iCell=1,nCells+1 - do k=1,nVertLevels - rtheta_m(k,iCell) = theta(k,iCell) * rho_zz(k,iCell) * (1.0_RKIND + rvord * scalars(index_qv,k,iCell)) - end do - end do - !$acc end parallel + ru(:,:) = u(:,:) * rho_edge(:,:) + rtheta_m(:,:) = theta(:,:) * rho_zz(:,:) * (1.0_RKIND + rvord * scalars(index_qv,:,:)) if (.not. firstCall) then lbc_interval = currTime - LBC_intv_end @@ -284,58 +225,15 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr dt = 1.0_RKIND / dt - - !$acc parallel default(present) - !$acc loop gang vector collapse(2) - do iEdge=1,nEdges+1 - do k=1,nVertLevels - lbc_tend_u(k,iEdge) = (u(k,iEdge) - lbc_tend_u(k,iEdge)) * dt - lbc_tend_ru(k,iEdge) = (ru(k,iEdge) - lbc_tend_ru(k,iEdge)) * dt - end do - end do - - !$acc loop gang vector collapse(2) - do iEdge=1,nEdges+1 - do k=1,nVertLevels - lbc_tend_rho_edge(k,iEdge) = (rho_edge(k,iEdge) - lbc_tend_rho_edge(k,iEdge)) * dt - end do - end do - - !$acc loop gang vector collapse(2) - do iCell=1,nCells+1 - do k=1,nVertLevels+1 - lbc_tend_w(k,iCell) = (w(k,iCell) - lbc_tend_w(k,iCell)) * dt - end do - end do - - !$acc loop gang vector collapse(2) - do iCell=1,nCells+1 - do k=1,nVertLevels - lbc_tend_theta(k,iCell) = (theta(k,iCell) - lbc_tend_theta(k,iCell)) * dt - lbc_tend_rtheta_m(k,iCell) = (rtheta_m(k,iCell) - lbc_tend_rtheta_m(k,iCell)) * dt - end do - end do - - !$acc loop gang vector collapse(2) - do iCell=1,nCells+1 - do k=1,nVertLevels - lbc_tend_rho_zz(k,iCell) = (rho_zz(k,iCell) - lbc_tend_rho_zz(k,iCell)) * dt - lbc_tend_rho(k,iCell) = (rho(k,iCell) - lbc_tend_rho(k,iCell)) * dt - end do - end do - !$acc end parallel - - !$acc parallel default(present) - !$acc loop gang - do iCell=1,nCells+1 - !$acc loop vector collapse(2) - do k=1,nVertLevels - do j = 1,nScalars - lbc_tend_scalars(j,k,iCell) = (scalars(j,k,iCell) - lbc_tend_scalars(j,k,iCell)) * dt - end do - end do - end do - !$acc end parallel + lbc_tend_u(:,:) = (u(:,:) - lbc_tend_u(:,:)) * dt + lbc_tend_ru(:,:) = (ru(:,:) - lbc_tend_ru(:,:)) * dt + lbc_tend_rho_edge(:,:) = (rho_edge(:,:) - lbc_tend_rho_edge(:,:)) * dt + lbc_tend_w(:,:) = (w(:,:) - lbc_tend_w(:,:)) * dt + lbc_tend_theta(:,:) = (theta(:,:) - lbc_tend_theta(:,:)) * dt + lbc_tend_rtheta_m(:,:) = (rtheta_m(:,:) - lbc_tend_rtheta_m(:,:)) * dt + lbc_tend_rho_zz(:,:) = (rho_zz(:,:) - lbc_tend_rho_zz(:,:)) * dt + lbc_tend_rho(:,:) = (rho(:,:) - lbc_tend_rho(:,:)) * dt + lbc_tend_scalars(:,:,:) = (scalars(:,:,:) - lbc_tend_scalars(:,:,:)) * dt ! ! Logging the lbc start and end times appears to be backwards, but @@ -413,9 +311,16 @@ function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) nullify(tend) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc enter data create(return_tend) + if (associated(tend)) then + else + call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) + + ! Ensure the integer pointed to by idx_ptr is copied to the gpu device + call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr) + idx = idx_ptr + end if MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc parallel default(present) @@ -482,7 +387,6 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta use mpas_derived_types, only : MPAS_POOL_SILENT use mpas_log, only : mpas_log_write - implicit none type (mpas_clock_type), intent(in) :: clock diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F index 654fd3ae82..e1b5480eac 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_iau.F +++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F @@ -137,6 +137,7 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten call mpas_pool_get_array(state, 'scalars', scalars, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2) call mpas_pool_get_array(diag , 'rho_edge', rho_edge) + !$acc update self(theta_m, scalars, rho_zz, rho_edge) call mpas_pool_get_dimension(state, 'moist_start', moist_start) call mpas_pool_get_dimension(state, 'moist_end', moist_end) @@ -149,6 +150,8 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten ! call mpas_pool_get_array(tend, 'rho_zz', tend_rho) ! call mpas_pool_get_array(tend, 'theta_m', tend_theta) call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars) + !$acc update self(tend_scalars) + call mpas_pool_get_array(tend_iau, 'theta', theta_amb) call mpas_pool_get_array(tend_iau, 'rho', rho_amb) @@ -209,6 +212,7 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten tend_rtheta(k,i) = tend_rtheta(k,i) + tend_th(k,i) enddo enddo + deallocate(theta) deallocate(tend_th) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 2731467569..e78dd2fcc1 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -203,10 +203,7 @@ subroutine mpas_atm_dynamics_init(domain) #ifdef MPAS_OPENACC type (mpas_pool_type), pointer :: mesh - type (mpas_pool_type), pointer :: state - type (mpas_pool_type), pointer :: diag - type (mpas_pool_type), pointer :: tend_physics - + real (kind=RKIND), dimension(:), pointer :: dvEdge integer, dimension(:,:), pointer :: cellsOnCell integer, dimension(:,:), pointer :: cellsOnEdge @@ -262,23 +259,7 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: latCell real (kind=RKIND), dimension(:), pointer :: lonCell real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct - real (kind=RKIND), dimension(:,:), pointer :: ru - real (kind=RKIND), dimension(:,:), pointer :: ru_save - real (kind=RKIND), dimension(:,:), pointer :: rw - real (kind=RKIND), dimension(:,:), pointer :: rw_save - real (kind=RKIND), dimension(:,:), pointer :: rtheta_p - real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save - real (kind=RKIND), dimension(:,:), pointer :: rho_p - real (kind=RKIND), dimension(:,:), pointer :: rho_p_save - real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split - - real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 - real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 - real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 - real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 - real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 - real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split - + real (kind=RKIND), dimension(:,:), pointer :: rthdynten real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init @@ -304,12 +285,9 @@ subroutine mpas_atm_dynamics_init(domain) #ifdef MPAS_OPENACC nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) - nullify(tend_physics) - call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + ! nullify(tend_physics) + ! call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) - - call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) - call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -492,23 +470,114 @@ subroutine mpas_atm_dynamics_init(domain) ! !$acc enter data copyin(tend_ru_physics) ! call mpas_pool_get_array(tend_physics, 'tend_rtheta_physics', tend_rtheta_physics) ! !$acc enter data copyin(tend_rtheta_physics) - call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) - !$acc enter data copyin(rthdynten) + !call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !!$acc enter data copyin(rthdynten) #endif end subroutine mpas_atm_dynamics_init - subroutine mpas_atm_pre_init(domain) + subroutine mpas_atm_pre_computesolvediag_h2d(block) implicit none - type (domain_type), intent(inout) :: domain + type (block_type), intent(inout) :: block #ifdef MPAS_OPENACC - type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend_physics + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + + real (kind=RKIND), dimension(:,:), pointer :: h_edge, h, u, v, & + vorticity, ke, pv_edge, pv_vertex, pv_cell, gradPVn, gradPVt, & + divergence + + + nullify(diag) + call mpas_pool_get_subpool(block % structs, 'diag', diag) + nullify(tend_physics) + call mpas_pool_get_subpool(block % structs, 'tend_physics', tend_physics) + + call mpas_pool_get_array(diag, 'v', v) + !$acc enter data copyin(v) + call mpas_pool_get_array(diag, 'rho_edge', h_edge) + !$acc enter data copyin(h_edge) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc enter data copyin(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc enter data copyin(divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc enter data copyin(ke) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc enter data copyin(pv_edge) + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc enter data copyin(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc enter data copyin(pv_cell) + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc enter data copyin(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc enter data copyin(gradPVt) +#endif + + end subroutine mpas_atm_pre_computesolvediag_h2d + + + + subroutine mpas_atm_post_computesolvediag_d2h(block) + + implicit none + + type (block_type), intent(inout) :: block + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend_physics + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + + real (kind=RKIND), dimension(:,:), pointer :: h_edge, h, u, v, & + vorticity, ke, pv_edge, pv_vertex, pv_cell, gradPVn, gradPVt, & + divergence + + nullify(diag) + call mpas_pool_get_subpool(block % structs, 'diag', diag) + nullify(tend_physics) + call mpas_pool_get_subpool(block % structs, 'tend_physics', tend_physics) + + call mpas_pool_get_array(diag, 'v', v) + !$acc exit data copyout(v) + call mpas_pool_get_array(diag, 'rho_edge', h_edge) + !$acc exit data copyout(h_edge) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc exit data copyout(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc exit data copyout(divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc exit data copyout(ke) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc exit data copyout(pv_edge) + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc exit data copyout(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc exit data copyout(pv_cell) + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc exit data copyout(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc exit data copyout(gradPVt) +#endif + + end subroutine mpas_atm_post_computesolvediag_d2h + + subroutine mpas_atm_pre_dynamics_h2d(domain) + + implicit none + + type (domain_type), intent(inout) :: domain + +#ifdef MPAS_OPENACC type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: diag type (mpas_pool_type), pointer :: tend @@ -516,62 +585,6 @@ subroutine mpas_atm_pre_init(domain) type (mpas_pool_type), pointer :: lbc - real (kind=RKIND), dimension(:), pointer :: dvEdge - integer, dimension(:,:), pointer :: cellsOnCell - integer, dimension(:,:), pointer :: cellsOnEdge - integer, dimension(:,:), pointer :: advCellsForEdge - integer, dimension(:,:), pointer :: edgesOnCell - integer, dimension(:), pointer :: nAdvCellsForEdge - integer, dimension(:), pointer :: nEdgesOnCell - real (kind=RKIND), dimension(:,:), pointer :: adv_coefs - real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd - real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign - real (kind=RKIND), dimension(:), pointer :: invAreaCell - integer, dimension(:), pointer :: bdyMaskCell - integer, dimension(:), pointer :: bdyMaskEdge - real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge - real (kind=RKIND), dimension(:), pointer :: invDvEdge - real (kind=RKIND), dimension(:), pointer :: dcEdge - real (kind=RKIND), dimension(:), pointer :: invDcEdge - integer, dimension(:,:), pointer :: edgesOnEdge - integer, dimension(:,:), pointer :: edgesOnVertex - real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign - integer, dimension(:), pointer :: nEdgesOnEdge - real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge - integer, dimension(:,:), pointer :: cellsOnVertex - integer, dimension(:,:), pointer :: verticesOnCell - integer, dimension(:,:), pointer :: verticesOnEdge - real (kind=RKIND), dimension(:), pointer :: invAreaTriangle - integer, dimension(:,:), pointer :: kiteForCell - real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex - real (kind=RKIND), dimension(:), pointer :: fEdge - real (kind=RKIND), dimension(:), pointer :: fVertex - real (kind=RKIND), dimension(:,:), pointer :: zz - real (kind=RKIND), dimension(:), pointer :: rdzw - real (kind=RKIND), dimension(:), pointer :: rdzu - real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell - real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell - real (kind=RKIND), dimension(:), pointer :: fzm - real (kind=RKIND), dimension(:), pointer :: fzp - real (kind=RKIND), dimension(:,:,:), pointer :: zb - real (kind=RKIND), dimension(:,:,:), pointer :: zb3 - integer, dimension(:), pointer :: nearestRelaxationCell - real (kind=RKIND), dimension(:,:), pointer :: zgrid - real (kind=RKIND), dimension(:,:), pointer :: zxu - real (kind=RKIND), dimension(:,:), pointer :: dss - real (kind=RKIND), dimension(:), pointer :: specZoneMaskCell - real (kind=RKIND), dimension(:,:), pointer :: defc_a - real (kind=RKIND), dimension(:,:), pointer :: defc_b - real (kind=RKIND), dimension(:), pointer :: latEdge - real (kind=RKIND), dimension(:), pointer :: angleEdge - real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 - real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 - real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalCell - real (kind=RKIND), dimension(:), pointer :: meshScalingRegionalEdge - real (kind=RKIND), dimension(:), pointer :: latCell - real (kind=RKIND), dimension(:), pointer :: lonCell - real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct - real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p real (kind=RKIND), dimension(:,:), pointer :: ru_save real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p @@ -612,11 +625,9 @@ subroutine mpas_atm_pre_init(domain) nullify(state) nullify(diag) - nullify(mesh) nullify(tend) nullify(tend_physics) nullify(lbc) - call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) @@ -626,19 +637,19 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(diag, 'ru', ru) !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'ru_p', ru_p) - !$acc enter data create(ru_p) + !$acc enter data copyin(ru_p) call mpas_pool_get_array(diag, 'ru_save', ru_save) - !$acc enter data create(ru_save) + !$acc enter data copyin(ru_save) call mpas_pool_get_array(diag, 'rw', rw) !$acc enter data copyin(rw) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rw_p', rw_p) - !$acc enter data create(rw_p) + !$acc enter data copyin(rw_p) call mpas_pool_get_array(diag, 'rw_save', rw_save) - !$acc enter data create(rw_save) + !$acc enter data copyin(rw_save) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc enter data copyin(rtheta_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) - !$acc enter data create(rtheta_p_save) + !$acc enter data copyin(rtheta_p_save) call mpas_pool_get_array(diag, 'exner', exner) !$acc enter data copyin(exner) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'exner_base', exner_base) @@ -656,15 +667,15 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(diag, 'rho_p', rho_p) !$acc enter data copyin(rho_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) - !$acc enter data create(rho_p_save) + !$acc enter data copyin(rho_p_save) call mpas_pool_get_array(diag, 'rho_pp', rho_pp) - !$acc enter data create(rho_pp) + !$acc enter data copyin(rho_pp) call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) - !$acc enter data create(rho_zz_old_split) + !$acc enter data copyin(rho_zz_old_split) call mpas_pool_get_array(diag, 'cqw', cqw) - !$acc enter data create(cqw) + !$acc enter data copyin(cqw) call mpas_pool_get_array(diag, 'cqu', cqu) - !$acc enter data create(cqu) + !$acc enter data copyin(cqu) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'pressure_base', pressure_base) @@ -674,17 +685,17 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(diag, 'v', v) !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) - !$acc enter data create(rtheta_pp) + !$acc enter data copyin(rtheta_pp) call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) - !$acc enter data create(rtheta_pp_old) + !$acc enter data copyin(rtheta_pp_old) call mpas_pool_get_array(diag, 'kdiff', kdiff) !$acc enter data copyin(kdiff) call mpas_pool_get_array(diag, 'pv_edge', pv_edge) - !$acc enter data create(pv_edge) ! use values from atm_compute_solve_diagnostics + !$acc enter data copyin(pv_edge) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) - !$acc enter data create(pv_vertex) + !$acc enter data copyin(pv_vertex) call mpas_pool_get_array(diag, 'pv_cell', pv_cell) - !$acc enter data create(pv_cell) + !$acc enter data copyin(pv_cell) call mpas_pool_get_array(diag, 'rho_edge', rho_edge) !$acc enter data copyin(rho_edge) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'h_divergence', h_divergence) @@ -693,65 +704,60 @@ subroutine mpas_atm_pre_init(domain) !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) - !$acc enter data create(alpha_tri) + !$acc enter data copyin(alpha_tri) call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) - !$acc enter data create(gamma_tri) + !$acc enter data copyin(gamma_tri) call mpas_pool_get_array(diag, 'a_tri', a_tri) - !$acc enter data create(a_tri) + !$acc enter data copyin(a_tri) call mpas_pool_get_array(diag, 'cofwr', cofwr) - !$acc enter data create(cofwr) + !$acc enter data copyin(cofwr) call mpas_pool_get_array(diag, 'cofwz', cofwz) - !$acc enter data create(cofwz) + !$acc enter data copyin(cofwz) call mpas_pool_get_array(diag, 'coftz', coftz) - !$acc enter data create(coftz) + !$acc enter data copyin(coftz) call mpas_pool_get_array(diag, 'cofwt', cofwt) - !$acc enter data create(cofwt) + !$acc enter data copyin(cofwt) call mpas_pool_get_array(diag, 'cofrz', cofrz) - !$acc enter data create(cofrz) - + !$acc enter data copyin(cofrz) call mpas_pool_get_array(diag, 'vorticity', vorticity) !$acc enter data copyin(vorticity) - call mpas_pool_get_array(diag, 'divergence', divergence) !$acc enter data copyin(divergence) - - - call mpas_pool_get_array(diag, 'ruAvg', ruAvg) - !$acc enter data create(ruAvg) + !$acc enter data copyin(ruAvg) call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) !$acc enter data copyin(ruAvg_split) call mpas_pool_get_array(diag, 'wwAvg', wwAvg) - !$acc enter data create(wwAvg) + !$acc enter data copyin(wwAvg) call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) !$acc enter data copyin(wwAvg_split) call mpas_pool_get_array(state, 'u', u_1, 1) !$acc enter data copyin(u_1) call mpas_pool_get_array(state, 'u', u_2, 2) - !$acc enter data create(u_2) + !$acc enter data copyin(u_2) call mpas_pool_get_array(state, 'w', w_1, 1) !$acc enter data copyin(w_1) call mpas_pool_get_array(state, 'w', w_2, 2) - !$acc enter data create(w_2) + !$acc enter data copyin(w_2) call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) !$acc enter data copyin(theta_m_1) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) - !$acc enter data create(theta_m_2) + !$acc enter data copyin(theta_m_2) call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) !$acc enter data copyin(rho_zz_1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - !$acc enter data create(rho_zz_2) + !$acc enter data copyin(rho_zz_2) call mpas_pool_get_array(state, 'scalars', scalars_1, 1) !$acc enter data copyin(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc enter data create(scalars_2) + !$acc enter data copyin(scalars_2) call mpas_pool_get_array(tend, 'u', tend_ru) !$acc enter data copyin(tend_ru) call mpas_pool_get_array(tend, 'rho_zz', tend_rho) - !$acc enter data create(tend_rho) + !$acc enter data copyin(tend_rho) call mpas_pool_get_array(tend, 'theta_m', tend_rt) !$acc enter data copyin(tend_rt) call mpas_pool_get_array(tend, 'w', tend_rw) @@ -759,294 +765,317 @@ subroutine mpas_atm_pre_init(domain) call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) !$acc enter data copyin(rt_diabatic_tend) call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) - !$acc enter data create(tend_u_euler) + !$acc enter data copyin(tend_u_euler) call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) - !$acc enter data create(tend_theta_euler) + !$acc enter data copyin(tend_theta_euler) call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) - !$acc enter data create(tend_w_euler) + !$acc enter data copyin(tend_w_euler) call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) !$acc enter data copyin(tend_w_pgf) call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) !$acc enter data copyin(tend_w_buoy) call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) - !$acc enter data create(scalar_tend_save) + !$acc enter data copyin(scalar_tend_save) call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) - !$acc enter data create(lbc_u) + !$acc enter data copyin(lbc_u) call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) - !$acc enter data create(lbc_w) + !$acc enter data copyin(lbc_w) call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) - !$acc enter data create(lbc_ru) + !$acc enter data copyin(lbc_ru) call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) - !$acc enter data create(lbc_rho_edge) + !$acc enter data copyin(lbc_rho_edge) call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) - !$acc enter data create(lbc_theta) + !$acc enter data copyin(lbc_theta) call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) - !$acc enter data create(lbc_rtheta_m) + !$acc enter data copyin(lbc_rtheta_m) call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) - !$acc enter data create(lbc_rho_zz) + !$acc enter data copyin(lbc_rho_zz) call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) - !$acc enter data create(lbc_rho) + !$acc enter data copyin(lbc_rho) call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) - !$acc enter data create(lbc_scalars) + !$acc enter data copyin(lbc_scalars) call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) - !$acc enter data create(lbc_tend_u) + !$acc enter data copyin(lbc_tend_u) call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) - !$acc enter data create(lbc_tend_ru) + !$acc enter data copyin(lbc_tend_ru) call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) - !$acc enter data create(lbc_tend_rho_edge) + !$acc enter data copyin(lbc_tend_rho_edge) call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) - !$acc enter data create(lbc_tend_w) + !$acc enter data copyin(lbc_tend_w) call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) - !$acc enter data create(lbc_tend_theta) + !$acc enter data copyin(lbc_tend_theta) call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) - !$acc enter data create(lbc_tend_rtheta_m) + !$acc enter data copyin(lbc_tend_rtheta_m) call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) - !$acc enter data create(lbc_tend_rho_zz) + !$acc enter data copyin(lbc_tend_rho_zz) call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) - !$acc enter data create(lbc_tend_rho) + !$acc enter data copyin(lbc_tend_rho) call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) - !$acc enter data create(lbc_tend_scalars) - - - - ! call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) - ! !$acc enter data copyin(dvEdge) - - ! call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) - ! !$acc enter data copyin(cellsOnCell) - - ! call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - ! !$acc enter data copyin(cellsOnEdge) - - ! call mpas_pool_get_array(mesh, 'advCellsForEdge', advCellsForEdge) - ! !$acc enter data copyin(advCellsForEdge) - - ! call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) - ! !$acc enter data copyin(edgesOnCell) - - ! call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) - ! !$acc enter data copyin(nAdvCellsForEdge) - - ! call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) - ! !$acc enter data copyin(nEdgesOnCell) - - ! call mpas_pool_get_array(mesh, 'adv_coefs', adv_coefs) - ! !$acc enter data copyin(adv_coefs) - - ! call mpas_pool_get_array(mesh, 'adv_coefs_3rd', adv_coefs_3rd) - ! !$acc enter data copyin(adv_coefs_3rd) - - ! call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) - ! !$acc enter data copyin(edgesOnCell_sign) - - ! call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) - ! !$acc enter data copyin(invAreaCell) - - ! call mpas_pool_get_array(mesh, 'bdyMaskCell', bdyMaskCell) - ! !$acc enter data copyin(bdyMaskCell) - - ! call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) - ! !$acc enter data copyin(bdyMaskEdge) - - ! call mpas_pool_get_array(mesh, 'specZoneMaskEdge', specZoneMaskEdge) - ! !$acc enter data copyin(specZoneMaskEdge) - - ! call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) - ! !$acc enter data copyin(invDvEdge) - - ! call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) - ! !$acc enter data copyin(dcEdge) - - ! call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) - ! !$acc enter data copyin(invDcEdge) - - ! call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) - ! !$acc enter data copyin(edgesOnEdge) - - ! call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) - ! !$acc enter data copyin(edgesOnVertex) - - ! call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) - ! !$acc enter data copyin(edgesOnVertex_sign) - - ! call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) - ! !$acc enter data copyin(nEdgesOnEdge) - - ! call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) - ! !$acc enter data copyin(weightsOnEdge) - - ! call mpas_pool_get_array(mesh, 'cellsOnVertex', cellsOnVertex) - ! !$acc enter data copyin(cellsOnVertex) - - ! call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) - ! !$acc enter data copyin(verticesOnCell) - - ! call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) - ! !$acc enter data copyin(verticesOnEdge) - - ! call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) - ! !$acc enter data copyin(invAreaTriangle) - - ! call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) - ! !$acc enter data copyin(kiteForCell) - - ! call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) - ! !$acc enter data copyin(kiteAreasOnVertex) - - ! call mpas_pool_get_array(mesh, 'fVertex', fVertex) - ! !$acc enter data copyin(fVertex) - - ! call mpas_pool_get_array(mesh, 'fEdge', fEdge) - ! !$acc enter data copyin(fEdge) - - ! call mpas_pool_get_array(mesh, 'zz', zz) - ! !$acc enter data copyin(zz) - - ! call mpas_pool_get_array(mesh, 'rdzw', rdzw) - ! !$acc enter data copyin(rdzw) - - ! call mpas_pool_get_array(mesh, 'rdzu', rdzu) - ! !$acc enter data copyin(rdzu) - - ! call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) - ! !$acc enter data copyin(zb_cell) - - ! call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) - ! !$acc enter data copyin(zb3_cell) - - ! call mpas_pool_get_array(mesh, 'fzm', fzm) - ! !$acc enter data copyin(fzm) - - ! call mpas_pool_get_array(mesh, 'fzp', fzp) - ! !$acc enter data copyin(fzp) - - ! call mpas_pool_get_array(mesh, 'zb', zb) - ! !$acc enter data copyin(zb) - - ! call mpas_pool_get_array(mesh, 'zb3', zb3) - ! !$acc enter data copyin(zb3) - - ! call mpas_pool_get_array(mesh, 'nearestRelaxationCell', nearestRelaxationCell) - ! !$acc enter data copyin(nearestRelaxationCell) - - ! call mpas_pool_get_array(mesh, 'zgrid', zgrid) - ! !$acc enter data copyin(zgrid) - - ! call mpas_pool_get_array(mesh, 'zxu', zxu) - ! !$acc enter data copyin(zxu) - - ! call mpas_pool_get_array(mesh, 'dss', dss) - ! !$acc enter data copyin(dss) - - ! call mpas_pool_get_array(mesh, 'specZoneMaskCell', specZoneMaskCell) - ! !$acc enter data copyin(specZoneMaskCell) - - ! call mpas_pool_get_array(mesh, 'defc_a', defc_a) - ! !$acc enter data copyin(defc_a) - - ! call mpas_pool_get_array(mesh, 'defc_b', defc_b) - ! !$acc enter data copyin(defc_b) - - ! call mpas_pool_get_array(mesh, 'latEdge', latEdge) - ! !$acc enter data copyin(latEdge) - - ! call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) - ! !$acc enter data copyin(angleEdge) - - ! call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) - ! !$acc enter data copyin(meshScalingDel2) - - ! call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) - ! !$acc enter data copyin(meshScalingDel4) - ! call mpas_pool_get_array(mesh, 'meshScalingRegionalCell', meshScalingRegionalCell) - ! !$acc enter data copyin(meshScalingRegionalCell) - - ! call mpas_pool_get_array(mesh, 'meshScalingRegionalEdge', meshScalingRegionalEdge) - ! !$acc enter data copyin(meshScalingRegionalEdge) - - ! call mpas_pool_get_array(mesh, 'latCell', latCell) - ! !$acc enter data copyin(latCell) - - ! call mpas_pool_get_array(mesh, 'lonCell', lonCell) - ! !$acc enter data copyin(lonCell) - - ! call mpas_pool_get_array(mesh, 'coeffs_reconstruct', coeffs_reconstruct) - ! !$acc enter data copyin(coeffs_reconstruct) + !$acc enter data copyin(lbc_tend_scalars) + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc enter data copyin(rthdynten) #endif - end subroutine mpas_atm_pre_init + end subroutine mpas_atm_pre_dynamics_h2d - subroutine mpas_post_step_d2h(inPool) + subroutine mpas_atm_post_dynamics_d2h(domain) implicit none - TYPE(mpas_pool_type), INTENT(IN) :: inPool + type (domain_type), intent(inout) :: domain #ifdef MPAS_OPENACC - type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: diag type (mpas_pool_type), pointer :: tend type (mpas_pool_type), pointer :: tend_physics + type (mpas_pool_type), pointer :: lbc + + + real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri + real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 - real (kind=RKIND), dimension(:,:), pointer :: theta, rho, pressure_base, pressure_p, pressure - real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base, rtheta_p, rtheta_base, rho_p, ru, rw + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend + real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save + + real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity + + real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta + real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars nullify(state) nullify(diag) - - - call mpas_pool_get_subpool(inPool, 'state', state) - call mpas_pool_get_subpool(inPool, 'diag', diag) - - + nullify(tend) + nullify(tend_physics) + nullify(lbc) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + + call mpas_pool_get_array(diag, 'ru', ru) + !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'ru_p', ru_p) + !$acc exit data copyout(ru_p) + call mpas_pool_get_array(diag, 'ru_save', ru_save) + !$acc exit data copyout(ru_save) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc exit data copyout(rw) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rw_p', rw_p) + !$acc exit data copyout(rw_p) + call mpas_pool_get_array(diag, 'rw_save', rw_save) + !$acc exit data copyout(rw_save) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc exit data copyout(rtheta_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) + !$acc exit data copyout(rtheta_p_save) + call mpas_pool_get_array(diag, 'exner', exner) + !$acc exit data copyout(exner) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'exner_base', exner_base) + !$acc exit data copyout(exner_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + !$acc exit data copyout(rtheta_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_base', rho_base) + !$acc exit data copyout(rho_base) + call mpas_pool_get_array(diag, 'rho', rho) + !$acc exit data copyout(rho) + call mpas_pool_get_array(diag, 'theta', theta) + !$acc exit data copyout(theta) + call mpas_pool_get_array(diag, 'theta_base', theta_base) + !$acc exit data copyout(theta_base) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + !$acc exit data copyout(rho_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) + !$acc exit data copyout(rho_p_save) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc exit data copyout(rho_pp) + call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) + !$acc exit data copyout(rho_zz_old_split) + call mpas_pool_get_array(diag, 'cqw', cqw) + !$acc exit data copyout(cqw) + call mpas_pool_get_array(diag, 'cqu', cqu) + !$acc exit data copyout(cqu) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + !$acc exit data copyout(pressure_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + !$acc exit data copyout(pressure_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc exit data copyout(pressure) + call mpas_pool_get_array(diag, 'v', v) + !$acc exit data copyout(v) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc exit data copyout(rtheta_pp) + call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) + !$acc exit data copyout(rtheta_pp_old) + call mpas_pool_get_array(diag, 'kdiff', kdiff) + !$acc exit data copyout(kdiff) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc exit data copyout(pv_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc exit data copyout(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc exit data copyout(pv_cell) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc exit data copyout(rho_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'h_divergence', h_divergence) + !$acc exit data copyout(h_divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics + + call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) + !$acc exit data copyout(alpha_tri) + call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) + !$acc exit data copyout(gamma_tri) + call mpas_pool_get_array(diag, 'a_tri', a_tri) + !$acc exit data copyout(a_tri) + call mpas_pool_get_array(diag, 'cofwr', cofwr) + !$acc exit data copyout(cofwr) + call mpas_pool_get_array(diag, 'cofwz', cofwz) + !$acc exit data copyout(cofwz) + call mpas_pool_get_array(diag, 'coftz', coftz) + !$acc exit data copyout(coftz) + call mpas_pool_get_array(diag, 'cofwt', cofwt) + !$acc exit data copyout(cofwt) + call mpas_pool_get_array(diag, 'cofrz', cofrz) + !$acc exit data copyout(cofrz) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc exit data copyout(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc exit data copyout(divergence) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) + !$acc exit data copyout(ruAvg) + call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) + !$acc exit data copyout(ruAvg_split) + call mpas_pool_get_array(diag, 'wwAvg', wwAvg) + !$acc exit data copyout(wwAvg) + call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) + !$acc exit data copyout(wwAvg_split) + call mpas_pool_get_array(state, 'u', u_1, 1) + !$acc exit data copyout(u_1) call mpas_pool_get_array(state, 'u', u_2, 2) + !$acc exit data copyout(u_2) call mpas_pool_get_array(state, 'w', w_1, 1) + !$acc exit data copyout(w_1) call mpas_pool_get_array(state, 'w', w_2, 2) + !$acc exit data copyout(w_2) call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc exit data copyout(theta_m_1) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + !$acc exit data copyout(theta_m_2) call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + !$acc exit data copyout(rho_zz_1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + !$acc exit data copyout(rho_zz_2) call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc exit data copyout(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc exit data copyout(scalars_2) - !$acc update self(u_1, u_2, w_1, w_2, theta_m_1, theta_m_2, & - !$acc rho_zz_1, rho_zz_2, scalars_1, scalars_2) - call mpas_pool_get_array(diag, 'theta', theta) - call mpas_pool_get_array(diag, 'rho', rho) - call mpas_pool_get_array(diag, 'pressure_p', pressure_p) - call mpas_pool_get_array(diag, 'pressure_base', pressure_base) - call mpas_pool_get_array(diag, 'pressure', pressure) - !$acc update self(theta, rho, pressure_p, pressure_base, pressure) + call mpas_pool_get_array(tend, 'u', tend_ru) + !$acc exit data copyout(tend_ru) + call mpas_pool_get_array(tend, 'rho_zz', tend_rho) + !$acc exit data copyout(tend_rho) + call mpas_pool_get_array(tend, 'theta_m', tend_rt) + !$acc exit data copyout(tend_rt) + call mpas_pool_get_array(tend, 'w', tend_rw) + !$acc exit data copyout(tend_rw) + call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) + !$acc exit data copyout(rt_diabatic_tend) + call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) + !$acc exit data copyout(tend_u_euler) + call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) + !$acc exit data copyout(tend_theta_euler) + call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) + !$acc exit data copyout(tend_w_euler) + call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) + !$acc exit data copyout(tend_w_pgf) + call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + !$acc exit data copyout(tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) + !$acc exit data copyout(scalar_tend_save) - call mpas_pool_get_array(diag, 'exner', exner) - call mpas_pool_get_array(diag, 'exner_base', exner_base) - call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) - call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) - call mpas_pool_get_array(diag, 'rho_p', rho_p) - call mpas_pool_get_array(diag, 'ru', ru) - call mpas_pool_get_array(diag, 'rw', rw) - !$acc update self(exner, exner_base, rtheta_p, rtheta_base, rho_p, ru, rw) + call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) + !$acc exit data copyout(lbc_u) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) + !$acc exit data copyout(lbc_w) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) + !$acc exit data copyout(lbc_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) + !$acc exit data copyout(lbc_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) + !$acc exit data copyout(lbc_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) + !$acc exit data copyout(lbc_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) + !$acc exit data copyout(lbc_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) + !$acc exit data copyout(lbc_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) + !$acc exit data copyout(lbc_scalars) + + + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + !$acc exit data copyout(lbc_tend_u) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + !$acc exit data copyout(lbc_tend_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + !$acc exit data copyout(lbc_tend_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + !$acc exit data copyout(lbc_tend_w) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + !$acc exit data copyout(lbc_tend_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + !$acc exit data copyout(lbc_tend_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + !$acc exit data copyout(lbc_tend_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + !$acc exit data copyout(lbc_tend_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + !$acc exit data copyout(lbc_tend_scalars) + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc exit data copyout(rthdynten) #endif - end subroutine mpas_post_step_d2h + end subroutine mpas_atm_post_dynamics_d2h !---------------------------------------------------------------------------- @@ -1364,12 +1393,14 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) call mpas_get_time(nowTime, dateTimeString=xtime_new) !$ser init directory='./ser_data' prefix='mpas_dycore' prefix_ref='mpas_dycore-mem' + call mpas_atm_pre_dynamics_h2d(domain) if (trim(config_time_integration) == 'SRK3') then call atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) else call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR) call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT) end if + call mpas_atm_post_dynamics_d2h(domain) call mpas_set_timeInterval(dtInterval, dt=dt) currTime = nowTime + dtInterval @@ -1479,7 +1510,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_get_time(nowTime, dateTimeString=xtime_new) - !$ser savepoint atm_srk3 datetime=xtime_new rank=domain%dm_info%my_proc_id + ! !!!$ser savepoint atm_srk3 datetime=xtime_new rank=domain%dm_info%my_proc_id + !$ser savepoint atm_srk3 datetime=xtime_new ! ! Retrieve configuration options @@ -1629,20 +1661,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) !$ser mode write - !$ser data theta_m_1_pre=theta_m - !$ser data scalars_1_pre=scalars_1 - !$ser data pressure_p_pre=pressure_p - !$ser data rtheta_p_pre=rtheta_p call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p') - !$ser data theta_m_1_post=theta_m - !$ser data scalars_1_post=scalars_1 - !$ser data pressure_p_post=pressure_p - !$ser data rtheta_p_post=rtheta_p + ! $ser data theta_m_1_post=theta_m + ! $ser data scalars_1_post=scalars_1 + ! $ser data pressure_p_post=pressure_p + ! $ser data rtheta_p_post=rtheta_p !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) call mpas_timer_start('atm_rk_integration_setup') - - ! $ser savepoint rk_integration_setup datetime=xtime_new + !$ser savepoint rk_integration_setup datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1659,7 +1686,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_stop('atm_rk_integration_setup') call mpas_timer_start('atm_compute_moist_coefficients') - ! $ser savepoint moist_coefficients datetime=xtime_new + !$ser savepoint moist_coefficients datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1711,7 +1738,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_start('atm_compute_vert_imp_coefs') rk_step = 1 - ! $ser savepoint vert_imp_coefs datetime=xtime_new dyn_substep=dynamics_substep + !$ser savepoint vert_imp_coefs datetime=xtime_new dyn_substep=dynamics_substep !$OMP PARALLEL DO do thread=1,nThreads @@ -1728,9 +1755,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'exner', exner) !$acc update self(exner) - !$ser data exner_pre=exner call exchange_halo_group(domain, 'dynamics:exner') - !$ser data exner_post=exner + ! $ser data exner_post=exner !$acc update device(exner) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -1771,7 +1797,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) delsq_vorticity(:,nVertices+1) = 0.0_RKIND allocate(dpdz(nVertLevels,nCells+1)) dpdz(:,nCells+1) = 0.0_RKIND - ! $ser savepoint compute_dyn_tend datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + !$ser savepoint compute_dyn_tend datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step !$OMP PARALLEL DO do thread=1,nThreads @@ -1802,14 +1828,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! we are solving for all edges of owned cells to minimize communications ! during the acoustic substeps !*********************************** - !$ser savepoint rk_loop datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step + ! !$ser savepoint rk_loop datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step + !$ser savepoint rk_loop datetime=xtime_new rk_step=rk_step ! tend_u call mpas_pool_get_array(tend, 'u', tend_u) !$acc update self(tend_u) - !$ser data tend_u_pre=tend_u call exchange_halo_group(domain, 'dynamics:tend_u') - !$ser data tend_u_post=tend_u + ! $ser data tend_u_post=tend_u !$acc update device(tend_u) call mpas_timer_start('small_step_prep') @@ -1826,7 +1852,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !------------------------------------------------------------------------------------------------------------------------ if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS dry dynamics in the specified zone - ! $ser savepoint apply_lbc datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + !$ser savepoint apply_lbc datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step allocate(ru_driving_tend(nVertLevels,nEdges+1)) allocate(rt_driving_tend(nVertLevels,nCells+1)) @@ -1834,13 +1860,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ru_driving_tend(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nEdges, 'ru', 0.0_RKIND ) rt_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rtheta_m', 0.0_RKIND ) rho_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rho_zz', 0.0_RKIND ) - - - ! $ser mode write + !$ser mode write ! $ser data ru_driving_tend=ru_driving_tend ! $ser data tert_driving_tendnd_rt=rt_driving_tend - ! $ser data rho_driving_tend=rho_driving_tend - + ! $ser data rho_driving_tend=rho_driving_tend !$OMP PARALLEL DO do thread=1,nThreads call atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, block % configs, nVertLevels, & @@ -1867,8 +1890,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) rt_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step ) rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) - - ! $ser mode write + !$ser mode write ! $ser data ru_driving_values=ru_driving_values ! $ser data rt_driving_values=rt_driving_values ! $ser data rho_driving_values=rho_driving_values @@ -1899,13 +1921,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! do small_step = 1, number_sub_steps(rk_step) - !$ser savepoint acoustic datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step small_step=small_step + ! !$ser savepoint acoustic datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step small_step=small_step + !$ser savepoint acoustic datetime=xtime_new rk_step=rk_step small_step=small_step call mpas_pool_get_array(diag, 'rho_pp', rho_pp) !$acc update self(rho_pp) - !$ser data rho_pp_pre=rho_pp call exchange_halo_group(domain, 'dynamics:rho_pp') - !$ser data rho_pp_post=rho_pp + ! $ser data rho_pp_post=rho_pp !$acc update device(rho_pp) call mpas_timer_start('atm_advance_acoustic_step') @@ -1931,9 +1953,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) !$acc update self(rtheta_pp) - !$ser data rtheta_pp_pre=rtheta_pp call exchange_halo_group(domain, 'dynamics:rtheta_pp') - !$ser data rtheta_pp_post=rtheta_pp + ! $ser data rtheta_pp_post=rtheta_pp !$acc update device(rtheta_pp) ! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step @@ -1950,26 +1971,21 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_stop('atm_divergence_damping_3d') end do ! end of acoustic steps loop + !$ser savepoint rk_loop2 datetime=xtime_new rk_step=rk_step ! ! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2] ! - !$ser savepoint rk_loop2 datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step - call mpas_pool_get_array(diag, 'ru_p', ru_p) call mpas_pool_get_array(diag, 'rw_p', rw_p) call mpas_pool_get_array(diag, 'rho_pp', rho_pp) call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) - !$ser data rw_p_pre=rw_p - !$ser data ru_p_pre=ru_p - !$ser data rho_pp_pre=rho_pp - !$ser data rtheta_pp_pre=rtheta_pp call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp') - !$ser data rw_p_post=rw_p - !$ser data ru_p_post=ru_p - !$ser data rho_pp_post=rho_pp - !$ser data rtheta_pp_post=rtheta_pp + ! $ser data rw_p_post=rw_p + ! $ser data ru_p_post=ru_p + ! $ser data rho_pp_post=rho_pp + ! $ser data rtheta_pp_post=rtheta_pp !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) call mpas_timer_start('atm_recover_large_step_variables') @@ -2048,14 +2064,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !------------------------------------------------------------------- call mpas_pool_get_array(state, 'u', u, 2) !$acc update self(u) - !$ser data u_pre=u ! u if (config_apply_lbcs) then call exchange_halo_group(domain, 'dynamics:u_123') else call exchange_halo_group(domain, 'dynamics:u_3') end if - !$ser data u_post=u + ! $ser data u_post=u !$acc update device(u) ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). @@ -2063,7 +2078,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then - ! $ser savepoint advance_scalars1 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + !$ser savepoint advance_scalars1 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & @@ -2073,7 +2088,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) - !$ser data scalars_2_pre_1=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') !$ser data scalars_2_post_1=scalars_2 !$acc update device(scalars_2) @@ -2125,16 +2139,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'pv_edge', pv_edge) call mpas_pool_get_array(diag, 'rho_edge', rho_edge) !$acc update self(w,pv_edge,rho_edge) - !$ser data w_pre=w - !$ser data pv_edge_pre=pv_edge - !$ser data rho_edge_pre=rho_edge if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2] ! call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) - !$ser data scalars_2_pre_2=scalars_2 call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars') !$ser data scalars_2_post_2=scalars_2 !$acc update device(scalars_2) @@ -2163,7 +2173,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! w halo values needs resetting after regional boundary update call mpas_pool_get_array(state, 'w', w, 2) !$acc update self(w) - !$ser data w_pre=w call exchange_halo_group(domain, 'dynamics:w') !$ser data w_post=w !$acc update device(w) @@ -2181,13 +2190,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc update self(theta_m,pressure_p,rtheta_p) - !$ser data theta_m_pre=theta_m - !$ser data pressure_p_pre=pressure_p - !$ser data rtheta_p_pre=rtheta_p call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p') - !$ser data theta_m_post=theta_m - !$ser data pressure_p_post=pressure_p - !$ser data rtheta_p_post=rtheta_p + ! $ser data theta_m_post=theta_m + ! $ser data pressure_p_post=pressure_p + ! $ser data rtheta_p_post=rtheta_p !$acc update device(theta_m,pressure_p,rtheta_p) ! @@ -2246,7 +2252,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) RK3_SPLIT_TRANSPORT : do rk_step = 1, 3 ! Runge-Kutta loop - ! $ser savepoint advance_scalars2 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + !$ser savepoint advance_scalars2 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) @@ -2256,7 +2262,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! need to fill halo for horizontal filter call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) - !$ser data scalars_2_pre=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') !$ser data scalars_2_post=scalars_2 !$acc update device(scalars_2) @@ -2287,9 +2292,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if (rk_step < 3) then call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) - !$ser data scalars_2_pre=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') - !$ser data scalars_2_post=scalars_2 + !$ser data scalars_2_post=scalars_2 !$acc update device(scalars_2) end if @@ -2323,15 +2327,20 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) #ifdef DO_PHYSICS call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc update self(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_1) if(config_convection_scheme == 'cu_grell_freitas' .or. & config_convection_scheme == 'cu_ntiedtke') then call mpas_pool_get_array(tend_physics, 'rqvdynten', rqvdynten) call mpas_pool_get_array(state, 'theta_m', theta_m, 2) + !$acc update self(theta_m) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc update self(rthdynten) + !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo @@ -2356,6 +2365,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) where ( scalars_2(:,:,:) < 0.0) & scalars_2(:,:,:) = 0.0 + !$acc update device(scalars_2, rthdynten) !call microphysics schemes: if (trim(config_microp_scheme) /= 'off') then call mpas_timer_start('microphysics') @@ -2405,7 +2415,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) - !$ser data scalars_2_pre=scalars_2 call exchange_halo_group(domain, 'dynamics:scalars') !$ser data scalars_2_post=scalars_2 !$acc update device(scalars_2) @@ -2657,6 +2666,7 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & ! $ser mode write + !$ser mode write ! $ser accdata rtheta_p=rtheta_p ! $ser accdata rho_p=rho_p ! $ser accdata theta_m_1=theta_m_1 @@ -2710,7 +2720,7 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel - ! $ser accdata scalars_2=scalars_2 + !$ser accdata scalars_2=scalars_2 end subroutine atm_rk_integration_setup @@ -2814,7 +2824,7 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel - ! $ser mode write + !$ser mode write ! $ser accdata cqu=cqu ! $ser accdata cqw=cqw @@ -2958,7 +2968,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - ! $ser mode write + !$ser mode write ! $ser accdata p=p ! $ser accdata t=t @@ -3041,7 +3051,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, end do ! loop over cells !$acc end parallel - ! $ser accdata cofrz=cofrz + ! $ser accdata cofrz=cofrz ! $ser accdata alpha_tri=alpha_tri MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') @@ -4300,8 +4310,8 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - ! $ser accdata scalar_new=scalar_new - ! $ser accdata uhAvg=uhAvg + !$ser accdata scalar_new=scalar_new + !$ser accdata uhAvg=uhAvg !$acc parallel @@ -4402,7 +4412,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - ! $ser accdata scalar_old=scalar_old + !$ser accdata scalar_old=scalar_old !$acc parallel wait @@ -4483,9 +4493,9 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do !$acc end parallel - ! $ser accdata scalar_tend_save=scalar_tend_save - ! $ser accdata scalar_tend_column=scalar_tend_column - ! $ser accdata scalar_new2=scalar_new + !$ser accdata scalar_tend_save=scalar_tend_save + !$ser accdata scalar_tend_column=scalar_tend_column + !$ser accdata scalar_new2=scalar_new MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc exit data delete(scalar_tend_column, wdtn, & @@ -4760,7 +4770,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') - ! $ser accdata scalars_old_mono=scalars_old + !$ser accdata scalars_old_mono=scalars_old !$acc parallel @@ -5356,7 +5366,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end do ! loop over scalars - ! $ser accdata scalars_new_mono2=scalars_new + !$ser accdata scalars_new_mono2=scalars_new MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') @@ -7025,6 +7035,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & real (kind=RKIND) :: ke_fact, efac logical :: reconstruct_v + MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, & !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & @@ -7358,9 +7369,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(gradPVt,gradPVn) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') end if ! apvm upwinding @@ -7471,8 +7479,19 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, & !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & !$acc zb_cell,zb3_cell) + + ! copyin the data that is only on the right-hand side + !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc rho_base,theta_base) + + ! copyin the data that will be modified in this routine + !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc rtheta_p,exner,exner_base,pressure_p, & + !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') + + rcv = rgas / (cp-rgas) p0 = 1.e5 ! this should come from somewhere else... @@ -7594,6 +7613,15 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, & !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & !$acc zb_cell,zb3_cell) + + ! delete the data that is only on the right-hand side + !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc rho_base,theta_base) + + ! copyout the data that will be modified in this routine + !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc rtheta_p,exner,exner_base,pressure_p, & + !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') end subroutine atm_init_coupled_diagnostics @@ -7901,7 +7929,7 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end do !$acc end parallel - ! $ser mode write + !$ser mode write ! $ser accdata tend_ru=tend_ru ! $ser accdata tend_rt=tend_rt ! $ser accdata tend_rw=tend_rw @@ -8018,7 +8046,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - ! $ser mode write + !$ser mode write ! $ser accdata tend_rho1=tend_rho ! $ser accdata tend_rt1=tend_rt @@ -8159,7 +8187,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel - ! $ser mode write + !$ser mode write ! $ser accdata tend_ru_fin=tend_ru MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index af45103337..d7766d6af9 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -43,7 +43,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) use mpas_atm_dimensions, only : mpas_atm_set_dims use mpas_atm_diagnostics_manager, only : mpas_atm_diag_setup use mpas_atm_threading, only : mpas_atm_threading_init - use atm_time_integration, only : mpas_atm_dynamics_init, mpas_atm_pre_init + use atm_time_integration, only : mpas_atm_dynamics_init, mpas_atm_pre_dynamics_h2d, mpas_atm_post_dynamics_d2h use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpas_attlist, only : mpas_modify_att use mpas_string_utils, only : mpas_string_replace @@ -76,7 +76,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) integer, pointer :: nVertLevels, maxEdges, maxEdges2, num_scalars character (len=ShortStrKIND) :: init_stream_name real (kind=R8KIND) :: input_start_time, input_stop_time - real (kind=RKIND), dimension(:,:), pointer :: pv_edge, ru, rw + ierr = 0 @@ -94,7 +94,6 @@ function atm_core_init(domain, startTimeStamp) result(ierr) ! Set up inner dimensions used by arrays in optimized dynamics routines ! call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) - call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) call mpas_pool_get_dimension(state, 'nVertLevels', nVertLevels) call mpas_pool_get_dimension(state, 'maxEdges', maxEdges) call mpas_pool_get_dimension(state, 'maxEdges2', maxEdges2) @@ -265,7 +264,6 @@ function atm_core_init(domain, startTimeStamp) result(ierr) do while (associated(block)) call mpas_pool_get_subpool(block % structs, 'mesh', mesh) call mpas_pool_get_subpool(block % structs, 'state', state) - call mpas_atm_pre_init(domain) call atm_mpas_init_block(domain % dminfo, domain % streamManager, block, mesh, dt) @@ -287,12 +285,7 @@ function atm_core_init(domain, startTimeStamp) result(ierr) block => block % next end do - call mpas_pool_get_array(diag, 'pv_edge', pv_edge) - call mpas_pool_get_array(diag, 'ru', ru) - call mpas_pool_get_array(diag, 'rw', rw) - !$acc update self(pv_edge,ru,rw) call exchange_halo_group(domain, 'initialization:pv_edge,ru,rw') - !$acc update device(pv_edge,ru,rw) call mpas_atm_diag_setup(domain % streamManager, domain % blocklist % configs, & domain % blocklist % structs, domain % clock, domain % dminfo) @@ -300,7 +293,6 @@ function atm_core_init(domain, startTimeStamp) result(ierr) ! ! Prepare the dynamics for integration ! - !call mpas_atm_pre_init(domain) call mpas_atm_dynamics_init(domain) end function atm_core_init @@ -503,6 +495,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadStart', edgeSolveThreadStart) call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadEnd', edgeSolveThreadEnd) + call mpas_atm_pre_computesolvediag_h2d(block) !$OMP PARALLEL DO do thread=1,nThreads if (.not. config_do_restart .or. (config_do_restart .and. config_do_DAcycling)) then @@ -521,6 +514,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) edgeThreadStart(thread), edgeThreadEnd(thread)) end do !$OMP END PARALLEL DO + call mpas_atm_post_computesolvediag_d2h(block) deallocate(ke_vertex) deallocate(ke_edge) @@ -603,7 +597,7 @@ function atm_core_run(domain) result(ierr) use mpas_derived_types, only : MPAS_STREAM_LATEST_BEFORE, MPAS_STREAM_INPUT, MPAS_STREAM_INPUT_OUTPUT use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpas_atm_boundaries, only : mpas_atm_update_bdy_tend - use atm_time_integration, only : mpas_post_step_d2h + use atm_time_integration, only : mpas_atm_pre_dynamics_h2d, mpas_atm_post_dynamics_d2h use mpas_atm_diagnostics_manager, only : mpas_atm_diag_update, mpas_atm_diag_compute, mpas_atm_diag_reset implicit none @@ -826,7 +820,7 @@ function atm_core_run(domain) result(ierr) call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh) call mpas_pool_get_subpool(block_ptr % structs, 'tend', tend) call atm_compute_output_diagnostics(state, 1, diag, mesh) - call mpas_post_step_d2h(block_ptr % structs) + block_ptr => block_ptr % next end do end if @@ -934,8 +928,6 @@ subroutine atm_compute_output_diagnostics(state, time_lev, diag, mesh) call mpas_pool_get_array(mesh, 'zz', zz) - !$acc parallel - !$acc loop gang vector collapse(2) do iCell=1,nCells do k=1,nVertLevels theta(k,iCell) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell)) @@ -943,7 +935,6 @@ subroutine atm_compute_output_diagnostics(state, time_lev, diag, mesh) pressure(k,iCell) = pressure_base(k,iCell) + pressure_p(k,iCell) end do end do - !$acc end parallel end subroutine atm_compute_output_diagnostics @@ -1012,11 +1003,8 @@ subroutine atm_do_timestep(domain, dt, itimestep) #ifdef DO_PHYSICS !proceed with physics if moist_physics is set to true: if(moist_physics) then - call mpas_log_write('call physics timetracker') call physics_timetracker(domain,dt,clock,itimestep,xtime_s) - call mpas_log_write('call physics driver') call physics_driver(domain,itimestep,xtime_s) - call mpas_log_write('end physics timetracker') endif #endif diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F index 40125972ab..56361e8b98 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_interface.F +++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F @@ -595,11 +595,13 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics, call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev) call mpas_pool_get_array(state,'theta_m',theta_m,time_lev) call mpas_pool_get_array(state,'w' ,w ,time_lev) + !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w) call mpas_pool_get_dimension(state,'index_qv',index_qv) call mpas_pool_get_dimension(state,'index_qc',index_qc) call mpas_pool_get_dimension(state,'index_qr',index_qr) call mpas_pool_get_array(state,'scalars',scalars,time_lev) + !$acc update host(scalars) qv => scalars(index_qv,:,:) qc => scalars(index_qc,:,:) qr => scalars(index_qr,:,:) @@ -831,7 +833,6 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te call mpas_pool_get_array(diag,'rtheta_p' ,rtheta_p ) call mpas_pool_get_array(diag,'surface_pressure',surface_pressure) call mpas_pool_get_array(diag,'dtheta_dt_mp' ,dtheta_dt_mp ) - call mpas_pool_get_array(tend,'tend_sfc_pressure',tend_sfc_pressure) call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev) @@ -1039,6 +1040,10 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te case default end select mp_tend_select + !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b) + !$acc update device(rtheta_p, rho_zz, theta_m, scalars) + !$acc update device(rt_diabatic_tend) + end subroutine microphysics_to_MPAS !================================================================================================================= diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F index 284b072851..76a8a6c280 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F +++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F @@ -133,6 +133,9 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s call mpas_pool_get_array(diag ,'rho_edge',mass_edge) call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys) + !$acc update self(theta_m, scalars, mass, mass_edge) + !! $acc tend_ru_physics,tend_rtheta_physics,tend_rho_physics) + call mpas_pool_get_dimension(state,'index_qv',index_qv) call mpas_pool_get_dimension(state,'index_qc',index_qc) call mpas_pool_get_dimension(state,'index_qr',index_qr) @@ -170,6 +173,8 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s call mpas_pool_get_array(tend_physics,'rthratensw',rthratensw) call mpas_pool_get_array(tend,'scalars_tend',tend_scalars) +!$acc update self(tend_scalars) ! Probably not needed + !initialize the tendency for the potential temperature and all scalars due to PBL, convection, @@ -219,6 +224,10 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s tend_th,tend_rtheta_physics,tend_scalars,tend_ru_physics,tend_u_phys, & exchange_halo_group) +!$acc update device(tend_scalars) + +!!$acc update device(tend_ru_physics,tend_rtheta_physics,tend_rho_physics) + !clean up any pointers that were allocated with zero size before the call to physics_get_tend_work: if(size(rucuten) == 0 ) deallocate(rucuten ) if(size(rvcuten) == 0 ) deallocate(rvcuten ) From e7a9142058ccdd0ad0a18dd2a42c845ad0f78fa0 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 6 May 2025 16:17:55 -0600 Subject: [PATCH 70/79] some more fixes + cleanup --- .../dynamics/mpas_atm_time_integration.F | 165 +----------------- .../physics/mpas_atmphys_interface.F | 1 - .../physics/mpas_atmphys_todynamics.F | 8 +- 3 files changed, 9 insertions(+), 165 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index e78dd2fcc1..2ca2a25979 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -20,11 +20,6 @@ module atm_time_integration use mpas_pool_routines use mpas_kind_types use mpas_constants - !USE m_ser, ONLY: ser_init, ser_array - USE m_serialize, ONLY: fs_add_savepoint_metainfo, fs_read_field, fs_create_savepoint, fs_write_field - USE utils_ppser, ONLY: ppser_set_mode, ppser_initialize, ppser_get_mode, ppser_savepoint, & - ppser_serializer, ppser_serializer_ref, ppser_intlength, ppser_reallength, & - ppser_realtype, ppser_zrperturb use mpas_dmpar use mpas_vector_reconstruction ! Added only clause to keep xlf90 from getting confused from the overloaded abs intrinsic in mpas_timekeeping @@ -1380,8 +1375,6 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) real (kind=RKIND) :: Time_new type (mpas_pool_type), pointer :: state character (len=StrKIND), pointer :: config_time_integration - character(len=StrKIND) :: timeStamp - character(len=StrKIND) :: savepoint_name clock => domain % clock @@ -1390,12 +1383,9 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) call mpas_pool_get_config(block % configs, 'config_time_integration', config_time_integration) call mpas_pool_get_config(block % configs, 'config_apply_lbcs', config_apply_lbcs) - call mpas_get_time(nowTime, dateTimeString=xtime_new) - !$ser init directory='./ser_data' prefix='mpas_dycore' prefix_ref='mpas_dycore-mem' - call mpas_atm_pre_dynamics_h2d(domain) if (trim(config_time_integration) == 'SRK3') then - call atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) + call atm_srk3(domain, dt, itimestep, exchange_halo_group) else call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR) call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT) @@ -1421,7 +1411,7 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) end subroutine atm_timestep - subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) + subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Advance model state forward in time by the specified time step using ! time-split RK3 scheme @@ -1441,7 +1431,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) real (kind=RKIND), intent(in) :: dt integer, intent(in) :: itimestep procedure (halo_exchange_routine) :: exchange_halo_group - type (MPAS_Time_type), intent(in) :: nowTime integer :: thread integer :: iCell, k, iEdge @@ -1457,9 +1446,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) integer :: rk_step, number_of_sub_steps integer :: iScalar - character (len=StrKIND) :: xtime_new - - real (kind=RKIND), dimension(3) :: rk_timestep, rk_sub_timestep integer, dimension(3) :: number_sub_steps integer :: small_step @@ -1509,10 +1495,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) logical, parameter :: debug = .false. - call mpas_get_time(nowTime, dateTimeString=xtime_new) - ! !!!$ser savepoint atm_srk3 datetime=xtime_new rank=domain%dm_info%my_proc_id - !$ser savepoint atm_srk3 datetime=xtime_new - ! ! Retrieve configuration options ! @@ -1660,16 +1642,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) - !$ser mode write call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p') - ! $ser data theta_m_1_post=theta_m - ! $ser data scalars_1_post=scalars_1 - ! $ser data pressure_p_post=pressure_p - ! $ser data rtheta_p_post=rtheta_p !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) call mpas_timer_start('atm_rk_integration_setup') - !$ser savepoint rk_integration_setup datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1686,7 +1662,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_stop('atm_rk_integration_setup') call mpas_timer_start('atm_compute_moist_coefficients') - !$ser savepoint moist_coefficients datetime=xtime_new !$OMP PARALLEL DO do thread=1,nThreads @@ -1738,8 +1713,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_start('atm_compute_vert_imp_coefs') rk_step = 1 - !$ser savepoint vert_imp_coefs datetime=xtime_new dyn_substep=dynamics_substep - !$OMP PARALLEL DO do thread=1,nThreads call atm_compute_vert_imp_coefs( state, mesh, diag, block % configs, nVertLevels, rk_sub_timestep(rk_step), & @@ -1750,13 +1723,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) end do !$OMP END PARALLEL DO call mpas_timer_stop('atm_compute_vert_imp_coefs') - - call mpas_pool_get_array(diag, 'exner', exner) - + call mpas_pool_get_array(diag, 'exner', exner) !$acc update self(exner) call exchange_halo_group(domain, 'dynamics:exner') - ! $ser data exner_post=exner !$acc update device(exner) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -1797,7 +1767,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) delsq_vorticity(:,nVertices+1) = 0.0_RKIND allocate(dpdz(nVertLevels,nCells+1)) dpdz(:,nCells+1) = 0.0_RKIND - !$ser savepoint compute_dyn_tend datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step !$OMP PARALLEL DO do thread=1,nThreads @@ -1828,14 +1797,11 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! we are solving for all edges of owned cells to minimize communications ! during the acoustic substeps !*********************************** - ! !$ser savepoint rk_loop datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step - !$ser savepoint rk_loop datetime=xtime_new rk_step=rk_step ! tend_u call mpas_pool_get_array(tend, 'u', tend_u) !$acc update self(tend_u) call exchange_halo_group(domain, 'dynamics:tend_u') - ! $ser data tend_u_post=tend_u !$acc update device(tend_u) call mpas_timer_start('small_step_prep') @@ -1852,7 +1818,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !------------------------------------------------------------------------------------------------------------------------ if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS dry dynamics in the specified zone - !$ser savepoint apply_lbc datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step allocate(ru_driving_tend(nVertLevels,nEdges+1)) allocate(rt_driving_tend(nVertLevels,nCells+1)) @@ -1860,10 +1825,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ru_driving_tend(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nEdges, 'ru', 0.0_RKIND ) rt_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rtheta_m', 0.0_RKIND ) rho_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rho_zz', 0.0_RKIND ) - !$ser mode write - ! $ser data ru_driving_tend=ru_driving_tend - ! $ser data tert_driving_tendnd_rt=rt_driving_tend - ! $ser data rho_driving_tend=rho_driving_tend !$OMP PARALLEL DO do thread=1,nThreads call atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, block % configs, nVertLevels, & @@ -1890,11 +1851,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) rt_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step ) rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) - !$ser mode write - ! $ser data ru_driving_values=ru_driving_values - ! $ser data rt_driving_values=rt_driving_values - ! $ser data rho_driving_values=rho_driving_values - call mpas_timer_start('atm_bdy_adjust_dynamics_relaxzone_tend') !$OMP PARALLEL DO do thread=1,nThreads @@ -1921,13 +1877,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! do small_step = 1, number_sub_steps(rk_step) - ! !$ser savepoint acoustic datetime=xtime_new rank=domain%dm_info%my_proc_id rk_step=rk_step small_step=small_step - !$ser savepoint acoustic datetime=xtime_new rk_step=rk_step small_step=small_step - + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) !$acc update self(rho_pp) call exchange_halo_group(domain, 'dynamics:rho_pp') - ! $ser data rho_pp_post=rho_pp !$acc update device(rho_pp) call mpas_timer_start('atm_advance_acoustic_step') @@ -1954,7 +1907,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) !$acc update self(rtheta_pp) call exchange_halo_group(domain, 'dynamics:rtheta_pp') - ! $ser data rtheta_pp_post=rtheta_pp !$acc update device(rtheta_pp) ! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step @@ -1971,7 +1923,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_timer_stop('atm_divergence_damping_3d') end do ! end of acoustic steps loop - !$ser savepoint rk_loop2 datetime=xtime_new rk_step=rk_step ! ! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2] @@ -1982,10 +1933,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp') - ! $ser data rw_p_post=rw_p - ! $ser data ru_p_post=ru_p - ! $ser data rho_pp_post=rho_pp - ! $ser data rtheta_pp_post=rtheta_pp !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) call mpas_timer_start('atm_recover_large_step_variables') @@ -2019,9 +1966,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) time_dyn_step = dt_dynamics*real(dynamics_substep-1) + rk_timestep(rk_step) ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'u', time_dyn_step ) - !$acc enter data copyin(ru_driving_values) - ! do this inline at present - it is simple enough !$acc parallel !$acc loop gang @@ -2035,12 +1980,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) end do !$acc end parallel - - ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'ru', time_dyn_step ) - !$acc update device(ru_driving_values) - call mpas_pool_get_array(diag, 'ru', u) ! do this inline at present - it is simple enough !$acc parallel @@ -2054,7 +1995,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) end if end do !$acc end parallel - !$acc exit data delete(ru_driving_values) deallocate(ru_driving_values) @@ -2070,7 +2010,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) else call exchange_halo_group(domain, 'dynamics:u_3') end if - ! $ser data u_post=u !$acc update device(u) ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). @@ -2078,9 +2017,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then - !$ser savepoint advance_scalars1 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step - - call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) @@ -2089,7 +2025,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) call exchange_halo_group(domain, 'dynamics:scalars') - !$ser data scalars_2_post_1=scalars_2 !$acc update device(scalars_2) allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -2146,7 +2081,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars') - !$ser data scalars_2_post_2=scalars_2 !$acc update device(scalars_2) else ! @@ -2154,9 +2088,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) ! call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge') end if - !$ser data w_post=w - !$ser data pv_edge_post=pv_edge - !$ser data rho_edge_post=rho_edge !$acc update device(w,pv_edge,rho_edge) ! set the zero-gradient condition on w for regional_MPAS @@ -2174,7 +2105,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'w', w, 2) !$acc update self(w) call exchange_halo_group(domain, 'dynamics:w') - !$ser data w_post=w !$acc update device(w) end if ! end of regional_MPAS addition @@ -2191,9 +2121,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc update self(theta_m,pressure_p,rtheta_p) call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p') - ! $ser data theta_m_post=theta_m - ! $ser data pressure_p_post=pressure_p - ! $ser data rtheta_p_post=rtheta_p !$acc update device(theta_m,pressure_p,rtheta_p) ! @@ -2251,8 +2178,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) if(config_time_integration_order == 2) rk_timestep(1) = dt/2. RK3_SPLIT_TRANSPORT : do rk_step = 1, 3 ! Runge-Kutta loop - - !$ser savepoint advance_scalars2 datetime=xtime_new dyn_substep=dynamics_substep rk_step=rk_step + call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) @@ -2263,7 +2189,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) call exchange_halo_group(domain, 'dynamics:scalars') - !$ser data scalars_2_post=scalars_2 !$acc update device(scalars_2) allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -2293,7 +2218,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) call exchange_halo_group(domain, 'dynamics:scalars') - !$ser data scalars_2_post=scalars_2 !$acc update device(scalars_2) end if @@ -2329,7 +2253,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_1, 1) !$acc update self(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc update self(scalars_1) + !$acc update self(scalars_2) if(config_convection_scheme == 'cu_grell_freitas' .or. & config_convection_scheme == 'cu_ntiedtke') then @@ -2341,7 +2265,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) !$acc update self(rthdynten) - !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo !update for the scalars at time_levs(1) is applied. A halo update for the scalars at time_levs(2) is done above. @@ -2416,10 +2339,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group, nowTime) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) !$acc update self(scalars_2) call exchange_halo_group(domain, 'dynamics:scalars') - !$ser data scalars_2_post=scalars_2 !$acc update device(scalars_2) - allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) ! @@ -2665,15 +2586,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - ! $ser mode write - !$ser mode write - ! $ser accdata rtheta_p=rtheta_p - ! $ser accdata rho_p=rho_p - ! $ser accdata theta_m_1=theta_m_1 - ! $ser accdata scalars_1=scalars_1 - ! $ser accdata rho_zz_1=rho_zz_1 - - !$acc kernels theta_m_2(:,cellEnd+1) = 0.0_RKIND !$acc end kernels @@ -2720,9 +2632,7 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel - !$ser accdata scalars_2=scalars_2 - - + end subroutine atm_rk_integration_setup @@ -2824,10 +2734,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel - !$ser mode write - ! $ser accdata cqu=cqu - ! $ser accdata cqw=cqw - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc exit data copyout(qtot) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') @@ -2967,12 +2873,6 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, !$acc enter data create(b_tri, c_tri) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - - !$ser mode write - ! $ser accdata p=p - ! $ser accdata t=t - - ! set coefficients dtseps = .5*dts*(1.+epssm) rcv = rgas/(cp-rgas) @@ -3051,9 +2951,6 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, end do ! loop over cells !$acc end parallel - ! $ser accdata cofrz=cofrz - ! $ser accdata alpha_tri=alpha_tri - MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') !$acc exit data copyout( b_tri, c_tri) !$acc exit data delete(qtot) @@ -3924,7 +3821,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE integer :: i, iCell, iEdge, k, cell1, cell2 real (kind=RKIND) :: invNs, rcv, p0, flux - rcv = rgas/(cp-rgas) p0 = 1.0e+05 ! this should come from somewhere else... @@ -4070,7 +3966,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE end do !$acc end parallel - end subroutine atm_recover_large_step_variables_work @@ -4309,11 +4204,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc enter data create(horiz_flux_arr) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - - !$ser accdata scalar_new=scalar_new - !$ser accdata uhAvg=uhAvg - - !$acc parallel !$acc loop gang worker private(scalar_weight2, ica) do iEdge=edgeStart,edgeEnd @@ -4411,10 +4301,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc enter data create(scalar_tend_column, wdtn) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - - !$ser accdata scalar_old=scalar_old - - !$acc parallel wait !$acc loop gang worker private(scalar_tend_column, wdtn) do iCell=cellSolveStart,cellSolveEnd @@ -4493,10 +4379,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do !$acc end parallel - !$ser accdata scalar_tend_save=scalar_tend_save - !$ser accdata scalar_tend_column=scalar_tend_column - !$ser accdata scalar_new2=scalar_new - MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc exit data delete(scalar_tend_column, wdtn, & !$acc horiz_flux_arr) @@ -4769,9 +4651,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end if MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') - - !$ser accdata scalars_old_mono=scalars_old - !$acc parallel !$acc loop gang worker @@ -5365,10 +5244,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end do ! loop over scalars - - !$ser accdata scalars_new_mono2=scalars_new - - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') if (local_advance_density) then !$acc exit data copyout(rho_zz_int) @@ -6863,7 +6738,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc exit data copyout(delsq_w) end if !$acc exit data delete(dpdz) - !$acc exit data delete(rayleigh_damp_coef) !$acc exit data delete(tend_ru_physics) !$acc exit data delete(tend_rtheta_physics) @@ -7140,7 +7014,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ! Replace 2.0 with 2 in exponentiation to avoid outside chance that ! compiler will actually allow "float raised to float" operation - !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -7310,8 +7183,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Compute pv at cell centers ! ( this computes pv_cell for all real cells ) ! only needed for APVM upwinding - ! - + ! !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -7929,13 +7801,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end do !$acc end parallel - !$ser mode write - ! $ser accdata tend_ru=tend_ru - ! $ser accdata tend_rt=tend_rt - ! $ser accdata tend_rw=tend_rw - ! $ser accdata tend_rho=tend_rho - - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') !$acc exit data delete(rho_driving_tend,rt_driving_tend, & !$acc ru_driving_tend) @@ -8046,11 +7911,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - !$ser mode write - ! $ser accdata tend_rho1=tend_rho - ! $ser accdata tend_rt1=tend_rt - - !$acc parallel default(present) !$acc loop gang worker do iEdge = edgeStart, edgeEnd @@ -8064,8 +7924,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - ! $ser accdata tend_ru1=tend_ru - ! Second, the horizontal filter for rtheta_m and rho_zz !$acc parallel default(present) !$acc loop gang worker @@ -8098,10 +7956,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do !$acc end parallel - ! $ser accdata tend_rho2=tend_rho - ! $ser accdata tend_rt2=tend_rt - - ! Third (and last), the horizontal filter for ru !$acc parallel default(present) !$acc loop gang worker private(divergence1, divergence2, vorticity1, vorticity2) @@ -8187,9 +8041,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel - !$ser mode write - ! $ser accdata tend_ru_fin=tend_ru - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') !$acc exit data delete(rho_driving_values, rt_driving_values, & !$acc ru_driving_values, divergence1, divergence2, vorticity1, vorticity2) diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F index 56361e8b98..3aba022e47 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_interface.F +++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F @@ -1039,7 +1039,6 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te case default end select mp_tend_select - !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b) !$acc update device(rtheta_p, rho_zz, theta_m, scalars) !$acc update device(rt_diabatic_tend) diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F index 76a8a6c280..e51331e99e 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F +++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F @@ -133,9 +133,7 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s call mpas_pool_get_array(diag ,'rho_edge',mass_edge) call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys) - !$acc update self(theta_m, scalars, mass, mass_edge) - !! $acc tend_ru_physics,tend_rtheta_physics,tend_rho_physics) - + !$acc update self(theta_m, scalars, mass, mass_edge) call mpas_pool_get_dimension(state,'index_qv',index_qv) call mpas_pool_get_dimension(state,'index_qc',index_qc) call mpas_pool_get_dimension(state,'index_qr',index_qr) @@ -176,7 +174,6 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s !$acc update self(tend_scalars) ! Probably not needed - !initialize the tendency for the potential temperature and all scalars due to PBL, convection, !and longwave and shortwave radiation: allocate(tend_th(nVertLevels,nCellsSolve)) @@ -225,9 +222,6 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s exchange_halo_group) !$acc update device(tend_scalars) - -!!$acc update device(tend_ru_physics,tend_rtheta_physics,tend_rho_physics) - !clean up any pointers that were allocated with zero size before the call to physics_get_tend_work: if(size(rucuten) == 0 ) deallocate(rucuten ) if(size(rvcuten) == 0 ) deallocate(rvcuten ) From 936765b30890b8cd0bf705566f7ddecead9b86df Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 6 May 2025 18:04:00 -0600 Subject: [PATCH 71/79] more cleanup --- src/core_atmosphere/dynamics/mpas_atm_iau.F | 2 -- .../dynamics/mpas_atm_time_integration.F | 19 +++---------------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F index e1b5480eac..c0b9fab65e 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_iau.F +++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F @@ -152,7 +152,6 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars) !$acc update self(tend_scalars) - call mpas_pool_get_array(tend_iau, 'theta', theta_amb) call mpas_pool_get_array(tend_iau, 'rho', rho_amb) call mpas_pool_get_array(tend_iau, 'u', u_amb) @@ -212,7 +211,6 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten tend_rtheta(k,i) = tend_rtheta(k,i) + tend_th(k,i) enddo enddo - deallocate(theta) deallocate(tend_th) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 2ca2a25979..3a180894c7 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -280,9 +280,6 @@ subroutine mpas_atm_dynamics_init(domain) #ifdef MPAS_OPENACC nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) - ! nullify(tend_physics) - ! call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) - call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -456,22 +453,11 @@ subroutine mpas_atm_dynamics_init(domain) !$acc enter data copyin(t_init) call mpas_pool_get_array(mesh, 'qv_init', qv_init) !$acc enter data copyin(qv_init) - - - ! Pointers for these are declared as module level variables - ! call mpas_pool_get_array(tend_physics, 'tend_rho_physics', tend_rho_physics) - ! !$acc enter data copyin(tend_rho_physics) - ! call mpas_pool_get_array(tend_physics, 'tend_ru_physics', tend_ru_physics) - ! !$acc enter data copyin(tend_ru_physics) - ! call mpas_pool_get_array(tend_physics, 'tend_rtheta_physics', tend_rtheta_physics) - ! !$acc enter data copyin(tend_rtheta_physics) - !call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) - !!$acc enter data copyin(rthdynten) - #endif end subroutine mpas_atm_dynamics_init + subroutine mpas_atm_pre_computesolvediag_h2d(block) implicit none @@ -519,7 +505,6 @@ subroutine mpas_atm_pre_computesolvediag_h2d(block) end subroutine mpas_atm_pre_computesolvediag_h2d - subroutine mpas_atm_post_computesolvediag_d2h(block) implicit none @@ -2002,6 +1987,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end if ! regional_MPAS addition !------------------------------------------------------------------- + call mpas_pool_get_array(state, 'u', u, 2) !$acc update self(u) ! u @@ -2070,6 +2056,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) deallocate(ke_edge) call mpas_timer_stop('atm_compute_solve_diagnostics') + call mpas_pool_get_array(state, 'w', w, 2) call mpas_pool_get_array(diag, 'pv_edge', pv_edge) call mpas_pool_get_array(diag, 'rho_edge', rho_edge) From e99150c36002b9a6e75ebdff8ad5819be1932596 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 6 May 2025 22:05:48 -0600 Subject: [PATCH 72/79] functions to subroutines --- .../dynamics/mpas_atm_boundaries.F | 29 +++----- .../dynamics/mpas_atm_time_integration.F | 67 +++++++++---------- 2 files changed, 40 insertions(+), 56 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index cf76aa5c4a..442b29b972 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -287,7 +287,7 @@ end subroutine mpas_atm_update_bdy_tend !> tend_scalars(1,:,:) = mpas_atm_get_bdy_tend(clock, domain % blocklist, nVertLevels, nCells, 'qv', 0.0_RKIND) ! !----------------------------------------------------------------------- - function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) result(return_tend) + subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t, return_tend) implicit none @@ -296,8 +296,7 @@ function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) integer, intent(in) :: vertDim, horizDim character(len=*), intent(in) :: field real (kind=RKIND), intent(in) :: delta_t - - real (kind=RKIND), dimension(vertDim,horizDim+1) :: return_tend + real (kind=RKIND), dimension(vertDim,horizDim+1), intent(out) :: return_tend type (mpas_pool_type), pointer :: lbc integer, pointer :: idx_ptr @@ -312,7 +311,6 @@ function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') - !$acc enter data create(return_tend) if (associated(tend)) then else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) @@ -342,10 +340,9 @@ function mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t) !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') - !$acc exit data copyout(return_tend) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') - end function mpas_atm_get_bdy_tend + end subroutine mpas_atm_get_bdy_tend !*********************************************************************** @@ -381,7 +378,7 @@ end function mpas_atm_get_bdy_tend !> scalars(1,:,:) = mpas_atm_get_bdy_state(clock, domain % blocklist, nVertLevels, nCells, 'qv', 0.0_RKIND) ! !----------------------------------------------------------------------- - function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta_t) result(return_state) + subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta_t, return_state) use mpas_pool_routines, only : mpas_pool_get_error_level, mpas_pool_set_error_level use mpas_derived_types, only : MPAS_POOL_SILENT @@ -394,8 +391,7 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta integer, intent(in) :: vertDim, horizDim character(len=*), intent(in) :: field real (kind=RKIND), intent(in) :: delta_t - - real (kind=RKIND), dimension(vertDim,horizDim+1) :: return_state + real (kind=RKIND), dimension(vertDim,horizDim+1), intent(out) :: return_state type (mpas_pool_type), pointer :: lbc integer, pointer :: idx_ptr @@ -447,7 +443,6 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta ! if (associated(tend) .and. associated(state)) then MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data create(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) @@ -460,7 +455,6 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data copyout(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) @@ -470,7 +464,6 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta idx=idx_ptr ! Avoid non-array pointer for OpenACC MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data create(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) @@ -483,11 +476,10 @@ function mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, delta !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data copyout(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') end if - end function mpas_atm_get_bdy_state_2d + end subroutine mpas_atm_get_bdy_state_2d !*********************************************************************** @@ -520,7 +512,7 @@ end function mpas_atm_get_bdy_state_2d !> num_scalars, nVertLevels, nCells, 'scalars', 0.0_RKIND) ! !----------------------------------------------------------------------- - function mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, field, delta_t) result(return_state) + subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, field, delta_t, return_state) use mpas_pool_routines, only : mpas_pool_get_error_level, mpas_pool_set_error_level use mpas_derived_types, only : MPAS_POOL_SILENT @@ -532,8 +524,7 @@ function mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, fi integer, intent(in) :: innerDim, vertDim, horizDim character(len=*), intent(in) :: field real (kind=RKIND), intent(in) :: delta_t - - real (kind=RKIND), dimension(innerDim,vertDim,horizDim+1) :: return_state + real (kind=RKIND), dimension(innerDim,vertDim,horizDim+1), intent(out) :: return_state type (mpas_pool_type), pointer :: lbc real (kind=RKIND), dimension(:,:,:), pointer :: tend @@ -566,7 +557,6 @@ function mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, fi call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2) MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc enter data create(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') !$acc parallel default(present) @@ -581,10 +571,9 @@ function mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, fi !$acc end parallel MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc exit data copyout(return_state) MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - end function mpas_atm_get_bdy_state_3d + end subroutine mpas_atm_get_bdy_state_3d !*********************************************************************** diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 3a180894c7..f629674971 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -78,13 +78,23 @@ end subroutine halo_exchange_routine real (kind=RKIND), dimension(:,:), allocatable :: wdtn_arr real (kind=RKIND), dimension(:,:), allocatable :: rho_zz_int - real (kind=RKIND), dimension(:,:,:), allocatable :: scalars_driving ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: ru_driving_tend ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: rt_driving_tend ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: rho_driving_tend ! regional_MPAS addition + !$acc declare create(ru_driving_tend) + !$acc declare create(rt_driving_tend) + !$acc declare create(rho_driving_tend) + + real (kind=RKIND), dimension(:,:,:), allocatable :: scalars_driving ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: ru_driving_values ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: rt_driving_values ! regional_MPAS addition - real (kind=RKIND), dimension(:,:), allocatable :: rho_driving_values ! regional_MPAS addition + real (kind=RKIND), dimension(:,:), allocatable :: rho_driving_values ! regional_MPAS addition + + !$acc declare create(scalars_driving) + !$acc declare create(ru_driving_values) + !$acc declare create(rt_driving_values) + !$acc declare create(rho_driving_values) + integer, dimension(:), pointer :: bdyMaskEdge ! regional_MPAS addition logical, pointer :: config_apply_lbcs @@ -1807,9 +1817,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) allocate(ru_driving_tend(nVertLevels,nEdges+1)) allocate(rt_driving_tend(nVertLevels,nCells+1)) allocate(rho_driving_tend(nVertLevels,nCells+1)) - ru_driving_tend(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nEdges, 'ru', 0.0_RKIND ) - rt_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rtheta_m', 0.0_RKIND ) - rho_driving_tend(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rho_zz', 0.0_RKIND ) + call mpas_atm_get_bdy_tend( clock, block, nVertLevels, nEdges, 'ru', 0.0_RKIND, ru_driving_tend) + call mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rtheta_m', 0.0_RKIND, rt_driving_tend) + call mpas_atm_get_bdy_tend( clock, block, nVertLevels, nCells, 'rho_zz', 0.0_RKIND, rho_driving_tend) !$OMP PARALLEL DO do thread=1,nThreads call atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, block % configs, nVertLevels, & @@ -1832,9 +1842,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) allocate(rho_driving_values(nVertLevels,nCells+1)) time_dyn_step = dt_dynamics*real(dynamics_substep-1) + rk_timestep(rk_step) - ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'ru', time_dyn_step ) - rt_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step ) - rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'ru', time_dyn_step, ru_driving_values) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step, rt_driving_values) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step, rho_driving_values) call mpas_timer_start('atm_bdy_adjust_dynamics_relaxzone_tend') !$OMP PARALLEL DO @@ -1950,8 +1960,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) time_dyn_step = dt_dynamics*real(dynamics_substep-1) + rk_timestep(rk_step) - ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'u', time_dyn_step ) - !$acc enter data copyin(ru_driving_values) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'u', time_dyn_step, ru_driving_values) ! do this inline at present - it is simple enough !$acc parallel !$acc loop gang @@ -1965,8 +1974,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end do !$acc end parallel - ru_driving_values(1:nVertLevels,1:nEdges+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nEdges, 'ru', time_dyn_step ) - !$acc update device(ru_driving_values) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'ru', time_dyn_step, ru_driving_values) call mpas_pool_get_array(diag, 'ru', u) ! do this inline at present - it is simple enough !$acc parallel @@ -1980,7 +1988,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end if end do !$acc end parallel - !$acc exit data delete(ru_driving_values) deallocate(ru_driving_values) @@ -2018,8 +2025,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! get the scalar values driving the regional boundary conditions ! - scalars_driving(:,:,:) = mpas_atm_get_bdy_state(clock, block, num_scalars, nVertLevels, nCells, & - 'scalars', rk_timestep(rk_step)) + call mpas_atm_get_bdy_state(clock, block, num_scalars, nVertLevels, nCells, & + 'scalars', rk_timestep(rk_step), scalars_driving) !$OMP PARALLEL DO do thread=1,nThreads @@ -2183,8 +2190,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! get the scalar values driving the regional boundary conditions ! - scalars_driving(:,:,:) = mpas_atm_get_bdy_state(clock, block, num_scalars, nVertLevels, nCells, & - 'scalars', rk_timestep(rk_step)) + call mpas_atm_get_bdy_state(clock, block, num_scalars, nVertLevels, nCells, & + 'scalars', rk_timestep(rk_step), scalars_driving) !$OMP PARALLEL DO do thread=1,nThreads @@ -2303,8 +2310,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) allocate(rho_driving_values(nVertLevels,nCells+1)) time_dyn_step = dt ! end of full timestep values - rt_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step ) - rho_driving_values(1:nVertLevels,1:nCells+1) = mpas_atm_get_bdy_state( clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step ) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nCells, 'rtheta_m', time_dyn_step, rt_driving_values) + call mpas_atm_get_bdy_state(clock, block, nVertLevels, nCells, 'rho_zz', time_dyn_step, rho_driving_values) !$OMP PARALLEL DO do thread=1,nThreads @@ -2333,7 +2340,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! get the scalar values driving the regional boundary conditions ! - scalars_driving(:,:,:) = mpas_atm_get_bdy_state(clock, block, num_scalars, nVertLevels, nCells, 'scalars', dt) + call mpas_atm_get_bdy_state(clock, block, num_scalars, nVertLevels, nCells, 'scalars', dt, scalars_driving) !$OMP PARALLEL DO do thread=1,nThreads @@ -7757,8 +7764,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc enter data copyin(rho_driving_tend,rt_driving_tend, & - !$acc ru_driving_tend) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') !$acc parallel default(present) @@ -7789,8 +7794,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc exit data delete(rho_driving_tend,rt_driving_tend, & - !$acc ru_driving_tend) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') end subroutine atm_bdy_adjust_dynamics_speczone_tend @@ -7878,8 +7881,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me vertexDegree = vertexDegree_ptr MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc enter data copyin(rho_driving_values, & - !$acc rt_driving_values, ru_driving_values) !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') @@ -8028,9 +8029,8 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc exit data delete(rho_driving_values, rt_driving_values, & - !$acc ru_driving_values, divergence1, divergence2, vorticity1, vorticity2) + MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') + !$acc exit data delete(divergence1, divergence2, vorticity1, vorticity2) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') end subroutine atm_bdy_adjust_dynamics_relaxzone_tend @@ -8067,7 +8067,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc enter data copyin(rt_driving_values, rho_driving_values) MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') !$acc parallel default(present) @@ -8084,7 +8083,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc exit data delete(rt_driving_values, rho_driving_values) MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') end subroutine atm_bdy_reset_speczone_values @@ -8176,8 +8174,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, !--- MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc enter data create(scalars_tmp) & - !$acc copyin(scalars_driving) + !$acc enter data create(scalars_tmp) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') !$acc parallel default(present) @@ -8261,7 +8258,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc exit data delete(scalars_tmp, scalars_driving) + !$acc exit data delete(scalars_tmp) MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') end subroutine atm_bdy_adjust_scalars_work @@ -8333,7 +8330,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !--- MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc enter data copyin(scalars_driving) MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') !$acc parallel default(present) @@ -8358,7 +8354,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !$acc end parallel MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc exit data delete(scalars_driving) MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') end subroutine atm_bdy_set_scalars_work From aa7aa85e08fa718042f592394b4065c6c4d452f9 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 6 May 2025 22:59:23 -0600 Subject: [PATCH 73/79] data creates for some module variables --- .../dynamics/mpas_atm_time_integration.F | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index f629674971..e679245b4c 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -65,6 +65,9 @@ end subroutine halo_exchange_routine ! real (kind=RKIND), allocatable, dimension(:,:) :: delsq_circulation ! no longer used -> removed real (kind=RKIND), allocatable, dimension(:,:) :: delsq_vorticity real (kind=RKIND), allocatable, dimension(:,:) :: dpdz + !$acc declare create(qtot) + !$acc declare create(delsq_theta, delsq_w, delsq_divergence) + !$acc declare create(delsq_u, delsq_vorticity, dpdz) ! Used in atm_advance_scalars real (kind=RKIND), dimension(:,:,:), allocatable :: horiz_flux_array @@ -1562,7 +1565,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! allocate storage for physics tendency save ! allocate(qtot(nVertLevels,nCells+1)) + !$acc kernels qtot(:,nCells+1) = 0.0_RKIND + !$acc end kernels #ifndef MPAS_CAM_DYCORE call mpas_pool_get_field(tend_physics, 'tend_rtheta_physics', tend_rtheta_physicsField) @@ -1749,19 +1754,31 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_start('atm_compute_dyn_tend') allocate(delsq_theta(nVertLevels,nCells+1)) + !$acc kernels delsq_theta(:,nCells+1) = 0.0_RKIND + !$acc end kernels allocate(delsq_w(nVertLevels,nCells+1)) + !$acc kernels delsq_w(:,nCells+1) = 0.0_RKIND + !$acc end kernels !! allocate(qtot(nVertLevels,nCells+1)) ! initializing this earlier in solution sequence allocate(delsq_divergence(nVertLevels,nCells+1)) + !$acc kernels delsq_divergence(:,nCells+1) = 0.0_RKIND + !$acc end kernels allocate(delsq_u(nVertLevels,nEdges+1)) + !$acc kernels delsq_u(:,nEdges+1) = 0.0_RKIND + !$acc end kernels !! allocate(delsq_circulation(nVertLevels,nVertices+1)) ! no longer used -> removed allocate(delsq_vorticity(nVertLevels,nVertices+1)) + !$acc kernels delsq_vorticity(:,nVertices+1) = 0.0_RKIND + !$acc end kernels allocate(dpdz(nVertLevels,nCells+1)) + !$acc kernels dpdz(:,nCells+1) = 0.0_RKIND + !$acc end kernels !$OMP PARALLEL DO do thread=1,nThreads @@ -2677,7 +2694,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & moist_end = moist_end_ptr MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc enter data create(qtot) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc parallel default(present) @@ -2729,7 +2745,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc exit data copyout(qtot) MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') end subroutine atm_compute_moist_coefficients @@ -2863,7 +2878,6 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc enter data copyin(qtot) !$acc enter data create(b_tri, c_tri) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') @@ -2947,7 +2961,6 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') !$acc exit data copyout( b_tri, c_tri) - !$acc exit data delete(qtot) MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') end subroutine atm_compute_vert_imp_coefs_work @@ -5680,16 +5693,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then !$acc enter data copyin(tend_rho_physics) - !$acc enter data copyin(qtot) - !$acc enter data create(delsq_u) - !$acc enter data create(delsq_vorticity, delsq_divergence) - !$acc enter data create(delsq_w) end if - !$acc enter data create(dpdz) - !$acc enter data copyin(rayleigh_damp_coef) + !$acc enter data create(rayleigh_damp_coef) !$acc enter data copyin(tend_ru_physics) !$acc enter data copyin(tend_rtheta_physics) - !$acc enter data create(delsq_theta) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -6726,16 +6733,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') if (rk_step == 1) then !$acc exit data delete(tend_rho_physics) - !$acc exit data delete(qtot) - !$acc exit data copyout(delsq_u) - !$acc exit data delete(delsq_vorticity, delsq_divergence) - !$acc exit data copyout(delsq_w) end if - !$acc exit data delete(dpdz) !$acc exit data delete(rayleigh_damp_coef) !$acc exit data delete(tend_ru_physics) !$acc exit data delete(tend_rtheta_physics) - !$acc exit data delete(delsq_theta) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From 2bdcee7c07be14946415390120892c49c9386ff4 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 7 May 2025 10:33:29 -0600 Subject: [PATCH 74/79] more data movements around compute solve diagnostics --- .../dynamics/mpas_atm_time_integration.F | 346 ++++++++++++++---- src/core_atmosphere/mpas_atm_core.F | 3 +- 2 files changed, 286 insertions(+), 63 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index e679245b4c..7dbab64405 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -104,6 +104,9 @@ end subroutine halo_exchange_routine ! Used in compute_solve_diagnostics real (kind=RKIND), allocatable, dimension(:,:) :: ke_vertex real (kind=RKIND), allocatable, dimension(:,:) :: ke_edge + !$acc declare create(ke_edge) + + type (MPAS_Clock_type), pointer, private :: clock type (block_type), pointer, private :: block @@ -479,19 +482,67 @@ subroutine mpas_atm_pre_computesolvediag_h2d(block) #ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: tend_physics real (kind=RKIND), dimension(:,:), pointer :: rthdynten - real (kind=RKIND), dimension(:,:), pointer :: h_edge, h, u, v, & - vorticity, ke, pv_edge, pv_vertex, pv_cell, gradPVn, gradPVt, & - divergence + real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, & + pv_vertex, pv_cell, gradPVn, gradPVt, divergence + real (kind=RKIND), dimension(:,:), pointer :: u, h + + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + + nullify(mesh) + call mpas_pool_get_subpool(block % structs, 'mesh', mesh) + nullify(state) + call mpas_pool_get_subpool(block % structs, 'state', state) nullify(diag) call mpas_pool_get_subpool(block % structs, 'diag', diag) - nullify(tend_physics) - call mpas_pool_get_subpool(block % structs, 'tend_physics', tend_physics) + + call mpas_pool_get_array(state, 'rho_zz', h, 1) + !$acc enter data create(h) + call mpas_pool_get_array(state, 'u', u, 1) + !$acc enter data copyin(u) call mpas_pool_get_array(diag, 'v', v) !$acc enter data copyin(v) @@ -513,6 +564,89 @@ subroutine mpas_atm_pre_computesolvediag_h2d(block) !$acc enter data copyin(gradPVn) call mpas_pool_get_array(diag, 'gradPVt', gradPVt) !$acc enter data copyin(gradPVt) + + ! Required by atm_init_coupled_diagnostics + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc enter data copyin(zz) + + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc enter data copyin(zb_cell) + + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc enter data copyin(zb3_cell) + + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc enter data copyin(fzm) + + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc enter data copyin(fzp) + + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc enter data copyin(zb) + + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc enter data copyin(zb3) + + ! Required by atm_compute_solve_diagnostics + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc enter data copyin(dvEdge) + + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc enter data copyin(cellsOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc enter data copyin(edgesOnCell) + + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc enter data copyin(nEdgesOnCell) + + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc enter data copyin(edgesOnCell_sign) + + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc enter data copyin(invAreaCell) + + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc enter data copyin(invDvEdge) + + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc enter data copyin(dcEdge) + + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc enter data copyin(invDcEdge) + + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc enter data copyin(edgesOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc enter data copyin(edgesOnVertex) + + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc enter data copyin(edgesOnVertex_sign) + + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc enter data copyin(nEdgesOnEdge) + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc enter data copyin(weightsOnEdge) + + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc enter data copyin(verticesOnCell) + + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc enter data copyin(verticesOnEdge) + + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc enter data copyin(invAreaTriangle) + + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc enter data copyin(kiteForCell) + + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc enter data copyin(kiteAreasOnVertex) + + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc enter data copyin(fVertex) #endif end subroutine mpas_atm_pre_computesolvediag_h2d @@ -526,18 +660,67 @@ subroutine mpas_atm_post_computesolvediag_d2h(block) #ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: state type (mpas_pool_type), pointer :: tend_physics real (kind=RKIND), dimension(:,:), pointer :: rthdynten - real (kind=RKIND), dimension(:,:), pointer :: h_edge, h, u, v, & - vorticity, ke, pv_edge, pv_vertex, pv_cell, gradPVn, gradPVt, & - divergence + real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, & + pv_vertex, pv_cell, gradPVn, gradPVt, divergence + real (kind=RKIND), dimension(:,:), pointer :: u, h + + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + + + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + nullify(mesh) + call mpas_pool_get_subpool(block % structs, 'mesh', mesh) + nullify(state) + call mpas_pool_get_subpool(block % structs, 'state', state) nullify(diag) call mpas_pool_get_subpool(block % structs, 'diag', diag) - nullify(tend_physics) - call mpas_pool_get_subpool(block % structs, 'tend_physics', tend_physics) + + call mpas_pool_get_array(state, 'rho_zz', h, 1) + !$acc exit data copyout(h) + call mpas_pool_get_array(state, 'u', u, 1) + !$acc exit data copyout(u) call mpas_pool_get_array(diag, 'v', v) !$acc exit data copyout(v) @@ -559,6 +742,89 @@ subroutine mpas_atm_post_computesolvediag_d2h(block) !$acc exit data copyout(gradPVn) call mpas_pool_get_array(diag, 'gradPVt', gradPVt) !$acc exit data copyout(gradPVt) + + ! Required by atm_init_coupled_diagnostics + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc exit data delete(zz) + + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc exit data delete(zb_cell) + + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc exit data delete(zb3_cell) + + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc exit data delete(fzm) + + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc exit data delete(fzp) + + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc exit data delete(zb) + + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc exit data delete(zb3) + + + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc exit data delete(dvEdge) + + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc exit data delete(cellsOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc exit data delete(edgesOnCell) + + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc exit data delete(nEdgesOnCell) + + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc exit data delete(edgesOnCell_sign) + + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc exit data delete(invAreaCell) + + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc exit data delete(invDvEdge) + + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc exit data delete(dcEdge) + + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc exit data delete(invDcEdge) + + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc exit data delete(edgesOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc exit data delete(edgesOnVertex) + + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc exit data delete(edgesOnVertex_sign) + + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc exit data delete(nEdgesOnEdge) + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc exit data delete(weightsOnEdge) + + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc exit data delete(verticesOnCell) + + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc exit data delete(verticesOnEdge) + + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc exit data delete(invAreaTriangle) + + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc exit data delete(kiteForCell) + + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc exit data delete(kiteAreasOnVertex) + + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc exit data delete(fVertex) #endif end subroutine mpas_atm_post_computesolvediag_d2h @@ -2065,7 +2331,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) allocate(ke_vertex(nVertLevels,nVertices+1)) ke_vertex(:,nVertices+1) = 0.0_RKIND allocate(ke_edge(nVertLevels,nEdges+1)) + !$acc kernels ke_edge(:,nEdges+1) = 0.0_RKIND + !$acc end kernels !$OMP PARALLEL DO do thread=1,nThreads @@ -4648,11 +4916,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') -#ifdef DO_PHYSICS - !$acc enter data copyin(scalar_tend) -#else - !$acc enter data create(scalar_tend) -#endif + if (local_advance_density) then !$acc enter data copyin(rho_zz_int) end if @@ -4680,7 +4944,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc exit data copyout(scalar_tend) !$acc update self(scalars_old) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -6905,26 +7168,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & logical :: reconstruct_v - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc fVertex, & - !$acc verticesOnEdge, & - !$acc invDvEdge,invDcEdge) - !$acc enter data copyin(u,h) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') ! ! Compute height on cell edges at velocity locations ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(ke_edge) ! local variable - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iEdge=edgeStart,edgeEnd @@ -7217,9 +7464,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Merged loops for calculating gradPVt, gradPVn and pv_edge ! Also precomputed inverses of dvEdge and dcEdge to avoid repeated divisions ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(gradPVt,gradPVn) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') r = config_apvm_upwinding * dt !$acc parallel default(present) !$acc loop gang @@ -7240,19 +7484,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & end if ! apvm upwinding - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc verticesOnEdge, & - !$acc fVertex,invDvEdge,invDcEdge) - !$acc exit data delete(u,h) - !$acc exit data copyout(ke_edge) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') end subroutine atm_compute_solve_diagnostics_work @@ -7342,17 +7573,13 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]') - ! copyin invariant fields - !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & - !$acc zb_cell,zb3_cell) - + ! copyin the data that is only on the right-hand side - !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc enter data copyin(scalars(index_qv,:,:),w,rho,theta, & !$acc rho_base,theta_base) ! copyin the data that will be modified in this routine - !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc enter data create(theta_m,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -7476,17 +7703,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc end parallel MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]') - ! delete invariant fields - !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & - !$acc zb_cell,zb3_cell) - ! delete the data that is only on the right-hand side - !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc exit data delete(scalars(index_qv,:,:),w,rho,theta, & !$acc rho_base,theta_base) ! copyout the data that will be modified in this routine - !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc exit data copyout(theta_m,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index d7766d6af9..de5b2d0d00 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -476,7 +476,9 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) allocate(ke_vertex(nVertLevels,nVertices+1)) ! ke_vertex is a module variable defined in mpas_atm_time_integration.F ke_vertex(:,nVertices+1) = 0.0_RKIND allocate(ke_edge(nVertLevels,nEdges+1)) ! ke_edge is a module variable defined in mpas_atm_time_integration.F + !$acc kernels ke_edge(:,nEdges+1) = 0.0_RKIND + !$acc end kernels call mpas_pool_get_dimension(block % dimensions, 'nThreads', nThreads) @@ -507,7 +509,6 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) vertexSolveThreadStart(thread), vertexSolveThreadEnd(thread), & edgeSolveThreadStart(thread), edgeSolveThreadEnd(thread)) end if - call atm_compute_solve_diagnostics(dt, state, 1, diag, mesh, block % configs, & cellThreadStart(thread), cellThreadEnd(thread), & vertexThreadStart(thread), vertexThreadEnd(thread), & From 6c3963c8b8fa45413106404fff8ebba19642f59e Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 7 May 2025 21:14:34 -0600 Subject: [PATCH 75/79] moving copies of tendency terms consumed in atm_compute_dyn_tend --- .../dynamics/mpas_atm_time_integration.F | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 7dbab64405..f46589fa86 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -860,6 +860,7 @@ subroutine mpas_atm_pre_dynamics_h2d(domain) real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 @@ -961,6 +962,10 @@ subroutine mpas_atm_pre_dynamics_h2d(domain) !$acc enter data copyin(h_divergence) call mpas_pool_get_array(diag, 'ke', ke) !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc enter data copyin(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc enter data copyin(gradPVt) call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) !$acc enter data copyin(alpha_tri) @@ -1114,6 +1119,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain) real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 @@ -1215,6 +1221,10 @@ subroutine mpas_atm_post_dynamics_d2h(domain) !$acc exit data copyout(h_divergence) call mpas_pool_get_array(diag, 'ke', ke) !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc exit data copyout(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc exit data copyout(gradPVt) call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) !$acc exit data copyout(alpha_tri) @@ -1971,6 +1981,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) tend_ru_physics, tend_rtheta_physics, tend_rho_physics) end if + !$acc enter data copyin(tend_rtheta_physics,tend_rho_physics,tend_ru_physics) + DYNAMICS_SUBSTEPS : do dynamics_substep = 1, dynamics_split @@ -2437,6 +2449,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) deallocate(qtot) ! we are finished with these now + !$acc exit data delete(tend_rtheta_physics,tend_rho_physics,tend_ru_physics) #ifndef MPAS_CAM_DYCORE call mpas_deallocate_scratch_field(tend_rtheta_physicsField) call mpas_deallocate_scratch_field(tend_rho_physicsField) @@ -5954,12 +5967,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm call mpas_log_write('-- RK step $i --', intArgs=[rk_step]) MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - if (rk_step == 1) then - !$acc enter data copyin(tend_rho_physics) - end if !$acc enter data create(rayleigh_damp_coef) - !$acc enter data copyin(tend_ru_physics) - !$acc enter data copyin(tend_rtheta_physics) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -6994,12 +7002,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - if (rk_step == 1) then - !$acc exit data delete(tend_rho_physics) - end if !$acc exit data delete(rayleigh_damp_coef) - !$acc exit data delete(tend_ru_physics) - !$acc exit data delete(tend_rtheta_physics) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work From ca1d38e83695c50fba18dc742fd9a4d01778e8bf Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Wed, 7 May 2025 23:31:41 -0600 Subject: [PATCH 76/79] using delete for lbc state and tend in post --- .../dynamics/mpas_atm_time_integration.F | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index f46589fa86..372ab599bf 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -1302,43 +1302,43 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) - !$acc exit data copyout(lbc_u) + !$acc exit data delete(lbc_u) call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) - !$acc exit data copyout(lbc_w) + !$acc exit data delete(lbc_w) call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) - !$acc exit data copyout(lbc_ru) + !$acc exit data delete(lbc_ru) call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) - !$acc exit data copyout(lbc_rho_edge) + !$acc exit data delete(lbc_rho_edge) call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) - !$acc exit data copyout(lbc_theta) + !$acc exit data delete(lbc_theta) call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) - !$acc exit data copyout(lbc_rtheta_m) + !$acc exit data delete(lbc_rtheta_m) call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) - !$acc exit data copyout(lbc_rho_zz) + !$acc exit data delete(lbc_rho_zz) call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) - !$acc exit data copyout(lbc_rho) + !$acc exit data delete(lbc_rho) call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) - !$acc exit data copyout(lbc_scalars) + !$acc exit data delete(lbc_scalars) call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) - !$acc exit data copyout(lbc_tend_u) + !$acc exit data delete(lbc_tend_u) call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) - !$acc exit data copyout(lbc_tend_ru) + !$acc exit data delete(lbc_tend_ru) call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) - !$acc exit data copyout(lbc_tend_rho_edge) + !$acc exit data delete(lbc_tend_rho_edge) call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) - !$acc exit data copyout(lbc_tend_w) + !$acc exit data delete(lbc_tend_w) call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) - !$acc exit data copyout(lbc_tend_theta) + !$acc exit data delete(lbc_tend_theta) call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) - !$acc exit data copyout(lbc_tend_rtheta_m) + !$acc exit data delete(lbc_tend_rtheta_m) call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) - !$acc exit data copyout(lbc_tend_rho_zz) + !$acc exit data delete(lbc_tend_rho_zz) call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) - !$acc exit data copyout(lbc_tend_rho) + !$acc exit data delete(lbc_tend_rho) call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) - !$acc exit data copyout(lbc_tend_scalars) + !$acc exit data delete(lbc_tend_scalars) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) !$acc exit data copyout(rthdynten) From d4de3f203e13c28881d7e4f9b366c72a33417060 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 8 May 2025 00:01:30 -0600 Subject: [PATCH 77/79] using delete instead of copyout - 1 --- .../dynamics/mpas_atm_time_integration.F | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 372ab599bf..56327449a6 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -1192,7 +1192,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) !$acc exit data copyout(rho_zz_old_split) call mpas_pool_get_array(diag, 'cqw', cqw) - !$acc exit data copyout(cqw) + !$acc exit data delete(cqw) call mpas_pool_get_array(diag, 'cqu', cqu) !$acc exit data copyout(cqu) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) @@ -1227,21 +1227,21 @@ subroutine mpas_atm_post_dynamics_d2h(domain) !$acc exit data copyout(gradPVt) call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) - !$acc exit data copyout(alpha_tri) + !$acc exit data delete(alpha_tri) call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) - !$acc exit data copyout(gamma_tri) + !$acc exit data delete(gamma_tri) call mpas_pool_get_array(diag, 'a_tri', a_tri) - !$acc exit data copyout(a_tri) + !$acc exit data delete(a_tri) call mpas_pool_get_array(diag, 'cofwr', cofwr) - !$acc exit data copyout(cofwr) + !$acc exit data delete(cofwr) call mpas_pool_get_array(diag, 'cofwz', cofwz) - !$acc exit data copyout(cofwz) + !$acc exit data delete(cofwz) call mpas_pool_get_array(diag, 'coftz', coftz) - !$acc exit data copyout(coftz) + !$acc exit data delete(coftz) call mpas_pool_get_array(diag, 'cofwt', cofwt) - !$acc exit data copyout(cofwt) + !$acc exit data delete(cofwt) call mpas_pool_get_array(diag, 'cofrz', cofrz) - !$acc exit data copyout(cofrz) + !$acc exit data delete(cofrz) call mpas_pool_get_array(diag, 'vorticity', vorticity) !$acc exit data copyout(vorticity) call mpas_pool_get_array(diag, 'divergence', divergence) From a8561ccf9f64a93b531145010e00a70c576f561e Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 8 May 2025 14:43:05 -0600 Subject: [PATCH 78/79] using delete instead of copyout - 2 --- .../dynamics/mpas_atm_time_integration.F | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 56327449a6..54abbdc368 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -1158,17 +1158,17 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(diag, 'ru_p', ru_p) !$acc exit data copyout(ru_p) call mpas_pool_get_array(diag, 'ru_save', ru_save) - !$acc exit data copyout(ru_save) + !$acc exit data delete(ru_save) call mpas_pool_get_array(diag, 'rw', rw) !$acc exit data copyout(rw) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rw_p', rw_p) !$acc exit data copyout(rw_p) call mpas_pool_get_array(diag, 'rw_save', rw_save) - !$acc exit data copyout(rw_save) + !$acc exit data delete(rw_save) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) !$acc exit data copyout(rtheta_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) - !$acc exit data copyout(rtheta_p_save) + !$acc exit data delete(rtheta_p_save) call mpas_pool_get_array(diag, 'exner', exner) !$acc exit data copyout(exner) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'exner_base', exner_base) @@ -1186,11 +1186,11 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(diag, 'rho_p', rho_p) !$acc exit data copyout(rho_p) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) - !$acc exit data copyout(rho_p_save) + !$acc exit data delete(rho_p_save) call mpas_pool_get_array(diag, 'rho_pp', rho_pp) !$acc exit data copyout(rho_pp) call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) - !$acc exit data copyout(rho_zz_old_split) + !$acc exit data delete(rho_zz_old_split) call mpas_pool_get_array(diag, 'cqw', cqw) !$acc exit data delete(cqw) call mpas_pool_get_array(diag, 'cqu', cqu) @@ -1214,7 +1214,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) !$acc exit data copyout(pv_vertex) call mpas_pool_get_array(diag, 'pv_cell', pv_cell) - !$acc exit data copyout(pv_cell) + !$acc exit data delete(pv_cell) call mpas_pool_get_array(diag, 'rho_edge', rho_edge) !$acc exit data copyout(rho_edge) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'h_divergence', h_divergence) @@ -1222,9 +1222,9 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(diag, 'ke', ke) !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics call mpas_pool_get_array(diag, 'gradPVn', gradPVn) - !$acc exit data copyout(gradPVn) + !$acc exit data delete(gradPVn) call mpas_pool_get_array(diag, 'gradPVt', gradPVt) - !$acc exit data copyout(gradPVt) + !$acc exit data delete(gradPVt) call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) !$acc exit data delete(alpha_tri) @@ -1258,23 +1258,23 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(state, 'u', u_1, 1) !$acc exit data copyout(u_1) call mpas_pool_get_array(state, 'u', u_2, 2) - !$acc exit data copyout(u_2) + !$acc exit data delete(u_2) call mpas_pool_get_array(state, 'w', w_1, 1) !$acc exit data copyout(w_1) call mpas_pool_get_array(state, 'w', w_2, 2) - !$acc exit data copyout(w_2) + !$acc exit data delete(w_2) call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) !$acc exit data copyout(theta_m_1) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) - !$acc exit data copyout(theta_m_2) + !$acc exit data copyout(theta_m_2) ! Delete gives incorrect results call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) !$acc exit data copyout(rho_zz_1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - !$acc exit data copyout(rho_zz_2) + !$acc exit data delete(rho_zz_2) call mpas_pool_get_array(state, 'scalars', scalars_1, 1) !$acc exit data copyout(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc exit data copyout(scalars_2) + !$acc exit data copyout(scalars_2) ! Delete gives incorrect results call mpas_pool_get_array(tend, 'u', tend_ru) From 81815ba55a04039d09c94f3b2fabaf3aa300b0e3 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 8 May 2025 15:32:22 -0600 Subject: [PATCH 79/79] add new ACC_xfer timers and remove empty ones --- .../dynamics/mpas_atm_boundaries.F | 19 ------- .../dynamics/mpas_atm_time_integration.F | 51 ++++++------------- 2 files changed, 15 insertions(+), 55 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index 442b29b972..ed2e9a492e 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -310,7 +310,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t nullify(tend) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') if (associated(tend)) then else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) @@ -319,7 +318,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr) idx = idx_ptr end if - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc parallel default(present) if (associated(tend)) then @@ -339,8 +337,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t end if !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') end subroutine mpas_atm_get_bdy_tend @@ -442,8 +438,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del ! query the field as a scalar constituent ! if (associated(tend) .and. associated(state)) then - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang vector collapse(2) @@ -454,8 +448,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2) @@ -463,9 +455,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del idx=idx_ptr ! Avoid non-array pointer for OpenACC - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang vector collapse(2) do i=1, horizDim+1 @@ -475,8 +464,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') end if end subroutine mpas_atm_get_bdy_state_2d @@ -556,9 +543,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang vector collapse(3) do i=1, horizDim+1 @@ -570,9 +554,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - end subroutine mpas_atm_get_bdy_state_3d diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 54abbdc368..1b6c79a9c6 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -297,6 +297,7 @@ subroutine mpas_atm_dynamics_init(domain) nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + MPAS_ACC_TIMER_START('mpas_dynamics_init [ACC_data_xfer]') call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -469,6 +470,8 @@ subroutine mpas_atm_dynamics_init(domain) !$acc enter data copyin(t_init) call mpas_pool_get_array(mesh, 'qv_init', qv_init) !$acc enter data copyin(qv_init) + + MPAS_ACC_TIMER_STOP('mpas_dynamics_init [ACC_data_xfer]') #endif end subroutine mpas_atm_dynamics_init @@ -539,6 +542,7 @@ subroutine mpas_atm_pre_computesolvediag_h2d(block) nullify(diag) call mpas_pool_get_subpool(block % structs, 'diag', diag) + MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]') call mpas_pool_get_array(state, 'rho_zz', h, 1) !$acc enter data create(h) call mpas_pool_get_array(state, 'u', u, 1) @@ -647,6 +651,8 @@ subroutine mpas_atm_pre_computesolvediag_h2d(block) call mpas_pool_get_array(mesh, 'fVertex', fVertex) !$acc enter data copyin(fVertex) + + MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]') #endif end subroutine mpas_atm_pre_computesolvediag_h2d @@ -717,6 +723,8 @@ subroutine mpas_atm_post_computesolvediag_d2h(block) nullify(diag) call mpas_pool_get_subpool(block % structs, 'diag', diag) + MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]') + call mpas_pool_get_array(state, 'rho_zz', h, 1) !$acc exit data copyout(h) call mpas_pool_get_array(state, 'u', u, 1) @@ -825,6 +833,8 @@ subroutine mpas_atm_post_computesolvediag_d2h(block) call mpas_pool_get_array(mesh, 'fVertex', fVertex) !$acc exit data delete(fVertex) + + MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]') #endif end subroutine mpas_atm_post_computesolvediag_d2h @@ -894,6 +904,7 @@ subroutine mpas_atm_pre_dynamics_h2d(domain) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]') call mpas_pool_get_array(diag, 'ru', ru) !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'ru_p', ru_p) @@ -1083,6 +1094,8 @@ subroutine mpas_atm_pre_dynamics_h2d(domain) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) !$acc enter data copyin(rthdynten) + + MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]') #endif end subroutine mpas_atm_pre_dynamics_h2d @@ -1153,6 +1166,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]') call mpas_pool_get_array(diag, 'ru', ru) !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics call mpas_pool_get_array(diag, 'ru_p', ru_p) @@ -1342,6 +1356,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) !$acc exit data copyout(rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]') #endif end subroutine mpas_atm_post_dynamics_d2h @@ -2974,8 +2989,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & moist_start = moist_start_ptr moist_end = moist_end_ptr - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -3025,8 +3038,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') end subroutine atm_compute_moist_coefficients @@ -4928,8 +4939,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - - if (local_advance_density) then !$acc enter data copyin(rho_zz_int) end if @@ -4957,7 +4966,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update self(scalars_old) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') @@ -7359,9 +7367,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & if(rk_step /= 3) reconstruct_v = .false. end if - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') - if (reconstruct_v) then !$acc parallel default(present) !$acc loop gang @@ -7989,9 +7994,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd @@ -8018,9 +8020,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end if end do !$acc end parallel - - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') end subroutine atm_bdy_adjust_dynamics_speczone_tend @@ -8291,9 +8290,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & call mpas_pool_get_array(state, 'theta_m', theta_m, 2) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) - - MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -8308,9 +8304,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') - end subroutine atm_bdy_reset_speczone_values !------------------------------------------------------------------------- @@ -8555,9 +8548,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !--- - MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells @@ -8578,9 +8568,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & end do ! updates now in temp storage !$acc end parallel - - MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') end subroutine atm_bdy_set_scalars_work @@ -8956,9 +8943,6 @@ subroutine summarize_timestep(domain) nEdgesSolve = nEdgesSolve_ptr nVertLevels = nVertLevels_ptr - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') - scalar_min = 0.0 scalar_max = 0.0 !$acc parallel default(present) @@ -8989,8 +8973,6 @@ subroutine summarize_timestep(domain) call mpas_dmpar_max_real(domain % dminfo, scalar_max, global_scalar_max) call mpas_log_write('global min, max u $r $r', realArgs=(/global_scalar_min, global_scalar_max/)) - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') end if if (config_print_global_minmax_sca) then @@ -9007,9 +8989,6 @@ subroutine summarize_timestep(domain) nVertLevels = nVertLevels_ptr num_scalars = num_scalars_ptr - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') - do iScalar = 1, num_scalars scalar_min = 0.0 scalar_max = 0.0