Conversation
…re Apple M-series
…dup on 100M cell raster
| ncols = Rast_window_cols(); | ||
| if ((region.ew_res / region.ns_res >= 1.01) || | ||
| (region.ns_res / region.ew_res >= 1.01)) { | ||
| G_warning(_("E-W and N-S grid resolutions are different. Taking average.")); |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| G_warning(_("E-W and N-S grid resolutions are different. Taking average.")); | |
| G_warning( | |
| _("E-W and N-S grid resolutions are different. Taking average.")); |
| */ | ||
| } | ||
| else | ||
| G_ludcmp(normal_ptr, 6, index_ptr,&temp); |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| G_ludcmp(normal_ptr, 6, index_ptr,&temp); | |
| G_ludcmp(normal_ptr, 6, index_ptr, &temp); |
| /*-----------------------------------------------------------------------*/ | ||
| /* PROCESS INPUT RASTER AND WRITE OUT RASTER LINE BY LINE */ | ||
|
|
||
| /* Parallel row loop. Each thread gets its own local computation */ | ||
| /* buffers (t_window, t_obs) to avoid write contention. */ | ||
| /*-----------------------------------------------------------------------*/ | ||
|
|
||
| if (mparam != FEATURE) | ||
| for (wind_row = 0; wind_row < EDGE; wind_row++) | ||
| Rast_put_row(fd_out, row_out, | ||
| DCELL_TYPE); /* Write out the edge cells as NULL. */ | ||
| else | ||
| for (wind_row = 0; wind_row < EDGE; wind_row++) | ||
| Rast_put_row(fd_out, featrow_out, | ||
| CELL_TYPE); /* Write out the edge cells as NULL. */ | ||
|
|
||
| for (wind_row = 0; wind_row < wsize - 1; wind_row++) | ||
| Rast_get_row(fd_in, row_in + (wind_row * ncols), wind_row, DCELL_TYPE); | ||
| /* Read in enough of the first rows to */ | ||
| /* allow window to be examined. */ | ||
|
|
||
| #pragma omp parallel for schedule(dynamic) private(row, col, wind_row) |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| /*-----------------------------------------------------------------------*/ | |
| /* PROCESS INPUT RASTER AND WRITE OUT RASTER LINE BY LINE */ | |
| /* Parallel row loop. Each thread gets its own local computation */ | |
| /* buffers (t_window, t_obs) to avoid write contention. */ | |
| /*-----------------------------------------------------------------------*/ | |
| if (mparam != FEATURE) | |
| for (wind_row = 0; wind_row < EDGE; wind_row++) | |
| Rast_put_row(fd_out, row_out, | |
| DCELL_TYPE); /* Write out the edge cells as NULL. */ | |
| else | |
| for (wind_row = 0; wind_row < EDGE; wind_row++) | |
| Rast_put_row(fd_out, featrow_out, | |
| CELL_TYPE); /* Write out the edge cells as NULL. */ | |
| for (wind_row = 0; wind_row < wsize - 1; wind_row++) | |
| Rast_get_row(fd_in, row_in + (wind_row * ncols), wind_row, DCELL_TYPE); | |
| /* Read in enough of the first rows to */ | |
| /* allow window to be examined. */ | |
| #pragma omp parallel for schedule(dynamic) private(row, col, wind_row) | |
| /*-----------------------------------------------------------------------*/ | |
| /* Parallel row loop. Each thread gets its own local computation */ | |
| /* buffers (t_window, t_obs) to avoid write contention. */ | |
| /*-----------------------------------------------------------------------*/ | |
| #pragma omp parallel for schedule(dynamic) private(row, col, wind_row) |
| (size_t)(row + wind_row - EDGE) * ncols + | ||
| col + wind_col - EDGE; |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| (size_t)(row + wind_row - EDGE) * ncols + | |
| col + wind_col - EDGE; | |
| (size_t)(row + wind_row - EDGE) * ncols + | |
| col + wind_col - EDGE; |
| Rast_set_d_null_value(row_out + col, 1); | ||
| } | ||
| found_null = TRUE; | ||
| Rast_set_c_null_value((CELL *)row_buffers[row] + col, 1); |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| Rast_set_c_null_value((CELL *)row_buffers[row] + col, 1); | |
| Rast_set_c_null_value( | |
| (CELL *)row_buffers[row] + col, 1); |
|
|
||
| /* row index in input matrix */ | ||
| double row_idx = (incellhd.north - ycoord1) / incellhd.ns_res; | ||
| if (GPJ_transform(&oproj, &iproj, &tproj, PJ_FWD, &x1, &y1, NULL) < 0) { |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| if (GPJ_transform(&oproj, &iproj, &tproj, PJ_FWD, &x1, &y1, NULL) < 0) { | |
| if (GPJ_transform(&oproj, &iproj, &tproj, PJ_FWD, &x1, &y1, NULL) < | |
| 0) { |
| double row_idx = (incellhd.north - ycoord1) / incellhd.ns_res; | ||
| if (GPJ_transform(&oproj, &iproj, &tproj, PJ_FWD, &x1, &y1, NULL) < 0) { | ||
| Rast_set_null_value(obufptr, 1, cell_type); | ||
| } else { |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| } else { | |
| } | |
| else { |
| interpolate(ibuffer, obufptr, cell_type, col_idx, row_idx, | ||
| &incellhd); | ||
| /* CALL OUR LOCK-FREE RAM INTERPOLATOR */ | ||
| interpolate_ram(full_map_array, obufptr, cell_type, c_idx, r_idx, &incellhd); |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| interpolate_ram(full_map_array, obufptr, cell_type, c_idx, r_idx, &incellhd); | |
| interpolate_ram(full_map_array, obufptr, cell_type, c_idx, | |
| r_idx, &incellhd); |
|
|
||
| xcoord2 = outcellhd.west + (outcellhd.ew_res / 2); | ||
| ycoord2 -= outcellhd.ns_res; | ||
| #pragma omp critical |
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
| #pragma omp critical | |
| #pragma omp critical |
|
|
||
| return buf; | ||
| } | ||
|
|
There was a problem hiding this comment.
[pre-commit] reported by reviewdog 🐶
This draft PR demonstrates a proof of concept parallelization of
r.param.scaleusing OpenMP, submitted as part of my GSoC 2026 proposal for "Parallelization of existing tools."Benchmark Results
100M cell raster, Apple M4, 8 cores — 3 runs each:
1.7x wall-time speedup on a 100M cell raster.
Technical Approach
The core blocker for parallelizing
r.param.scaleis the sequential sliding buffer inprocess(). The original implementation shuffles rows down after each row. Each row depends on the state left by the previous row, making direct row-level parallelism impossible without reading each row's neighborhood from disk multiple times, which would be slower than serial. The solution is the same RAM preload pattern used in the r.proj parallelization: load the entire input raster into a flat 2D array before the parallel region. With the full map in RAM, each output row's neighborhood window can be accessed independently. The outer row loop is parallelized with#pragma omp parallel for schedule(dynamic), with per-threadt_windowandt_obsbuffers to avoid write contention.Known Limitations
-Xclang -fopenmp). Full implementation will wireHAVE_OPENMPinto the configure system for Linux/Windows portability.memoryparameter consistent with other GRASS modules.This PR is not intended for merge. It demonstrates that the surface parameter computation is parallelizable and the performance gain is real.