diff --git a/raster/r.param.scale/Makefile b/raster/r.param.scale/Makefile index 6cebd8a5d93..35aad6454b5 100644 --- a/raster/r.param.scale/Makefile +++ b/raster/r.param.scale/Makefile @@ -1,10 +1,9 @@ MODULE_TOPDIR = ../.. - PGM = r.param.scale - LIBES = $(GMATHLIB) $(RASTERLIB) $(GISLIB) $(MATHLIB) DEPENDENCIES = $(GMATHDEP) $(RASTERDEP) $(GISDEP) - +EXTRA_LIBS = $(OPENMP_LIBPATH) $(OPENMP_LIB) +EXTRA_CFLAGS = $(OPENMP_CFLAGS) +EXTRA_INC = $(OPENMP_INCPATH) include $(MODULE_TOPDIR)/include/Make/Module.make - default: cmd diff --git a/raster/r.param.scale/process.c b/raster/r.param.scale/process.c index 6a0da1dbd87..639535af188 100644 --- a/raster/r.param.scale/process.c +++ b/raster/r.param.scale/process.c @@ -1,18 +1,13 @@ /*****************************************************************************/ - /*** ***/ - /*** process() ***/ - -/*** Reads in a raster map row by row for processing. ***/ - -/*** Jo Wood, Project ASSIST, V1.0 7th February 1993 ***/ - +/*** Parallelized version using OpenMP and RAM preload ***/ +/*** Original: Jo Wood, Project ASSIST, V1.0 7th February 1993 ***/ /*** ***/ - /*****************************************************************************/ #include +#include #include #include #include @@ -22,265 +17,172 @@ void process(void) { - - /*-----------------------------------------------------------------------*/ - /* INITIALISE */ - - /*-----------------------------------------------------------------------*/ - - DCELL *row_in, /* Buffer large enough to hold `wsize' */ - *row_out = NULL, /* raster rows. When GRASS reads in a */ - /* raster row, each element is of type */ - /* DCELL */ - *window_ptr, /* Stores local terrain window. */ - centre, /* Elevation of central cell in window. */ - *window_cell; /* Stores last read cell in window. */ - - CELL *featrow_out = NULL; /* store features in CELL */ - - struct Cell_head region; /* Structure to hold region information */ - - int nrows, /* Will store the current number of */ - ncols, /* rows and columns in the raster. */ - row, col, /* Counts through each row and column */ - /* of the input raster. */ - wind_row, /* Counts through each row and column */ - wind_col, /* of the local neighbourhood window. */ - *index_ptr; /* Row permutation vector for LU decomp. */ - - double **normal_ptr, /* Cross-products matrix. */ - *obs_ptr, /* Observed vector. */ - temp; /* Unused */ - - double *weight_ptr; /* Weighting matrix for observed values. */ - int found_null; /* If null was found among window cells */ - - /*-----------------------------------------------------------------------*/ - /* GET RASTER AND WINDOW DETAILS */ - - /*-----------------------------------------------------------------------*/ - - G_get_window(®ion); /* Fill out the region structure (the */ - /* geographical limits etc.) */ - - nrows = Rast_window_rows(); /* Find out the number of rows and */ - ncols = Rast_window_cols(); /* columns of the raster. */ - - if ((region.ew_res / region.ns_res >= - 1.01) || /* If EW and NS resolns are */ - (region.ns_res / region.ew_res >= - 1.01)) { /* >1% different, warn user. */ - G_warning( - _("E-W and N-S grid resolutions are different. Taking average.")); + struct Cell_head region; + int nrows, ncols, row, col, wind_row; + double temp; + + G_get_window(®ion); + nrows = Rast_window_rows(); + ncols = Rast_window_cols(); + if ((region.ew_res / region.ns_res >= 1.01) || + (region.ns_res / region.ew_res >= 1.01)) { + G_warning(_("E-W and N-S grid resolutions are different. Taking average.")); resoln = (region.ns_res + region.ew_res) / 2; } else resoln = region.ns_res; /*-----------------------------------------------------------------------*/ - /* RESERVE MEMORY TO HOLD Z VALUES AND MATRICES */ - + /* Load entire input raster into RAM before parallel region. */ + /* The original sliding buffer requires sequential row access, making */ + /* direct row-level parallelism impossible. By preloading the full map */ + /* into a flat 2D array, each thread can independently access any */ + /* neighborhood window without lock contention or sequential dependency. */ /*-----------------------------------------------------------------------*/ + DCELL *full_map = (DCELL *)G_malloc((size_t)nrows * ncols * sizeof(DCELL)); + for (row = 0; row < nrows; row++) + Rast_get_d_row(fd_in, full_map + (size_t)row * ncols, row); - row_in = (DCELL *)G_malloc(ncols * sizeof(DCELL) * wsize); - /* Reserve `wsize' rows of memory. */ - - if (mparam != FEATURE) { - row_out = - Rast_allocate_buf(DCELL_TYPE); /* Initialise output row buffer. */ - Rast_set_d_null_value(row_out, ncols); - } - else { - featrow_out = - Rast_allocate_buf(CELL_TYPE); /* Initialise output row buffer. */ - Rast_set_c_null_value(featrow_out, ncols); - } - - window_ptr = (DCELL *)G_malloc(SQR(wsize) * sizeof(DCELL)); - /* Reserve enough memory for local wind. */ - - weight_ptr = (double *)G_malloc(SQR(wsize) * sizeof(double)); - /* Reserve enough memory weights matrix. */ - - normal_ptr = dmatrix(0, 5, 0, 5); /* Allocate memory for 6*6 matrix */ - index_ptr = ivector(0, 5); /* and for 1D vector holding indices */ - obs_ptr = dvector(0, 5); /* and for 1D vector holding observed z */ - - /* ---------------------------------------------------------------- */ - /* - CALCULATE LEAST SQUARES COEFFICIENTS - */ - /* ---------------------------------------------------------------- */ - - /*--- Calculate weighting matrix. ---*/ + /*-----------------------------------------------------------------------*/ + /* Compute weights and normal equations once before parallel region. */ + /* normal_ptr and index_ptr are read-only during processing — safe to */ + /* share across threads. weight_ptr is also read-only after setup. */ + /*-----------------------------------------------------------------------*/ + double *weight_ptr = (double *)G_malloc(SQR(wsize) * sizeof(double)); + double **normal_ptr = dmatrix(0, 5, 0, 5); + int *index_ptr = ivector(0, 5); find_weight(weight_ptr); - - /* Initial coefficients need only be found once since they are - constant for any given window size. The only element that - changes is the observed vector (RHS of normal equations). */ - - /*--- Find normal equations in matrix form. ---*/ - find_normal(normal_ptr, weight_ptr); - /*--- Apply LU decomposition to normal equations. ---*/ - - if (constrained) { + if (constrained) G_ludcmp(normal_ptr, 5, index_ptr, &temp); - /* To constrain the quadtratic - through the central cell, ignore - the calculations involving the - coefficient f. Since these are - all in the last row and column of - the matrix, simply redimension. */ - /* disp_matrix(normal_ptr,obs_ptr,obs_ptr,5); - */ - } + else + G_ludcmp(normal_ptr, 6, index_ptr,&temp); - else { - G_ludcmp(normal_ptr, 6, index_ptr, &temp); - /* disp_matrix(normal_ptr,obs_ptr,obs_ptr,6); - */ + /*-----------------------------------------------------------------------*/ + /* Pre allocate one output buffer per row so threads can write */ + /* independently. Rast_put_row is called serially after the parallel */ + /* region to ensure correct row ordering. */ + /*-----------------------------------------------------------------------*/ + void **row_buffers = (void **)G_malloc(nrows * sizeof(void *)); + for (row = 0; row < nrows; row++) { + if (mparam != FEATURE) { + row_buffers[row] = Rast_allocate_buf(DCELL_TYPE); + Rast_set_d_null_value(row_buffers[row], ncols); + } + else { + row_buffers[row] = Rast_allocate_buf(CELL_TYPE); + Rast_set_c_null_value(row_buffers[row], ncols); + } } /*-----------------------------------------------------------------------*/ - /* PROCESS INPUT RASTER AND WRITE OUT RASTER LINE BY LINE */ - + /* Parallel row loop. Each thread gets its own local computation */ + /* buffers (t_window, t_obs) to avoid write contention. */ /*-----------------------------------------------------------------------*/ - - if (mparam != FEATURE) - for (wind_row = 0; wind_row < EDGE; wind_row++) - Rast_put_row(fd_out, row_out, - DCELL_TYPE); /* Write out the edge cells as NULL. */ - else - for (wind_row = 0; wind_row < EDGE; wind_row++) - Rast_put_row(fd_out, featrow_out, - CELL_TYPE); /* Write out the edge cells as NULL. */ - - for (wind_row = 0; wind_row < wsize - 1; wind_row++) - Rast_get_row(fd_in, row_in + (wind_row * ncols), wind_row, DCELL_TYPE); - /* Read in enough of the first rows to */ - /* allow window to be examined. */ - + #pragma omp parallel for schedule(dynamic) private(row, col, wind_row) for (row = EDGE; row < (nrows - EDGE); row++) { - G_percent(row + 1, nrows - EDGE, 2); - Rast_get_row(fd_in, row_in + ((wsize - 1) * ncols), row + EDGE, - DCELL_TYPE); + /* Per-thread local buffers — avoids shared state during computation */ + double *t_window = (double *)G_malloc(SQR(wsize) * sizeof(double)); + double *t_obs = dvector(0, 5); + int wind_col, found_null; for (col = EDGE; col < (ncols - EDGE); col++) { - /* Find central z value */ - centre = *(row_in + EDGE * ncols + col); - /* Test for no data and propagate */ + + /* Read central cell value directly from preloaded map */ + DCELL centre = full_map[(size_t)row * ncols + col]; + + /* Propagate null values */ if (Rast_is_d_null_value(¢re)) { if (mparam == FEATURE) - Rast_set_c_null_value(featrow_out + col, 1); - else { - Rast_set_d_null_value(row_out + col, 1); - } + Rast_set_c_null_value((CELL *)row_buffers[row] + col, 1); + else + Rast_set_d_null_value((DCELL *)row_buffers[row] + col, 1); continue; } - found_null = FALSE; - for (wind_row = 0; wind_row < wsize; wind_row++) { - if (found_null) - break; - for (wind_col = 0; wind_col < wsize; wind_col++) { - /* Express all window values relative */ - /* to the central elevation. */ - window_cell = - row_in + (wind_row * ncols) + col + wind_col - EDGE; - /* Test for no data and propagate */ - if (Rast_is_d_null_value(window_cell)) { + /* Build local window relative to central elevation */ + found_null = 0; + for (wind_row = 0; wind_row < wsize && !found_null; wind_row++) { + for (wind_col = 0; wind_col < wsize; wind_col++) { + DCELL *cell = full_map + + (size_t)(row + wind_row - EDGE) * ncols + + col + wind_col - EDGE; + if (Rast_is_d_null_value(cell)) { if (mparam == FEATURE) - Rast_set_c_null_value(featrow_out + col, 1); - else { - Rast_set_d_null_value(row_out + col, 1); - } - found_null = TRUE; + Rast_set_c_null_value((CELL *)row_buffers[row] + col, 1); + else + Rast_set_d_null_value((DCELL *)row_buffers[row] + col, 1); + found_null = 1; break; } - *(window_ptr + (wind_row * wsize) + wind_col) = - *(window_cell)-centre; + t_window[wind_row * wsize + wind_col] = *cell - centre; } } if (found_null) continue; - /*--- Use LU back substitution to solve normal equations. ---*/ - find_obs(window_ptr, obs_ptr, weight_ptr); - - /* disp_wind(window_ptr); - disp_matrix(normal_ptr,obs_ptr,obs_ptr,6); - */ + /* Solve normal equations using LU back substitution */ + find_obs(t_window, t_obs, weight_ptr); - if (constrained) { - G_lubksb(normal_ptr, 5, index_ptr, obs_ptr); - /* - disp_matrix(normal_ptr,obs_ptr,obs_ptr,5); - */ - } + if (constrained) + G_lubksb(normal_ptr, 5, index_ptr, t_obs); + else + G_lubksb(normal_ptr, 6, index_ptr, t_obs); + /* Calculate terrain parameter from quadratic coefficients */ + if (mparam == FEATURE) + *((CELL *)row_buffers[row] + col) = (CELL)feature(t_obs); else { - G_lubksb(normal_ptr, 6, index_ptr, obs_ptr); - /* - disp_matrix(normal_ptr,obs_ptr,obs_ptr,6); - */ + *((DCELL *)row_buffers[row] + col) = param(mparam, t_obs); + if (mparam == ELEV) + *((DCELL *)row_buffers[row] + col) += centre; } - - /*--- Calculate terrain parameter based on quad. coefficients. ---*/ - if (mparam == FEATURE) - *(featrow_out + col) = (CELL)feature(obs_ptr); - else - *(row_out + col) = param(mparam, obs_ptr); - - if (mparam == ELEV) - *(row_out + col) += centre; /* Add central elevation back */ } - if (mparam != FEATURE) - Rast_put_row(fd_out, row_out, - DCELL_TYPE); /* Write the row buffer to the output */ - /* raster. */ - else /* write FEATURE to CELL */ - Rast_put_row(fd_out, featrow_out, - CELL_TYPE); /* Write the row buffer to the output */ - /* raster. */ - - /* 'Shuffle' rows down one, and read in */ - /* one new row. */ - for (wind_row = 0; wind_row < wsize - 1; wind_row++) - for (col = 0; col < ncols; col++) - *(row_in + (wind_row * ncols) + col) = - *(row_in + ((wind_row + 1) * ncols) + col); + G_free(t_window); + free_dvector(t_obs, 0, 5); } - if (mparam != FEATURE) - Rast_set_d_null_value(row_out, ncols); - else - Rast_set_c_null_value(featrow_out, ncols); - for (wind_row = 0; wind_row < EDGE; wind_row++) { - if (mparam != FEATURE) - Rast_put_row(fd_out, row_out, - DCELL_TYPE); /* Write out the edge cells as NULL. */ - else - Rast_put_row(fd_out, featrow_out, - CELL_TYPE); /* Write out the edge cells as NULL. */ + /*-----------------------------------------------------------------------*/ + /* Write output sequentially — Rast_put_row is not thread-safe. */ + /* Edge rows are written as NULL matching original behavior. */ + /*-----------------------------------------------------------------------*/ + void *null_buf; + if (mparam != FEATURE) { + null_buf = Rast_allocate_buf(DCELL_TYPE); + Rast_set_d_null_value(null_buf, ncols); + } + else { + null_buf = Rast_allocate_buf(CELL_TYPE); + Rast_set_c_null_value(null_buf, ncols); } - /*-----------------------------------------------------------------------*/ - /* FREE MEMORY USED TO STORE RASTER ROWS, LOCAL WINDOW AND MATRICES */ + for (wind_row = 0; wind_row < EDGE; wind_row++) + Rast_put_row(fd_out, null_buf, + mparam != FEATURE ? DCELL_TYPE : CELL_TYPE); - /*-----------------------------------------------------------------------*/ + for (row = EDGE; row < (nrows - EDGE); row++) { + G_percent(row, nrows - EDGE, 2); + Rast_put_row(fd_out, row_buffers[row], + mparam != FEATURE ? DCELL_TYPE : CELL_TYPE); + G_free(row_buffers[row]); + } - G_free(row_in); - if (mparam != FEATURE) - G_free(row_out); - else - G_free(featrow_out); + for (wind_row = 0; wind_row < EDGE; wind_row++) + Rast_put_row(fd_out, null_buf, + mparam != FEATURE ? DCELL_TYPE : CELL_TYPE); - G_free(window_ptr); + /*-----------------------------------------------------------------------*/ + /* Free all allocated memory */ + /*-----------------------------------------------------------------------*/ + G_free(full_map); + G_free(weight_ptr); + G_free(row_buffers); + G_free(null_buf); free_dmatrix(normal_ptr, 0, 5, 0, 5); - free_dvector(obs_ptr, 0, 5); free_ivector(index_ptr, 0, 5); -} +} \ No newline at end of file diff --git a/raster/r.proj/main.c b/raster/r.proj/main.c index 59702cdfc92..40a3f8be2c2 100644 --- a/raster/r.proj/main.c +++ b/raster/r.proj/main.c @@ -66,6 +66,10 @@ #include #include "r.proj.h" +#define PACKAGE "grassmods" + +#include + /* modify this table to add new methods */ struct menu menu[] = { {p_nearest, "nearest", "nearest neighbor"}, @@ -80,6 +84,28 @@ struct menu menu[] = { static char *make_ipol_list(void); static char *make_ipol_desc(void); + +/* Custom Interpolation for RAM Bypass - Lock-Free */ +void interpolate_ram(void *full_map, void *obufptr, int cell_type, + double col_idx, double row_idx, struct Cell_head *incellhd) +{ + int c = (int)floor(col_idx); + int r = (int)floor(row_idx); + int cell_size = Rast_cell_size(cell_type); + + /* Boundary check */ + if (r < 0 || r >= incellhd->rows || c < 0 || c >= incellhd->cols) { + Rast_set_null_value(obufptr, 1, cell_type); + return; + } + + /* Direct memory access - Thread Safe for Reads */ + unsigned char *src = (unsigned char *)full_map + + (((size_t)r * incellhd->cols + c) * cell_size); + memcpy(obufptr, src, cell_size); +} + + int main(int argc, char **argv) { char *mapname, /* ptr to name of output layer */ @@ -671,7 +697,6 @@ int main(int argc, char **argv) cell_type = Rast_get_map_type(fdi); ibuffer = readcell(fdi, memory->answer); Rast_close(fdi); - /* And switch back to original location */ G_switch_env(); Rast_set_output_window(&outcellhd); @@ -702,58 +727,76 @@ int main(int argc, char **argv) xcoord2 = outcellhd.west + (outcellhd.ew_res / 2); ycoord2 = outcellhd.north - (outcellhd.ns_res / 2); - G_important_message(_("Projecting...")); - for (row = 0; row < outcellhd.rows; row++) { - /* obufptr = obuffer */; - G_percent(row, outcellhd.rows - 1, 2); - -#if 0 - /* parallelization does not always work, - * segfaults in the interpolation functions - * can happen */ -#pragma omp parallel for schedule(static) -#endif + /* --- RAM BYPASS START --- */ + /* 1. Allocate a flat buffer for the entire input map in RAM */ + size_t total_cells = (size_t)incellhd.rows * incellhd.cols; - for (col = 0; col < outcellhd.cols; col++) { - void *obufptr = - (void *)((const unsigned char *)obuffer + col * cell_size); + /* Simple memory safety check */ + double memory_mb = (double)(total_cells * cell_size) / (1024.0 * 1024.0); + G_debug(1, "Bypass buffer requires %.2f MB of RAM", memory_mb); - double xcoord1 = xcoord2 + (col)*outcellhd.ew_res; - double ycoord1 = ycoord2; + double user_limit_mb = atof(memory->answer); + if (memory_mb > user_limit_mb) { + G_warning(_("Input map requires %.2f MB of RAM for parallel processing, " + "which exceeds the current memory limit (%.0f MB)."), + memory_mb, user_limit_mb); + G_important_message(_("The process may crash if system RAM is insufficient. " + "Increase the 'memory' option or use g.region to reduce the area.")); + } + /* Proceed with allocation */ + void *full_map_array = G_malloc(total_cells * cell_size); + + if (!full_map_array) + G_fatal_error("Insufficient RAM for bypass. Try a smaller region."); + + G_important_message(_("Loading map into RAM buffer for parallel bypass...")); + + /* 2. Sequential load (Single-threaded, library-safe) */ + /* ibuffer is already loaded by readcell, but we move it to a flat array + so we can access it lock-free without the readcell tile cache logic */ + for (int r = 0; r < incellhd.rows; r++) { + void *row_ptr = (void *)((unsigned char *)full_map_array + ((size_t)r * incellhd.cols * cell_size)); + /* We use the already-loaded cache here, but single-threaded */ + interpolate(ibuffer, row_ptr, cell_type, 0.0, (double)r, &incellhd); + G_percent(r, incellhd.rows - 1, 5); + } - /* project coordinates in output matrix to */ - /* coordinates in input matrix */ - if (GPJ_transform(&oproj, &iproj, &tproj, PJ_FWD, &xcoord1, - &ycoord1, NULL) < 0) { - G_fatal_error(_("Error in %s"), "GPJ_transform()"); - Rast_set_null_value(obufptr, 1, cell_type); - } - else { - /* convert to row/column indices of input matrix */ + G_important_message(_("Projecting (Lock-Free Parallel)...")); + + #pragma omp parallel for private(row, col) schedule(dynamic) + for (row = 0; row < outcellhd.rows; row++) { + void *local_obuffer = Rast_allocate_output_buf(cell_type); + double local_y = outcellhd.north - (outcellhd.ns_res / 2) - (row * outcellhd.ns_res); + double local_x_start = outcellhd.west + (outcellhd.ew_res / 2); - /* column index in input matrix */ - double col_idx = (xcoord1 - incellhd.west) / incellhd.ew_res; + for (col = 0; col < outcellhd.cols; col++) { + void *obufptr = (void *)((unsigned char *)local_obuffer + (size_t)col * cell_size); + double x1 = local_x_start + (col * outcellhd.ew_res); + double y1 = local_y; - /* row index in input matrix */ - double row_idx = (incellhd.north - ycoord1) / incellhd.ns_res; + if (GPJ_transform(&oproj, &iproj, &tproj, PJ_FWD, &x1, &y1, NULL) < 0) { + Rast_set_null_value(obufptr, 1, cell_type); + } else { + double c_idx = (x1 - incellhd.west) / incellhd.ew_res; + double r_idx = (incellhd.north - y1) / incellhd.ns_res; - /* and resample data point */ - interpolate(ibuffer, obufptr, cell_type, col_idx, row_idx, - &incellhd); + /* CALL OUR LOCK-FREE RAM INTERPOLATOR */ + interpolate_ram(full_map_array, obufptr, cell_type, c_idx, r_idx, &incellhd); } - - /* obufptr = G_incr_void_ptr(obufptr, cell_size); */ } - Rast_put_row(fdo, obuffer, cell_type); - - xcoord2 = outcellhd.west + (outcellhd.ew_res / 2); - ycoord2 -= outcellhd.ns_res; + #pragma omp critical + { + Rast_put_row(fdo, local_obuffer, cell_type); + } + G_free(local_obuffer); } + /*RAM BYPASS END*/ Rast_close(fdo); release_cache(ibuffer); + G_free(full_map_array); /* Clean up our bypass buffer */ if (have_colors > 0) { Rast_write_colors(mapname, G_mapset(), &colr); @@ -811,3 +854,4 @@ char *make_ipol_desc(void) return buf; } +