diff --git a/dev-doc/IOP_Module_API.md b/dev-doc/IOP_Module_API.md index cab13a5a93cf..3972e2b5b988 100644 --- a/dev-doc/IOP_Module_API.md +++ b/dev-doc/IOP_Module_API.md @@ -465,7 +465,14 @@ If the module can use the GPU, implement `process_cl()` wrapped in `#ifdef HAVE_ ## Tiling Support -If `IOP_FLAGS_ALLOW_TILING` is set, implement `tiling_callback()` to report memory requirements: +If `IOP_FLAGS_ALLOW_TILING` is set, the pixelpipe is allowed to process a piece in tiling mode, if some parameters don't allow tiling override this in `commit_params()`. + +For calculation of memory requirements and tile aligning we have `tiling_callback()`, if not provided defaults are used as in `default_tiling_callback()` + +Whenever a module possibly exceeds requirements as defined in `default_tiling_callback()` or requires special aligning a specific `tiling_callback()` should be provided for three reasons: +a) the tiling process will not allocate more memory than granted +b) the OpenCL code path will not be tried if requirements are too high thus avoiding costly late fallbacks to CPU path. +c) tile stitching will be correct for alignment | Field | Purpose | |-------|---------| @@ -475,13 +482,15 @@ If `IOP_FLAGS_ALLOW_TILING` is set, implement `tiling_callback()` to report memo | `overlap` | Pixels of overlap between adjacent tiles (for spatial filters) | | `align` | Tile origin alignment (1 = none, other values only for special algorithms) | + +An example ```c void tiling_callback(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out, dt_develop_tiling_t *tiling) { - tiling->factor = 2.5f; // input + 1.5× temp buffers - tiling->factor_cl = 2.5f; + tiling->factor = 2.5f; // input + output + 2 single channel temp buffers + tiling->factor_cl = 3.75f; // as above but we need an additional rgb buffer plus a single channel buffer for a mask tiling->maxbuf = 1.0f; tiling->maxbuf_cl = 1.0f; tiling->overhead = 0; diff --git a/src/common/opencl.c b/src/common/opencl.c index c0be06a61ca1..03036f45fa0d 100644 --- a/src/common/opencl.c +++ b/src/common/opencl.c @@ -325,6 +325,22 @@ gboolean dt_opencl_use_pinned_memory(const int devid) return (!_cldev_running(devid)) ? FALSE : cl->dev[devid].pinned_memory; } +gboolean dt_opencl_unified_memory(const int devid) +{ + dt_opencl_t *cl = darktable.opencl; + return (!_cldev_running(devid)) ? FALSE : cl->dev[devid].unified_memory; +} + +/* this defines an additional alignment requirement for opencl image width. + It can have strong effects on processing speed. Reasonable values are a + power of 2. set to 1 for no effect. + FIXME we can possibly fix this per device +*/ +unsigned int dt_opencl_tiling_align(const int devid) +{ + return 4; +} + void dt_opencl_write_device_config(const int devid) { if(devid <= DT_DEVICE_CPU) return; diff --git a/src/common/opencl.h b/src/common/opencl.h index 29a1abffdced..1cc1e58d782c 100644 --- a/src/common/opencl.h +++ b/src/common/opencl.h @@ -607,6 +607,8 @@ gboolean dt_opencl_read_device_config(const int devid); gboolean dt_opencl_avoid_atomics(const int devid); void dt_opencl_micro_nap(const int devid); gboolean dt_opencl_use_pinned_memory(const int devid); +gboolean dt_opencl_unified_memory(const int devid); +unsigned int dt_opencl_tiling_align(const int devid); G_END_DECLS @@ -644,7 +646,7 @@ static inline void dt_opencl_cleanup(dt_opencl_t *cl) } static inline gboolean dt_opencl_finish(const int devid) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline gboolean dt_opencl_finish_sync_pipe(const int devid, const int pipetype) @@ -653,7 +655,7 @@ static inline gboolean dt_opencl_finish_sync_pipe(const int devid, } static inline int dt_opencl_lock_device(const int pipetype) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline void dt_opencl_unlock_device(const int dev) { @@ -661,7 +663,7 @@ static inline void dt_opencl_unlock_device(const int dev) static inline int dt_opencl_create_kernel(const int program, const char *name) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline void dt_opencl_free_kernel(const int kernel) { @@ -669,33 +671,33 @@ static inline void dt_opencl_free_kernel(const int kernel) static inline int dt_opencl_get_max_work_item_sizes(const int dev, size_t *sizes) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline int dt_opencl_get_work_group_limits(const int dev, size_t *sizes, size_t *workgroupsize, unsigned long *localmemsize) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline int dt_opencl_get_kernel_work_group_size(const int dev, const int kernel, size_t *kernelworkgroupsize) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } static inline gboolean dt_opencl_is_enabled(void) { diff --git a/src/develop/tiling.c b/src/develop/tiling.c index 09fdfec3ee38..295ec1842719 100644 --- a/src/develop/tiling.c +++ b/src/develop/tiling.c @@ -30,13 +30,6 @@ #include -/* this defines an additional alignment requirement for opencl image width. - It can have strong effects on processing speed. Reasonable values are a - power of 2. set to 1 for no effect. - FIXME we can possibly fix this per device -*/ -#define CL_ALIGNMENT 4 - /* parameter RESERVE for extended roi_in sizes due to inaccuracies when doing roi_out -> roi_in estimations. Needs to be increased if tiling fails due to insufficient buffer sizes. */ @@ -500,11 +493,11 @@ static int _simplex(double (*objfunc)(double[], void *[]), } -static int _nm_fit_output_to_input_roi(dt_iop_module_t *self, - dt_dev_pixelpipe_iop_t *piece, - const dt_iop_roi_t *iroi, - dt_iop_roi_t *oroi, - int delta) +static gboolean _nm_fit_output_to_input_roi(const dt_iop_module_t *self, + const dt_dev_pixelpipe_iop_t *piece, + const dt_iop_roi_t *iroi, + dt_iop_roi_t *oroi, + const int delta) { void *rest[4] = { (void *)self, (void *)piece, (void *)iroi, (void *)oroi }; double start[4] = { (float)oroi->x / piece->iwidth, (float)oroi->y / piece->iheight, @@ -531,12 +524,12 @@ static int _nm_fit_output_to_input_roi(dt_iop_module_t *self, /* find a matching oroi_full by probing start value of oroi and get corresponding input roi into iroi_probe. We search in two steps. first by a simplicistic iterative search which will succeed in most cases. If this does not converge, we do a downhill simplex (nelder-mead) fitting */ -static int _fit_output_to_input_roi(dt_iop_module_t *self, - dt_dev_pixelpipe_iop_t *piece, - const dt_iop_roi_t *iroi, - dt_iop_roi_t *oroi, - int delta, - int iter) +static gboolean _fit_output_to_input_roi(dt_iop_module_t *self, + dt_dev_pixelpipe_iop_t *piece, + dt_iop_roi_t *iroi, + dt_iop_roi_t *oroi, + const int delta, + int iter) { dt_iop_roi_t iroi_probe = *iroi; dt_iop_roi_t save_oroi = *oroi; @@ -570,8 +563,7 @@ static int _fit_output_to_input_roi(dt_iop_module_t *self, // try simplex downhill fitting now. // it's crucial that we have a good starting point in oroi, else this // will not converge as well. - int fit = _nm_fit_output_to_input_roi(self, piece, iroi, oroi, delta); - return fit; + return _nm_fit_output_to_input_roi(self, piece, iroi, oroi, delta); } @@ -1189,8 +1181,8 @@ void default_process_tiling(dt_iop_module_t *self, return; } -float dt_tiling_estimate_cpumem(dt_develop_tiling_t *tiling, - dt_dev_pixelpipe_iop_t *piece, +float dt_tiling_estimate_cpumem(const dt_develop_tiling_t *tiling, + const dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const int max_bpp) @@ -1250,8 +1242,8 @@ float dt_tiling_estimate_cpumem(dt_develop_tiling_t *tiling, } #ifdef HAVE_OPENCL -float dt_tiling_estimate_clmem(dt_develop_tiling_t *tiling, - dt_dev_pixelpipe_iop_t *piece, +float dt_tiling_estimate_clmem(const dt_develop_tiling_t *tiling, + const dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const int max_bpp) @@ -1260,7 +1252,10 @@ float dt_tiling_estimate_clmem(dt_develop_tiling_t *tiling, const float fullscale = fmaxf(roi_in->scale / roi_out->scale, sqrtf(((float)roi_in->width * roi_in->height) / ((float)roi_out->width * roi_out->height))); const gboolean use_pinned_memory = dt_opencl_use_pinned_memory(devid); - const float pinned_buffer_overhead = use_pinned_memory ? 2.0f : 0.0f; + /* If using pinned transfer on devices with dedicated GPU mem there is an additional + mem pressure as they will allocate also on device as cache for performance + */ + const float pinned_buffer_overhead = use_pinned_memory && !dt_opencl_unified_memory(devid) ? 2.0f : 0.0f; const float pinned_buffer_slack = use_pinned_memory ? 0.85f : 1.0f; const float available = (float)dt_opencl_get_device_available(devid); const float factor = fmaxf(tiling->factor_cl + pinned_buffer_overhead, 1.0f); @@ -1271,7 +1266,7 @@ float dt_tiling_estimate_clmem(dt_develop_tiling_t *tiling, int width = MIN(MAX(roi_in->width, roi_out->width), darktable.opencl->dev[devid].max_image_width); int height = MIN(MAX(roi_in->height, roi_out->height), darktable.opencl->dev[devid].max_image_height); - const unsigned int align = _lcm(tiling->align, CL_ALIGNMENT); + const unsigned int align = _lcm(tiling->align, dt_opencl_tiling_align(devid)); if((float)width * height * max_bpp * maxbuf > singlebuffer) { @@ -1347,9 +1342,11 @@ static int _default_process_tiling_cl_ptp(dt_iop_module_t *self, /* shall we use pinned memory transfers? */ gboolean use_pinned_memory = dt_opencl_use_pinned_memory(devid); - const float pinned_buffer_overhead = use_pinned_memory ? 2.0f : 0.0f; // add two additional pinned memory buffers - // which seemingly get allocated not only on - // host but also on device (why???) + /* If using pinned transfer on devices with dedicated GPU mem there is an additional + mem pressure as they will allocate also on device as cache for performance + */ + const float pinned_buffer_overhead = use_pinned_memory && !dt_opencl_unified_memory(devid) ? 2.0f : 0.0f; + // avoid problems when pinned buffer size gets too close to max_mem_alloc size const float pinned_buffer_slack = use_pinned_memory ? 0.85f : 1.0f; const float available = (float)dt_opencl_get_device_available(devid); @@ -1394,16 +1391,13 @@ static int _default_process_tiling_cl_ptp(dt_iop_module_t *self, /* Alignment rules: we need to make sure that alignment requirements of module are fulfilled. Modules will report alignment requirements via align within tiling_callback(). - Additional alignment requirements are set via definition of CL_ALIGNMENT. + Additional alignment requirements are set via dt_opencl_tiling_align(). We guarantee alignment by selecting image width/height and overlap accordingly. For a tile width/height that is identical to image width/height no special alignment is done. */ const unsigned int align = tiling.align; - /* determining alignment requirement for tile width/height. - in case of tile width also align according to definition of CL_ALIGNMENT - */ - const unsigned int walign = _lcm(align, CL_ALIGNMENT); + const unsigned int walign = _lcm(align, dt_opencl_tiling_align(devid)); const unsigned int halign = align; assert(align != 0 && walign != 0 && halign != 0); @@ -1442,22 +1436,11 @@ static int _default_process_tiling_cl_ptp(dt_iop_module_t *self, /* reserve pinned input and output memory for host<->device data transfer */ if(use_pinned_memory) { - pinned_input = dt_opencl_alloc_device_buffer_with_flags(devid, (size_t)width * height * in_bpp, - CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR); - if(pinned_input == NULL) - { - dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, - "[default_process_tiling_cl_ptp] could not alloc pinned " - "input buffer for module '%s%s'", - self->op, dt_iop_get_instance_id(self)); - use_pinned_memory = FALSE; - } - } + const size_t bsize = (size_t)width * height * in_bpp; + pinned_input = dt_opencl_alloc_device_buffer_with_flags(devid, bsize, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR); - if(use_pinned_memory) - { - input_buffer = dt_opencl_map_buffer(devid, pinned_input, TRUE, CL_MAP_WRITE, 0, - (size_t)width * height * in_bpp); + if(pinned_input) + input_buffer = dt_opencl_map_buffer(devid, pinned_input, TRUE, CL_MAP_WRITE, 0, bsize); if(input_buffer == NULL) { dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, @@ -1470,22 +1453,11 @@ static int _default_process_tiling_cl_ptp(dt_iop_module_t *self, if(use_pinned_memory) { - pinned_output = dt_opencl_alloc_device_buffer_with_flags(devid, (size_t)width * height * out_bpp, - CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); - if(pinned_output == NULL) - { - dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, - "[default_process_tiling_cl_ptp] could not alloc pinned output " - "buffer for module '%s%s'", - self->op, dt_iop_get_instance_id(self)); - use_pinned_memory = FALSE; - } - } + const size_t bsize = (size_t)width * height * out_bpp; + pinned_output = dt_opencl_alloc_device_buffer_with_flags(devid, bsize, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); + if(pinned_output) + output_buffer = dt_opencl_map_buffer(devid, pinned_output, TRUE, CL_MAP_READ, 0, bsize); - if(use_pinned_memory) - { - output_buffer = dt_opencl_map_buffer(devid, pinned_output, TRUE, CL_MAP_READ, 0, - (size_t)width * height * out_bpp); if(output_buffer == NULL) { dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, @@ -1517,8 +1489,8 @@ static int _default_process_tiling_cl_ptp(dt_iop_module_t *self, size_t region[2] = { wd, ht }; /* roi_in and roi_out for process_cl on subbuffer */ - dt_iop_roi_t iroi = { roi_in->x + tx * tile_wd, roi_in->y + ty * tile_ht, wd, ht, roi_in->scale }; - dt_iop_roi_t oroi = { roi_out->x + tx * tile_wd, roi_out->y + ty * tile_ht, wd, ht, roi_out->scale }; + const dt_iop_roi_t iroi = { roi_in->x + tx * tile_wd, roi_in->y + ty * tile_ht, wd, ht, roi_in->scale }; + const dt_iop_roi_t oroi = { roi_out->x + tx * tile_wd, roi_out->y + ty * tile_ht, wd, ht, roi_out->scale }; /* offsets of tile into ivoid and ovoid */ @@ -1717,9 +1689,11 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, /* shall we use pinned memory transfers? */ gboolean use_pinned_memory = dt_opencl_use_pinned_memory(devid); - const float pinned_buffer_overhead = use_pinned_memory ? 2.0f : 0.0f; // add two additional pinned memory buffers - // which seemingly get allocated not only on - // host but also on device (why???) + + /* If using pinned transfer on devices with dedicated GPU mem there is an additional + mem pressure as they will allocate also on device as cache for performance + */ + const float pinned_buffer_overhead = use_pinned_memory && !dt_opencl_unified_memory(devid) ? 2.0f : 0.0f; // avoid problems when pinned buffer size gets too close to max_mem_alloc size const float pinned_buffer_slack = use_pinned_memory ? 0.85f : 1.0f; const float available = (float)dt_opencl_get_device_available(devid); @@ -1731,10 +1705,7 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, int width = MIN(MAX(roi_in->width, roi_out->width), darktable.opencl->dev[devid].max_image_width); int height = MIN(MAX(roi_in->height, roi_out->height), darktable.opencl->dev[devid].max_image_height); - /* Alignment rules: we need to make sure that alignment requirements of module are fulfilled. - Modules will report alignment requirements via align within tiling_callback(). - */ - const unsigned int align = _lcm(tiling.align, CL_ALIGNMENT); + const unsigned int align = _lcm(tiling.align, dt_opencl_tiling_align(devid)); assert(align != 0); /* shrink tile size in case it would exceed singlebuffer size */ @@ -1755,7 +1726,7 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, width = _align_down((int)floorf(width * sqrtf(scale)), align); height = _align_down((int)floorf(height * sqrtf(scale)), align); } - dt_print(DT_DEBUG_TILING | DT_DEBUG_VERBOSE, + dt_print(DT_DEBUG_TILING | DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE, "[default_process_tiling_cl_roi] [%s] buffer exceeds singlebuffer, corrected to %dx%d", dt_dev_pixelpipe_type_to_str(piece->pipe->type), width, height); } @@ -1824,22 +1795,10 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, /* reserve pinned input and output memory for host<->device data transfer */ if(use_pinned_memory) { - pinned_input = dt_opencl_alloc_device_buffer_with_flags(devid, (size_t)width * height * in_bpp, - CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR); - if(pinned_input == NULL) - { - dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, - "[default_process_tiling_cl_roi] [%s] could not alloc pinned input buffer for module '%s%s'", - dt_dev_pixelpipe_type_to_str(piece->pipe->type), self->op, dt_iop_get_instance_id(self)); - use_pinned_memory = FALSE; - } - } - - if(use_pinned_memory) - { - - input_buffer = dt_opencl_map_buffer(devid, pinned_input, TRUE, CL_MAP_WRITE, 0, - (size_t)width * height * in_bpp); + const size_t bsize = (size_t)width * height * in_bpp; + pinned_input = dt_opencl_alloc_device_buffer_with_flags(devid, bsize, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR); + if(pinned_input) + input_buffer = dt_opencl_map_buffer(devid, pinned_input, TRUE, CL_MAP_WRITE, 0, bsize); if(input_buffer == NULL) { dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, @@ -1852,23 +1811,10 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, if(use_pinned_memory) { - - pinned_output = dt_opencl_alloc_device_buffer_with_flags(devid, (size_t)width * height * out_bpp, - CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); - if(pinned_output == NULL) - { - dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, - "[default_process_tiling_cl_roi] [%s] could not alloc pinned output buffer for module '%s%s'", - dt_dev_pixelpipe_type_to_str(piece->pipe->type), self->op, dt_iop_get_instance_id(self)); - use_pinned_memory = FALSE; - } - } - - if(use_pinned_memory) - { - - output_buffer = dt_opencl_map_buffer(devid, pinned_output, TRUE, CL_MAP_READ, 0, - (size_t)width * height * out_bpp); + const size_t bsize = (size_t)width * height * out_bpp; + pinned_output = dt_opencl_alloc_device_buffer_with_flags(devid, bsize, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); + if(pinned_output) + output_buffer = dt_opencl_map_buffer(devid, pinned_output, TRUE, CL_MAP_READ, 0, bsize); if(output_buffer == NULL) { dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING, @@ -1976,17 +1922,15 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, const size_t ioffs = (size_t)(in_dy * ipitch) + (size_t)(in_dx * in_bpp); const size_t ooffs = (size_t)(out_dy * opitch) + (size_t)(out_dx * out_bpp); - /* origin and region of full input tile */ - size_t iorigin[2] = { 0, 0 }; - size_t iregion[2] = { iroi_full.width, iroi_full.height }; + /* region of full input tile */ + const size_t iregion[2] = { iroi_full.width, iroi_full.height }; - /* origin and region of full output tile */ - size_t oforigin[2] = { 0, 0 }; - size_t ofregion[2] = { oroi_full.width, oroi_full.height }; + /* region of full output tile */ + const size_t ofregion[2] = { oroi_full.width, oroi_full.height }; /* origin and region of good part of output tile */ - size_t oorigin[2] = { oroi_good.x - oroi_full.x, oroi_good.y - oroi_full.y }; - size_t oregion[2] = { oroi_good.width, oroi_good.height }; + const size_t oorigin[2] = { oroi_good.x - oroi_full.x, oroi_good.y - oroi_full.y }; + const size_t oregion[2] = { oroi_good.width, oroi_good.height }; dt_print_pipe(DT_DEBUG_TILING, " tile cl_roi", piece->pipe, piece->module, devid, &iroi_full, &oroi_full, @@ -2027,14 +1971,14 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, (size_t)iroi_full.width * in_bpp); /* blocking memory transfer: pinned host input buffer -> opencl/device tile */ - err = dt_opencl_write_host_to_device_raw(devid, (char *)input_buffer, input, iorigin, iregion, + err = dt_opencl_write_host_to_device_raw(devid, (char *)input_buffer, input, CLIMG_ORIGIN, iregion, (size_t)iroi_full.width * in_bpp, TRUE); if(err != CL_SUCCESS) use_pinned_memory = FALSE; } else { /* blocking direct memory transfer: host input image -> opencl/device tile */ - err = dt_opencl_write_host_to_device_raw(devid, (char *)ivoid + ioffs, input, iorigin, iregion, + err = dt_opencl_write_host_to_device_raw(devid, (char *)ivoid + ioffs, input, CLIMG_ORIGIN, iregion, ipitch, TRUE); } if(err != CL_SUCCESS) goto error; @@ -2064,7 +2008,7 @@ static int _default_process_tiling_cl_roi(dt_iop_module_t *self, if(use_pinned_memory) { /* blocking memory transfer: complete opencl/device tile -> pinned host output buffer */ - err = dt_opencl_read_host_from_device_raw(devid, (char *)output_buffer, output, oforigin, ofregion, + err = dt_opencl_read_host_from_device_raw(devid, (char *)output_buffer, output, CLIMG_ORIGIN, ofregion, (size_t)oroi_full.width * out_bpp, TRUE); if(err != CL_SUCCESS) { @@ -2149,7 +2093,7 @@ int default_process_tiling_cl(dt_iop_module_t *self, const dt_iop_roi_t *const roi_out, const int in_bpp) { - return -1; + return DT_OPENCL_DEFAULT_ERROR; } #endif @@ -2159,7 +2103,7 @@ int default_process_tiling_cl(dt_iop_module_t *self, no overlap between tiles, and an pixel alignment of 1 in x and y direction, i.e. no special alignment required. Simple pixel to pixel modules (take tonecurve as an example) can happily live with that. - (1) Small overhead like look-up-tables in tonecurve can be ignored safely. */ + (1) Small overhead like look-up-tables can be ignored safely. */ void default_tiling_callback(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *roi_in, diff --git a/src/develop/tiling.h b/src/develop/tiling.h index 936ac43c6458..8e5100f3fe2b 100644 --- a/src/develop/tiling.h +++ b/src/develop/tiling.h @@ -71,12 +71,12 @@ void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t gboolean dt_tiling_piece_fits_host_memory(const struct dt_dev_pixelpipe_iop_t *piece, const size_t width, const size_t height, const unsigned bpp, const float factor, const size_t overhead); -float dt_tiling_estimate_cpumem(struct dt_develop_tiling_t *tiling, struct dt_dev_pixelpipe_iop_t *piece, +float dt_tiling_estimate_cpumem(const dt_develop_tiling_t *tiling, const struct dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const int max_bpp); #ifdef HAVE_OPENCL -float dt_tiling_estimate_clmem(struct dt_develop_tiling_t *tiling, struct dt_dev_pixelpipe_iop_t *piece, +float dt_tiling_estimate_clmem(const dt_develop_tiling_t *tiling, const struct dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const int max_bpp); #endif