diff --git a/pyproject.toml b/pyproject.toml index a62b19480a..6b95abdc7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dace-cartesian = [ 'dace>=1.0.2' # refined in [tool.uv.sources] ] dace-next = [ - 'dace==43!2026.04.20' # uses custom index at 'https://github.com/GridTools/pypi' + 'dace>=1.0.2' # refined in [tool.uv.sources] ] dev = [ {include-group = 'build'}, @@ -485,7 +485,7 @@ url = 'https://gridtools.github.io/pypi/' atlas4py = {index = "test.pypi"} dace = [ {git = "https://github.com/GridTools/dace", branch = "romanc/stree-v2", group = "dace-cartesian"}, - {index = "gridtools", group = "dace-next"} + {git = "https://github.com/GridTools/dace", branch = "min_warps_per_eu_next", group = "dace-next"} ] # -- versioningit -- diff --git a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py index 799e8ad228..9e13624ed3 100644 --- a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py +++ b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py @@ -117,6 +117,7 @@ def gt_auto_optimize( gpu_block_size_1d: Optional[Sequence[int | str] | str] = (64, 1, 1), gpu_block_size_2d: Optional[Sequence[int | str] | str] = None, gpu_block_size_3d: Optional[Sequence[int | str] | str] = None, + gpu_min_warps_per_eu: Optional[int] = None, gpu_maxnreg: Optional[int] = None, blocking_dim: Optional[gtx_common.Dimension] = None, blocking_size: int = 10, @@ -379,6 +380,7 @@ def gt_auto_optimize( gpu_maxnreg=gpu_maxnreg, optimization_hooks=optimization_hooks, gpu_block_size_spec=gpu_block_size_spec if gpu_block_size_spec else None, + gpu_min_warps_per_eu=gpu_min_warps_per_eu, validate_all=validate_all, ) @@ -815,6 +817,7 @@ def _gt_auto_configure_maps_and_strides( gpu_maxnreg: Optional[int], optimization_hooks: dict[GT4PyAutoOptHook, GT4PyAutoOptHookFun], gpu_block_size_spec: Optional[dict[str, Sequence[int | str] | str]], + gpu_min_warps_per_eu: Optional[int], validate_all: bool, ) -> dace.SDFG: """Configure the Maps and the strides of the SDFG inplace. @@ -889,6 +892,7 @@ def _gt_auto_configure_maps_and_strides( gpu_launch_bounds=gpu_launch_bounds, gpu_launch_factor=gpu_launch_factor, gpu_block_size_spec=gpu_block_size_spec, + gpu_min_warps_per_eu=gpu_min_warps_per_eu, gpu_maxnreg=gpu_maxnreg, validate=False, validate_all=validate_all, diff --git a/src/gt4py/next/program_processors/runners/dace/transformations/gpu_utils.py b/src/gt4py/next/program_processors/runners/dace/transformations/gpu_utils.py index aa34736c8a..2b6d4889a4 100644 --- a/src/gt4py/next/program_processors/runners/dace/transformations/gpu_utils.py +++ b/src/gt4py/next/program_processors/runners/dace/transformations/gpu_utils.py @@ -34,6 +34,7 @@ def gt_gpu_transformation( gpu_launch_bounds: Optional[int | str] = None, gpu_launch_factor: Optional[int] = None, gpu_block_size_spec: Optional[dict[str, Sequence[int | str] | str]] = None, + gpu_min_warps_per_eu: Optional[int] = None, gpu_maxnreg: Optional[int] = None, validate: bool = True, validate_all: bool = False, @@ -124,6 +125,7 @@ def gt_gpu_transformation( launch_bounds=gpu_launch_bounds, launch_factor=gpu_launch_factor, **gpu_block_size_spec, + gpu_min_warps_per_eu=gpu_min_warps_per_eu, gpu_maxnreg=gpu_maxnreg, validate=False, validate_all=validate_all, @@ -365,6 +367,7 @@ def gt_set_gpu_blocksize( block_size: Optional[Sequence[int | str] | str], launch_bounds: Optional[int | str] = None, launch_factor: Optional[int] = None, + gpu_min_warps_per_eu: Optional[int] = None, gpu_maxnreg: Optional[int] = None, validate: bool = True, validate_all: bool = False, @@ -397,6 +400,7 @@ def gt_set_gpu_blocksize( }.items(): if f"{arg}_{dim}d" not in kwargs: kwargs[f"{arg}_{dim}d"] = val + kwargs["gpu_min_warps_per_eu"] = gpu_min_warps_per_eu kwargs["maxnreg"] = gpu_maxnreg setter = GPUSetBlockSize(**kwargs) @@ -595,6 +599,12 @@ class GPUSetBlockSize(dace_transformation.SingleStateTransformation): default=None, desc="Set the launch bound property for 3 dimensional map.", ) + min_warps_per_eu = dace_properties.Property( + dtype=int, + allow_none=True, + default=None, + desc="Set the minimum number of warps per EU for the GPU maps.", + ) maxnreg = dace_properties.Property( dtype=int, allow_none=True, @@ -616,6 +626,7 @@ def __init__( launch_factor_1d: int | None = None, launch_factor_2d: int | None = None, launch_factor_3d: int | None = None, + gpu_min_warps_per_eu: int | None = None, maxnreg: int | None = None, ) -> None: super().__init__() @@ -644,6 +655,8 @@ def __init__( self.launch_bounds_3d = _gpu_launch_bound_parser( self.block_size_3d, launch_bounds_3d, launch_factor_3d ) + if gpu_min_warps_per_eu is not None: + self.min_warps_per_eu = gpu_min_warps_per_eu if maxnreg is not None: self.maxnreg = maxnreg @@ -767,6 +780,8 @@ def apply( elif launch_bounds is not None: # Note: empty string has a meaning in DaCe gpu_map.gpu_launch_bounds = launch_bounds + if self.min_warps_per_eu: + gpu_map.gpu_min_warps_per_eu = self.min_warps_per_eu def gt_remove_trivial_gpu_maps( sdfg: dace.SDFG, diff --git a/uv.lock b/uv.lock index 449acb09ef..4667295dd8 100644 --- a/uv.lock +++ b/uv.lock @@ -1245,7 +1245,7 @@ dependencies = [ [[package]] name = "dace" version = "43!2026.4.20" -source = { registry = "https://gridtools.github.io/pypi/" } +source = { git = "https://github.com/GridTools/dace?branch=min_warps_per_eu_next#eda19870248afba0b104fcad1899b6108ebe5064" } resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", "python_full_version >= '3.14' and sys_platform == 'emscripten'", @@ -1276,9 +1276,6 @@ dependencies = [ { name = "sympy" }, { name = "typing-extensions" }, ] -wheels = [ - { url = "https://gridtools.github.io/pypi/dace/dace-43!2026.4.20-py3-none-any.whl", hash = "sha256:04e79fa42d57d3a192f7164c4402007f18afc336f5051bdb2cdcb7080310552f" }, -] [[package]] name = "debugpy" @@ -1792,7 +1789,7 @@ dace-cartesian = [ { name = "dace", version = "1.0.0", source = { git = "https://github.com/GridTools/dace?branch=romanc%2Fstree-v2#43811b298769a626085d8917e5d9516060af8ec5" } }, ] dace-next = [ - { name = "dace", version = "43!2026.4.20", source = { registry = "https://gridtools.github.io/pypi/" } }, + { name = "dace", version = "43!2026.4.20", source = { git = "https://github.com/GridTools/dace?branch=min_warps_per_eu_next#eda19870248afba0b104fcad1899b6108ebe5064" } }, ] dev = [ { name = "atlas4py" }, @@ -1962,7 +1959,7 @@ build = [ { name = "wheel", specifier = ">=0.33.6" }, ] dace-cartesian = [{ name = "dace", git = "https://github.com/GridTools/dace?branch=romanc%2Fstree-v2" }] -dace-next = [{ name = "dace", specifier = "==43!2026.4.20", index = "https://gridtools.github.io/pypi/", conflict = { package = "gt4py", group = "dace-next" } }] +dace-next = [{ name = "dace", git = "https://github.com/GridTools/dace?branch=min_warps_per_eu_next" }] dev = [ { name = "atlas4py", specifier = ">=0.41", index = "https://test.pypi.org/simple" }, { name = "coverage", extras = ["toml"], specifier = ">=7.6.1" },