wip elastic

justincdavis · justincdavis · commit f8aab311dfeb · 2025-12-04T14:10:19.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -3355,6 +3355,9 @@ def test_kernel_video(self):
             make_segmentation_mask,
             make_video,
             make_keypoints,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
         ],
     )
     def test_functional(self, make_input):
@@ -3370,9 +3373,16 @@ def test_functional(self, make_input):
             (F.elastic_mask, tv_tensors.Mask),
             (F.elastic_video, tv_tensors.Video),
             (F.elastic_keypoints, tv_tensors.KeyPoints),
+            pytest.param(
+                F._geometry._elastic_cvcuda,
+                "cvcuda.Tensor",
+                marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available"),
+            ),
         ],
     )
     def test_functional_signature(self, kernel, input_type):
+        if input_type == "cvcuda.Tensor":
+            input_type = _import_cvcuda().Tensor
         check_functional_kernel_signature_match(F.elastic, kernel=kernel, input_type=input_type)
 
     @pytest.mark.parametrize(
@@ -3385,6 +3395,9 @@ def test_functional_signature(self, kernel, input_type):
             make_segmentation_mask,
             make_video,
             make_keypoints,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
         ],
     )
     def test_displacement_error(self, make_input):
@@ -3406,6 +3419,9 @@ def test_displacement_error(self, make_input):
             make_segmentation_mask,
             make_video,
             make_keypoints,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
         ],
     )
     # ElasticTransform needs larger images to avoid the needed internal padding being larger than the actual image
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -4,6 +4,7 @@
 from collections.abc import Sequence
 from typing import Any, Optional, TYPE_CHECKING, Union
 
+import numpy as np
 import PIL.Image
 import torch
 from torch.nn.functional import grid_sample, interpolate, pad as torch_pad
@@ -2529,6 +2530,111 @@ def elastic_video(
     return elastic_image(video, displacement, interpolation=interpolation, fill=fill)
 
 
+if CVCUDA_AVAILABLE:
+    _cvcuda_interp = {
+        InterpolationMode.BILINEAR: cvcuda.Interp.LINEAR,
+        "bilinear": cvcuda.Interp.LINEAR,
+        "linear": cvcuda.Interp.LINEAR,
+        2: cvcuda.Interp.LINEAR,
+        InterpolationMode.BICUBIC: cvcuda.Interp.CUBIC,
+        "bicubic": cvcuda.Interp.CUBIC,
+        3: cvcuda.Interp.CUBIC,
+        InterpolationMode.NEAREST: cvcuda.Interp.NEAREST,
+        "nearest": cvcuda.Interp.NEAREST,
+        0: cvcuda.Interp.NEAREST,
+        InterpolationMode.BOX: cvcuda.Interp.BOX,
+        "box": cvcuda.Interp.BOX,
+        4: cvcuda.Interp.BOX,
+        InterpolationMode.HAMMING: cvcuda.Interp.HAMMING,
+        "hamming": cvcuda.Interp.HAMMING,
+        5: cvcuda.Interp.HAMMING,
+        InterpolationMode.LANCZOS: cvcuda.Interp.LANCZOS,
+        "lanczos": cvcuda.Interp.LANCZOS,
+        1: cvcuda.Interp.LANCZOS,
+    }
+
+
+def _elastic_cvcuda(
+    image: "cvcuda.Tensor",
+    displacement: torch.Tensor,
+    interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
+    fill: _FillTypeJIT = None,
+) -> "cvcuda.Tensor":
+    if not isinstance(displacement, torch.Tensor):
+        raise TypeError("Argument displacement should be a Tensor")
+
+    # Input image is NHWC format: (N, H, W, C)
+    batch_size, height, width, num_channels = image.shape
+    device = torch.device("cuda")
+    dtype = torch.float32
+
+    expected_shape = (1, height, width, 2)
+    if expected_shape != displacement.shape:
+        raise ValueError(f"Argument displacement shape should be {expected_shape}, but given {displacement.shape}")
+
+    # cvcuda.remap only supports uint8 for 3-channel images, float32 for 1-channel
+    input_dtype = image.dtype
+    if num_channels == 3 and input_dtype != cvcuda.Type.U8:
+        raise ValueError(f"cvcuda.remap requires uint8 dtype for 3-channel images, but got {input_dtype}")
+    elif num_channels == 1 and input_dtype != cvcuda.Type.F32:
+        raise ValueError(f"cvcuda.remap requires float32 dtype for 1-channel images, but got {input_dtype}")
+
+    # Build normalized grid: identity + displacement
+    # _create_identity_grid returns (1, H, W, 2) with values in [-1, 1]
+    identity_grid = _create_identity_grid((height, width), device=device, dtype=dtype)
+    grid = identity_grid.add_(displacement.to(dtype=dtype, device=device))
+
+    # Convert normalized grid [-1, 1] to absolute pixel coordinates [0, width-1], [0, height-1]
+    # grid[..., 0] is x (horizontal), grid[..., 1] is y (vertical)
+    map_x = (grid[..., 0] + 1) * (width - 1) / 2.0
+    map_y = (grid[..., 1] + 1) * (height - 1) / 2.0
+
+    # Stack into (1, H, W, 2) map tensor
+    pixel_map = torch.stack([map_x, map_y], dim=-1)
+
+    # Expand map for batch if needed
+    if batch_size > 1:
+        pixel_map = pixel_map.expand(batch_size, -1, -1, -1)
+
+    # Create cvcuda map tensor (NHWC layout with 2 channels for x,y)
+    cv_map = cvcuda.as_tensor(pixel_map.contiguous(), "NHWC")
+
+    # Resolve interpolation
+    src_interp = _cvcuda_interp.get(interpolation, cvcuda.Interp.LINEAR)
+
+    # Resolve border mode and value
+    if fill is None:
+        border_mode = cvcuda.Border.CONSTANT
+        border_value = np.array([], dtype=np.float32)
+    elif isinstance(fill, (int, float)):
+        border_mode = cvcuda.Border.CONSTANT
+        border_value = np.array([fill], dtype=np.float32)
+    elif isinstance(fill, (list, tuple)):
+        border_mode = cvcuda.Border.CONSTANT
+        border_value = np.array(fill, dtype=np.float32)
+    else:
+        border_mode = cvcuda.Border.CONSTANT
+        border_value = np.array([], dtype=np.float32)
+
+    # Call cvcuda.remap
+    output = cvcuda.remap(
+        image,
+        cv_map,
+        src_interp=src_interp,
+        map_interp=cvcuda.Interp.LINEAR,
+        map_type=cvcuda.Remap.ABSOLUTE,
+        align_corners=False,
+        border=border_mode,
+        border_value=border_value,
+    )
+
+    return output
+
+
+if CVCUDA_AVAILABLE:
+    _elastic_cvcuda = _register_kernel_internal(elastic, cvcuda.Tensor)(_elastic_cvcuda)
+
+
 def center_crop(inpt: torch.Tensor, output_size: list[int]) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.RandomCrop` for details."""
     if torch.jit.is_scripting():