fix: normalize_cvcuda move to correct patterns for tests/exporting

justincdavis · justincdavis · commit dbf4a5c12462 · 2025-11-20T11:23:20.000-08:00
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -400,8 +400,9 @@ def make_image_pil(*args, **kwargs):
     return to_pil_image(make_image(*args, **kwargs))
 
 
-def make_image_cvcuda(*args, **kwargs):
-    return to_cvcuda_tensor(make_image(*args, **kwargs))
+def make_image_cvcuda(*args, batch_dims=(1,), **kwargs):
+    # explicitly default batch_dims to (1,) since to_cvcuda_tensor requires a batch dimension (ndims == 4)
+    return to_cvcuda_tensor(make_image(*args, batch_dims=batch_dims, **kwargs))
 
 
 def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -5517,7 +5517,17 @@ def test_kernel_image_inplace(self, device):
     def test_kernel_video(self):
         check_kernel(F.normalize_video, make_video(dtype=torch.float32), mean=self.MEAN, std=self.STD)
 
-    @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image_tensor,
+            make_image,
+            make_video,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+            ),
+        ],
+    )
     def test_functional(self, make_input):
         check_functional(F.normalize, make_input(dtype=torch.float32), mean=self.MEAN, std=self.STD)
 
@@ -5527,6 +5537,11 @@ def test_functional(self, make_input):
             (F.normalize_image, torch.Tensor),
             (F.normalize_image, tv_tensors.Image),
             (F.normalize_video, tv_tensors.Video),
+            pytest.param(
+                F._misc._normalize_cvcuda,
+                _import_cvcuda().Tensor,
+                marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA"),
+            ),
         ],
     )
     def test_functional_signature(self, kernel, input_type):
@@ -5555,7 +5570,17 @@ def _sample_input_adapter(self, transform, input, device):
             adapted_input[key] = value
         return adapted_input
 
-    @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image_tensor,
+            make_image,
+            make_video,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+            ),
+        ],
+    )
     def test_transform(self, make_input):
         check_transform(
             transforms.Normalize(mean=self.MEAN, std=self.STD),
@@ -5579,78 +5604,16 @@ def test_correctness_image(self, mean, std, dtype, fn):
 
         assert_equal(actual, expected)
 
-
-@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
-@needs_cuda
-class TestNormalizeCVCUDA:
-    MEANS_STDS = {
-        "RGB": TestNormalize.MEANS_STDS,
-        "GRAY": [([0.5], [2.0])],
-    }
-    MEAN_STD = {
-        "RGB": MEANS_STDS["RGB"][0],
-        "GRAY": MEANS_STDS["GRAY"][0],
-    }
-
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
-    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
-    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
-    def test_functional(self, color_space, batch_dims, dtype):
-        means_stds = self.MEANS_STDS[color_space]
-        for mean, std in means_stds:
-            image = make_image_cvcuda(color_space=color_space, dtype=dtype, batch_dims=batch_dims)
-            check_functional(F.normalize, image, mean=mean, std=std)
-
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
-    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
-    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
-    def test_functional_scalar(self, color_space, batch_dims, dtype):
-        image = make_image_cvcuda(color_space=color_space, dtype=dtype, batch_dims=batch_dims)
-        check_functional(F.normalize, image, mean=0.5, std=2.0)
-
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
-    @pytest.mark.parametrize("batch_dims", [(1,)])
-    def test_functional_error(self, dtype, batch_dims):
-        rgb_mean, rgb_std = self.MEAN_STD["RGB"]
-        gray_mean, gray_std = self.MEAN_STD["GRAY"]
-
-        with pytest.raises(ValueError, match="Inplace normalization is not supported for CVCUDA."):
-            F.normalize(make_image_cvcuda(batch_dims=batch_dims, dtype=dtype), mean=rgb_mean, std=rgb_std, inplace=True)
-
-        with pytest.raises(ValueError, match="Mean should have 3 elements. Got 1."):
-            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="RGB", dtype=dtype), mean=gray_mean, std=rgb_std)
-
-        with pytest.raises(ValueError, match="Std should have 3 elements. Got 1."):
-            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="RGB", dtype=dtype), mean=rgb_mean, std=gray_std)
-
-        with pytest.raises(ValueError, match="Mean should have 1 elements. Got 3."):
-            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="GRAY", dtype=dtype), mean=rgb_mean, std=gray_std)
-
-        with pytest.raises(ValueError, match="Std should have 1 elements. Got 3."):
-            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="GRAY", dtype=dtype), mean=gray_mean, std=rgb_std)
-
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
-    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
-    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
-    def test_transform(self, dtype, color_space, batch_dims):
-        means_stds = self.MEANS_STDS[color_space]
-        for mean, std in means_stds:
-            check_transform(
-                transforms.Normalize(mean=mean, std=std),
-                make_image_cvcuda(color_space=color_space, dtype=dtype, batch_dims=batch_dims),
-            )
-
-    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
-    def test_correctness_image(self, batch_dims):
-        mean, std = self.MEAN_STD["RGB"]
-        torch_image = make_image(batch_dims=batch_dims, dtype=torch.float32, device="cuda")
-        cvc_image = F.to_cvcuda_tensor(torch_image)
-
-        gold = F.normalize(torch_image, mean=mean, std=std)
-        image = F.normalize(cvc_image, mean=mean, std=std)
-        image = F.cvcuda_to_tensor(image)
-
-        assert_close(image, gold, rtol=1e-7, atol=1e-7)
+    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+    @pytest.mark.parametrize(("mean", "std"), MEANS_STDS)
+    @pytest.mark.parametrize("dtype", [torch.float32])
+    @pytest.mark.parametrize("fn", [F.normalize, transform_cls_to_functional(transforms.Normalize)])
+    def test_correctness_cvcuda(self, mean, std, dtype, fn):
+        image = make_image(batch_dims=(1,), dtype=dtype, device="cuda")
+        cvc_image = F.to_cvcuda_tensor(image)
+        actual = F._misc._normalize_cvcuda(cvc_image, mean=mean, std=std)
+        expected = fn(image, mean=mean, std=std)
+        torch.testing.assert_close(F.cvcuda_to_tensor(actual), expected, rtol=1e-7, atol=1e-7)
 
 
 class TestClampBoundingBoxes:
diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
@@ -153,7 +153,6 @@
     gaussian_noise_image,
     gaussian_noise_video,
     normalize,
-    normalize_cvcuda,
     normalize_image,
     normalize_video,
     sanitize_bounding_boxes,
diff --git a/torchvision/transforms/v2/functional/_misc.py b/torchvision/transforms/v2/functional/_misc.py
@@ -1,5 +1,5 @@
 import math
-from typing import Optional, Sequence, TYPE_CHECKING
+from typing import Optional, TYPE_CHECKING
 
 import PIL.Image
 import torch
@@ -79,15 +79,22 @@ def normalize_video(video: torch.Tensor, mean: list[float], std: list[float], in
     return normalize_image(video, mean, std, inplace=inplace)
 
 
-def normalize_cvcuda(
+def _normalize_cvcuda(
     image: "cvcuda.Tensor",
-    mean: Sequence[float | int] | float | int,
-    std: Sequence[float | int] | float | int,
+    mean: list[float],
+    std: list[float],
     inplace: bool = False,
 ) -> "cvcuda.Tensor":
+    cvcuda = _import_cvcuda()
     if inplace:
         raise ValueError("Inplace normalization is not supported for CVCUDA.")
 
+    # CV-CUDA supports signed int and float tensors
+    # torchvision only supports uint and float, right now CV-CUDA doesnt expose float16, so only check 32
+    # in the future add float16 once exposed in CV-CUDA
+    if not (image.dtype == cvcuda.Type.F32):
+        raise ValueError(f"Input tensor should be a float tensor. Got {image.dtype}.")
+
     channels = image.shape[3]
     if isinstance(mean, float | int):
         mean = [mean] * channels
@@ -115,7 +122,7 @@ def normalize_cvcuda(
 
 
 if CVCUDA_AVAILABLE:
-    _normalize_cvcuda = _register_kernel_internal(normalize, cvcuda.Tensor)(normalize_cvcuda)
+    _normalize_cvcuda_registered = _register_kernel_internal(normalize, _import_cvcuda().Tensor)(_normalize_cvcuda)
 
 
 def gaussian_blur(inpt: torch.Tensor, kernel_size: list[int], sigma: Optional[list[float]] = None) -> torch.Tensor: