Skip to content

Commit 6423a64

Browse files
committed
merge with remote main for needs_cvcuda
2 parents aa35ca1 + 1fd3632 commit 6423a64

File tree

3 files changed

+198
-15
lines changed

3 files changed

+198
-15
lines changed

test/test_transforms_v2.py

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import torchvision.transforms.v2 as transforms
2222

2323
from common_utils import (
24+
assert_close,
2425
assert_equal,
2526
cache,
2627
cpu_and_cuda,
@@ -42,7 +43,6 @@
4243
)
4344

4445
from torch import nn
45-
from torch.testing import assert_close
4646
from torch.utils._pytree import tree_flatten, tree_map
4747
from torch.utils.data import DataLoader, default_collate
4848
from torchvision import tv_tensors
@@ -2619,7 +2619,32 @@ def test_kernel(self, kernel, make_input, input_dtype, output_dtype, device, sca
26192619
scale=scale,
26202620
)
26212621

2622-
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
2622+
@pytest.mark.parametrize(
2623+
("kernel", "input_type"),
2624+
[
2625+
(F.to_dtype_image, torch.Tensor),
2626+
(F.to_dtype_video, tv_tensors.Video),
2627+
pytest.param(
2628+
F._misc._to_dtype_image_cvcuda,
2629+
None,
2630+
marks=pytest.mark.needs_cvcuda,
2631+
),
2632+
],
2633+
)
2634+
def test_functional_signature(self, kernel, input_type):
2635+
if kernel is F._misc._to_dtype_image_cvcuda:
2636+
input_type = _import_cvcuda().Tensor
2637+
check_functional_kernel_signature_match(F.to_dtype, kernel=kernel, input_type=input_type)
2638+
2639+
@pytest.mark.parametrize(
2640+
"make_input",
2641+
[
2642+
make_image_tensor,
2643+
make_image,
2644+
make_video,
2645+
pytest.param(make_image_cvcuda, marks=pytest.mark.needs_cvcuda),
2646+
],
2647+
)
26232648
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
26242649
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
26252650
@pytest.mark.parametrize("device", cpu_and_cuda())
@@ -2634,7 +2659,14 @@ def test_functional(self, make_input, input_dtype, output_dtype, device, scale):
26342659

26352660
@pytest.mark.parametrize(
26362661
"make_input",
2637-
[make_image_tensor, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2662+
[
2663+
make_image_tensor,
2664+
make_image,
2665+
make_bounding_boxes,
2666+
make_segmentation_mask,
2667+
make_video,
2668+
pytest.param(make_image_cvcuda, marks=pytest.mark.needs_cvcuda),
2669+
],
26382670
)
26392671
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
26402672
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
@@ -2680,25 +2712,69 @@ def fn(value):
26802712

26812713
return torch.tensor(tree_map(fn, image.tolist())).to(dtype=output_dtype, device=image.device)
26822714

2715+
def _get_dtype_conversion_atol_cvcuda(self, input_dtype, output_dtype):
2716+
in_bits = torch.iinfo(input_dtype).bits if not input_dtype.is_floating_point else None
2717+
out_bits = torch.iinfo(output_dtype).bits if not output_dtype.is_floating_point else None
2718+
narrows_bits = in_bits is not None and out_bits is not None and out_bits < in_bits
2719+
2720+
# int->int with narrowing bits, allow atol=1 for rounding diffs
2721+
if narrows_bits:
2722+
atol = 1
2723+
# float->int check for same diff, rounding error on float
2724+
elif input_dtype.is_floating_point and not output_dtype.is_floating_point:
2725+
atol = 1
2726+
# if generating a float value from an int, allow small rounding error
2727+
elif not input_dtype.is_floating_point and output_dtype.is_floating_point:
2728+
atol = 1e-7
2729+
# all other cases, should be exact
2730+
# uint8 -> uint16 promotion would be here
2731+
else:
2732+
atol = 0
2733+
2734+
return atol
2735+
26832736
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
26842737
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
26852738
@pytest.mark.parametrize("device", cpu_and_cuda())
26862739
@pytest.mark.parametrize("scale", (True, False))
2687-
def test_image_correctness(self, input_dtype, output_dtype, device, scale):
2740+
@pytest.mark.parametrize(
2741+
"make_input",
2742+
[
2743+
make_image,
2744+
pytest.param(make_image_cvcuda, marks=pytest.mark.needs_cvcuda),
2745+
],
2746+
)
2747+
@pytest.mark.parametrize("fn", [F.to_dtype, transform_cls_to_functional(transforms.ToDtype)])
2748+
def test_image_correctness(self, input_dtype, output_dtype, device, scale, make_input, fn):
26882749
if input_dtype.is_floating_point and output_dtype == torch.int64:
26892750
pytest.xfail("float to int64 conversion is not supported")
26902751
if input_dtype == torch.uint8 and output_dtype == torch.uint16 and device == "cuda":
26912752
pytest.xfail("uint8 to uint16 conversion is not supported on cuda")
2753+
if (
2754+
input_dtype == torch.uint16
2755+
and output_dtype == torch.uint8
2756+
and not scale
2757+
and make_input is make_image_cvcuda
2758+
):
2759+
pytest.xfail("uint16 to uint8 conversion without scale is not supported for CV-CUDA.")
26922760

2693-
input = make_image(dtype=input_dtype, device=device)
2761+
input = make_input(dtype=input_dtype, device=device)
2762+
out = fn(input, dtype=output_dtype, scale=scale)
2763+
2764+
if make_input is make_image_cvcuda:
2765+
input = F.cvcuda_to_tensor(input)
2766+
out = F.cvcuda_to_tensor(out)
26942767

2695-
out = F.to_dtype(input, dtype=output_dtype, scale=scale)
26962768
expected = self.reference_convert_dtype_image_tensor(input, dtype=output_dtype, scale=scale)
26972769

2698-
if input_dtype.is_floating_point and not output_dtype.is_floating_point and scale:
2699-
torch.testing.assert_close(out, expected, atol=1, rtol=0)
2700-
else:
2701-
torch.testing.assert_close(out, expected)
2770+
atol, rtol = None, None
2771+
if make_input is make_image_cvcuda:
2772+
atol = self._get_dtype_conversion_atol_cvcuda(input_dtype, output_dtype)
2773+
rtol = 0
2774+
elif input_dtype.is_floating_point and not output_dtype.is_floating_point and scale:
2775+
atol, rtol = 1, 0
2776+
2777+
torch.testing.assert_close(out, expected, atol=atol, rtol=rtol)
27022778

27032779
def was_scaled(self, inpt):
27042780
# this assumes the target dtype is float

torchvision/transforms/v2/_misc.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from torchvision import transforms as _transforms, tv_tensors
1111
from torchvision.transforms.v2 import functional as F, Transform
12+
from torchvision.transforms.v2.functional._utils import _is_cvcuda_tensor
1213

1314
from ._utils import (
1415
_parse_labels_getter,
@@ -267,7 +268,7 @@ class ToDtype(Transform):
267268
Default: ``False``.
268269
"""
269270

270-
_transformed_types = (torch.Tensor,)
271+
_transformed_types = Transform._transformed_types + (_is_cvcuda_tensor,)
271272

272273
def __init__(
273274
self, dtype: Union[torch.dtype, dict[Union[type, str], Optional[torch.dtype]]], scale: bool = False
@@ -294,7 +295,11 @@ def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
294295
if isinstance(self.dtype, torch.dtype):
295296
# For consistency / BC with ConvertImageDtype, we only care about images or videos when dtype
296297
# is a simple torch.dtype
297-
if not is_pure_tensor(inpt) and not isinstance(inpt, (tv_tensors.Image, tv_tensors.Video)):
298+
if (
299+
not is_pure_tensor(inpt)
300+
and not isinstance(inpt, (tv_tensors.Image, tv_tensors.Video))
301+
and not _is_cvcuda_tensor(inpt)
302+
):
298303
return inpt
299304

300305
dtype: Optional[torch.dtype] = self.dtype
@@ -311,7 +316,9 @@ def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
311316
'e.g. dtype={tv_tensors.Mask: torch.int64, "others": None} to pass-through the rest of the inputs.'
312317
)
313318

314-
supports_scaling = is_pure_tensor(inpt) or isinstance(inpt, (tv_tensors.Image, tv_tensors.Video))
319+
supports_scaling = (
320+
is_pure_tensor(inpt) or isinstance(inpt, (tv_tensors.Image, tv_tensors.Video)) or _is_cvcuda_tensor(inpt)
321+
)
315322
if dtype is None:
316323
if self.scale and supports_scaling:
317324
warnings.warn(

torchvision/transforms/v2/functional/_misc.py

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import math
2-
from typing import Optional
2+
from typing import Optional, TYPE_CHECKING
33

44
import PIL.Image
55
import torch
@@ -13,7 +13,12 @@
1313

1414
from ._meta import _convert_bounding_box_format
1515

16-
from ._utils import _get_kernel, _register_kernel_internal, is_pure_tensor
16+
from ._utils import _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_kernel_internal, is_pure_tensor
17+
18+
CVCUDA_AVAILABLE = _is_cvcuda_available()
19+
20+
if TYPE_CHECKING:
21+
import cvcuda # type: ignore[import-not-found]
1722

1823

1924
def normalize(
@@ -340,6 +345,101 @@ def _to_dtype_tensor_dispatch(inpt: torch.Tensor, dtype: torch.dtype, scale: boo
340345
return inpt.to(dtype)
341346

342347

348+
# cvcuda is only used if it is installed, so we can simply define empty mappings
349+
_torch_to_cvcuda_dtypes: dict[torch.dtype, "cvcuda.Type"] = {}
350+
_cvcuda_to_torch_dtypes: dict["cvcuda.Type", torch.dtype] = {}
351+
352+
353+
def _to_dtype_image_cvcuda(
354+
inpt: "cvcuda.Tensor",
355+
dtype: torch.dtype = torch.float,
356+
scale: bool = False,
357+
) -> "cvcuda.Tensor":
358+
"""
359+
Convert the dtype of a CV-CUDA tensor, based on a torch.dtype.
360+
361+
Args:
362+
inpt: The CV-CUDA tensor to convert the dtype of.
363+
dtype: The torch.dtype to convert the dtype to.
364+
scale: Whether to scale the values to the new dtype.
365+
There are four cases for the scaling setup:
366+
1. float -> float
367+
2. int -> int
368+
3. float -> int
369+
4. int -> float
370+
If scale is True, the values will be scaled to the new dtype.
371+
If scale is False, the values will not be scaled.
372+
The scale values for float -> float are 1.0 and 0.0 respectively.
373+
The scale values for int -> int are 2^(bit_diff) of the new dtype.
374+
Where bit_diff is the difference in the number of bits of the new dtype and the input dtype.
375+
The scale values for float -> int and int -> float are the maximum value of the new dtype.
376+
377+
Returns:
378+
out (cvcuda.Tensor): The CV-CUDA tensor with the converted dtype.
379+
380+
"""
381+
cvcuda = _import_cvcuda()
382+
383+
if not _torch_to_cvcuda_dtypes:
384+
_torch_to_cvcuda_dtypes[torch.uint8] = cvcuda.Type.U8
385+
_torch_to_cvcuda_dtypes[torch.uint16] = cvcuda.Type.U16
386+
_torch_to_cvcuda_dtypes[torch.uint32] = cvcuda.Type.U32
387+
_torch_to_cvcuda_dtypes[torch.uint64] = cvcuda.Type.U64
388+
_torch_to_cvcuda_dtypes[torch.int8] = cvcuda.Type.S8
389+
_torch_to_cvcuda_dtypes[torch.int16] = cvcuda.Type.S16
390+
_torch_to_cvcuda_dtypes[torch.int32] = cvcuda.Type.S32
391+
_torch_to_cvcuda_dtypes[torch.int64] = cvcuda.Type.S64
392+
_torch_to_cvcuda_dtypes[torch.float32] = cvcuda.Type.F32
393+
_torch_to_cvcuda_dtypes[torch.float64] = cvcuda.Type.F64
394+
395+
if not _cvcuda_to_torch_dtypes:
396+
for k, v in _torch_to_cvcuda_dtypes.items():
397+
_cvcuda_to_torch_dtypes[v] = k
398+
399+
dtype_in = _cvcuda_to_torch_dtypes.get(inpt.dtype)
400+
cvc_dtype = _torch_to_cvcuda_dtypes.get(dtype)
401+
if dtype_in is None or cvc_dtype is None:
402+
raise ValueError(f"No torch or cvcuda dtype found for dtype {dtype} or {inpt.dtype}")
403+
404+
# torchvision will overflow the values of uint16 when converting down to uint8 without scale
405+
# example: 300 -> 255 (cvcuda) vs 300 mod 256 = 44 (torchvision)
406+
# since it is not equivalent, raise an error for unsupported behavior
407+
# the workaround could be using torch for dtype conversion directly via zero-copy
408+
if dtype_in == torch.uint16 and dtype == torch.uint8 and not scale:
409+
raise ValueError("uint16 to uint8 conversion without scale is not supported for CV-CUDA.")
410+
411+
scale_val, offset = 1.0, 0.0
412+
if scale:
413+
in_dtype_float = dtype_in.is_floating_point
414+
out_dtype_float = dtype.is_floating_point
415+
416+
if in_dtype_float and out_dtype_float:
417+
scale_val, offset = 1.0, 0.0
418+
elif not in_dtype_float and not out_dtype_float:
419+
in_bits = torch.iinfo(dtype_in).bits
420+
out_bits = torch.iinfo(dtype).bits
421+
scale_val = float(2 ** (out_bits - in_bits))
422+
offset = 0.0
423+
elif in_dtype_float and not out_dtype_float:
424+
# Mirror the scaling factor which torchvision uses
425+
eps = 1e-3
426+
max_val = float(_max_value(dtype))
427+
scale_val, offset = max_val + 1.0 - eps, 0.0
428+
else:
429+
scale_val, offset = 1.0 / float(_max_value(dtype_in)), 0.0
430+
431+
return cvcuda.convertto(
432+
inpt,
433+
dtype=cvc_dtype,
434+
scale=scale_val,
435+
offset=offset,
436+
)
437+
438+
439+
if CVCUDA_AVAILABLE:
440+
_register_kernel_internal(to_dtype, _import_cvcuda().Tensor)(_to_dtype_image_cvcuda)
441+
442+
343443
def sanitize_bounding_boxes(
344444
bounding_boxes: torch.Tensor,
345445
format: Optional[tv_tensors.BoundingBoxFormat] = None,

0 commit comments

Comments
 (0)