Add 'crop' option to RandomRotate that centre crops the image to remove any padding regions introduced by the rotation

sg3-141-592 · sg3-141-592 · commit dc34375f81e6 · 2026-03-29T15:38:52.000+01:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -2435,6 +2435,62 @@ def test_functional_image_fast_path_correctness(self, size, angle, expand):
 
         torch.testing.assert_close(actual, expected)
 
+    @pytest.mark.parametrize("size", [(100, 100), (120, 80)])
+    @pytest.mark.parametrize("angle", [15.0, 30.0, 45.0])
+    def test_transform_crop_removes_fill(self, size, angle):
+        # Output of crop=True should contain no fill pixels when input is fully non-zero
+        h, w = size
+        image = tv_tensors.Image(torch.full((3, h, w), 200, dtype=torch.uint8))
+        transform = transforms.RandomRotation((angle, angle), fill=0, crop=True)
+        output = transform(image)
+        assert output.min().item() > 0, "crop=True output should have no fill pixels"
+        assert output.shape[-2] < h or output.shape[-1] < w, "crop=True should reduce at least one dimension"
+
+    @pytest.mark.parametrize("size", [(100, 100), (120, 80)])
+    @pytest.mark.parametrize("angle", [15.0, 30.0, 45.0])
+    def test_transform_crop_consistent_across_inputs(self, size, angle):
+        # Image, mask, and bounding boxes should all be cropped to the same canvas size
+        h, w = size
+        image = tv_tensors.Image(torch.full((3, h, w), 200, dtype=torch.uint8))
+        mask = tv_tensors.Mask(torch.ones(1, h, w, dtype=torch.uint8))
+        boxes = tv_tensors.BoundingBoxes(
+            torch.tensor([[10.0, 10.0, 50.0, 50.0]]),
+            format=tv_tensors.BoundingBoxFormat.XYXY,
+            canvas_size=(h, w),
+        )
+        transform = transforms.RandomRotation((angle, angle), crop=True)
+        out_image, out_mask, out_boxes = transform(image, mask, boxes)
+        assert out_image.shape[-2:] == out_mask.shape[-2:]
+        assert out_boxes.canvas_size == (out_image.shape[-2], out_image.shape[-1])
+
+    def test_transform_crop_and_expand_mutually_exclusive(self):
+        with pytest.raises(ValueError, match="crop and expand are mutually exclusive"):
+            transforms.RandomRotation(30, expand=True, crop=True)
+
+    @pytest.mark.parametrize("angle", [0.0, 90.0, 180.0, 270.0])
+    def test_transform_crop_zero_angle_preserves_size(self, angle):
+        # Multiples of 90° should not reduce the image size
+        image = tv_tensors.Image(torch.zeros(3, 100, 100, dtype=torch.uint8))
+        transform = transforms.RandomRotation((angle, angle), crop=True)
+        output = transform(image)
+        assert output.shape == image.shape
+
+    def test_largest_inscribed_crop_size(self):
+        from torchvision.transforms.v2.functional._geometry import _largest_inscribed_crop_size
+
+        # No rotation: crop equals original size
+        assert _largest_inscribed_crop_size(100, 100, 0) == (100, 100)
+        assert _largest_inscribed_crop_size(200, 100, 0) == (100, 200)
+
+        # 45° square: inscribed square has side = 100 / sqrt(2) ≈ 70.71 → floor to 70
+        crop_h, crop_w = _largest_inscribed_crop_size(100, 100, 45)
+        assert crop_h == crop_w == 70
+
+        # Crop is always smaller than or equal to original dimensions
+        for w, h, a in [(200, 100, 20), (640, 480, 15), (50, 50, 37)]:
+            ch, cw = _largest_inscribed_crop_size(w, h, a)
+            assert ch <= h and cw <= w
+
 
 class TestContainerTransforms:
     class BuiltinTransform(transforms.Transform):
diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py
@@ -606,6 +606,9 @@ class RandomRotation(Transform):
             Fill value can be also a dictionary mapping data type to the fill value, e.g.
             ``fill={tv_tensors.Image: 127, tv_tensors.Mask: 0}`` where ``Image`` will be filled with 127 and
             ``Mask`` will be filled with 0.
+        crop (bool, optional): If ``True``, the rotated output is center-cropped to the largest axis-aligned
+            rectangle that fits entirely within the rotated image, removing any fill/padding regions introduced
+            by the rotation. Mutually exclusive with ``expand``. Default is ``False``.
 
     .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
 
@@ -620,11 +623,15 @@ def __init__(
         expand: bool = False,
         center: Optional[list[float]] = None,
         fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
+        crop: bool = False,
     ) -> None:
         super().__init__()
+        if crop and expand:
+            raise ValueError("crop and expand are mutually exclusive")
         self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
         self.interpolation = interpolation
         self.expand = expand
+        self.crop = crop
 
         self.fill = fill
         self._fill = _setup_fill_arg(fill)
@@ -634,21 +641,37 @@ def __init__(
 
         self.center = center
 
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
+        params = super()._extract_params_for_v1_transform()
+        if params.pop("crop"):
+            raise ValueError(
+                f"{type(self).__name__}() cannot be scripted when crop=True, "
+                "as this feature is not supported by the v1 transform."
+            )
+        return params
+
     def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         angle = torch.empty(1).uniform_(self.degrees[0], self.degrees[1]).item()
-        return dict(angle=angle)
+        params: dict[str, Any] = dict(angle=angle)
+        if self.crop:
+            height, width = query_size(flat_inputs)
+            params["crop_hw"] = F._geometry._largest_inscribed_crop_size(width, height, angle)
+        return params
 
     def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
-        return self._call_kernel(
+        output = self._call_kernel(
             F.rotate,
             inpt,
-            **params,
+            angle=params["angle"],
             interpolation=self.interpolation,
             expand=self.expand,
             center=self.center,
             fill=fill,
         )
+        if self.crop:
+            output = self._call_kernel(F.center_crop, output, output_size=list(params["crop_hw"]))
+        return output
 
 
 class RandomAffine(Transform):
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -1335,6 +1335,46 @@ def affine_video(
     )
 
 
+def _largest_inscribed_crop_size(width: int, height: int, angle: float) -> tuple[int, int]:
+    """Compute the largest axis-aligned rectangle inscribed in a rotated width x height rectangle.
+
+    Returns ``(crop_height, crop_width)`` as integers.
+    """
+    import math
+
+    angle_rad = math.radians(angle)
+    sin_a = abs(math.sin(angle_rad))
+    cos_a = abs(math.cos(angle_rad))
+
+    # Clamp near-zero values to avoid numerical noise from sin(180°) ≈ 1.2e-16 etc.
+    if sin_a < 1e-10:
+        return height, width
+    if cos_a < 1e-10:
+        return width, height
+
+    width_is_longer = width >= height
+    side_long = width if width_is_longer else height
+    side_short = height if width_is_longer else width
+
+    if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10:
+        # Half-constrained: two crop corners touch the longer side.
+        # Also handles the 45° degenerate case via abs(sin_a - cos_a) < 1e-10.
+        x = 0.5 * side_short
+        if width_is_longer:
+            crop_w, crop_h = x / sin_a, x / cos_a
+        else:
+            crop_w, crop_h = x / cos_a, x / sin_a
+    else:
+        # Fully constrained: crop touches all four sides
+        cos_2a = cos_a * cos_a - sin_a * sin_a
+        crop_w = (width * cos_a - height * sin_a) / cos_2a
+        crop_h = (height * cos_a - width * sin_a) / cos_2a
+
+    # Use floor (int()) to guarantee the crop region contains no fill pixels.
+    # Clamp to image dimensions for edge cases like wide images rotated near 90°.
+    return min(int(crop_h), height), min(int(crop_w), width)
+
+
 def rotate(
     inpt: torch.Tensor,
     angle: float,