From d3f473bf439eaf1c5b872f932584359046364727 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Wed, 8 Apr 2026 14:39:07 +0200
Subject: [PATCH 01/11] Torchvision API RandomApply implementation

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../dali/experimental/torchvision/__init__.py |   2 +
 .../experimental/torchvision/v2/rand_apply.py |  58 ++++++++
 .../python/torchvision/test_tv_rand_apply.py  | 135 ++++++++++++++++++
 3 files changed, 195 insertions(+)
 create mode 100644 dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py
 create mode 100644 dali/test/python/torchvision/test_tv_rand_apply.py

diff --git a/dali/python/nvidia/dali/experimental/torchvision/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
index a87fc3f47f8..de4d4c47977 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/__init__.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
@@ -19,6 +19,7 @@
 from .v2.gaussian_blur import GaussianBlur
 from .v2.normalize import Normalize
 from .v2.pad import Pad
+from .v2.rand_apply import RandomApply
 from .v2.resize import Resize
 from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage
 
@@ -31,6 +32,7 @@
     "Normalize",
     "Pad",
     "PILToTensor",
+    "RandomApply",
     "RandomGrayscale",
     "RandomHorizontalFlip",
     "RandomVerticalFlip",
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py b/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py
new file mode 100644
index 00000000000..cb09dadede6
--- /dev/null
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Callable, Literal, Sequence
+
+from .operator import Operator, _ValidateIfZeroOneRange
+import nvidia.dali.fn as fn
+import nvidia.dali as dali
+
+
+class RandomApply(Operator):
+    """
+    Apply randomly a list of transformations with a given probability.
+
+    Parameters
+    ----------
+    op_list : Sequence[Callable]
+        List of transformations to apply.
+    p : float, optional, default = 0.5
+        Probability of applying the transformations.
+    device : Literal["cpu", "gpu"], optional, default = "cpu"
+        Device to use for the operator. Can be ``"cpu"`` or ``"gpu"``.
+    """
+
+    arg_rules = [_ValidateIfZeroOneRange]
+
+    def __init__(
+        self,
+        op_list: Sequence[Callable],
+        p: float = 0.5,
+        device: Literal["cpu", "gpu"] = "cpu",
+    ):
+        super().__init__(device=device, p=p)
+        self.p = p
+        self.op_list = op_list
+
+    def _kernel(self, data_input):
+        """
+        Randomly applies each operator in op_list sequentially.
+        """
+        output = data_input
+        convert = fn.random.coin_flip(dtype=dali.types.DALIDataType.BOOL, probability=self.p)
+        if convert:
+            for op in self.op_list:
+                output = op(output)
+
+        return output
diff --git a/dali/test/python/torchvision/test_tv_rand_apply.py b/dali/test/python/torchvision/test_tv_rand_apply.py
new file mode 100644
index 00000000000..cbf0367ee3b
--- /dev/null
+++ b/dali/test/python/torchvision/test_tv_rand_apply.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from nose2.tools import params
+from nose_utils import assert_raises
+from PIL import Image
+import torch
+import torchvision.transforms.v2 as transforms
+
+from nvidia.dali.experimental.torchvision import (
+    Compose,
+    Grayscale,
+    RandomApply,
+    RandomHorizontalFlip,
+)
+
+
+def verify_non_one_off(t1: torch.Tensor, t2: torch.Tensor):
+    if t1.dtype == torch.uint8:
+        t1 = t1.to(torch.int16)
+        t2 = t2.to(torch.int16)
+
+    diff = (t1 - t2).abs()
+    more_than_one_mask = diff > 1
+
+    return more_than_one_mask.sum().item() == 0
+
+
+dali_extra = os.environ["DALI_EXTRA_PATH"]
+jpeg = os.path.join(dali_extra, "db", "single", "jpeg")
+jpeg_113 = os.path.join(jpeg, "113")
+test_files = [
+    os.path.join(jpeg_113, f)
+    for f in ["snail-4291306_1280.jpg", "snail-4345504_1280.jpg", "snail-4368154_1280.jpg"]
+]
+
+
+@params("cpu", "gpu")
+def test_random_apply_p1(device):
+    """p=1.0: transformations always applied — output must match torchvision."""
+    td = Compose(
+        [RandomApply([Grayscale(num_output_channels=3, device=device)], p=1.0, device=device)]
+    )
+    t = transforms.Compose(
+        [transforms.RandomApply([transforms.Grayscale(num_output_channels=3)], p=1.0)]
+    )
+
+    for fn in test_files:
+        img = Image.open(fn)
+        out_tv = transforms.functional.pil_to_tensor(t(img))
+        out_dali = transforms.functional.pil_to_tensor(td(img))
+        assert verify_non_one_off(out_tv, out_dali), f"Images differ: {fn}"
+
+
+@params("cpu", "gpu")
+def test_random_apply_p0(device):
+    """p=0.0: transformations never applied — output must equal input."""
+    td = Compose(
+        [RandomApply([Grayscale(num_output_channels=3, device=device)], p=0.0, device=device)]
+    )
+    t = transforms.Compose(
+        [transforms.RandomApply([transforms.Grayscale(num_output_channels=3)], p=0.0)]
+    )
+
+    for fn in test_files:
+        img = Image.open(fn)
+        out_tv = transforms.functional.pil_to_tensor(t(img))
+        out_dali = transforms.functional.pil_to_tensor(td(img))
+        assert verify_non_one_off(out_tv, out_dali), f"Images differ: {fn}"
+
+
+@params(-0.1, 2.0, [0.0, 0.8])
+def test_invalid_random_apply_probability(p):
+    with assert_raises(ValueError):
+        RandomApply([Grayscale(num_output_channels=3)], p=p)
+
+
+@params("cpu", "gpu")
+def test_random_apply_multi_ops(device):
+    """p=1.0 with multiple operators — all applied in sequence."""
+    td = Compose(
+        [
+            RandomApply(
+                [
+                    RandomHorizontalFlip(p=1.0, device=device),
+                    Grayscale(num_output_channels=3, device=device),
+                ],
+                p=1.0,
+                device=device,
+            )
+        ]
+    )
+    t = transforms.Compose(
+        [
+            transforms.RandomApply(
+                [
+                    transforms.RandomHorizontalFlip(p=1.0),
+                    transforms.Grayscale(num_output_channels=3),
+                ],
+                p=1.0,
+            )
+        ]
+    )
+
+    for fn in test_files:
+        img = Image.open(fn)
+        out_tv = transforms.functional.pil_to_tensor(t(img))
+        out_dali = transforms.functional.pil_to_tensor(td(img))
+        assert verify_non_one_off(out_tv, out_dali), f"Images differ: {fn}"
+
+
+def test_random_apply_preserves_shape():
+    """Output shape must match input shape regardless of p."""
+    td_apply = Compose([RandomApply([RandomHorizontalFlip(p=1.0)], p=1.0)])
+    td_skip = Compose([RandomApply([RandomHorizontalFlip(p=1.0)], p=0.0)])
+
+    for fn in test_files:
+        img = Image.open(fn)
+        out_apply = td_apply(img)
+        out_skip = td_skip(img)
+        assert out_apply.size == img.size, f"Shape mismatch after apply: {fn}"
+        assert out_skip.size == img.size, f"Shape mismatch after skip: {fn}"

From b0c754b9cc12d77f1775e736025aab9dac953ee9 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Fri, 8 May 2026 14:06:32 +0200
Subject: [PATCH 02/11] Greptile review fixes

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../dali/experimental/torchvision/v2/rand_apply.py  |  4 ++--
 dali/test/python/torchvision/test_tv_rand_apply.py  | 13 +++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py b/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py
index cb09dadede6..75890376ecc 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/rand_apply.py
@@ -43,7 +43,7 @@ def __init__(
     ):
         super().__init__(device=device, p=p)
         self.p = p
-        self.op_list = op_list
+        self._op_list = op_list
 
     def _kernel(self, data_input):
         """
@@ -52,7 +52,7 @@ def _kernel(self, data_input):
         output = data_input
         convert = fn.random.coin_flip(dtype=dali.types.DALIDataType.BOOL, probability=self.p)
         if convert:
-            for op in self.op_list:
+            for op in self._op_list:
                 output = op(output)
 
         return output
diff --git a/dali/test/python/torchvision/test_tv_rand_apply.py b/dali/test/python/torchvision/test_tv_rand_apply.py
index cbf0367ee3b..fb5aee450b9 100644
--- a/dali/test/python/torchvision/test_tv_rand_apply.py
+++ b/dali/test/python/torchvision/test_tv_rand_apply.py
@@ -84,7 +84,7 @@ def test_random_apply_p0(device):
 
 @params(-0.1, 2.0, [0.0, 0.8])
 def test_invalid_random_apply_probability(p):
-    with assert_raises(ValueError):
+    with assert_raises(ValueError, regex="p should be a floating point value in the interval"):
         RandomApply([Grayscale(num_output_channels=3)], p=p)
 
 
@@ -122,10 +122,15 @@ def test_random_apply_multi_ops(device):
         assert verify_non_one_off(out_tv, out_dali), f"Images differ: {fn}"
 
 
-def test_random_apply_preserves_shape():
+@params("cpu", "gpu")
+def test_random_apply_preserves_shape(device):
     """Output shape must match input shape regardless of p."""
-    td_apply = Compose([RandomApply([RandomHorizontalFlip(p=1.0)], p=1.0)])
-    td_skip = Compose([RandomApply([RandomHorizontalFlip(p=1.0)], p=0.0)])
+    td_apply = Compose(
+        [RandomApply([RandomHorizontalFlip(p=1.0, device=device)], p=1.0, device=device)]
+    )
+    td_skip = Compose(
+        [RandomApply([RandomHorizontalFlip(p=1.0, device=device)], p=0.0, device=device)]
+    )
 
     for fn in test_files:
         img = Image.open(fn)

From 57302e8a0a2a185de7f29e7ba6b50ebf67a25c91 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Wed, 13 May 2026 09:54:26 +0200
Subject: [PATCH 03/11] Adding 0 < p 1 tests

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../python/torchvision/test_tv_rand_apply.py  | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/dali/test/python/torchvision/test_tv_rand_apply.py b/dali/test/python/torchvision/test_tv_rand_apply.py
index fb5aee450b9..a1c7e68c00d 100644
--- a/dali/test/python/torchvision/test_tv_rand_apply.py
+++ b/dali/test/python/torchvision/test_tv_rand_apply.py
@@ -14,7 +14,7 @@
 
 import os
 
-from nose2.tools import params
+from nose2.tools import params, cartesian_params
 from nose_utils import assert_raises
 from PIL import Image
 import torch
@@ -138,3 +138,32 @@ def test_random_apply_preserves_shape(device):
         out_skip = td_skip(img)
         assert out_apply.size == img.size, f"Shape mismatch after apply: {fn}"
         assert out_skip.size == img.size, f"Shape mismatch after skip: {fn}"
+
+
+@cartesian_params((0.01, 0.1, 0.25, 0.3, 0.8, 0.99), ("cpu", "gpu"))
+def test_random_apply_p_sanity(p, device):
+    """Sanity test to verify if 0 < p < 1."""
+    td = Compose(
+        [RandomApply([Grayscale(num_output_channels=3, device=device)], p=p, device=device)]
+    )
+    for fn in test_files:
+        img = Image.open(fn)
+        _ = td(img)
+
+
+@cartesian_params((0.3, 0.5, 0.8), ("cpu", "gpu"))
+def test_random_apply_p(p, device):
+    """Sanity test to verify if p value varies application."""
+    td = Compose(
+        [RandomApply([Grayscale(num_output_channels=3, device=device)], p=p, device=device)]
+    )
+    reps = 10
+    for fn in test_files:
+        img = Image.open(fn)
+        tensor_img = transforms.functional.pil_to_tensor(img)
+        proc = 0
+        for i in range(reps):
+            out_dali = transforms.functional.pil_to_tensor(td(img))
+            if not verify_non_one_off(out_dali, tensor_img):
+                proc += 1
+        assert proc > 0, f"RandomApply did not apply any operation in {reps} runs"

From a4d62091033cc1cdd0d5e226fd54c00c0b33ea0b Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Wed, 13 May 2026 14:11:23 +0200
Subject: [PATCH 04/11] Review fixes

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 dali/test/python/torchvision/test_tv_rand_apply.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dali/test/python/torchvision/test_tv_rand_apply.py b/dali/test/python/torchvision/test_tv_rand_apply.py
index a1c7e68c00d..23377711491 100644
--- a/dali/test/python/torchvision/test_tv_rand_apply.py
+++ b/dali/test/python/torchvision/test_tv_rand_apply.py
@@ -155,15 +155,15 @@ def test_random_apply_p_sanity(p, device):
 def test_random_apply_p(p, device):
     """Sanity test to verify if p value varies application."""
     td = Compose(
-        [RandomApply([Grayscale(num_output_channels=3, device=device)], p=p, device=device)]
+        [RandomApply([Grayscale(num_output_channels=1, device=device)], p=p, device=device)]
     )
     reps = 10
     for fn in test_files:
         img = Image.open(fn)
-        tensor_img = transforms.functional.pil_to_tensor(img)
         proc = 0
         for i in range(reps):
             out_dali = transforms.functional.pil_to_tensor(td(img))
-            if not verify_non_one_off(out_dali, tensor_img):
+            # If grayscale was applied it will result in a single channel image
+            if out_dali.shape[0] == 1:
                 proc += 1
         assert proc > 0, f"RandomApply did not apply any operation in {reps} runs"

From 693e9affe4da9af93312e44257d157b14fdff2c4 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Mon, 18 May 2026 18:45:20 +0200
Subject: [PATCH 05/11] Torchvision API RandomCrop and crop operartors

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../dali/experimental/torchvision/__init__.py |   2 +
 .../torchvision/v2/functional/__init__.py     |   2 +
 .../torchvision/v2/functional/crop.py         |  69 ++++
 .../experimental/torchvision/v2/randomcrop.py | 283 ++++++++++++++++
 dali/test/python/torchvision/test_tv_crop.py  | 157 +++++++++
 .../python/torchvision/test_tv_randomcrop.py  | 304 ++++++++++++++++++
 6 files changed, 817 insertions(+)
 create mode 100644 dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py
 create mode 100644 dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
 create mode 100644 dali/test/python/torchvision/test_tv_crop.py
 create mode 100644 dali/test/python/torchvision/test_tv_randomcrop.py

diff --git a/dali/python/nvidia/dali/experimental/torchvision/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
index de4d4c47977..550dfd57bc5 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/__init__.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
@@ -20,6 +20,7 @@
 from .v2.normalize import Normalize
 from .v2.pad import Pad
 from .v2.rand_apply import RandomApply
+from .v2.randomcrop import RandomCrop
 from .v2.resize import Resize
 from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage
 
@@ -33,6 +34,7 @@
     "Pad",
     "PILToTensor",
     "RandomApply",
+    "RandomCrop",
     "RandomGrayscale",
     "RandomHorizontalFlip",
     "RandomVerticalFlip",
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
index ec19014a2d7..18003740b00 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
@@ -14,6 +14,7 @@
 
 from .centercrop import center_crop
 from .color import to_grayscale, rgb_to_grayscale
+from .crop import crop
 from .flips import horizontal_flip, vertical_flip
 from .gaussian_blur import gaussian_blur
 from .normalize import normalize
@@ -23,6 +24,7 @@
 
 __all__ = [
     "center_crop",
+    "crop",
     "gaussian_blur",
     "horizontal_flip",
     "normalize",
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py
new file mode 100644
index 00000000000..d084b6aee0c
--- /dev/null
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nvidia.dali.experimental.dynamic as ndd
+from nvidia.dali._typing import TensorLike
+from nvidia.dali.experimental.dynamic._device import DeviceLike
+
+from ..operator import adjust_input
+from ..randomcrop import RandomCrop
+
+
+def _get_crop_axes(inpt: TensorLike | ndd.Batch) -> list[int]:
+    layout = inpt.layout[-3:]
+    if layout == "HWC":
+        return [-3, -2]
+    if layout == "CHW":
+        return [-2, -1]
+    if inpt.layout[-2:] == "HW":
+        return [-2, -1]
+    raise ValueError(f"Unsupported layout: {inpt.layout!r}. Expected one of HWC, CHW, HW.")
+
+
+def _verify_crop_coordinate(value, name: str) -> None:
+    if not isinstance(value, int):
+        raise TypeError(f"{name} must be int, got {type(value)}")
+
+
+@adjust_input
+def crop(
+    inpt: TensorLike | ndd.Batch,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    device: DeviceLike = "cpu",
+) -> ndd.Tensor | ndd.Batch:
+    """
+    Please refer to the ``RandomCrop`` operator for more details.
+    """
+    _verify_crop_coordinate(top, "top")
+    _verify_crop_coordinate(left, "left")
+    RandomCrop.verify_args(
+        size=(height, width),
+        padding=None,
+        pad_if_needed=False,
+        padding_mode="constant",
+        fill=0,
+    )
+
+    return ndd.slice(
+        inpt,
+        (top, left),
+        (height, width),
+        axes=_get_crop_axes(inpt),
+        out_of_bounds_policy="pad",
+        fill_values=0,
+        device=device,
+    )
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
new file mode 100644
index 00000000000..9fb673d7de7
--- /dev/null
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
@@ -0,0 +1,283 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import numbers
+from types import NoneType
+from typing import Literal, Sequence, Union
+
+from PIL import Image
+import nvidia.dali as dali
+import nvidia.dali.fn as fn
+import numpy as np
+import torch
+
+from .centercrop import CenterCrop
+from .operator import (
+    Operator,
+    _ArgumentValidateRule,
+    _ValidateIfNonNegative,
+    _ValidateSizeDescriptor,
+    get_HWC_from_layout_pipeline,
+)
+from .pad import PADDING_CLASS, _ValidatePaddingMode
+
+
+class _ValidateCropSize(_ArgumentValidateRule):
+    """
+    Verify RandomCrop size values.
+    """
+
+    @classmethod
+    def verify(cls, *, size, **_) -> None:
+        if isinstance(size, (list, tuple)) and any(not isinstance(value, int) for value in size):
+            raise ValueError(f"Size values must be integers, got {size}")
+
+
+class _ValidatePadding(_ArgumentValidateRule):
+    """
+    Verify RandomCrop padding arguments.
+    """
+
+    @classmethod
+    def verify(cls, *, padding, pad_if_needed, padding_mode, **_) -> None:
+        if not isinstance(pad_if_needed, bool):
+            raise TypeError(f"pad_if_needed must be bool, got {type(pad_if_needed)}")
+
+        if padding is not None:
+            if not isinstance(padding, (int, list, tuple)):
+                raise TypeError(
+                    f"Padding must be an int or a sequence of length 1, 2 or 4, "
+                    f"got {type(padding)}"
+                )
+            if isinstance(padding, (list, tuple)) and len(padding) not in (1, 2, 4):
+                raise ValueError(f"Padding sequence must have length 1, 2 or 4, got {len(padding)}")
+            if isinstance(padding, (list, tuple)) and any(
+                not isinstance(value, int) for value in padding
+            ):
+                raise ValueError(f"Padding values must be integers, got {padding}")
+            _ValidateIfNonNegative.verify(values=padding, name="padding")
+
+        if pad_if_needed or padding is not None:
+            _ValidatePaddingMode.verify(padding_mode=padding_mode)
+
+
+class _ValidateFill(_ArgumentValidateRule):
+    """
+    Verify RandomCrop fill argument.
+    """
+
+    @classmethod
+    def _verify_fill_value(cls, fill) -> None:
+        if fill is None or isinstance(fill, numbers.Number):
+            return
+        if isinstance(fill, (list, tuple)) and all(
+            isinstance(value, numbers.Number) for value in fill
+        ):
+            return
+        raise TypeError(f"fill must be a number, sequence of numbers, None or a dict, got {fill!r}")
+
+    @classmethod
+    def verify(cls, *, fill, **_) -> None:
+        if isinstance(fill, dict):
+            for key, value in fill.items():
+                if not isinstance(key, (type, str)):
+                    raise TypeError(f"fill dictionary keys must be types or strings, got {key!r}")
+                cls._verify_fill_value(value)
+        else:
+            cls._verify_fill_value(fill)
+
+
+class RandomCrop(Operator):
+    """
+    Crop the input at a random location.
+
+    If the input is a ``torch.Tensor`` it can have an arbitrary number of leading batch dimensions.
+    For example, the image tensor can have [..., C, H, W] shape.
+
+    Parameters
+    ----------
+    size : sequence or int
+        Desired output size of the crop. If size is an int instead of sequence like (h, w),
+        a square crop (size, size) is made. If provided a sequence of length 1, it will be
+        interpreted as (size[0], size[0]).
+    padding : int or sequence, optional, default = None
+        Optional padding on each border of the image, applied before cropping. If a single int
+        or a sequence of length 1 is provided this is used to pad all borders. If sequence of
+        length 2 is provided this is the padding on left/right and top/bottom respectively. If
+        a sequence of length 4 is provided this is the padding for the left, top, right and
+        bottom borders respectively.
+    pad_if_needed : bool, optional, default = False
+        Pad the image if it is smaller than the desired size.
+    fill : number or tuple or dict, optional, default = 0
+        Pixel fill value used when the padding_mode is constant.
+    padding_mode : Literal["constant", "edge", "reflect", "symmetric"], optional,
+        Type of padding. Should be: constant, edge, reflect or symmetric.
+    device : Literal["cpu", "gpu"], optional, default = "cpu"
+        Device to use for the crop. Can be ``"cpu"`` or ``"gpu"``.
+    """
+
+    arg_rules = [_ValidateSizeDescriptor, _ValidateCropSize, _ValidatePadding, _ValidateFill]
+    preprocess_data = get_HWC_from_layout_pipeline
+
+    @classmethod
+    def adjust_size(cls, size: int | Sequence[int]) -> Sequence[int]:
+        return CenterCrop.adjust_size(size)
+
+    @classmethod
+    def adjust_padding(cls, padding: None | int | Sequence[int]) -> tuple[int, int, int, int]:
+        if padding is None:
+            return 0, 0, 0, 0
+        if isinstance(padding, int):
+            return padding, padding, padding, padding
+        if isinstance(padding, (list, tuple)):
+            if len(padding) == 1:
+                return padding[0], padding[0], padding[0], padding[0]
+            if len(padding) == 2:
+                return padding[0], padding[1], padding[0], padding[1]
+            if len(padding) == 4:
+                return tuple(padding)
+
+        raise TypeError(
+            f"Padding must be an int or a sequence of length 1, 2 or 4, got {type(padding)}"
+        )
+
+    @staticmethod
+    def adjust_fill(fill):
+        if isinstance(fill, dict):
+            return {key: RandomCrop.adjust_fill(value) for key, value in fill.items()}
+        if fill is None:
+            return 0
+        if isinstance(fill, numbers.Number):
+            return fill
+        return tuple(fill)
+
+    @staticmethod
+    def _get_input_type(tensor):
+        layout = tensor.property("layout")[0]
+        if layout == np.frombuffer(bytes("F", "utf-8"), dtype=np.uint8)[0]:
+            layout = tensor.property("layout")[1]
+        if layout == np.frombuffer(bytes("C", "utf-8"), dtype=np.uint8)[0]:
+            return torch.Tensor
+        return Image.Image
+
+    @staticmethod
+    def _get_fill(fill, tensor):
+        if not isinstance(fill, dict):
+            return fill
+
+        input_type = RandomCrop._get_input_type(tensor)
+        string_keys = (input_type.__name__, f"{input_type.__module__}.{input_type.__name__}")
+        for key in (input_type, *string_keys):
+            if key in fill:
+                return fill[key]
+        if "others" in fill:
+            return fill["others"]
+        raise ValueError(f"fill dictionary does not contain a value for {input_type}")
+
+    @staticmethod
+    def _randint(max_value):
+        range_start = fn.cast(max_value * 0, dtype=dali.types.FLOAT)
+        range_end = fn.cast(max_value + 1, dtype=dali.types.FLOAT)
+        value = dali.math.floor(fn.random.uniform(range=fn.stack(range_start, range_end)))
+        return fn.cast(value, dtype=dali.types.INT32)
+
+    def __init__(
+        self,
+        size: int | Sequence[int],
+        padding: None | int | Sequence[int] = None,
+        pad_if_needed: bool = False,
+        fill: Union[
+            int,
+            float,
+            Sequence[int],
+            Sequence[float],
+            None,
+            dict[
+                type | str,
+                int
+                | float
+                | collections.abc.Sequence[int]
+                | collections.abc.Sequence[float]
+                | NoneType,
+            ],
+        ] = 0,
+        padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
+        device: Literal["cpu", "gpu"] = "cpu",
+    ):
+        super().__init__(
+            device=device,
+            size=size,
+            padding=padding,
+            pad_if_needed=pad_if_needed,
+            padding_mode=padding_mode,
+            fill=fill,
+        )
+
+        self.size = RandomCrop.adjust_size(size)
+        self.padding = RandomCrop.adjust_padding(padding)
+        self.pad_if_needed = pad_if_needed
+        self.fill = RandomCrop.adjust_fill(fill)
+        self.padding_mode = padding_mode
+        self.needs_padding = pad_if_needed or any(self.padding)
+
+    def _kernel(self, data_input):
+        """
+        Applies the random crop to the input data.
+        """
+        in_h, in_w, _, tensor = data_input
+        crop_h, crop_w = self.size
+        pad_left, pad_top, pad_right, pad_bottom = self.padding
+
+        if self.needs_padding:
+            padded_h = in_h + pad_top + pad_bottom
+            padded_w = in_w + pad_left + pad_right
+
+            if self.pad_if_needed:
+                pad_h = dali.math.max(crop_h - padded_h, 0)
+                pad_w = dali.math.max(crop_w - padded_w, 0)
+                pad_top = pad_top + pad_h
+                pad_bottom = pad_bottom + pad_h
+                pad_left = pad_left + pad_w
+                pad_right = pad_right + pad_w
+
+            tensor = fn.slice(
+                tensor,
+                fn.stack(
+                    fn.cast(-pad_left, dtype=dali.types.INT64),
+                    fn.cast(-pad_top, dtype=dali.types.INT64),
+                ),
+                fn.stack(in_w + pad_left + pad_right, in_h + pad_top + pad_bottom),
+                out_of_bounds_policy=PADDING_CLASS[self.padding_mode].border_type,
+                fill_values=self.fill,
+                device=self.device,
+                axis_names="WH",
+            )
+
+            in_h = in_h + pad_top + pad_bottom
+            in_w = in_w + pad_left + pad_right
+
+        max_top = fn.cast(in_h, dtype=dali.types.INT32) - crop_h
+        max_left = fn.cast(in_w, dtype=dali.types.INT32) - crop_w
+
+        top = RandomCrop._randint(max_top)
+        left = RandomCrop._randint(max_left)
+
+        return fn.slice(
+            tensor,
+            fn.stack(left, top),
+            fn.stack(crop_w, crop_h),
+            device=self.device,
+            axis_names="WH",
+        )
diff --git a/dali/test/python/torchvision/test_tv_crop.py b/dali/test/python/torchvision/test_tv_crop.py
new file mode 100644
index 00000000000..07d45d0ed2e
--- /dev/null
+++ b/dali/test/python/torchvision/test_tv_crop.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import unittest
+
+from nose2.tools import params
+from nose_utils import assert_raises
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms.v2.functional as tv_fn
+
+from nvidia.dali.experimental.torchvision.v2.functional import crop
+
+
+def make_test_tensor(shape=(3, 8, 10), dtype=torch.uint8):
+    return torch.arange(math.prod(shape), dtype=dtype).reshape(shape)
+
+
+def _make_pil_image(mode, h=8, w=10, seed=42):
+    rng = np.random.default_rng(seed)
+    if mode == "L":
+        data = rng.integers(0, 256, (h, w), dtype=np.uint8)
+    elif mode == "RGB":
+        data = rng.integers(0, 256, (h, w, 3), dtype=np.uint8)
+    elif mode == "RGBA":
+        data = rng.integers(0, 256, (h, w, 4), dtype=np.uint8)
+    else:
+        raise ValueError(f"Unsupported mode: {mode}")
+    return Image.fromarray(data, mode=mode)
+
+
+def _assert_crop_matches_torchvision(inpt, top, left, height, width, device="cpu"):
+    dali_out = crop(inpt, top, left, height, width, device=device)
+    tv_out = tv_fn.crop(inpt, top, left, height, width)
+
+    if device == "gpu" and not isinstance(dali_out, Image.Image):
+        dali_out = dali_out.cpu()
+        if isinstance(tv_out, torch.Tensor):
+            tv_out = tv_out.cpu()
+
+    if isinstance(inpt, Image.Image):
+        assert isinstance(dali_out, Image.Image), f"Expected PIL Image, got {type(dali_out)}"
+        assert dali_out.mode == tv_out.mode, f"Expected mode {tv_out.mode}, got {dali_out.mode}"
+        dali_out = tv_fn.pil_to_tensor(dali_out)
+        tv_out = tv_fn.pil_to_tensor(tv_out)
+
+    assert dali_out.shape == tv_out.shape, f"Shape mismatch: {dali_out.shape} != {tv_out.shape}"
+    assert torch.equal(dali_out, tv_out), "DALI crop output differs from torchvision"
+
+
+@params(
+    (1, 2, 4, 5),
+    (0, 0, 8, 10),
+    (3, 4, 2, 3),
+)
+def test_crop_tensor_cpu(top, left, height, width):
+    _assert_crop_matches_torchvision(make_test_tensor(), top, left, height, width)
+
+
+@params("L", "RGB", "RGBA")
+def test_crop_pil_cpu(mode):
+    _assert_crop_matches_torchvision(_make_pil_image(mode), top=1, left=2, height=4, width=5)
+
+
+@params(
+    (-1, -2, 6, 8),
+    (6, 8, 5, 6),
+    (0, 0, 12, 14),
+)
+def test_crop_out_of_bounds_tensor_cpu(top, left, height, width):
+    _assert_crop_matches_torchvision(make_test_tensor(), top, left, height, width)
+
+
+@params("L", "RGB", "RGBA")
+def test_crop_out_of_bounds_pil_cpu(mode):
+    _assert_crop_matches_torchvision(_make_pil_image(mode), top=-2, left=-3, height=12, width=14)
+
+
+def test_crop_batched_tensor_cpu():
+    tensor = make_test_tensor(shape=(4, 3, 8, 10))
+    _assert_crop_matches_torchvision(tensor, top=2, left=3, height=4, width=5)
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+def test_crop_batched_tensor_gpu():
+    tensor = make_test_tensor(shape=(4, 3, 8, 10)).cuda()
+    _assert_crop_matches_torchvision(tensor, top=2, left=3, height=4, width=5, device="gpu")
+
+
+@params(torch.float32, torch.int16, torch.int32)
+def test_crop_preserves_tensor_dtype_cpu(dtype):
+    tensor = make_test_tensor(dtype=dtype)
+    dali_out = crop(tensor, top=1, left=1, height=4, width=5)
+    tv_out = tv_fn.crop(tensor, top=1, left=1, height=4, width=5)
+
+    assert dali_out.dtype == tv_out.dtype, f"Expected dtype {tv_out.dtype}, got {dali_out.dtype}"
+    assert torch.equal(dali_out, tv_out), "DALI crop output differs from torchvision"
+
+
+def test_crop_invalid_input_type():
+    with assert_raises(TypeError):
+        _ = crop([1, 2, 3], top=0, left=0, height=1, width=1)
+
+
+@params(
+    (0, 1),
+    (1, 0),
+    (-1, 1),
+    (1, -1),
+    (1.0, 1),
+    (1, 1.0),
+)
+def test_crop_invalid_output_size(height, width):
+    with assert_raises((TypeError, ValueError)):
+        _ = crop(make_test_tensor(), top=0, left=0, height=height, width=width)
+
+
+@params(
+    (0.5, 0),
+    ("0", 0),
+    (0, 0.5),
+    (0, "0"),
+)
+def test_crop_invalid_coordinates(top, left):
+    with assert_raises(TypeError):
+        _ = crop(make_test_tensor(), top=top, left=left, height=1, width=1)
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+@params(
+    (1, 2, 4, 5),
+    (-1, -2, 6, 8),
+)
+def test_crop_tensor_gpu(top, left, height, width):
+    tensor = make_test_tensor().cuda()
+    _assert_crop_matches_torchvision(tensor, top, left, height, width, device="gpu")
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+@params("L", "RGB", "RGBA")
+def test_crop_pil_gpu(mode):
+    _assert_crop_matches_torchvision(
+        _make_pil_image(mode), top=-2, left=-3, height=12, width=14, device="gpu"
+    )
diff --git a/dali/test/python/torchvision/test_tv_randomcrop.py b/dali/test/python/torchvision/test_tv_randomcrop.py
new file mode 100644
index 00000000000..df1129a7dea
--- /dev/null
+++ b/dali/test/python/torchvision/test_tv_randomcrop.py
@@ -0,0 +1,304 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import unittest
+
+from nose2.tools import params
+from nose_utils import assert_raises
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms.v2 as transforms
+import torchvision.transforms.v2.functional as tv_fn
+
+from nvidia.dali.experimental.torchvision import Compose, RandomCrop
+from nvidia.dali.experimental.torchvision.v2.operator import Operator
+
+
+def make_tensor(shape=(3, 8, 10), dtype=torch.uint8):
+    return torch.arange(math.prod(shape), dtype=dtype).reshape(shape)
+
+
+def make_pil_image(mode="RGB", h=8, w=10, seed=42):
+    rng = np.random.default_rng(seed)
+    if mode == "L":
+        data = rng.integers(0, 256, (h, w), dtype=np.uint8)
+    elif mode == "RGB":
+        data = rng.integers(0, 256, (h, w, 3), dtype=np.uint8)
+    elif mode == "RGBA":
+        data = rng.integers(0, 256, (h, w, 4), dtype=np.uint8)
+    else:
+        raise ValueError(f"Unsupported mode: {mode}")
+    return Image.fromarray(data, mode=mode)
+
+
+def _to_tensor(inpt):
+    if isinstance(inpt, Image.Image):
+        return tv_fn.pil_to_tensor(inpt)
+    return inpt
+
+
+def _assert_equal_to_torchvision(inpt, dali_transform, tv_transform, device="cpu"):
+    out = dali_transform(inpt)
+    tv_out = tv_transform(inpt)
+
+    out = _to_tensor(out)
+    tv_out = _to_tensor(tv_out)
+    if device == "gpu":
+        out = out.cpu()
+        if isinstance(tv_out, torch.Tensor):
+            tv_out = tv_out.cpu()
+
+    assert out.shape == tv_out.shape, f"Shape mismatch: {out.shape} != {tv_out.shape}"
+    assert torch.equal(out, tv_out), "DALI RandomCrop output differs from torchvision"
+
+
+def _build_dali_random_crop(**kwargs):
+    batch_size = kwargs.pop("batch_size", 1)
+    return Compose([RandomCrop(**kwargs)], batch_size=batch_size)
+
+
+def test_random_crop_is_operator():
+    assert issubclass(RandomCrop, Operator)
+
+
+@params(
+    (make_tensor(), (8, 10)),
+    (make_tensor(shape=(4, 3, 8, 10)), (8, 10)),
+    (make_pil_image("L"), (8, 10)),
+    (make_pil_image("RGB"), (8, 10)),
+    (make_pil_image("RGBA"), (8, 10)),
+)
+def test_random_crop_identity_matches_torchvision_cpu(inpt, size):
+    batch_size = inpt.shape[0] if isinstance(inpt, torch.Tensor) and inpt.ndim > 3 else 1
+    _assert_equal_to_torchvision(
+        inpt,
+        _build_dali_random_crop(size=size, batch_size=batch_size),
+        transforms.RandomCrop(size=size),
+    )
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+@params(
+    ("tensor", (3, 8, 10), (8, 10)),
+    ("tensor", (4, 3, 8, 10), (8, 10)),
+    ("pil", "RGB", (8, 10)),
+)
+def test_random_crop_identity_matches_torchvision_gpu(input_type, input_arg, size):
+    inpt = make_pil_image(input_arg) if input_type == "pil" else make_tensor(shape=input_arg).cuda()
+    batch_size = inpt.shape[0] if isinstance(inpt, torch.Tensor) and inpt.ndim > 3 else 1
+    _assert_equal_to_torchvision(
+        inpt,
+        _build_dali_random_crop(size=size, device="gpu", batch_size=batch_size),
+        transforms.RandomCrop(size=size),
+        device="gpu",
+    )
+
+
+@params(
+    (None, 0, "constant"),
+    (1, 0, "constant"),
+    ([1], 0, "constant"),
+    ([1, 1], 0, "constant"),
+    ([1, 1, 1, 1], 0, "constant"),
+    (1, 7, "constant"),
+    (1, (1, 2, 3), "constant"),
+    (1, None, "constant"),
+    (1, 0, "edge"),
+    (1, 0, "reflect"),
+    (1, 0, "symmetric"),
+)
+def test_random_crop_padding_matches_torchvision_tensor_cpu(padding, fill, padding_mode):
+    tensor = make_tensor(shape=(3, 4, 5))
+    size = (4, 5) if padding is None else (6, 7)
+
+    _assert_equal_to_torchvision(
+        tensor,
+        _build_dali_random_crop(
+            size=size,
+            padding=padding,
+            fill=fill,
+            padding_mode=padding_mode,
+        ),
+        transforms.RandomCrop(
+            size=size,
+            padding=padding,
+            fill=fill,
+            padding_mode=padding_mode,
+        ),
+    )
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+@params(
+    (1, 7, "constant"),
+    (1, 0, "edge"),
+    (1, 0, "reflect"),
+    (1, 0, "symmetric"),
+)
+def test_random_crop_padding_matches_torchvision_tensor_gpu(padding, fill, padding_mode):
+    tensor = make_tensor(shape=(3, 4, 5)).cuda()
+    _assert_equal_to_torchvision(
+        tensor,
+        _build_dali_random_crop(
+            size=(6, 7),
+            padding=padding,
+            fill=fill,
+            padding_mode=padding_mode,
+            device="gpu",
+        ),
+        transforms.RandomCrop(
+            size=(6, 7),
+            padding=padding,
+            fill=fill,
+            padding_mode=padding_mode,
+        ),
+        device="gpu",
+    )
+
+
+@params("L", "RGB", "RGBA")
+def test_random_crop_padding_matches_torchvision_pil_cpu(mode):
+    img = make_pil_image(mode=mode, h=4, w=5)
+    _assert_equal_to_torchvision(
+        img,
+        _build_dali_random_crop(size=(6, 7), padding=1, fill=3),
+        transforms.RandomCrop(size=(6, 7), padding=1, fill=3),
+    )
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+@params("L", "RGB", "RGBA")
+def test_random_crop_padding_matches_torchvision_pil_gpu(mode):
+    img = make_pil_image(mode=mode, h=4, w=5)
+    _assert_equal_to_torchvision(
+        img,
+        _build_dali_random_crop(size=(6, 7), padding=1, fill=3, device="gpu"),
+        transforms.RandomCrop(size=(6, 7), padding=1, fill=3),
+        device="gpu",
+    )
+
+
+"""
+# TODO: Tensor fill pattern is not currently supported
+def test_random_crop_fill_dict_matches_torchvision_tensor_cpu():
+    tensor = make_tensor(shape=(3, 4, 5))
+    fill = {torch.Tensor: 9}
+    _assert_equal_to_torchvision(
+        tensor,
+        _build_dali_random_crop(size=(6, 7), padding=1, fill=fill),
+        transforms.RandomCrop(size=(6, 7), padding=1, fill=fill),
+    )
+
+# TODO: fill pattern as tensor is not currently supported
+def test_random_crop_fill_dict_matches_torchvision_pil_cpu():
+    img = make_pil_image(mode="RGB", h=4, w=5)
+    fill = {Image.Image: (1, 2, 3)}
+    _assert_equal_to_torchvision(
+        img,
+        _build_dali_random_crop(size=(6, 7), padding=1, fill=fill),
+        transforms.RandomCrop(size=(6, 7), padding=1, fill=fill),
+    )
+"""
+
+
+@params(
+    (4, (4, 4)),
+    ([4, 5], (4, 5)),
+)
+def test_random_crop_tensor_shape_cpu(size, expected_hw):
+    tensor = make_tensor()
+    out = _build_dali_random_crop(size=size)(tensor)
+
+    assert out.shape == (3, *expected_hw)
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
+@params(
+    (4, (4, 4)),
+    ([4, 5], (4, 5)),
+)
+def test_random_crop_tensor_shape_gpu(size, expected_hw):
+    tensor = make_tensor().cuda()
+    out = _build_dali_random_crop(size=size, device="gpu")(tensor)
+
+    assert out.shape == (3, *expected_hw)
+
+
+@params("cpu", "gpu")
+def test_random_crop_pad_if_needed_shape(device):
+    if device == "gpu" and not torch.cuda.is_available():
+        raise unittest.SkipTest("CUDA is not available")
+
+    tensor = make_tensor(shape=(3, 4, 5))
+    if device == "gpu":
+        tensor = tensor.cuda()
+    out = _build_dali_random_crop(size=(6, 7), pad_if_needed=True, device=device)(tensor)
+
+    assert out.shape == (3, 6, 7)
+
+
+@params(
+    [],
+    [0, 5],
+    [5, 0],
+    [1.0, 2],
+    [1, 2, 3],
+    -1,
+    1.0,
+    {"bad": "value"},
+)
+def test_random_crop_invalid_size(size):
+    with assert_raises((TypeError, ValueError)):
+        _ = RandomCrop(size=size)
+
+
+@params(
+    -1,
+    [1, -1],
+    [1, 2, 3],
+    [1.0],
+    "bad",
+)
+def test_random_crop_invalid_padding(padding):
+    with assert_raises((TypeError, ValueError)):
+        _ = RandomCrop(size=3, padding=padding)
+
+
+def test_random_crop_invalid_pad_if_needed():
+    with assert_raises(TypeError):
+        _ = RandomCrop(size=3, pad_if_needed="yes")
+
+
+@params(
+    object(),
+    "bad",
+    [1, object()],
+    {object(): 1},
+    {torch.Tensor: object()},
+)
+def test_random_crop_invalid_fill(fill):
+    with assert_raises(TypeError):
+        _ = RandomCrop(size=3, padding=1, fill=fill)
+
+
+def test_random_crop_invalid_padding_mode_when_padding_is_used():
+    with assert_raises(ValueError):
+        _ = RandomCrop(size=3, padding=1, padding_mode="bad")
+
+
+def test_random_crop_invalid_padding_mode_when_pad_if_needed_is_used():
+    with assert_raises(ValueError):
+        _ = RandomCrop(size=3, pad_if_needed=True, padding_mode="bad")

From 2c7e9ef4332954420b539fa53d4f87ec86a4a859 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Tue, 19 May 2026 13:18:58 +0200
Subject: [PATCH 06/11] Greptile review comments and "cpu"/"gpu" unit tests

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../experimental/torchvision/v2/randomcrop.py |  25 +--
 .../python/torchvision/test_tv_randomcrop.py  | 170 +++++++-----------
 2 files changed, 68 insertions(+), 127 deletions(-)

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
index 9fb673d7de7..55f8fed339c 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
@@ -163,32 +163,9 @@ def adjust_fill(fill):
             return fill
         return tuple(fill)
 
-    @staticmethod
-    def _get_input_type(tensor):
-        layout = tensor.property("layout")[0]
-        if layout == np.frombuffer(bytes("F", "utf-8"), dtype=np.uint8)[0]:
-            layout = tensor.property("layout")[1]
-        if layout == np.frombuffer(bytes("C", "utf-8"), dtype=np.uint8)[0]:
-            return torch.Tensor
-        return Image.Image
-
-    @staticmethod
-    def _get_fill(fill, tensor):
-        if not isinstance(fill, dict):
-            return fill
-
-        input_type = RandomCrop._get_input_type(tensor)
-        string_keys = (input_type.__name__, f"{input_type.__module__}.{input_type.__name__}")
-        for key in (input_type, *string_keys):
-            if key in fill:
-                return fill[key]
-        if "others" in fill:
-            return fill["others"]
-        raise ValueError(f"fill dictionary does not contain a value for {input_type}")
-
     @staticmethod
     def _randint(max_value):
-        range_start = fn.cast(max_value * 0, dtype=dali.types.FLOAT)
+        range_start = fn.cast(0, dtype=dali.types.FLOAT)
         range_end = fn.cast(max_value + 1, dtype=dali.types.FLOAT)
         value = dali.math.floor(fn.random.uniform(range=fn.stack(range_start, range_end)))
         return fn.cast(value, dtype=dali.types.INT32)
diff --git a/dali/test/python/torchvision/test_tv_randomcrop.py b/dali/test/python/torchvision/test_tv_randomcrop.py
index df1129a7dea..0fac3565916 100644
--- a/dali/test/python/torchvision/test_tv_randomcrop.py
+++ b/dali/test/python/torchvision/test_tv_randomcrop.py
@@ -15,7 +15,7 @@
 import math
 import unittest
 
-from nose2.tools import params
+from nose2.tools import cartesian_params, params
 from nose_utils import assert_raises
 import numpy as np
 from PIL import Image
@@ -70,58 +70,65 @@ def _build_dali_random_crop(**kwargs):
     return Compose([RandomCrop(**kwargs)], batch_size=batch_size)
 
 
-def test_random_crop_is_operator():
-    assert issubclass(RandomCrop, Operator)
+def _skip_if_gpu_unavailable(device):
+    if device == "gpu" and not torch.cuda.is_available():
+        raise unittest.SkipTest("CUDA is not available")
 
 
-@params(
-    (make_tensor(), (8, 10)),
-    (make_tensor(shape=(4, 3, 8, 10)), (8, 10)),
-    (make_pil_image("L"), (8, 10)),
-    (make_pil_image("RGB"), (8, 10)),
-    (make_pil_image("RGBA"), (8, 10)),
-)
-def test_random_crop_identity_matches_torchvision_cpu(inpt, size):
-    batch_size = inpt.shape[0] if isinstance(inpt, torch.Tensor) and inpt.ndim > 3 else 1
-    _assert_equal_to_torchvision(
-        inpt,
-        _build_dali_random_crop(size=size, batch_size=batch_size),
-        transforms.RandomCrop(size=size),
-    )
+def _move_tensor_to_device(inpt, device):
+    if device == "gpu" and isinstance(inpt, torch.Tensor):
+        return inpt.cuda()
+    return inpt
 
 
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-@params(
-    ("tensor", (3, 8, 10), (8, 10)),
-    ("tensor", (4, 3, 8, 10), (8, 10)),
-    ("pil", "RGB", (8, 10)),
+def test_random_crop_is_operator():
+    assert issubclass(RandomCrop, Operator)
+
+
+@cartesian_params(
+    ("cpu", "gpu"),
+    (
+        ("tensor", (3, 8, 10), (8, 10)),
+        ("tensor", (4, 3, 8, 10), (8, 10)),
+        ("pil", "L", (8, 10)),
+        ("pil", "RGB", (8, 10)),
+        ("pil", "RGBA", (8, 10)),
+    ),
 )
-def test_random_crop_identity_matches_torchvision_gpu(input_type, input_arg, size):
-    inpt = make_pil_image(input_arg) if input_type == "pil" else make_tensor(shape=input_arg).cuda()
+def test_random_crop_identity_matches_torchvision(device, input_case):
+    _skip_if_gpu_unavailable(device)
+    input_type, input_arg, size = input_case
+    inpt = make_pil_image(input_arg) if input_type == "pil" else make_tensor(shape=input_arg)
+    inpt = _move_tensor_to_device(inpt, device)
     batch_size = inpt.shape[0] if isinstance(inpt, torch.Tensor) and inpt.ndim > 3 else 1
     _assert_equal_to_torchvision(
         inpt,
-        _build_dali_random_crop(size=size, device="gpu", batch_size=batch_size),
+        _build_dali_random_crop(size=size, device=device, batch_size=batch_size),
         transforms.RandomCrop(size=size),
-        device="gpu",
+        device=device,
     )
 
 
-@params(
-    (None, 0, "constant"),
-    (1, 0, "constant"),
-    ([1], 0, "constant"),
-    ([1, 1], 0, "constant"),
-    ([1, 1, 1, 1], 0, "constant"),
-    (1, 7, "constant"),
-    (1, (1, 2, 3), "constant"),
-    (1, None, "constant"),
-    (1, 0, "edge"),
-    (1, 0, "reflect"),
-    (1, 0, "symmetric"),
+@cartesian_params(
+    ("cpu", "gpu"),
+    (
+        (None, 0, "constant"),
+        (1, 0, "constant"),
+        ([1], 0, "constant"),
+        ([1, 1], 0, "constant"),
+        ([1, 1, 1, 1], 0, "constant"),
+        (1, 7, "constant"),
+        (1, (1, 2, 3), "constant"),
+        (1, None, "constant"),
+        (1, 0, "edge"),
+        (1, 0, "reflect"),
+        (1, 0, "symmetric"),
+    ),
 )
-def test_random_crop_padding_matches_torchvision_tensor_cpu(padding, fill, padding_mode):
-    tensor = make_tensor(shape=(3, 4, 5))
+def test_random_crop_padding_matches_torchvision_tensor(device, padding_case):
+    _skip_if_gpu_unavailable(device)
+    padding, fill, padding_mode = padding_case
+    tensor = _move_tensor_to_device(make_tensor(shape=(3, 4, 5)), device)
     size = (4, 5) if padding is None else (6, 7)
 
     _assert_equal_to_torchvision(
@@ -131,6 +138,7 @@ def test_random_crop_padding_matches_torchvision_tensor_cpu(padding, fill, paddi
             padding=padding,
             fill=fill,
             padding_mode=padding_mode,
+            device=device,
         ),
         transforms.RandomCrop(
             size=size,
@@ -138,62 +146,25 @@ def test_random_crop_padding_matches_torchvision_tensor_cpu(padding, fill, paddi
             fill=fill,
             padding_mode=padding_mode,
         ),
+        device=device,
     )
 
 
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-@params(
-    (1, 7, "constant"),
-    (1, 0, "edge"),
-    (1, 0, "reflect"),
-    (1, 0, "symmetric"),
-)
-def test_random_crop_padding_matches_torchvision_tensor_gpu(padding, fill, padding_mode):
-    tensor = make_tensor(shape=(3, 4, 5)).cuda()
-    _assert_equal_to_torchvision(
-        tensor,
-        _build_dali_random_crop(
-            size=(6, 7),
-            padding=padding,
-            fill=fill,
-            padding_mode=padding_mode,
-            device="gpu",
-        ),
-        transforms.RandomCrop(
-            size=(6, 7),
-            padding=padding,
-            fill=fill,
-            padding_mode=padding_mode,
-        ),
-        device="gpu",
-    )
-
-
-@params("L", "RGB", "RGBA")
-def test_random_crop_padding_matches_torchvision_pil_cpu(mode):
+@cartesian_params(("cpu", "gpu"), ("L", "RGB", "RGBA"))
+def test_random_crop_padding_matches_torchvision_pil(device, mode):
+    _skip_if_gpu_unavailable(device)
     img = make_pil_image(mode=mode, h=4, w=5)
     _assert_equal_to_torchvision(
         img,
-        _build_dali_random_crop(size=(6, 7), padding=1, fill=3),
+        _build_dali_random_crop(size=(6, 7), padding=1, fill=3, device=device),
         transforms.RandomCrop(size=(6, 7), padding=1, fill=3),
-    )
-
-
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-@params("L", "RGB", "RGBA")
-def test_random_crop_padding_matches_torchvision_pil_gpu(mode):
-    img = make_pil_image(mode=mode, h=4, w=5)
-    _assert_equal_to_torchvision(
-        img,
-        _build_dali_random_crop(size=(6, 7), padding=1, fill=3, device="gpu"),
-        transforms.RandomCrop(size=(6, 7), padding=1, fill=3),
-        device="gpu",
+        device=device,
     )
 
 
 """
 # TODO: Tensor fill pattern is not currently supported
-def test_random_crop_fill_dict_matches_torchvision_tensor_cpu():
+def test_random_crop_fill_dict_matches_torchvision_tensor():
     tensor = make_tensor(shape=(3, 4, 5))
     fill = {torch.Tensor: 9}
     _assert_equal_to_torchvision(
@@ -203,7 +174,7 @@ def test_random_crop_fill_dict_matches_torchvision_tensor_cpu():
     )
 
 # TODO: fill pattern as tensor is not currently supported
-def test_random_crop_fill_dict_matches_torchvision_pil_cpu():
+def test_random_crop_fill_dict_matches_torchvision_pil():
     img = make_pil_image(mode="RGB", h=4, w=5)
     fill = {Image.Image: (1, 2, 3)}
     _assert_equal_to_torchvision(
@@ -214,25 +185,18 @@ def test_random_crop_fill_dict_matches_torchvision_pil_cpu():
 """
 
 
-@params(
-    (4, (4, 4)),
-    ([4, 5], (4, 5)),
-)
-def test_random_crop_tensor_shape_cpu(size, expected_hw):
-    tensor = make_tensor()
-    out = _build_dali_random_crop(size=size)(tensor)
-
-    assert out.shape == (3, *expected_hw)
-
-
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-@params(
-    (4, (4, 4)),
-    ([4, 5], (4, 5)),
+@cartesian_params(
+    ("cpu", "gpu"),
+    (
+        (4, (4, 4)),
+        ([4, 5], (4, 5)),
+    ),
 )
-def test_random_crop_tensor_shape_gpu(size, expected_hw):
-    tensor = make_tensor().cuda()
-    out = _build_dali_random_crop(size=size, device="gpu")(tensor)
+def test_random_crop_tensor_shape(device, shape_case):
+    _skip_if_gpu_unavailable(device)
+    size, expected_hw = shape_case
+    tensor = _move_tensor_to_device(make_tensor(), device)
+    out = _build_dali_random_crop(size=size, device=device)(tensor)
 
     assert out.shape == (3, *expected_hw)
 

From 08ebc425711921de449878f17cc25fb0fd380a82 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Tue, 19 May 2026 14:43:56 +0200
Subject: [PATCH 07/11] Lint fixes

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../nvidia/dali/experimental/torchvision/v2/randomcrop.py      | 3 ---
 dali/test/python/torchvision/test_tv_randomcrop.py             | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
index 55f8fed339c..01bf27b4037 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
@@ -17,11 +17,8 @@
 from types import NoneType
 from typing import Literal, Sequence, Union
 
-from PIL import Image
 import nvidia.dali as dali
 import nvidia.dali.fn as fn
-import numpy as np
-import torch
 
 from .centercrop import CenterCrop
 from .operator import (
diff --git a/dali/test/python/torchvision/test_tv_randomcrop.py b/dali/test/python/torchvision/test_tv_randomcrop.py
index 0fac3565916..9ac5425dd45 100644
--- a/dali/test/python/torchvision/test_tv_randomcrop.py
+++ b/dali/test/python/torchvision/test_tv_randomcrop.py
@@ -163,7 +163,7 @@ def test_random_crop_padding_matches_torchvision_pil(device, mode):
 
 
 """
-# TODO: Tensor fill pattern is not currently supported
+# TODO: Fill using dictionary pattern is currently not supported
 def test_random_crop_fill_dict_matches_torchvision_tensor():
     tensor = make_tensor(shape=(3, 4, 5))
     fill = {torch.Tensor: 9}
@@ -173,7 +173,6 @@ def test_random_crop_fill_dict_matches_torchvision_tensor():
         transforms.RandomCrop(size=(6, 7), padding=1, fill=fill),
     )
 
-# TODO: fill pattern as tensor is not currently supported
 def test_random_crop_fill_dict_matches_torchvision_pil():
     img = make_pil_image(mode="RGB", h=4, w=5)
     fill = {Image.Image: (1, 2, 3)}

From 12dddd3b3362a6d274ffe5e4a75b27fd519ae9ac Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Tue, 19 May 2026 15:36:25 +0200
Subject: [PATCH 08/11] More tests

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 dali/test/python/torchvision/test_tv_crop.py  |  91 +++++++--------
 .../python/torchvision/test_tv_randomcrop.py  | 106 ++++++++++++++++++
 2 files changed, 147 insertions(+), 50 deletions(-)

diff --git a/dali/test/python/torchvision/test_tv_crop.py b/dali/test/python/torchvision/test_tv_crop.py
index 07d45d0ed2e..3648c6c07c7 100644
--- a/dali/test/python/torchvision/test_tv_crop.py
+++ b/dali/test/python/torchvision/test_tv_crop.py
@@ -15,7 +15,7 @@
 import math
 import unittest
 
-from nose2.tools import params
+from nose2.tools import cartesian_params, params
 from nose_utils import assert_raises
 import numpy as np
 from PIL import Image
@@ -61,47 +61,56 @@ def _assert_crop_matches_torchvision(inpt, top, left, height, width, device="cpu
     assert torch.equal(dali_out, tv_out), "DALI crop output differs from torchvision"
 
 
-@params(
-    (1, 2, 4, 5),
-    (0, 0, 8, 10),
-    (3, 4, 2, 3),
-)
-def test_crop_tensor_cpu(top, left, height, width):
-    _assert_crop_matches_torchvision(make_test_tensor(), top, left, height, width)
+def _skip_if_gpu_unavailable(device):
+    if device == "gpu" and not torch.cuda.is_available():
+        raise unittest.SkipTest("CUDA is not available")
 
 
-@params("L", "RGB", "RGBA")
-def test_crop_pil_cpu(mode):
-    _assert_crop_matches_torchvision(_make_pil_image(mode), top=1, left=2, height=4, width=5)
+def _move_tensor_to_device(tensor, device):
+    if device == "gpu":
+        return tensor.cuda()
+    return tensor
 
 
-@params(
-    (-1, -2, 6, 8),
-    (6, 8, 5, 6),
-    (0, 0, 12, 14),
+@cartesian_params(
+    ("cpu", "gpu"),
+    (
+        (1, 2, 4, 5),
+        (0, 0, 8, 10),
+        (3, 4, 2, 3),
+        (-1, -2, 6, 8),
+        (6, 8, 5, 6),
+        (0, 0, 12, 14),
+    ),
 )
-def test_crop_out_of_bounds_tensor_cpu(top, left, height, width):
-    _assert_crop_matches_torchvision(make_test_tensor(), top, left, height, width)
-
-
-@params("L", "RGB", "RGBA")
-def test_crop_out_of_bounds_pil_cpu(mode):
-    _assert_crop_matches_torchvision(_make_pil_image(mode), top=-2, left=-3, height=12, width=14)
-
-
-def test_crop_batched_tensor_cpu():
-    tensor = make_test_tensor(shape=(4, 3, 8, 10))
-    _assert_crop_matches_torchvision(tensor, top=2, left=3, height=4, width=5)
+def test_crop_tensor(device, crop_case):
+    _skip_if_gpu_unavailable(device)
+    tensor = _move_tensor_to_device(make_test_tensor(), device)
+    _assert_crop_matches_torchvision(tensor, *crop_case, device=device)
+
+
+@cartesian_params(
+    ("cpu", "gpu"),
+    ("L", "RGB", "RGBA"),
+    (
+        (1, 2, 4, 5),
+        (-2, -3, 12, 14),
+    ),
+)
+def test_crop_pil(device, mode, crop_case):
+    _skip_if_gpu_unavailable(device)
+    _assert_crop_matches_torchvision(_make_pil_image(mode), *crop_case, device=device)
 
 
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-def test_crop_batched_tensor_gpu():
-    tensor = make_test_tensor(shape=(4, 3, 8, 10)).cuda()
-    _assert_crop_matches_torchvision(tensor, top=2, left=3, height=4, width=5, device="gpu")
+@cartesian_params(("cpu", "gpu"), ((2, 3, 4, 5),))
+def test_crop_batched_tensor(device, crop_case):
+    _skip_if_gpu_unavailable(device)
+    tensor = _move_tensor_to_device(make_test_tensor(shape=(4, 3, 8, 10)), device)
+    _assert_crop_matches_torchvision(tensor, *crop_case, device=device)
 
 
 @params(torch.float32, torch.int16, torch.int32)
-def test_crop_preserves_tensor_dtype_cpu(dtype):
+def test_crop_preserves_tensor_dtype(dtype):
     tensor = make_test_tensor(dtype=dtype)
     dali_out = crop(tensor, top=1, left=1, height=4, width=5)
     tv_out = tv_fn.crop(tensor, top=1, left=1, height=4, width=5)
@@ -137,21 +146,3 @@ def test_crop_invalid_output_size(height, width):
 def test_crop_invalid_coordinates(top, left):
     with assert_raises(TypeError):
         _ = crop(make_test_tensor(), top=top, left=left, height=1, width=1)
-
-
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-@params(
-    (1, 2, 4, 5),
-    (-1, -2, 6, 8),
-)
-def test_crop_tensor_gpu(top, left, height, width):
-    tensor = make_test_tensor().cuda()
-    _assert_crop_matches_torchvision(tensor, top, left, height, width, device="gpu")
-
-
-@unittest.skipUnless(torch.cuda.is_available(), "CUDA is not available")
-@params("L", "RGB", "RGBA")
-def test_crop_pil_gpu(mode):
-    _assert_crop_matches_torchvision(
-        _make_pil_image(mode), top=-2, left=-3, height=12, width=14, device="gpu"
-    )
diff --git a/dali/test/python/torchvision/test_tv_randomcrop.py b/dali/test/python/torchvision/test_tv_randomcrop.py
index 9ac5425dd45..618125215e1 100644
--- a/dali/test/python/torchvision/test_tv_randomcrop.py
+++ b/dali/test/python/torchvision/test_tv_randomcrop.py
@@ -81,6 +81,42 @@ def _move_tensor_to_device(inpt, device):
     return inpt
 
 
+def _possible_torchvision_random_crop_outputs(inpt, size, padding, fill=0, padding_mode="constant"):
+    crop_h, crop_w = RandomCrop.adjust_size(size)
+    pad_left, pad_top, pad_right, pad_bottom = RandomCrop.adjust_padding(padding)
+
+    padded_h = inpt.shape[-2] + pad_top + pad_bottom
+    padded_w = inpt.shape[-1] + pad_left + pad_right
+
+    if padded_h < crop_h:
+        diff = crop_h - padded_h
+        pad_top += diff
+        pad_bottom += diff
+        padded_h += 2 * diff
+
+    if padded_w < crop_w:
+        diff = crop_w - padded_w
+        pad_left += diff
+        pad_right += diff
+        padded_w += 2 * diff
+
+    padded = tv_fn.pad(
+        inpt,
+        padding=[pad_left, pad_top, pad_right, pad_bottom],
+        fill=fill,
+        padding_mode=padding_mode,
+    )
+
+    top_values = range(padded_h - crop_h + 1) if padded_h > crop_h else range(1)
+    left_values = range(padded_w - crop_w + 1) if padded_w > crop_w else range(1)
+
+    return [
+        tv_fn.crop(padded, top=top, left=left, height=crop_h, width=crop_w)
+        for top in top_values
+        for left in left_values
+    ]
+
+
 def test_random_crop_is_operator():
     assert issubclass(RandomCrop, Operator)
 
@@ -162,6 +198,65 @@ def test_random_crop_padding_matches_torchvision_pil(device, mode):
     )
 
 
+@cartesian_params(
+    ("cpu", "gpu"),
+    (
+        ([0, 1, 2, 0], (7, 8)),
+        ([2, 0, 0, 1], (6, 9)),
+        ([1, 2, 0, 3], (10, 7)),
+    ),
+)
+def test_random_crop_asymmetric_padding_with_pad_if_needed(device, padding_case):
+    _skip_if_gpu_unavailable(device)
+    padding, size = padding_case
+    tensor = make_tensor(shape=(3, 4, 5))
+    expected_outputs = _possible_torchvision_random_crop_outputs(
+        tensor,
+        size=size,
+        padding=padding,
+    )
+
+    dali_out = _build_dali_random_crop(
+        size=size,
+        padding=padding,
+        pad_if_needed=True,
+        device=device,
+    )(_move_tensor_to_device(tensor, device)).cpu()
+
+    assert any(
+        torch.equal(dali_out, expected) for expected in expected_outputs
+    ), "DALI RandomCrop output is not a valid torchvision crop"
+
+
+@cartesian_params(("cpu", "gpu"))
+def test_random_crop_pad_if_needed_matches_torchvision_random_offsets(device):
+    _skip_if_gpu_unavailable(device)
+    tensor = make_tensor(shape=(3, 4, 5))
+    size = (6, 7)
+
+    expected_outputs = {
+        out.numpy().tobytes()
+        for out in _possible_torchvision_random_crop_outputs(
+            tensor,
+            size=size,
+            padding=None,
+        )
+    }
+    tv_transform = transforms.RandomCrop(size=size, pad_if_needed=True)
+    tv_outputs = {tv_transform(tensor).numpy().tobytes() for _ in range(100)}
+
+    assert len(tv_outputs) > 1, "Torchvision RandomCrop did not sample multiple offsets"
+    assert tv_outputs <= expected_outputs, "Torchvision produced an unexpected pad_if_needed crop"
+
+    dali_tensor = _move_tensor_to_device(tensor, device)
+    dali_transform = _build_dali_random_crop(size=size, pad_if_needed=True, device=device)
+    dali_outputs = {dali_transform(dali_tensor).cpu().numpy().tobytes() for _ in range(20)}
+
+    assert (
+        dali_outputs <= expected_outputs
+    ), "DALI RandomCrop produced an invalid pad_if_needed crop"
+
+
 """
 # TODO: Fill using dictionary pattern is currently not supported
 def test_random_crop_fill_dict_matches_torchvision_tensor():
@@ -200,6 +295,17 @@ def test_random_crop_tensor_shape(device, shape_case):
     assert out.shape == (3, *expected_hw)
 
 
+@cartesian_params(("cpu", "gpu"))
+def test_random_crop_samples_different_offsets(device):
+    _skip_if_gpu_unavailable(device)
+    tensor = _move_tensor_to_device(make_tensor(), device)
+    transform = _build_dali_random_crop(size=(4, 5), device=device)
+
+    outputs = {bytes(transform(tensor).cpu().numpy().tobytes()) for _ in range(20)}
+
+    assert len(outputs) > 1, "RandomCrop produced the same crop for every run"
+
+
 @params("cpu", "gpu")
 def test_random_crop_pad_if_needed_shape(device):
     if device == "gpu" and not torch.cuda.is_available():

From 5c32f8fb4a7aab15ae3c49f7dd720d895a407b92 Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Fri, 22 May 2026 19:35:05 +0200
Subject: [PATCH 09/11] Review comments

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../experimental/torchvision/v2/randomcrop.py | 100 ++++++------------
 .../python/torchvision/test_tv_randomcrop.py  |  69 ++++++++++--
 2 files changed, 96 insertions(+), 73 deletions(-)

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
index 01bf27b4037..957029b9685 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
@@ -12,10 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import collections
 import numbers
-from types import NoneType
-from typing import Literal, Sequence, Union
+from typing import Literal, Sequence
 
 import nvidia.dali as dali
 import nvidia.dali.fn as fn
@@ -76,24 +74,16 @@ class _ValidateFill(_ArgumentValidateRule):
     """
 
     @classmethod
-    def _verify_fill_value(cls, fill) -> None:
+    def verify(cls, *, fill, **_) -> None:
         if fill is None or isinstance(fill, numbers.Number):
             return
         if isinstance(fill, (list, tuple)) and all(
             isinstance(value, numbers.Number) for value in fill
         ):
+            if len(fill) == 0:
+                raise ValueError("fill sequence must be non-empty")
             return
-        raise TypeError(f"fill must be a number, sequence of numbers, None or a dict, got {fill!r}")
-
-    @classmethod
-    def verify(cls, *, fill, **_) -> None:
-        if isinstance(fill, dict):
-            for key, value in fill.items():
-                if not isinstance(key, (type, str)):
-                    raise TypeError(f"fill dictionary keys must be types or strings, got {key!r}")
-                cls._verify_fill_value(value)
-        else:
-            cls._verify_fill_value(fill)
+        raise TypeError(f"fill must be a number, sequence of numbers, or None, got {fill!r}")
 
 
 class RandomCrop(Operator):
@@ -117,9 +107,9 @@ class RandomCrop(Operator):
         bottom borders respectively.
     pad_if_needed : bool, optional, default = False
         Pad the image if it is smaller than the desired size.
-    fill : number or tuple or dict, optional, default = 0
+    fill : number or tuple, optional, default = 0
         Pixel fill value used when the padding_mode is constant.
-    padding_mode : Literal["constant", "edge", "reflect", "symmetric"], optional,
+    padding_mode : Literal["constant", "edge", "reflect", "symmetric"], optional, default="constant"
         Type of padding. Should be: constant, edge, reflect or symmetric.
     device : Literal["cpu", "gpu"], optional, default = "cpu"
         Device to use for the crop. Can be ``"cpu"`` or ``"gpu"``.
@@ -152,8 +142,6 @@ def adjust_padding(cls, padding: None | int | Sequence[int]) -> tuple[int, int,
 
     @staticmethod
     def adjust_fill(fill):
-        if isinstance(fill, dict):
-            return {key: RandomCrop.adjust_fill(value) for key, value in fill.items()}
         if fill is None:
             return 0
         if isinstance(fill, numbers.Number):
@@ -172,21 +160,7 @@ def __init__(
         size: int | Sequence[int],
         padding: None | int | Sequence[int] = None,
         pad_if_needed: bool = False,
-        fill: Union[
-            int,
-            float,
-            Sequence[int],
-            Sequence[float],
-            None,
-            dict[
-                type | str,
-                int
-                | float
-                | collections.abc.Sequence[int]
-                | collections.abc.Sequence[float]
-                | NoneType,
-            ],
-        ] = 0,
+        fill: int | float | Sequence[int] | Sequence[float] | None = 0,
         padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
         device: Literal["cpu", "gpu"] = "cpu",
     ):
@@ -214,44 +188,40 @@ def _kernel(self, data_input):
         crop_h, crop_w = self.size
         pad_left, pad_top, pad_right, pad_bottom = self.padding
 
-        if self.needs_padding:
-            padded_h = in_h + pad_top + pad_bottom
-            padded_w = in_w + pad_left + pad_right
-
-            if self.pad_if_needed:
-                pad_h = dali.math.max(crop_h - padded_h, 0)
-                pad_w = dali.math.max(crop_w - padded_w, 0)
-                pad_top = pad_top + pad_h
-                pad_bottom = pad_bottom + pad_h
-                pad_left = pad_left + pad_w
-                pad_right = pad_right + pad_w
-
-            tensor = fn.slice(
-                tensor,
-                fn.stack(
-                    fn.cast(-pad_left, dtype=dali.types.INT64),
-                    fn.cast(-pad_top, dtype=dali.types.INT64),
-                ),
-                fn.stack(in_w + pad_left + pad_right, in_h + pad_top + pad_bottom),
-                out_of_bounds_policy=PADDING_CLASS[self.padding_mode].border_type,
-                fill_values=self.fill,
-                device=self.device,
-                axis_names="WH",
-            )
+        padded_h = in_h + pad_top + pad_bottom
+        padded_w = in_w + pad_left + pad_right
 
-            in_h = in_h + pad_top + pad_bottom
-            in_w = in_w + pad_left + pad_right
+        if self.pad_if_needed:
+            pad_h = dali.math.max(crop_h - padded_h, 0)
+            pad_w = dali.math.max(crop_w - padded_w, 0)
+            pad_top = pad_top + pad_h
+            pad_left = pad_left + pad_w
+            # Only pad_top / pad_left are read below; pad_bottom / pad_right are dropped.
+            padded_h = padded_h + 2 * pad_h
+            padded_w = padded_w + 2 * pad_w
 
-        max_top = fn.cast(in_h, dtype=dali.types.INT32) - crop_h
-        max_left = fn.cast(in_w, dtype=dali.types.INT32) - crop_w
+        max_top = fn.cast(padded_h, dtype=dali.types.INT32) - crop_h
+        max_left = fn.cast(padded_w, dtype=dali.types.INT32) - crop_w
 
         top = RandomCrop._randint(max_top)
         left = RandomCrop._randint(max_left)
 
+        slice_kwargs = {
+            "device": self.device,
+            "axis_names": "WH",
+        }
+        if self.needs_padding:
+            slice_kwargs.update(
+                out_of_bounds_policy=PADDING_CLASS[self.padding_mode].border_type,
+                fill_values=self.fill,
+            )
+
         return fn.slice(
             tensor,
-            fn.stack(left, top),
+            fn.stack(
+                fn.cast(left - pad_left, dtype=dali.types.INT32),
+                fn.cast(top - pad_top, dtype=dali.types.INT32),
+            ),
             fn.stack(crop_w, crop_h),
-            device=self.device,
-            axis_names="WH",
+            **slice_kwargs,
         )
diff --git a/dali/test/python/torchvision/test_tv_randomcrop.py b/dali/test/python/torchvision/test_tv_randomcrop.py
index 618125215e1..e2201841526 100644
--- a/dali/test/python/torchvision/test_tv_randomcrop.py
+++ b/dali/test/python/torchvision/test_tv_randomcrop.py
@@ -23,6 +23,8 @@
 import torchvision.transforms.v2 as transforms
 import torchvision.transforms.v2.functional as tv_fn
 
+import nvidia.dali as dali
+import nvidia.dali.experimental.torchvision.v2.randomcrop as randomcrop_module
 from nvidia.dali.experimental.torchvision import Compose, RandomCrop
 from nvidia.dali.experimental.torchvision.v2.operator import Operator
 
@@ -121,6 +123,47 @@ def test_random_crop_is_operator():
     assert issubclass(RandomCrop, Operator)
 
 
+def test_random_crop_fuses_padding_into_crop_slice():
+    transform = RandomCrop(size=(4, 5), padding=1)
+    slice_calls = []
+    cast_calls = []
+
+    def fake_slice(tensor, anchor, shape, **kwargs):
+        slice_calls.append((tensor, anchor, shape, kwargs))
+        return "cropped"
+
+    def fake_cast(value, dtype):
+        cast_calls.append((value, dtype))
+        return value
+
+    old_slice = randomcrop_module.fn.slice
+    old_stack = randomcrop_module.fn.stack
+    old_cast = randomcrop_module.fn.cast
+    old_randint = RandomCrop._randint
+    try:
+        randomcrop_module.fn.slice = fake_slice
+        randomcrop_module.fn.stack = lambda *args: args
+        randomcrop_module.fn.cast = fake_cast
+        RandomCrop._randint = staticmethod(lambda max_value: 0)
+
+        out = transform._kernel((4, 5, 3, "input"))
+    finally:
+        randomcrop_module.fn.slice = old_slice
+        randomcrop_module.fn.stack = old_stack
+        randomcrop_module.fn.cast = old_cast
+        RandomCrop._randint = staticmethod(old_randint)
+
+    assert out == "cropped"
+    assert len(slice_calls) == 1
+    tensor, anchor, shape, kwargs = slice_calls[0]
+    assert tensor == "input"
+    assert anchor == (-1, -1)
+    assert shape == (5, 4)
+    assert kwargs["out_of_bounds_policy"] == "pad"
+    assert kwargs["fill_values"] == 0
+    assert cast_calls[-2:] == [(-1, dali.types.INT32), (-1, dali.types.INT32)]
+
+
 @cartesian_params(
     ("cpu", "gpu"),
     (
@@ -257,8 +300,8 @@ def test_random_crop_pad_if_needed_matches_torchvision_random_offsets(device):
     ), "DALI RandomCrop produced an invalid pad_if_needed crop"
 
 
-"""
 # TODO: Fill using dictionary pattern is currently not supported
+@unittest.skip("dict fill not supported")
 def test_random_crop_fill_dict_matches_torchvision_tensor():
     tensor = make_tensor(shape=(3, 4, 5))
     fill = {torch.Tensor: 9}
@@ -268,6 +311,9 @@ def test_random_crop_fill_dict_matches_torchvision_tensor():
         transforms.RandomCrop(size=(6, 7), padding=1, fill=fill),
     )
 
+
+# TODO: Fill using dictionary pattern is currently not supported
+@unittest.skip("dict fill not supported")
 def test_random_crop_fill_dict_matches_torchvision_pil():
     img = make_pil_image(mode="RGB", h=4, w=5)
     fill = {Image.Image: (1, 2, 3)}
@@ -276,7 +322,6 @@ def test_random_crop_fill_dict_matches_torchvision_pil():
         _build_dali_random_crop(size=(6, 7), padding=1, fill=fill),
         transforms.RandomCrop(size=(6, 7), padding=1, fill=fill),
     )
-"""
 
 
 @cartesian_params(
@@ -330,7 +375,7 @@ def test_random_crop_pad_if_needed_shape(device):
     {"bad": "value"},
 )
 def test_random_crop_invalid_size(size):
-    with assert_raises((TypeError, ValueError)):
+    with assert_raises((TypeError, ValueError), glob="*size*"):
         _ = RandomCrop(size=size)
 
 
@@ -342,12 +387,12 @@ def test_random_crop_invalid_size(size):
     "bad",
 )
 def test_random_crop_invalid_padding(padding):
-    with assert_raises((TypeError, ValueError)):
+    with assert_raises((TypeError, ValueError), glob="*padding*"):
         _ = RandomCrop(size=3, padding=padding)
 
 
 def test_random_crop_invalid_pad_if_needed():
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*pad_if_needed must be bool*"):
         _ = RandomCrop(size=3, pad_if_needed="yes")
 
 
@@ -357,17 +402,25 @@ def test_random_crop_invalid_pad_if_needed():
     [1, object()],
     {object(): 1},
     {torch.Tensor: object()},
+    {Image.Image: (1, 2, 3)},  # TODO: dict fill patterns are not supported
+    {torch.Tensor: 9},  # TODO: dict fill patterns are not supported
 )
 def test_random_crop_invalid_fill(fill):
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*fill must be*"):
+        _ = RandomCrop(size=3, padding=1, fill=fill)
+
+
+@params(([],), ((),))
+def test_random_crop_empty_fill_sequence(fill):
+    with assert_raises(ValueError, glob="*fill sequence must be non-empty*"):
         _ = RandomCrop(size=3, padding=1, fill=fill)
 
 
 def test_random_crop_invalid_padding_mode_when_padding_is_used():
-    with assert_raises(ValueError):
+    with assert_raises(ValueError, glob="*Invalid padding mode*"):
         _ = RandomCrop(size=3, padding=1, padding_mode="bad")
 
 
 def test_random_crop_invalid_padding_mode_when_pad_if_needed_is_used():
-    with assert_raises(ValueError):
+    with assert_raises(ValueError, glob="*Invalid padding mode*"):
         _ = RandomCrop(size=3, pad_if_needed=True, padding_mode="bad")

From 4fd51eb091da31f2ef0503575062401d24571f5b Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Thu, 9 Apr 2026 13:30:38 +0200
Subject: [PATCH 10/11] Image information Torchvision's functional API

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../torchvision/v2/functional/__init__.py     |   3 +
 .../v2/functional/image_metadata.py           |  82 ++++++++
 .../torchvision/test_tv_image_metadata.py     | 186 ++++++++++++++++++
 3 files changed, 271 insertions(+)
 create mode 100644 dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
 create mode 100644 dali/test/python/torchvision/test_tv_image_metadata.py

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
index 18003740b00..6064709cd61 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
@@ -17,6 +17,7 @@
 from .crop import crop
 from .flips import horizontal_flip, vertical_flip
 from .gaussian_blur import gaussian_blur
+from .image_metadata import get_dimensions, get_image_size
 from .normalize import normalize
 from .pad import pad
 from .resize import resize
@@ -26,6 +27,8 @@
     "center_crop",
     "crop",
     "gaussian_blur",
+    "get_dimensions",
+    "get_image_size",
     "horizontal_flip",
     "normalize",
     "pad",
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
new file mode 100644
index 00000000000..4b62db09f20
--- /dev/null
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from PIL import Image
+import torch
+
+
+def get_image_size(inpt: Image.Image | torch.Tensor) -> List[int]:
+    """
+    Return the spatial size of an image as ``[width, height]``.
+
+    Mirrors ``torchvision.transforms.v2.functional.get_image_size``.
+
+    .. note::
+        This function is provided for compatibility.  The torchvision successor
+        ``get_size`` returns ``[height, width]`` instead.
+
+    Parameters
+    ----------
+    inpt : PIL Image or torch.Tensor
+        Input image.  Tensors are expected in ``[…, H, W]`` layout (leading
+        channel / batch dimensions are ignored).
+
+    Returns
+    -------
+    List[int]
+        ``[width, height]``
+    """
+    if isinstance(inpt, Image.Image):
+        return list(inpt.size)  # PIL .size is (W, H)
+    elif isinstance(inpt, torch.Tensor):
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}"
+            )
+        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
+    raise TypeError(f"Unsupported input type: {type(inpt)}")
+
+
+def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
+    """
+    Return the number of channels, height, and width of an image as
+    ``[channels, height, width]``.
+
+    Mirrors ``torchvision.transforms.v2.functional.get_dimensions``.
+
+    Parameters
+    ----------
+    inpt : PIL Image or torch.Tensor
+        Input image.  Tensors are expected in ``[H, W]`` or ``[…, C, H, W]`` layout
+        (leading batch dimensions are ignored).
+
+    Returns
+    -------
+    List[int]
+        ``[channels, height, width]``
+    """
+    if isinstance(inpt, Image.Image):
+        w, h = inpt.size
+        return [len(inpt.getbands()), h, w]
+    elif isinstance(inpt, torch.Tensor):
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}"
+            )
+        if inpt.ndim == 2:
+            return [1, inpt.shape[-2], inpt.shape[-1]]
+        return [inpt.shape[-3], inpt.shape[-2], inpt.shape[-1]]  # [C, H, W]
+    raise TypeError(f"Unsupported input type: {type(inpt)}")
diff --git a/dali/test/python/torchvision/test_tv_image_metadata.py b/dali/test/python/torchvision/test_tv_image_metadata.py
new file mode 100644
index 00000000000..3f24ebcd359
--- /dev/null
+++ b/dali/test/python/torchvision/test_tv_image_metadata.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+import unittest
+
+from nose2.tools import cartesian_params, params
+from nose_utils import assert_raises
+from PIL import Image
+import torch
+from torchvision import tv_tensors
+import torchvision.transforms.v2.functional as fn_tv
+
+from nvidia.dali.experimental.torchvision.v2.functional import get_image_size, get_dimensions
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _tv_get_image_size(inpt):
+    """Call torchvision get_image_size while suppressing its deprecation warning."""
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", UserWarning)
+        return fn_tv.get_image_size(inpt)
+
+
+def _skip_if_gpu_unavailable(device):
+    if device == "gpu" and not torch.cuda.is_available():
+        raise unittest.SkipTest("CUDA is not available")
+
+
+def _move_tensor_to_device(tensor, device):
+    if device == "gpu":
+        return tensor.cuda()
+    return tensor
+
+
+def _make_compatibility_input(input_kind, shape):
+    tensor = torch.zeros(*shape)
+    if input_kind == "tensor":
+        return tensor
+    if input_kind == "tv_image":
+        return tv_tensors.Image(tensor)
+    raise ValueError(f"Unsupported input kind: {input_kind}")
+
+
+# PIL images with known exact dimensions (W x H)
+PIL_CASES = [
+    Image.new("RGB", (320, 240)),  # 3 channels
+    Image.new("L", (100, 50)),  # 1 channel, non-square
+    Image.new("RGBA", (64, 32)),  # 4 channels
+    Image.new("RGB", (1, 1)),  # minimal
+    Image.new("L", (512, 1)),  # extreme aspect ratio
+]
+
+# Tensors in CHW / NCHW layout — deliberately use H≠W to catch W/H swap bugs
+TENSOR_CASES = [
+    torch.zeros(3, 240, 320),  # CHW
+    torch.zeros(1, 3, 240, 320),  # NCHW, N=1
+    torch.zeros(8, 3, 240, 320),  # NCHW, N=8
+    torch.zeros(1, 50, 100),  # CHW, 1 channel
+    torch.zeros(4, 32, 64),  # CHW, 4 channels
+    torch.zeros(10, 11, 12, 8, 3, 240, 320),  # ...NCHW, N=8
+]
+
+TORCHVISION_COMPATIBILITY_CASES = [
+    ("tensor", (240, 320)),  # HW, implicit single channel
+    ("tensor", (3, 240, 320)),  # CHW
+    ("tensor", (8, 3, 240, 320)),  # NCHW
+    ("tv_image", (240, 320)),  # torchvision Image converts HW to 1HW
+    ("tv_image", (3, 240, 320)),  # torchvision Image, CHW
+]
+
+
+# ---------------------------------------------------------------------------
+# get_image_size — PIL
+# ---------------------------------------------------------------------------
+
+
+@params(*PIL_CASES)
+def test_get_image_size_pil(img):
+    expected = _tv_get_image_size(img)
+    assert (
+        get_image_size(img) == expected
+    ), f"mode={img.mode} size={img.size}: got {get_image_size(img)}, expected {expected}"
+
+
+# ---------------------------------------------------------------------------
+# get_image_size — tensors
+# ---------------------------------------------------------------------------
+
+
+@cartesian_params(("cpu", "gpu"), TENSOR_CASES)
+def test_get_image_size_tensor(device, t):
+    _skip_if_gpu_unavailable(device)
+    t = _move_tensor_to_device(t, device)
+    expected = _tv_get_image_size(t)
+    assert (
+        get_image_size(t) == expected
+    ), f"device={device} shape={t.shape}: got {get_image_size(t)}, expected {expected}"
+
+
+# ---------------------------------------------------------------------------
+# get_dimensions — PIL
+# ---------------------------------------------------------------------------
+
+
+@params(*PIL_CASES)
+def test_get_dimensions_pil(img):
+    expected = fn_tv.get_dimensions(img)
+    assert (
+        get_dimensions(img) == expected
+    ), f"mode={img.mode} size={img.size}: got {get_dimensions(img)}, expected {expected}"
+
+
+# ---------------------------------------------------------------------------
+# get_dimensions — tensors
+# ---------------------------------------------------------------------------
+
+
+@cartesian_params(("cpu", "gpu"), TENSOR_CASES)
+def test_get_dimensions_tensor(device, t):
+    _skip_if_gpu_unavailable(device)
+    t = _move_tensor_to_device(t, device)
+    expected = fn_tv.get_dimensions(t)
+    assert (
+        get_dimensions(t) == expected
+    ), f"device={device} shape={t.shape}: got {get_dimensions(t)}, expected {expected}"
+
+
+# ---------------------------------------------------------------------------
+# Torchvision compatibility
+# ---------------------------------------------------------------------------
+
+
+@params(*PIL_CASES)
+def test_image_metadata_pil_matches_torchvision(img):
+    assert get_image_size(img) == _tv_get_image_size(img)
+    assert get_dimensions(img) == fn_tv.get_dimensions(img)
+
+
+@cartesian_params(("cpu", "gpu"), TORCHVISION_COMPATIBILITY_CASES)
+def test_image_metadata_tensor_inputs_match_torchvision(device, input_case):
+    _skip_if_gpu_unavailable(device)
+    input_kind, shape = input_case
+    inpt = _move_tensor_to_device(_make_compatibility_input(input_kind, shape), device)
+
+    assert get_image_size(inpt) == _tv_get_image_size(inpt)
+    assert get_dimensions(inpt) == fn_tv.get_dimensions(inpt)
+
+
+# ---------------------------------------------------------------------------
+# Error cases
+# ---------------------------------------------------------------------------
+
+
+def test_get_image_size_1d_tensor_raises():
+    with assert_raises(TypeError):
+        get_image_size(torch.zeros(10))
+
+
+def test_get_dimensions_1d_tensor_raises():
+    with assert_raises(TypeError):
+        get_dimensions(torch.zeros(10))
+
+
+def test_get_image_size_unsupported_type_raises():
+    with assert_raises(TypeError):
+        get_image_size("not_an_image")
+
+
+def test_get_dimensions_unsupported_type_raises():
+    with assert_raises(TypeError):
+        get_dimensions("not_an_image")

From c024bac1d22cca60d7895ed8f63386a7f02231eb Mon Sep 17 00:00:00 2001
From: Marek Dabek <mdabek@nvidia.com>
Date: Mon, 25 May 2026 10:37:11 +0200
Subject: [PATCH 11/11] Review fixes

Signed-off-by: Marek Dabek <mdabek@nvidia.com>
---
 .../torchvision/v2/functional/image_metadata.py           | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
index 4b62db09f20..9ec4c85891d 100644
--- a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
+++ b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
@@ -44,10 +44,10 @@ def get_image_size(inpt: Image.Image | torch.Tensor) -> List[int]:
     elif isinstance(inpt, torch.Tensor):
         if inpt.ndim < 2:
             raise TypeError(
-                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}"
+                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}."
             )
         return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
-    raise TypeError(f"Unsupported input type: {type(inpt)}")
+    raise TypeError(f"Unsupported input type: {type(inpt)}.")
 
 
 def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
@@ -74,9 +74,9 @@ def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
     elif isinstance(inpt, torch.Tensor):
         if inpt.ndim < 2:
             raise TypeError(
-                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}"
+                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}."
             )
         if inpt.ndim == 2:
             return [1, inpt.shape[-2], inpt.shape[-1]]
         return [inpt.shape[-3], inpt.shape[-2], inpt.shape[-1]]  # [C, H, W]
-    raise TypeError(f"Unsupported input type: {type(inpt)}")
+    raise TypeError(f"Unsupported input type: {type(inpt)}.")