From 0ddbf574b6f5ff0630a60d42e022eb8b77b6046e Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Tue, 5 May 2026 10:13:03 -0400 Subject: [PATCH 01/13] Add ImageData/ImageBuffer class and tests --- include/core/image_buffer.hpp | 81 +++++++ include/core/image_data.hpp | 178 ++++++++++++++ src/core/image_data.cpp | 88 +++++++ .../src/tests/core/image/test_image_data.cpp | 225 ++++++++++++++++++ 4 files changed, 572 insertions(+) create mode 100644 include/core/image_buffer.hpp create mode 100644 include/core/image_data.hpp create mode 100644 src/core/image_data.cpp create mode 100644 tests/roccv/cpp/src/tests/core/image/test_image_data.cpp diff --git a/include/core/image_buffer.hpp b/include/core/image_buffer.hpp new file mode 100644 index 00000000..aea93c2d --- /dev/null +++ b/include/core/image_buffer.hpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +/** Maximum number of data planes an image can have. */ +#define ROCCV_MAX_IMAGE_PLANES (6) + +namespace roccv { + +/** + * @brief Describes a single pitch-linear image plane. + * + * For interleaved-channel formats there is exactly one plane covering the whole + * image. For planar formats (e.g. NV12, YUV420) each channel/plane carries its + * own width, height, and row stride and lives in its own buffer. + */ +struct ImagePlaneStrided { + /** Width of this plane in pixels. Must be >= 1. */ + int32_t width; + + /** Height of this plane in pixels. Must be >= 1. */ + int32_t height; + + /** Distance in bytes between the start of consecutive rows. Must be at + * least `(width * bits-per-pixel + 7) / 8`. */ + int64_t rowStride; + + /** Pointer to the first byte of plane data. Validity (device vs host) is + * determined by the enclosing data type. */ + void* basePtr; +}; + +/** + * @brief A pitch-linear image buffer: one or more `ImagePlaneStrided` entries. + * + * Only the first `numPlanes` entries carry valid data; the remainder of the + * fixed-size `planes` array is unused. Capping the array size keeps the buffer + * trivially copyable so it can ride inside `ImageBuffer` without an + * allocation. + */ +struct ImageBufferStrided { + /** Number of valid planes. Must be >= 1. */ + int32_t numPlanes; + + /** Per-plane descriptors. Only the first `numPlanes` are valid. */ + ImagePlaneStrided planes[ROCCV_MAX_IMAGE_PLANES]; +}; + +/** + * @brief An image buffer. Currently only the strided variant is supported. + * Mirrors the role `TensorBuffer` plays for tensors and is intentionally + * shaped as a tagged-union-style aggregate so additional buffer kinds can be + * added later (e.g. HIP textures) without changing the public type. + */ +struct ImageBuffer { + ImageBufferStrided strided; +}; + +} // namespace roccv diff --git a/include/core/image_data.hpp b/include/core/image_data.hpp new file mode 100644 index 00000000..cf45e71c --- /dev/null +++ b/include/core/image_data.hpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include +#include + +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" +#include "operator_types.h" + +namespace roccv { + +/** + * @brief Discriminator for the kind of buffer an ImageData carries. Used by + * IsCompatibleKind() / cast<>() to perform safe runtime down-casting through + * the ImageData hierarchy. + */ +enum class ImageBufferType { + IMAGE_BUFFER_NONE, // Default/invalid buffer type. Used when no buffer type is specified. + IMAGE_BUFFER_STRIDED_HIP, // GPU-accessible buffer with strided access. + IMAGE_BUFFER_STRIDED_HOST // Host-accessible buffer with strided access. +}; + +/** + * @brief Holds the underlying image data alongside metadata (format, buffer + * kind). Non-strided image data is not supported for use right now; use + * ImageDataStrided to access strided image data instead. + * + * ImageData is the interchange type for a single variable-sized image. It + * does not own the underlying pixel buffer — it is a metadata snapshot, valid + * only as long as the producing buffer outlives it. + */ +class ImageData { + public: + ImageData() = delete; + virtual ~ImageData() = default; + + /** + * @brief Returns the pixel format of the image. + */ + virtual const ImageFormat &format() const; + + /** + * @brief Returns the device the image data resides on. + */ + virtual eDeviceType device() const; + + /** + * @brief Attempts to down-cast this ImageData to a more specific subclass. + * Returns the casted value if the underlying buffer kind matches what + * Derived expects, or std::nullopt otherwise. + * + * @tparam Derived The target subclass to cast to. + */ + template + std::optional cast() const { + static_assert(std::is_base_of::value, "Cannot cast ImageData to an unrelated type."); + static_assert(sizeof(Derived) == sizeof(ImageData), "Derived type must not add any additional data members."); + + if (!Derived::IsCompatibleKind(m_bufferType)) { + return std::nullopt; + } + + return std::make_optional(m_format, m_buffer); + } + + static bool IsCompatibleKind(ImageBufferType bufferType); + + protected: + ImageData(const ImageFormat &format, const ImageBuffer &buffer); + + ImageFormat m_format; + eDeviceType m_deviceType; + ImageBufferType m_bufferType; + ImageBuffer m_buffer; +}; + +/** + * @brief Image data backed by one or more pitch-linear planes. Adds typed + * accessors for plane descriptors on top of the base ImageData. Sub-classed + * by ImageDataStridedHip and ImageDataStridedHost to discriminate device vs + * host residency. + */ +class ImageDataStrided : public ImageData { + public: + using Buffer = ImageBufferStrided; + + ImageDataStrided(const ImageFormat &format, const ImageBuffer &buffer); + + static bool IsCompatibleKind(ImageBufferType bufferType); + + /** + * @brief Returns the logical image dimensions, taken from plane 0. For + * planar formats, individual planes may have smaller dimensions (e.g. + * chroma sub-sampling); use plane(p) to inspect each plane directly. + */ + Size2D size() const; + + /** + * @brief Returns the number of valid planes in the buffer. + */ + int32_t numPlanes() const; + + /** + * @brief Returns the descriptor for the requested plane. + * + * @param[in] p The plane index. Must satisfy `0 <= p < numPlanes()`. + */ + const ImagePlaneStrided &plane(int32_t p) const; +}; + +/** + * @brief GPU-accessible strided image data. + */ +class ImageDataStridedHip : public ImageDataStrided { + public: + using Buffer = ImageBufferStrided; + + ImageDataStridedHip(const ImageFormat &format, const ImageBuffer &buffer); + + /** + * @brief Constructs GPU-accessible strided image data from a strided + * image buffer directly. + * + * @param[in] format The pixel format. + * @param[in] buffer A strided image buffer with planes allocated on the GPU. + */ + ImageDataStridedHip(const ImageFormat &format, const Buffer &buffer); + + static bool IsCompatibleKind(ImageBufferType bufferType); +}; + +/** + * @brief Host-accessible strided image data. + */ +class ImageDataStridedHost : public ImageDataStrided { + public: + using Buffer = ImageBufferStrided; + + ImageDataStridedHost(const ImageFormat &format, const ImageBuffer &buffer); + + /** + * @brief Constructs host-accessible strided image data from a strided + * image buffer directly. + * + * @param[in] format The pixel format. + * @param[in] buffer A strided image buffer with planes allocated on the host. + */ + ImageDataStridedHost(const ImageFormat &format, const Buffer &buffer); + + static bool IsCompatibleKind(ImageBufferType bufferType); +}; + +} // namespace roccv diff --git a/src/core/image_data.cpp b/src/core/image_data.cpp new file mode 100644 index 00000000..6fb0fc83 --- /dev/null +++ b/src/core/image_data.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image_data.hpp" + +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" + +namespace roccv { + +const ImageFormat& ImageData::format() const { return m_format; } + +eDeviceType ImageData::device() const { return m_deviceType; } + +ImageData::ImageData(const ImageFormat& format, const ImageBuffer& buffer) + : m_format(format), + m_deviceType(eDeviceType::GPU), + m_bufferType(ImageBufferType::IMAGE_BUFFER_NONE), + m_buffer(buffer) {} + +bool ImageData::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType != ImageBufferType::IMAGE_BUFFER_NONE; +} + +ImageDataStrided::ImageDataStrided(const ImageFormat& format, const ImageBuffer& buffer) + : ImageData(format, buffer) {} + +bool ImageDataStrided::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HIP || + bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HOST; +} + +Size2D ImageDataStrided::size() const { + const ImagePlaneStrided& p0 = m_buffer.strided.planes[0]; + return Size2D{p0.width, p0.height}; +} + +int32_t ImageDataStrided::numPlanes() const { return m_buffer.strided.numPlanes; } + +const ImagePlaneStrided& ImageDataStrided::plane(int32_t p) const { return m_buffer.strided.planes[p]; } + +ImageDataStridedHip::ImageDataStridedHip(const ImageFormat& format, const ImageBuffer& buffer) + : ImageDataStrided(format, buffer) { + m_bufferType = ImageBufferType::IMAGE_BUFFER_STRIDED_HIP; + m_deviceType = eDeviceType::GPU; +} + +ImageDataStridedHip::ImageDataStridedHip(const ImageFormat& format, const ImageDataStridedHip::Buffer& buffer) + : ImageDataStridedHip(format, ImageBuffer{.strided = buffer}) {} + +bool ImageDataStridedHip::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HIP; +} + +ImageDataStridedHost::ImageDataStridedHost(const ImageFormat& format, const ImageBuffer& buffer) + : ImageDataStrided(format, buffer) { + m_bufferType = ImageBufferType::IMAGE_BUFFER_STRIDED_HOST; + m_deviceType = eDeviceType::CPU; +} + +ImageDataStridedHost::ImageDataStridedHost(const ImageFormat& format, const ImageDataStridedHost::Buffer& buffer) + : ImageDataStridedHost(format, ImageBuffer{.strided = buffer}) {} + +bool ImageDataStridedHost::IsCompatibleKind(ImageBufferType bufferType) { + return bufferType == ImageBufferType::IMAGE_BUFFER_STRIDED_HOST; +} + +} // namespace roccv diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp new file mode 100644 index 00000000..885b1aa5 --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include +#include +#include + +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// EXPECT_EQ feeds both sides through std::to_string, which only accepts +// numeric types. Wrap enum/pointer/bool comparisons in these casts. +auto AsInt = [](auto v) { return static_cast(v); }; +auto AsAddr = [](void* p) { return reinterpret_cast(p); }; + +// ImageData carries pointers but never dereferences them; the buffer is a +// metadata snapshot. Use opaque sentinel pointers in tests so we can verify +// values flow through without needing real allocations. +void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); +void* const FAKE_PTR_B = reinterpret_cast(0xBBBBBBBBull); +void* const FAKE_PTR_C = reinterpret_cast(0xCCCCCCCCull); + +ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {width, height, rowStride, basePtr}; + return buf; +} + +ImageBufferStrided MakeThreePlaneBuffer() { + // Mimics a planar layout (e.g. YUV420-style) with sub-sampled chroma — three + // planes of differing dimensions and strides backed by distinct buffers. + ImageBufferStrided buf{}; + buf.numPlanes = 3; + buf.planes[0] = {1920, 1080, 1920, FAKE_PTR_A}; // Y full-resolution + buf.planes[1] = {960, 540, 960, FAKE_PTR_B}; // U sub-sampled + buf.planes[2] = {960, 540, 960, FAKE_PTR_C}; // V sub-sampled + return buf; +} + +/** + * @brief Verifies HIP-strided construction populates all observable state and + * tags itself as GPU-resident. + */ +void TestImageDataStridedHipConstruction() { + auto buf = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A); + ImageDataStridedHip data(FMT_RGB8, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(data.numPlanes(), 1); + EXPECT_EQ(data.size().w, 640); + EXPECT_EQ(data.size().h, 480); + EXPECT_EQ(data.plane(0).width, 640); + EXPECT_EQ(data.plane(0).height, 480); + EXPECT_EQ(data.plane(0).rowStride, static_cast(640 * 3)); + EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_A)); + EXPECT_EQ(data.format().channels(), 3); +} + +/** + * @brief Same shape as the Hip test but for Host-resident strided data. + */ +void TestImageDataStridedHostConstruction() { + auto buf = MakeSinglePlaneBuffer(320, 240, 320, FAKE_PTR_B); + ImageDataStridedHost data(FMT_U8, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(data.numPlanes(), 1); + EXPECT_EQ(data.size().w, 320); + EXPECT_EQ(data.size().h, 240); + EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_B)); + EXPECT_EQ(data.format().channels(), 1); +} + +/** + * @brief Multi-plane buffers must round-trip per-plane dimensions and pointers + * unchanged. size() reports plane 0 by convention; planes 1..N may be smaller. + */ +void TestImageDataStridedMultiPlane() { + auto buf = MakeThreePlaneBuffer(); + ImageDataStridedHip data(FMT_U8, buf); + + EXPECT_EQ(data.numPlanes(), 3); + EXPECT_EQ(data.size().w, 1920); + EXPECT_EQ(data.size().h, 1080); + + EXPECT_EQ(data.plane(0).width, 1920); + EXPECT_EQ(data.plane(0).height, 1080); + EXPECT_EQ(AsAddr(data.plane(0).basePtr), AsAddr(FAKE_PTR_A)); + + EXPECT_EQ(data.plane(1).width, 960); + EXPECT_EQ(data.plane(1).height, 540); + EXPECT_EQ(AsAddr(data.plane(1).basePtr), AsAddr(FAKE_PTR_B)); + + EXPECT_EQ(data.plane(2).width, 960); + EXPECT_EQ(data.plane(2).height, 540); + EXPECT_EQ(AsAddr(data.plane(2).basePtr), AsAddr(FAKE_PTR_C)); +} + +/** + * @brief The two leaf ctors (taking ImageBuffer vs ImageBufferStrided directly) + * must produce observably identical state. + */ +void TestImageDataStridedSugarCtor() { + auto buf = MakeSinglePlaneBuffer(100, 200, 400, FAKE_PTR_A); + + ImageDataStridedHip wide(FMT_RGBA8, ImageBuffer{.strided = buf}); + ImageDataStridedHip sugar(FMT_RGBA8, buf); + + EXPECT_EQ(AsInt(wide.device()), AsInt(sugar.device())); + EXPECT_EQ(wide.numPlanes(), sugar.numPlanes()); + EXPECT_EQ(AsAddr(wide.plane(0).basePtr), AsAddr(sugar.plane(0).basePtr)); + EXPECT_EQ(wide.plane(0).rowStride, sugar.plane(0).rowStride); + + ImageDataStridedHost wideHost(FMT_U8, ImageBuffer{.strided = buf}); + ImageDataStridedHost sugarHost(FMT_U8, buf); + EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device())); + EXPECT_EQ(AsAddr(wideHost.plane(0).basePtr), AsAddr(sugarHost.plane(0).basePtr)); +} + +/** + * @brief IsCompatibleKind on each level discriminates the buffer kinds it + * accepts. Base accepts anything-but-NONE; Strided accepts both Hip and Host; + * leaves accept only their own. + */ +void TestImageDataIsCompatibleKind() { + EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1); + EXPECT_EQ(AsInt(ImageData::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1); + + EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1); + EXPECT_EQ(AsInt(ImageDataStrided::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1); + + EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 1); + EXPECT_EQ(AsInt(ImageDataStridedHip::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 0); + + EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HIP)), 0); + EXPECT_EQ(AsInt(ImageDataStridedHost::IsCompatibleKind(ImageBufferType::IMAGE_BUFFER_STRIDED_HOST)), 1); +} + +/** + * @brief Round-trip a derived ImageData through the base reference and back + * via cast<>(). Successful casts must preserve every observable field; casts + * to incompatible kinds must return std::nullopt. + */ +void TestImageDataCast() { + auto buf = MakeSinglePlaneBuffer(800, 600, 800 * 4, FAKE_PTR_A); + + // Hip → base → Hip should round-trip, Hip → Host should fail. + { + ImageDataStridedHip hip(FMT_RGBA8, buf); + const ImageData& base = hip; + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 1); + EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(AsAddr(asHip->plane(0).basePtr), AsAddr(FAKE_PTR_A)); + EXPECT_EQ(asHip->plane(0).width, 800); + + auto asStrided = base.cast(); + EXPECT_EQ(AsInt(asStrided.has_value()), 1); + EXPECT_EQ(AsInt(asStrided->device()), AsInt(eDeviceType::GPU)); + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 0); + } + + // Symmetrically: Host → base → Host succeeds, Host → Hip fails. + { + ImageDataStridedHost host(FMT_RGBA8, buf); + const ImageData& base = host; + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 1); + EXPECT_EQ(AsInt(asHost->device()), AsInt(eDeviceType::CPU)); + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 0); + } +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + TEST_CASE(TestImageDataStridedHipConstruction()); + TEST_CASE(TestImageDataStridedHostConstruction()); + TEST_CASE(TestImageDataStridedMultiPlane()); + TEST_CASE(TestImageDataStridedSugarCtor()); + TEST_CASE(TestImageDataIsCompatibleKind()); + TEST_CASE(TestImageDataCast()); + + TEST_CASES_END(); +} From 689a94de1987bd7465a52f09ac3d0b9f6ca70d2d Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Tue, 5 May 2026 16:04:56 -0400 Subject: [PATCH 02/13] Add Image/ImageStorage implementations --- include/core/image.hpp | 183 +++++++++++++++++++++++++++++++++ include/core/image_storage.hpp | 56 ++++++++++ src/core/image.cpp | 168 ++++++++++++++++++++++++++++++ 3 files changed, 407 insertions(+) create mode 100644 include/core/image.hpp create mode 100644 include/core/image_storage.hpp create mode 100644 src/core/image.cpp diff --git a/include/core/image.hpp b/include/core/image.hpp new file mode 100644 index 00000000..ecde3b51 --- /dev/null +++ b/include/core/image.hpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include + +#include "core/detail/allocators/i_allocator.hpp" +#include "core/image_buffer.hpp" +#include "core/image_data.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" +#include "operator_types.h" + +namespace roccv { + +class ImageStorage; + +/** + * @brief Cleanup callback signature for ImageWrapData. Invoked when the last + * Image handle referencing the wrapped buffer is destroyed. Receives the + * ImageData snapshot that was originally wrapped, so callbacks can free + * multi-plane buffers or dispatch on format. + */ +using ImageDataCleanupFunc = std::function; + +/** + * @brief Per-image allocation spec describing what to allocate for a single + * variable-sized image. Mirrors NVCVImageRequirements: size, format, per-plane + * row strides, and base-address alignment. Used as the input to Image's + * allocating constructors and as the output of CalcRequirements; not stored + * on the Image instance after construction (m_metadata holds the runtime + * descriptor in ImageData form). + * + * Per-plane row strides are populated only for planes 0..numPlanes(format)-1; + * remaining slots are unused. Today's interleaved-only ImageFormat means only + * planeRowStride[0] is populated in practice. + */ +struct ImageRequirements { + Size2D size; // Width and height in pixels. + ImageFormat format; // Pixel format (dtype + channel count + swizzle). + int64_t planeRowStride[ROCCV_MAX_IMAGE_PLANES]; // Per-plane row stride in bytes. + int32_t alignBytes; // Required base-address alignment, in bytes. +}; + +/** + * @brief A single variable-sized image with device-resident pixel data. + * + * Image is the per-element type held by ImageBatchVarShape. It is a handle + * over a refcounted ImageStorage: copying an Image bumps the refcount and + * leaves both handles pointing at the same underlying buffer. The buffer is + * freed when the last handle is destroyed (for owning Images) or when the + * cleanup callback fires (for ImageWrapData with a callback). + */ +class Image { + public: + using Requirements = ImageRequirements; + + /** + * @brief Compute the requirements (row stride, etc.) for an image of the + * given dimensions and format. + */ + static Requirements CalcRequirements(Size2D size, ImageFormat format); + + /** + * @brief Allocate a new device buffer for an image of the given dimensions + * and format using the global default allocator. + */ + explicit Image(Size2D size, ImageFormat format, eDeviceType device = eDeviceType::GPU); + + /** + * @brief Allocate a new device buffer using a caller-supplied allocator. + */ + explicit Image(Size2D size, ImageFormat format, const IAllocator& alloc, eDeviceType device = eDeviceType::GPU); + + /** + * @brief Allocate a new device buffer from precomputed requirements. + */ + explicit Image(const Requirements& reqs, eDeviceType device = eDeviceType::GPU); + explicit Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device = eDeviceType::GPU); + + Image(const Image&) = default; // refcount bump + Image(Image&&) noexcept = default; + Image& operator=(const Image&) = default; // refcount bump + Image& operator=(Image&&) noexcept = default; + ~Image() = default; + + /** + * @brief Image dimensions in pixels. + */ + Size2D size() const noexcept; + + /** + * @brief Pixel format. + */ + ImageFormat format() const noexcept; + + /** + * @brief Device the underlying buffer resides on. + */ + eDeviceType device() const noexcept; + + /** + * @brief Snapshot of the image's data buffer (pointer, stride, format). + * + * The returned ImageData references the same underlying buffer; lifetime + * is controlled by this Image's refcount, not by the snapshot. + */ + ImageData exportData() const; + + /** + * @brief Exports the image's data buffer and casts it to a specified image data object. + * + * Throws std::bad_cast if the underlying buffer kind does not match what + * `Derived` expects (e.g. exportData() on a host-resident + * image throws std::bad_cast). Convenience wrapper around ImageData::cast<>. + * + * @tparam Derived The ImageData subclass to cast to. + * @return The image data casted to the image data object specified + */ + template + Derived exportData() const { + ImageData data = exportData(); + std::optional derived_data = data.cast(); + if (!derived_data.has_value()) { + throw std::bad_cast(); + } + + return derived_data.value(); + } + + private: + // Internal ctor used by ImageWrapData and the allocating public ctors via + // delegation. Stores `metadata` and `storage` verbatim — no allocation. + Image(ImageData metadata, std::shared_ptr storage); + + friend Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup); + + // m_data is declared first so the allocating ctor can initialize it + // (allocating the buffer) before m_metadata reads back the pointer. + std::shared_ptr m_data; + ImageData m_metadata; +}; + +/** + * @brief Wrap an externally-owned buffer as an Image without allocating. + * + * View-only by default: the wrapped buffer is NOT freed when the returned + * Image (and any copies) go out of scope. The caller is responsible for + * keeping the underlying memory alive for as long as any handle survives. + * + * Pass a non-null cleanup callback to opt into ownership transfer; the + * callback runs exactly once, when the last handle is destroyed. + * + * @param[in] data Pre-existing image data (pointer, layout, device). + * @param[in] cleanup Optional callback to free the buffer on last destruction. + * @return An Image referencing the wrapped buffer. + */ +extern Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup = nullptr); + +} // namespace roccv diff --git a/include/core/image_storage.hpp b/include/core/image_storage.hpp new file mode 100644 index 00000000..69b9d3c0 --- /dev/null +++ b/include/core/image_storage.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +namespace roccv { + +/** + * @brief Holds the raw data pointer for a single Image and serves as the + * refcount target shared between Image handles. + * + * ImageStorage carries no lifecycle logic of its own: freeing the underlying + * buffer is the responsibility of the shared_ptr deleter + * installed at the Image construction site. The allocating Image ctor + * captures the allocator + device into its deleter; ImageWrapData captures + * the user's cleanup callback (or installs none for the view-only case). + * + * As a result, ImageStorage is held only by shared_ptr — never by value, never + * copied. Move/copy are deleted to enforce that. + */ +class ImageStorage { + public: + explicit ImageStorage(void* data) : m_data(data) {} + + ImageStorage(const ImageStorage&) = delete; + ImageStorage& operator=(const ImageStorage&) = delete; + + /** + * @brief Returns the raw data pointer this storage is tracking. + */ + void* data() const noexcept { return m_data; } + + private: + void* m_data; +}; + +} // namespace roccv diff --git a/src/core/image.cpp b/src/core/image.cpp new file mode 100644 index 00000000..995fd4a1 --- /dev/null +++ b/src/core/image.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image.hpp" + +#include "core/data_type.hpp" +#include "core/detail/context.hpp" +#include "core/exception.hpp" +#include "core/image_storage.hpp" + +namespace roccv { + +namespace { + +// Allocates a buffer through `alloc` for the requested device and wraps it +// in an ImageStorage whose shared_ptr deleter frees through the same allocator. +// The allocator reference is captured by reference; callers must ensure it +// outlives every Image (and any handle copied from it) it creates. +std::shared_ptr makeStorage(const ImageRequirements& reqs, const IAllocator& alloc, eDeviceType device) { + const size_t bytes = static_cast(reqs.planeRowStride[0]) * reqs.size.h; + + void* buf = nullptr; + switch (device) { + case eDeviceType::GPU: + buf = alloc.allocHipMem(bytes); + break; + case eDeviceType::CPU: + buf = alloc.allocHostMem(bytes); + break; + } + + return std::shared_ptr(new ImageStorage(buf), [&alloc, device](ImageStorage* s) { + switch (device) { + case eDeviceType::GPU: + alloc.freeHipMem(s->data()); + break; + case eDeviceType::CPU: + alloc.freeHostMem(s->data()); + break; + } + delete s; + }); +} + +// Builds the canonical ImageData stored on Image from a freshly-allocated +// (or wrapped) buffer plus its layout description. Single-plane today — +// ImageFormat is interleaved-only, so only planes[0] is populated. +ImageData makeImageData(const ImageRequirements& reqs, void* buf, eDeviceType device) { + ImageBufferStrided strided{}; + strided.numPlanes = 1; + strided.planes[0].width = reqs.size.w; + strided.planes[0].height = reqs.size.h; + strided.planes[0].rowStride = reqs.planeRowStride[0]; + strided.planes[0].basePtr = buf; + + switch (device) { + case eDeviceType::GPU: + return ImageDataStridedHip(reqs.format, strided); + case eDeviceType::CPU: + return ImageDataStridedHost(reqs.format, strided); + } + + throw Exception("Unsupported device type in Image::makeImageData.", eStatusType::INVALID_VALUE); +} + +} // namespace + +// ----------------------------------------------------------------------------- +// CalcRequirements +// ----------------------------------------------------------------------------- + +Image::Requirements Image::CalcRequirements(Size2D size, ImageFormat format) { + if (size.w < 1 || size.h < 1) { + throw Exception("Image dimensions must be >= 1.", eStatusType::INVALID_VALUE); + } + + ImageRequirements reqs; + reqs.size = size; + reqs.format = format; + + const int64_t bytesPerPixel = static_cast(DataType(format.dtype()).size()) * format.channels(); + reqs.planeRowStride[0] = bytesPerPixel * size.w; // packed; no row padding while alignBytes is unused. + + // TODO: derive a sensible default base/row alignment from device attributes. + reqs.alignBytes = 0; + + return reqs; +} + +// ----------------------------------------------------------------------------- +// Constructors +// ----------------------------------------------------------------------------- + +Image::Image(Size2D size, ImageFormat format, eDeviceType device) + : Image(size, format, GlobalContext().getDefaultAllocator(), device) {} + +Image::Image(Size2D size, ImageFormat format, const IAllocator& alloc, eDeviceType device) + : Image(CalcRequirements(size, format), alloc, device) {} + +Image::Image(const Requirements& reqs, eDeviceType device) + : Image(reqs, GlobalContext().getDefaultAllocator(), device) {} + +Image::Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device) + : m_data(makeStorage(reqs, alloc, device)), m_metadata(makeImageData(reqs, m_data->data(), device)) {} + +Image::Image(ImageData metadata, std::shared_ptr storage) + : m_data(std::move(storage)), m_metadata(std::move(metadata)) {} + +// ----------------------------------------------------------------------------- +// Accessors +// ----------------------------------------------------------------------------- + +Size2D Image::size() const noexcept { return m_metadata.cast()->size(); } + +ImageFormat Image::format() const noexcept { return m_metadata.format(); } + +eDeviceType Image::device() const noexcept { return m_metadata.device(); } + +ImageData Image::exportData() const { return m_metadata; } + +// ----------------------------------------------------------------------------- +// ImageWrapData +// ----------------------------------------------------------------------------- + +Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup) { + auto strided = data.cast(); + if (!strided.has_value()) { + throw Exception("ImageWrapData requires strided image data.", eStatusType::INVALID_VALUE); + } + + // Storage tracks plane(0)'s base pointer. Single-plane today; multi-plane + // wraps would need a richer storage shape (or to abandon storing the + // pointer here at all). + void* basePtr = strided->plane(0).basePtr; + + // Deleter captures both the original ImageData snapshot and the user's + // cleanup callback. View-only (cleanup == nullptr) means the deleter + // touches nothing but the storage object itself. + auto storage = std::shared_ptr(new ImageStorage(basePtr), [data, cleanup](ImageStorage* s) { + if (cleanup) { + cleanup(data); + } + delete s; + }); + + return Image(data, std::move(storage)); +} + +} // namespace roccv From 72f5218adcfb6b6da8e510e09858a4cfeca31d7e Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Tue, 5 May 2026 16:16:22 -0400 Subject: [PATCH 03/13] Add Image tests and fix zero-initialization bug in Image::CalcRequirements --- src/core/image.cpp | 18 +- .../cpp/src/tests/core/image/test_image.cpp | 441 ++++++++++++++++++ 2 files changed, 451 insertions(+), 8 deletions(-) create mode 100644 tests/roccv/cpp/src/tests/core/image/test_image.cpp diff --git a/src/core/image.cpp b/src/core/image.cpp index 995fd4a1..4319eb20 100644 --- a/src/core/image.cpp +++ b/src/core/image.cpp @@ -93,17 +93,19 @@ Image::Requirements Image::CalcRequirements(Size2D size, ImageFormat format) { throw Exception("Image dimensions must be >= 1.", eStatusType::INVALID_VALUE); } - ImageRequirements reqs; - reqs.size = size; - reqs.format = format; - const int64_t bytesPerPixel = static_cast(DataType(format.dtype()).size()) * format.channels(); - reqs.planeRowStride[0] = bytesPerPixel * size.w; // packed; no row padding while alignBytes is unused. + // Designated aggregate init: planeRowStride[0] is set explicitly; the + // remaining ROCCV_MAX_IMAGE_PLANES-1 slots are zeroed by the trailing- + // elements rule for brace-enclosed array initializers. alignBytes stays at + // 0 since CalcRequirements always produces packed rows for now. // TODO: derive a sensible default base/row alignment from device attributes. - reqs.alignBytes = 0; - - return reqs; + return ImageRequirements{ + .size = size, + .format = format, + .planeRowStride = {bytesPerPixel * size.w}, + .alignBytes = 0, + }; } // ----------------------------------------------------------------------------- diff --git a/tests/roccv/cpp/src/tests/core/image/test_image.cpp b/tests/roccv/cpp/src/tests/core/image/test_image.cpp new file mode 100644 index 00000000..879dd68c --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image.cpp @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// EXPECT_EQ pipes through std::to_string, so wrap enums/pointers/bools. +auto AsInt = [](auto v) { return static_cast(v); }; +auto AsAddr = [](void* p) { return reinterpret_cast(p); }; +auto AsSize = [](auto v) { return static_cast(v); }; + +void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); + +/** + * @brief Test allocator that backs allocations with malloc and tallies how + * many times each entry point is invoked. Pure host-side; no GPU dependency. + * + * The Hip path returns malloc'd memory because no test dereferences it — we + * only care that ptr round-trips through Image and that free is called the + * right number of times. + */ +class CountingAllocator : public IAllocator { + public: + mutable int hipAllocs = 0; + mutable int hipFrees = 0; + mutable int hostAllocs = 0; + mutable int hostFrees = 0; + mutable size_t lastAllocBytes = 0; + + void* allocHipMem(size_t size) const override { + ++hipAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHipMem(void* ptr) const noexcept override { + ++hipFrees; + std::free(ptr); + } + + void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override { + ++hostAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHostMem(void* ptr) const noexcept override { + ++hostFrees; + std::free(ptr); + } + + // Unused by the Image paths under test. Trip loudly if invoked unexpectedly. + void* allocHostPinnedMem(size_t) const override { throw std::runtime_error("unused in tests"); } + void freeHostPinnedMem(void*) const noexcept override { std::abort(); } +}; + +// Build a single-plane ImageData snapshot referencing a sentinel pointer. Used +// for ImageWrapData tests where we never dereference the buffer. +ImageDataStridedHip MakeFakeHipData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {width, height, static_cast(width * fmt.channels()), basePtr}; + return ImageDataStridedHip(fmt, buf); +} + +// ============================================================================= +// CalcRequirements +// ============================================================================= + +/** + * @brief Packed-row stride for a typical 3-channel uint8 image. Other fields + * propagate unchanged; remaining plane slots stay zeroed. + */ +void TestCalcRequirementsRgb8() { + auto reqs = Image::CalcRequirements({320, 240}, FMT_RGB8); + + EXPECT_EQ(reqs.size.w, 320); + EXPECT_EQ(reqs.size.h, 240); + EXPECT_EQ(reqs.format.channels(), 3); + EXPECT_EQ(reqs.planeRowStride[0], static_cast(320 * 3)); + EXPECT_EQ(reqs.planeRowStride[1], 0); + EXPECT_EQ(reqs.planeRowStride[5], 0); + EXPECT_EQ(reqs.alignBytes, 0); +} + +/** + * @brief Multi-byte dtype is reflected in the per-pixel byte count. + */ +void TestCalcRequirementsF32() { + auto reqs = Image::CalcRequirements({64, 64}, FMT_F32); + EXPECT_EQ(reqs.planeRowStride[0], static_cast(64 * 4)); +} + +/** + * @brief Single-channel U8 → row stride equals width. + */ +void TestCalcRequirementsU8() { + auto reqs = Image::CalcRequirements({100, 50}, FMT_U8); + EXPECT_EQ(reqs.planeRowStride[0], 100); +} + +/** + * @brief Width or height < 1 must throw INVALID_VALUE. + */ +void TestCalcRequirementsRejectsInvalidDims() { + EXPECT_EXCEPTION(Image::CalcRequirements({0, 100}, FMT_RGB8), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(Image::CalcRequirements({100, 0}, FMT_RGB8), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(Image::CalcRequirements({-5, 100}, FMT_RGB8), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(Image::CalcRequirements({100, -5}, FMT_RGB8), eStatusType::INVALID_VALUE); +} + +/** + * @brief Large widths must not overflow during stride math; row stride must + * fit in int64. + */ +void TestCalcRequirementsLargeDims() { + // 8K image, RGBA32 (4 channels * 4 bytes = 16 B/pixel) → 8192 * 16 = 131072 B/row. + auto reqs = Image::CalcRequirements({8192, 4320}, FMT_RGBA8); + EXPECT_EQ(reqs.planeRowStride[0], static_cast(8192 * 4)); +} + +// ============================================================================= +// Allocating constructors +// ============================================================================= + +/** + * @brief GPU-device ctor routes allocation through allocHipMem with the + * computed byte count. + */ +void TestImageHipAllocation() { + CountingAllocator alloc; + { + Image img({320, 240}, FMT_RGB8, alloc, eDeviceType::GPU); + + EXPECT_EQ(alloc.hipAllocs, 1); + EXPECT_EQ(alloc.hostAllocs, 0); + EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(320 * 3 * 240)); + + EXPECT_EQ(img.size().w, 320); + EXPECT_EQ(img.size().h, 240); + EXPECT_EQ(AsInt(img.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(img.format().channels(), 3); + + // Image is still alive — buffer not yet freed. + EXPECT_EQ(alloc.hipFrees, 0); + } + // Image dropped — buffer freed exactly once via the matching allocator. + EXPECT_EQ(alloc.hipFrees, 1); +} + +/** + * @brief Same shape as the Hip test but for CPU residency. + */ +void TestImageHostAllocation() { + CountingAllocator alloc; + { + Image img({100, 50}, FMT_U8, alloc, eDeviceType::CPU); + + EXPECT_EQ(alloc.hostAllocs, 1); + EXPECT_EQ(alloc.hipAllocs, 0); + EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(100 * 50)); + EXPECT_EQ(AsInt(img.device()), AsInt(eDeviceType::CPU)); + } + EXPECT_EQ(alloc.hostFrees, 1); +} + +/** + * @brief Constructing from precomputed Requirements yields observably + * identical state to the (Size2D, ImageFormat) sugar form. + */ +void TestImageRequirementsCtor() { + CountingAllocator alloc; + auto reqs = Image::CalcRequirements({64, 32}, FMT_RGBA8); + + Image img(reqs, alloc, eDeviceType::GPU); + + EXPECT_EQ(img.size().w, 64); + EXPECT_EQ(img.size().h, 32); + EXPECT_EQ(img.format().channels(), 4); + EXPECT_EQ(AsSize(alloc.lastAllocBytes), AsSize(64 * 4 * 32)); +} + +// ============================================================================= +// Refcount / lifecycle +// ============================================================================= + +/** + * @brief Copying an Image bumps the refcount: both handles see the same + * underlying buffer, and free is deferred until the LAST handle drops. + */ +void TestImageCopySharesBuffer() { + CountingAllocator alloc; + void* buf = nullptr; + { + Image first({16, 16}, FMT_U8, alloc, eDeviceType::GPU); + buf = first.exportData().cast()->plane(0).basePtr; + + Image second = first; // refcount bump + EXPECT_EQ(alloc.hipAllocs, 1); // No new allocation. + EXPECT_EQ(AsAddr(second.exportData().cast()->plane(0).basePtr), AsAddr(buf)); + + // Drop `first`; buffer must NOT be freed yet — `second` still holds it. + { + Image consumed = std::move(first); + (void)consumed; + } + EXPECT_EQ(alloc.hipFrees, 0); + } + // All handles dropped — exactly one free. + EXPECT_EQ(alloc.hipFrees, 1); +} + +/** + * @brief Move-construction transfers the buffer; the source is left empty. + * The buffer must still free exactly once (when the destination drops). + */ +void TestImageMoveSemantics() { + CountingAllocator alloc; + { + Image src({8, 8}, FMT_U8, alloc, eDeviceType::CPU); + void* srcBuf = src.exportData().cast()->plane(0).basePtr; + + Image dst = std::move(src); + EXPECT_EQ(AsAddr(dst.exportData().cast()->plane(0).basePtr), AsAddr(srcBuf)); + EXPECT_EQ(alloc.hostFrees, 0); + } + EXPECT_EQ(alloc.hostFrees, 1); +} + +// ============================================================================= +// exportData / exportData() +// ============================================================================= + +/** + * @brief exportData() returns an ImageData snapshot that mirrors the Image's + * size, format, device, and base pointer. + */ +void TestImageExportData() { + CountingAllocator alloc; + Image img({80, 60}, FMT_RGBA8, alloc, eDeviceType::GPU); + ImageData data = img.exportData(); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(data.format().channels(), 4); + + auto strided = data.cast(); + EXPECT_EQ(AsInt(strided.has_value()), 1); + EXPECT_EQ(strided->plane(0).width, 80); + EXPECT_EQ(strided->plane(0).height, 60); + EXPECT_EQ(strided->plane(0).rowStride, static_cast(80 * 4)); +} + +/** + * @brief Templated exportData() returns the matching subclass directly. + */ +void TestImageExportDataTypedSuccess() { + CountingAllocator alloc; + Image img({4, 4}, FMT_U8, alloc, eDeviceType::GPU); + + auto hip = img.exportData(); + EXPECT_EQ(AsInt(hip.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(hip.plane(0).width, 4); +} + +/** + * @brief Templated exportData() throws std::bad_cast when the requested + * subclass does not match the underlying buffer kind. + */ +void TestImageExportDataTypedMismatch() { + CountingAllocator alloc; + Image img({4, 4}, FMT_U8, alloc, eDeviceType::GPU); + + bool threw = false; + try { + (void)img.exportData(); + } catch (const std::bad_cast&) { + threw = true; + } + EXPECT_EQ(AsInt(threw), 1); +} + +// ============================================================================= +// ImageWrapData +// ============================================================================= + +/** + * @brief View-only wrap (no cleanup callback) must not free the wrapped + * buffer when the Image is destroyed. + */ +void TestImageWrapDataViewOnly() { + int frees = 0; + { + Image wrapped = ImageWrapData(MakeFakeHipData(640, 480, FAKE_PTR_A)); + EXPECT_EQ(wrapped.size().w, 640); + EXPECT_EQ(wrapped.size().h, 480); + EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(AsAddr(wrapped.exportData().cast()->plane(0).basePtr), AsAddr(FAKE_PTR_A)); + } + // No callback was registered — nothing observable should have changed. + EXPECT_EQ(frees, 0); +} + +/** + * @brief Wrap with a cleanup callback: the callback fires exactly once when + * the last Image handle goes out of scope. + */ +void TestImageWrapDataCleanupFires() { + int callbackInvocations = 0; + { + Image wrapped = + ImageWrapData(MakeFakeHipData(100, 100, FAKE_PTR_A), [&](const ImageData&) { ++callbackInvocations; }); + EXPECT_EQ(callbackInvocations, 0); // Not fired during normal use. + } + EXPECT_EQ(callbackInvocations, 1); +} + +/** + * @brief Cleanup callback receives the original wrapped ImageData snapshot — + * the captured base pointer must match what was passed to ImageWrapData. + */ +void TestImageWrapDataCleanupReceivesData() { + void* receivedBasePtr = nullptr; + { + Image wrapped = ImageWrapData(MakeFakeHipData(50, 50, FAKE_PTR_A), [&](const ImageData& d) { + receivedBasePtr = d.cast()->plane(0).basePtr; + }); + } + EXPECT_EQ(AsAddr(receivedBasePtr), AsAddr(FAKE_PTR_A)); +} + +/** + * @brief Cleanup must fire only on LAST handle drop — copies bump the + * refcount, intermediate drops do nothing. + */ +void TestImageWrapDataCleanupFiresOnce() { + int callbackInvocations = 0; + { + Image first = + ImageWrapData(MakeFakeHipData(10, 10, FAKE_PTR_A), [&](const ImageData&) { ++callbackInvocations; }); + Image second = first; // refcount = 2 + Image third = first; // refcount = 3 + { + Image fourth = third; + (void)fourth; + } // dropped → refcount = 3 + EXPECT_EQ(callbackInvocations, 0); + // first, second, third still alive at scope exit + } + EXPECT_EQ(callbackInvocations, 1); +} + +/** + * @brief Wrapped Image's accessors mirror the wrapped ImageData verbatim — + * size, format, device, and base pointer all round-trip unchanged. + */ +void TestImageWrapDataAccessors() { + auto fake = MakeFakeHipData(123, 45, FAKE_PTR_A, FMT_RGBA8); + Image wrapped = ImageWrapData(fake); + + EXPECT_EQ(wrapped.size().w, 123); + EXPECT_EQ(wrapped.size().h, 45); + EXPECT_EQ(wrapped.format().channels(), 4); + EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU)); + + auto strided = wrapped.exportData().cast(); + EXPECT_EQ(AsInt(strided.has_value()), 1); + EXPECT_EQ(strided->plane(0).width, 123); + EXPECT_EQ(strided->plane(0).height, 45); + EXPECT_EQ(AsAddr(strided->plane(0).basePtr), AsAddr(FAKE_PTR_A)); +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + // CalcRequirements + TEST_CASE(TestCalcRequirementsRgb8()); + TEST_CASE(TestCalcRequirementsF32()); + TEST_CASE(TestCalcRequirementsU8()); + TEST_CASE(TestCalcRequirementsRejectsInvalidDims()); + TEST_CASE(TestCalcRequirementsLargeDims()); + + // Allocating constructors + TEST_CASE(TestImageHipAllocation()); + TEST_CASE(TestImageHostAllocation()); + TEST_CASE(TestImageRequirementsCtor()); + + // Refcount / lifecycle + TEST_CASE(TestImageCopySharesBuffer()); + TEST_CASE(TestImageMoveSemantics()); + + // exportData + TEST_CASE(TestImageExportData()); + TEST_CASE(TestImageExportDataTypedSuccess()); + TEST_CASE(TestImageExportDataTypedMismatch()); + + // ImageWrapData + TEST_CASE(TestImageWrapDataViewOnly()); + TEST_CASE(TestImageWrapDataCleanupFires()); + TEST_CASE(TestImageWrapDataCleanupReceivesData()); + TEST_CASE(TestImageWrapDataCleanupFiresOnce()); + TEST_CASE(TestImageWrapDataAccessors()); + + TEST_CASES_END(); +} From 554473fa89139b7569d6254080004d1aaefe8579 Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Tue, 5 May 2026 16:28:14 -0400 Subject: [PATCH 04/13] General cleanup and redundant comment removal --- include/core/image.hpp | 42 ++++--------------- include/core/image_storage.hpp | 3 -- src/core/image.cpp | 14 +------ tests/roccv/cpp/include/test_helpers.hpp | 6 +++ .../cpp/src/tests/core/image/test_image.cpp | 29 ++++--------- .../src/tests/core/image/test_image_data.cpp | 5 --- 6 files changed, 23 insertions(+), 76 deletions(-) diff --git a/include/core/image.hpp b/include/core/image.hpp index ecde3b51..7b057851 100644 --- a/include/core/image.hpp +++ b/include/core/image.hpp @@ -107,53 +107,25 @@ class Image { Image& operator=(Image&&) noexcept = default; ~Image() = default; - /** - * @brief Image dimensions in pixels. - */ Size2D size() const noexcept; - - /** - * @brief Pixel format. - */ ImageFormat format() const noexcept; - - /** - * @brief Device the underlying buffer resides on. - */ eDeviceType device() const noexcept; - /** - * @brief Snapshot of the image's data buffer (pointer, stride, format). - * - * The returned ImageData references the same underlying buffer; lifetime - * is controlled by this Image's refcount, not by the snapshot. - */ - ImageData exportData() const; + // Reference into m_metadata; valid as long as this Image (or any handle + // sharing its storage) is alive. + const ImageData& exportData() const noexcept { return m_metadata; } - /** - * @brief Exports the image's data buffer and casts it to a specified image data object. - * - * Throws std::bad_cast if the underlying buffer kind does not match what - * `Derived` expects (e.g. exportData() on a host-resident - * image throws std::bad_cast). Convenience wrapper around ImageData::cast<>. - * - * @tparam Derived The ImageData subclass to cast to. - * @return The image data casted to the image data object specified - */ + // Throws std::bad_cast if the underlying buffer kind doesn't match Derived. template Derived exportData() const { - ImageData data = exportData(); - std::optional derived_data = data.cast(); - if (!derived_data.has_value()) { + auto derived = m_metadata.cast(); + if (!derived.has_value()) { throw std::bad_cast(); } - - return derived_data.value(); + return derived.value(); } private: - // Internal ctor used by ImageWrapData and the allocating public ctors via - // delegation. Stores `metadata` and `storage` verbatim — no allocation. Image(ImageData metadata, std::shared_ptr storage); friend Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup); diff --git a/include/core/image_storage.hpp b/include/core/image_storage.hpp index 69b9d3c0..70984742 100644 --- a/include/core/image_storage.hpp +++ b/include/core/image_storage.hpp @@ -44,9 +44,6 @@ class ImageStorage { ImageStorage(const ImageStorage&) = delete; ImageStorage& operator=(const ImageStorage&) = delete; - /** - * @brief Returns the raw data pointer this storage is tracking. - */ void* data() const noexcept { return m_data; } private: diff --git a/src/core/image.cpp b/src/core/image.cpp index 4319eb20..04e08d2c 100644 --- a/src/core/image.cpp +++ b/src/core/image.cpp @@ -95,10 +95,6 @@ Image::Requirements Image::CalcRequirements(Size2D size, ImageFormat format) { const int64_t bytesPerPixel = static_cast(DataType(format.dtype()).size()) * format.channels(); - // Designated aggregate init: planeRowStride[0] is set explicitly; the - // remaining ROCCV_MAX_IMAGE_PLANES-1 slots are zeroed by the trailing- - // elements rule for brace-enclosed array initializers. alignBytes stays at - // 0 since CalcRequirements always produces packed rows for now. // TODO: derive a sensible default base/row alignment from device attributes. return ImageRequirements{ .size = size, @@ -137,8 +133,6 @@ ImageFormat Image::format() const noexcept { return m_metadata.format(); } eDeviceType Image::device() const noexcept { return m_metadata.device(); } -ImageData Image::exportData() const { return m_metadata; } - // ----------------------------------------------------------------------------- // ImageWrapData // ----------------------------------------------------------------------------- @@ -149,14 +143,10 @@ Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup) { throw Exception("ImageWrapData requires strided image data.", eStatusType::INVALID_VALUE); } - // Storage tracks plane(0)'s base pointer. Single-plane today; multi-plane - // wraps would need a richer storage shape (or to abandon storing the - // pointer here at all). + // Single-plane assumption: storage tracks plane(0). Multi-plane wraps will + // need a richer storage shape. void* basePtr = strided->plane(0).basePtr; - // Deleter captures both the original ImageData snapshot and the user's - // cleanup callback. View-only (cleanup == nullptr) means the deleter - // touches nothing but the storage object itself. auto storage = std::shared_ptr(new ImageStorage(basePtr), [data, cleanup](ImageStorage* s) { if (cleanup) { cleanup(data); diff --git a/tests/roccv/cpp/include/test_helpers.hpp b/tests/roccv/cpp/include/test_helpers.hpp index 6c43053b..df6840f5 100644 --- a/tests/roccv/cpp/include/test_helpers.hpp +++ b/tests/roccv/cpp/include/test_helpers.hpp @@ -198,6 +198,12 @@ namespace tests { ". Expected no exceptions, but received the following exception: " + e.what()); \ } +// EXPECT_EQ pipes through std::to_string, so wrap enums/pointers/bools through +// these casts before comparing. +inline auto AsInt = [](auto v) { return static_cast(v); }; +inline auto AsAddr = [](void* p) { return reinterpret_cast(p); }; +inline auto AsSize = [](auto v) { return static_cast(v); }; + /** * @brief Creates a NHWC tensor which contains data loaded from an image. * diff --git a/tests/roccv/cpp/src/tests/core/image/test_image.cpp b/tests/roccv/cpp/src/tests/core/image/test_image.cpp index 879dd68c..13937dff 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image.cpp @@ -39,11 +39,6 @@ using namespace roccv::tests; namespace { -// EXPECT_EQ pipes through std::to_string, so wrap enums/pointers/bools. -auto AsInt = [](auto v) { return static_cast(v); }; -auto AsAddr = [](void* p) { return reinterpret_cast(p); }; -auto AsSize = [](auto v) { return static_cast(v); }; - void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); /** @@ -233,10 +228,7 @@ void TestImageCopySharesBuffer() { EXPECT_EQ(AsAddr(second.exportData().cast()->plane(0).basePtr), AsAddr(buf)); // Drop `first`; buffer must NOT be freed yet — `second` still holds it. - { - Image consumed = std::move(first); - (void)consumed; - } + { Image sink = std::move(first); } EXPECT_EQ(alloc.hipFrees, 0); } // All handles dropped — exactly one free. @@ -317,20 +309,15 @@ void TestImageExportDataTypedMismatch() { // ============================================================================= /** - * @brief View-only wrap (no cleanup callback) must not free the wrapped - * buffer when the Image is destroyed. + * @brief View-only wrap (no cleanup callback) round-trips metadata and must + * not crash when the Image is destroyed (no free attempt on the sentinel ptr). */ void TestImageWrapDataViewOnly() { - int frees = 0; - { - Image wrapped = ImageWrapData(MakeFakeHipData(640, 480, FAKE_PTR_A)); - EXPECT_EQ(wrapped.size().w, 640); - EXPECT_EQ(wrapped.size().h, 480); - EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU)); - EXPECT_EQ(AsAddr(wrapped.exportData().cast()->plane(0).basePtr), AsAddr(FAKE_PTR_A)); - } - // No callback was registered — nothing observable should have changed. - EXPECT_EQ(frees, 0); + Image wrapped = ImageWrapData(MakeFakeHipData(640, 480, FAKE_PTR_A)); + EXPECT_EQ(wrapped.size().w, 640); + EXPECT_EQ(wrapped.size().h, 480); + EXPECT_EQ(AsInt(wrapped.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(AsAddr(wrapped.exportData().cast()->plane(0).basePtr), AsAddr(FAKE_PTR_A)); } /** diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp index 885b1aa5..bea99d59 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp @@ -33,11 +33,6 @@ using namespace roccv::tests; namespace { -// EXPECT_EQ feeds both sides through std::to_string, which only accepts -// numeric types. Wrap enum/pointer/bool comparisons in these casts. -auto AsInt = [](auto v) { return static_cast(v); }; -auto AsAddr = [](void* p) { return reinterpret_cast(p); }; - // ImageData carries pointers but never dereferences them; the buffer is a // metadata snapshot. Use opaque sentinel pointers in tests so we can verify // values flow through without needing real allocations. From 8bebbfb284a982ca7e1949732104712ac903be68 Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 6 May 2026 12:54:32 -0400 Subject: [PATCH 05/13] Remove ImageData from Image private member --- include/core/image.hpp | 51 +++++++++++++++------- src/core/image.cpp | 97 +++++++++++++++++++++++++----------------- 2 files changed, 92 insertions(+), 56 deletions(-) diff --git a/include/core/image.hpp b/include/core/image.hpp index 7b057851..7a42d51a 100644 --- a/include/core/image.hpp +++ b/include/core/image.hpp @@ -22,10 +22,11 @@ #pragma once +#include #include #include #include -#include +#include #include "core/detail/allocators/i_allocator.hpp" #include "core/image_buffer.hpp" @@ -50,9 +51,9 @@ using ImageDataCleanupFunc = std::function; * @brief Per-image allocation spec describing what to allocate for a single * variable-sized image. Mirrors NVCVImageRequirements: size, format, per-plane * row strides, and base-address alignment. Used as the input to Image's - * allocating constructors and as the output of CalcRequirements; not stored - * on the Image instance after construction (m_metadata holds the runtime - * descriptor in ImageData form). + * allocating constructors and as the output of CalcRequirements; also + * preserved on the Image itself as the source of truth from which exportData() + * rebuilds an ImageData snapshot on demand. * * Per-plane row strides are populated only for planes 0..numPlanes(format)-1; * remaining slots are unused. Today's interleaved-only ImageFormat means only @@ -73,6 +74,13 @@ struct ImageRequirements { * leaves both handles pointing at the same underlying buffer. The buffer is * freed when the last handle is destroyed (for owning Images) or when the * cleanup callback fires (for ImageWrapData with a callback). + * + * Storage shape: Image holds the buffer pointer (via ImageStorage) plus the + * "ingredients" describing it (size, format, device, per-plane row strides). + * It does NOT hold a precomputed ImageData snapshot — exportData() rebuilds + * one on demand from the ingredients. This keeps a single source of truth for + * the buffer pointer and aligns with how ImageBatchVarShape produces its + * own snapshots. */ class Image { public: @@ -107,18 +115,28 @@ class Image { Image& operator=(Image&&) noexcept = default; ~Image() = default; - Size2D size() const noexcept; - ImageFormat format() const noexcept; - eDeviceType device() const noexcept; + Size2D size() const noexcept { return m_size; } + ImageFormat format() const noexcept { return m_format; } + eDeviceType device() const noexcept { return m_device; } - // Reference into m_metadata; valid as long as this Image (or any handle - // sharing its storage) is alive. - const ImageData& exportData() const noexcept { return m_metadata; } + /** + * @brief Build and return an ImageData snapshot describing this image. + * + * Returned by value (not by reference) — Image stores ingredients, not a + * precomputed snapshot, so each call constructs a fresh ImageData. The + * snapshot's plane descriptors point into this Image's buffer; it remains + * valid as long as any handle to this storage is alive. + */ + ImageData exportData() const; - // Throws std::bad_cast if the underlying buffer kind doesn't match Derived. + /** + * @brief Build a snapshot and down-cast it to a specific subclass. Throws + * std::bad_cast if the underlying buffer kind doesn't match Derived. + */ template Derived exportData() const { - auto derived = m_metadata.cast(); + ImageData data = exportData(); + auto derived = data.cast(); if (!derived.has_value()) { throw std::bad_cast(); } @@ -126,14 +144,15 @@ class Image { } private: - Image(ImageData metadata, std::shared_ptr storage); + Image(const Requirements& reqs, eDeviceType device, std::shared_ptr storage); friend Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup); - // m_data is declared first so the allocating ctor can initialize it - // (allocating the buffer) before m_metadata reads back the pointer. std::shared_ptr m_data; - ImageData m_metadata; + Size2D m_size; + ImageFormat m_format; + eDeviceType m_device; + std::array m_planeRowStride; }; /** diff --git a/src/core/image.cpp b/src/core/image.cpp index 04e08d2c..12a20d9a 100644 --- a/src/core/image.cpp +++ b/src/core/image.cpp @@ -22,6 +22,9 @@ #include "core/image.hpp" +#include +#include + #include "core/data_type.hpp" #include "core/detail/context.hpp" #include "core/exception.hpp" @@ -61,27 +64,6 @@ std::shared_ptr makeStorage(const ImageRequirements& reqs, const I }); } -// Builds the canonical ImageData stored on Image from a freshly-allocated -// (or wrapped) buffer plus its layout description. Single-plane today — -// ImageFormat is interleaved-only, so only planes[0] is populated. -ImageData makeImageData(const ImageRequirements& reqs, void* buf, eDeviceType device) { - ImageBufferStrided strided{}; - strided.numPlanes = 1; - strided.planes[0].width = reqs.size.w; - strided.planes[0].height = reqs.size.h; - strided.planes[0].rowStride = reqs.planeRowStride[0]; - strided.planes[0].basePtr = buf; - - switch (device) { - case eDeviceType::GPU: - return ImageDataStridedHip(reqs.format, strided); - case eDeviceType::CPU: - return ImageDataStridedHost(reqs.format, strided); - } - - throw Exception("Unsupported device type in Image::makeImageData.", eStatusType::INVALID_VALUE); -} - } // namespace // ----------------------------------------------------------------------------- @@ -118,20 +100,41 @@ Image::Image(const Requirements& reqs, eDeviceType device) : Image(reqs, GlobalContext().getDefaultAllocator(), device) {} Image::Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType device) - : m_data(makeStorage(reqs, alloc, device)), m_metadata(makeImageData(reqs, m_data->data(), device)) {} - -Image::Image(ImageData metadata, std::shared_ptr storage) - : m_data(std::move(storage)), m_metadata(std::move(metadata)) {} + : Image(reqs, device, makeStorage(reqs, alloc, device)) {} + +Image::Image(const Requirements& reqs, eDeviceType device, std::shared_ptr storage) + : m_data(std::move(storage)), + m_size(reqs.size), + m_format(reqs.format), + m_device(device), + m_planeRowStride{} { + std::copy(std::begin(reqs.planeRowStride), std::end(reqs.planeRowStride), m_planeRowStride.begin()); +} // ----------------------------------------------------------------------------- -// Accessors +// exportData // ----------------------------------------------------------------------------- -Size2D Image::size() const noexcept { return m_metadata.cast()->size(); } +ImageData Image::exportData() const { + // TODO: derive numPlanes from m_format when planar formats land. Today's + // ImageFormat is interleaved-only, so plane 0 covers the whole image and + // its dimensions match m_size verbatim. + ImageBufferStrided strided{}; + strided.numPlanes = 1; + strided.planes[0].width = m_size.w; + strided.planes[0].height = m_size.h; + strided.planes[0].rowStride = m_planeRowStride[0]; + strided.planes[0].basePtr = m_data->data(); -ImageFormat Image::format() const noexcept { return m_metadata.format(); } + switch (m_device) { + case eDeviceType::GPU: + return ImageDataStridedHip(m_format, strided); + case eDeviceType::CPU: + return ImageDataStridedHost(m_format, strided); + } -eDeviceType Image::device() const noexcept { return m_metadata.device(); } + throw Exception("Unsupported device type in Image::exportData.", eStatusType::INVALID_VALUE); +} // ----------------------------------------------------------------------------- // ImageWrapData @@ -143,18 +146,32 @@ Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup) { throw Exception("ImageWrapData requires strided image data.", eStatusType::INVALID_VALUE); } - // Single-plane assumption: storage tracks plane(0). Multi-plane wraps will - // need a richer storage shape. - void* basePtr = strided->plane(0).basePtr; - - auto storage = std::shared_ptr(new ImageStorage(basePtr), [data, cleanup](ImageStorage* s) { - if (cleanup) { - cleanup(data); - } - delete s; - }); + // Single-plane assumption: storage tracks plane(0) and Requirements only + // populates planeRowStride[0]. Multi-plane wraps will need to copy each + // plane's stride and either store per-plane base pointers or derive them + // from a single owning allocation. + const ImagePlaneStrided& plane0 = strided->plane(0); + + // Designated initializers to avoid value-initializing ImageFormat through + // its explicit default ctor (which copy-list-init refuses). + Image::Requirements reqs{ + .size = Size2D{plane0.width, plane0.height}, + .format = data.format(), + .planeRowStride = {plane0.rowStride}, + .alignBytes = 0, + }; - return Image(data, std::move(storage)); + // The deleter captures `data` by value so the original snapshot survives + // long enough to be passed to the cleanup callback on last-handle drop. + auto storage = std::shared_ptr(new ImageStorage(plane0.basePtr), + [data, cleanup](ImageStorage* s) { + if (cleanup) { + cleanup(data); + } + delete s; + }); + + return Image(reqs, data.device(), std::move(storage)); } } // namespace roccv From b93d304b830ccfd13a385d31878a90d14df71262 Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 6 May 2026 12:55:27 -0400 Subject: [PATCH 06/13] Add ImageBatchBuffer/ImageBatchData initial implementation --- include/core/image_batch_buffer.hpp | 96 +++++++++++ include/core/image_batch_data.hpp | 245 ++++++++++++++++++++++++++++ 2 files changed, 341 insertions(+) create mode 100644 include/core/image_batch_buffer.hpp create mode 100644 include/core/image_batch_data.hpp diff --git a/include/core/image_batch_buffer.hpp b/include/core/image_batch_buffer.hpp new file mode 100644 index 00000000..884bfc6a --- /dev/null +++ b/include/core/image_batch_buffer.hpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include "core/image_buffer.hpp" +#include "core/image_format.hpp" + +namespace roccv { + +/** + * @brief Pitch-linear descriptor table for a variable-shape image batch. + * + * Each entry of `imageList` is a full per-image strided buffer descriptor — + * reusing `ImageBufferStrided` keeps the per-image shape (multi-plane-capable, + * one base pointer per plane, per-plane row stride) identical to what a single + * `Image` carries today. + * + * Pointer residency: + * - `imageList` is the descriptor table read by GPU kernels. For a GPU-resident + * batch this points into device memory; for a hypothetical CPU-resident + * batch it would point into host memory. The producing batch class owns the + * allocation and decides residency. + * - `formatList` mirrors `imageList`'s residency and holds one ImageFormat per + * image (so kernels can branch on per-image format without dereferencing the + * descriptor table). + * - `hostFormatList` is always host-resident. It exists so host-side validation + * code can read per-image formats without paying a D->H copy. For a + * CPU-resident batch this MAY alias `formatList`; for a GPU-resident batch + * it is a separate host mirror kept in sync by the producer. + * + * `uniqueFormat` is the common ImageFormat across all images, or a default- + * constructed (0-channel) ImageFormat sentinel if formats are heterogeneous or + * the batch is empty. Cached to fast-path the homogeneous case. + * + * `maxWidth` / `maxHeight` are the bounding box across all images. Used by + * operators to size launch grids. Both are 0 when the batch is empty. + * + * The struct is intentionally trivially copyable so it can ride inside + * `ImageBatchBuffer` without an allocation, mirroring `ImageBufferStrided`'s + * relationship to `ImageBuffer`. + */ +struct ImageBatchVarShapeBufferStrided { + /** Common format across all images in the batch, or a default-constructed + * ImageFormat if formats are heterogeneous or the batch is empty. */ + ImageFormat uniqueFormat; + + /** Bounding box across all images, in pixels. Both 0 when empty. */ + int32_t maxWidth; + int32_t maxHeight; + + /** Per-image format array, length == numImages. Residency matches + * `imageList` (device for GPU batches, host for CPU batches). */ + ImageFormat* formatList; + + /** Host-resident mirror of `formatList`. May alias `formatList` for + * CPU-resident batches. Length == numImages. */ + const ImageFormat* hostFormatList; + + /** Per-image descriptor table, length == numImages. The kernel-facing + * pointer; residency determines which device the batch lives on. */ + ImageBufferStrided* imageList; +}; + +/** + * @brief An image-batch buffer. Currently only the variable-shape strided + * variant is supported. Shaped as a tagged-union-style aggregate so additional + * batch buffer kinds can be added later (e.g. tensor-backed batches) without + * changing the public type. + */ +struct ImageBatchBuffer { + ImageBatchVarShapeBufferStrided varShapeStrided; +}; + +} // namespace roccv diff --git a/include/core/image_batch_data.hpp b/include/core/image_batch_data.hpp new file mode 100644 index 00000000..7d6638d4 --- /dev/null +++ b/include/core/image_batch_data.hpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include + +#include +#include + +#include "core/image_batch_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" +#include "operator_types.h" + +namespace roccv { + +/** + * @brief Discriminator for the kind of buffer an ImageBatchData carries. Used + * by IsCompatibleKind() / cast<>() to perform safe runtime down-casting through + * the ImageBatchData hierarchy. + * + * The hierarchy currently exposes only one concrete buffer kind + * (variable-shape, strided, GPU-resident); the enum is shaped to grow into + * additional kinds (e.g. tensor-backed batches, host-resident varshape) without + * breaking the existing buffer kind values. + */ +enum class ImageBatchBufferType { + IMAGE_BATCH_BUFFER_NONE, // Default/invalid buffer type. + IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP, // GPU-accessible varshape descriptor table. + IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST, // Host-accessible varshape descriptor table. +}; + +/** + * @brief Holds the underlying image-batch data alongside metadata + * (numImages, buffer kind). Non-strided batch data is not supported for use + * right now; use ImageBatchVarShapeDataStrided to access strided varshape data + * instead. + * + * ImageBatchData is the interchange type for a batch of variable-sized images. + * It does not own any of the underlying buffers (the descriptor table, the + * format arrays, or the per-image pixel buffers) — it is a metadata snapshot, + * valid only as long as the producing batch outlives it. + * + * Lazy-sync note: for a GPU-resident batch the producer (ImageBatchVarShape) + * is responsible for ensuring the device-side descriptor table is up to date + * with any pushBack/popBack edits before handing out an ImageBatchData. The + * snapshot itself carries no synchronization state. + */ +class ImageBatchData { + public: + ImageBatchData() = delete; + virtual ~ImageBatchData() = default; + + /** + * @brief Returns the number of images currently in the batch. + */ + virtual int32_t numImages() const; + + /** + * @brief Returns the device the descriptor table (and per-image pixel + * buffers) reside on. + */ + virtual eDeviceType device() const; + + /** + * @brief Attempts to down-cast this ImageBatchData to a more specific + * subclass. Returns the casted value if the underlying buffer kind matches + * what Derived expects, or std::nullopt otherwise. + * + * @tparam Derived The target subclass to cast to. + */ + template + std::optional cast() const { + static_assert(std::is_base_of::value, + "Cannot cast ImageBatchData to an unrelated type."); + static_assert(sizeof(Derived) == sizeof(ImageBatchData), + "Derived type must not add any additional data members."); + + if (!Derived::IsCompatibleKind(m_bufferType)) { + return std::nullopt; + } + + return std::make_optional(m_numImages, m_buffer); + } + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); + + protected: + ImageBatchData(int32_t numImages, const ImageBatchBuffer& buffer); + + int32_t m_numImages; + eDeviceType m_deviceType; + ImageBatchBufferType m_bufferType; + ImageBatchBuffer m_buffer; +}; + +/** + * @brief Image-batch data backed by a variable-shape descriptor table. Adds + * typed accessors for the per-image format arrays and the bounding box across + * the batch. Sub-classed by ImageBatchVarShapeDataStrided to discriminate + * pitch-linear storage; further sub-classed by ImageBatchVarShapeDataStridedHip + * to tag device residency. + */ +class ImageBatchVarShapeData : public ImageBatchData { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeData(int32_t numImages, const ImageBatchBuffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); + + /** + * @brief Bounding box across all images in the batch, in pixels. Both + * dimensions are 0 when the batch is empty. Used by operators to size + * launch grids without iterating the descriptor table. + */ + Size2D maxSize() const; + + /** + * @brief Returns the common ImageFormat across all images, or a + * default-constructed (0-channel) ImageFormat sentinel if formats are + * heterogeneous or the batch is empty. + */ + ImageFormat uniqueFormat() const; + + /** + * @brief Per-image format array. Residency matches the descriptor table + * (device for GPU batches). Length == numImages(). + * + * Prefer hostFormatList() for host-side validation paths to avoid a D->H + * copy. + */ + const ImageFormat* formatList() const; + + /** + * @brief Host-resident mirror of formatList(). Always safe to dereference + * from host code. Length == numImages(). + */ + const ImageFormat* hostFormatList() const; +}; + +/** + * @brief Variable-shape image-batch data backed by a pitch-linear descriptor + * table. Adds the per-image descriptor accessor on top of + * ImageBatchVarShapeData. + */ +class ImageBatchVarShapeDataStrided : public ImageBatchVarShapeData { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeDataStrided(int32_t numImages, const ImageBatchBuffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); + + /** + * @brief Per-image descriptor table. Length == numImages(). Residency + * matches the enclosing data type — for ImageBatchVarShapeDataStridedHip + * this is a device pointer; kernels read it directly. + * + * Each entry is a full ImageBufferStrided so the per-image shape + * (multi-plane-capable, per-plane stride and base pointer) matches what a + * single Image carries. + */ + const ImageBufferStrided* imageList() const; +}; + +/** + * @brief GPU-accessible variable-shape image-batch data. + */ +class ImageBatchVarShapeDataStridedHip : public ImageBatchVarShapeDataStrided { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeDataStridedHip(int32_t numImages, const ImageBatchBuffer& buffer); + + /** + * @brief Constructs GPU-accessible varshape image-batch data from the + * concrete strided buffer directly. + * + * @param[in] numImages Number of images currently in the batch. + * @param[in] buffer Descriptor table + per-image format arrays. The + * descriptor table and `formatList` must point to GPU + * memory; `hostFormatList` to host memory. + */ + ImageBatchVarShapeDataStridedHip(int32_t numImages, const Buffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); +}; + +/** + * @brief Host-accessible variable-shape image-batch data. + * + * The host-resident counterpart to ImageBatchVarShapeDataStridedHip. The + * descriptor table, `formatList`, and `hostFormatList` all point to host + * memory; `formatList` and `hostFormatList` MAY alias the same allocation + * since no D->H sync is required. + * + * The lazy host->device descriptor sync that the GPU producer needs is not + * applicable here — host-only varshape batches can edit the descriptor table + * in place and hand it straight to host kernels. The matching producer-side + * design (whether host batches are a separate type, a runtime-tagged variant + * of ImageBatchVarShape, or skipped entirely in favor of CPU-side per-image + * loops) is still open. + */ +class ImageBatchVarShapeDataStridedHost : public ImageBatchVarShapeDataStrided { + public: + using Buffer = ImageBatchVarShapeBufferStrided; + + ImageBatchVarShapeDataStridedHost(int32_t numImages, const ImageBatchBuffer& buffer); + + /** + * @brief Constructs host-accessible varshape image-batch data from the + * concrete strided buffer directly. + * + * @param[in] numImages Number of images currently in the batch. + * @param[in] buffer Descriptor table + per-image format arrays. All + * pointers must reference host memory; `formatList` + * and `hostFormatList` may alias. + */ + ImageBatchVarShapeDataStridedHost(int32_t numImages, const Buffer& buffer); + + static bool IsCompatibleKind(ImageBatchBufferType bufferType); +}; + +} // namespace roccv From defdcd44af9ec66c30dc42a78f9e2e0c5a3250ed Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 6 May 2026 14:55:30 -0400 Subject: [PATCH 07/13] Add ImageBatchData tests --- include/core/image_batch_buffer.hpp | 6 +- include/core/image_batch_data.hpp | 5 +- include/core/image_format.hpp | 13 +- src/core/image_batch_data.cpp | 103 ++++++ .../core/image/test_image_batch_data.cpp | 297 ++++++++++++++++++ 5 files changed, 417 insertions(+), 7 deletions(-) create mode 100644 src/core/image_batch_data.cpp create mode 100644 tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp diff --git a/include/core/image_batch_buffer.hpp b/include/core/image_batch_buffer.hpp index 884bfc6a..06f0c4b3 100644 --- a/include/core/image_batch_buffer.hpp +++ b/include/core/image_batch_buffer.hpp @@ -50,9 +50,9 @@ namespace roccv { * CPU-resident batch this MAY alias `formatList`; for a GPU-resident batch * it is a separate host mirror kept in sync by the producer. * - * `uniqueFormat` is the common ImageFormat across all images, or a default- - * constructed (0-channel) ImageFormat sentinel if formats are heterogeneous or - * the batch is empty. Cached to fast-path the homogeneous case. + * `uniqueFormat` is the common ImageFormat across all images, or FMT_NONE if + * formats are heterogeneous or the batch is empty. Cached to fast-path the + * homogeneous case. * * `maxWidth` / `maxHeight` are the bounding box across all images. Used by * operators to size launch grids. Both are 0 when the batch is empty. diff --git a/include/core/image_batch_data.hpp b/include/core/image_batch_data.hpp index 7d6638d4..c50de010 100644 --- a/include/core/image_batch_data.hpp +++ b/include/core/image_batch_data.hpp @@ -137,9 +137,8 @@ class ImageBatchVarShapeData : public ImageBatchData { Size2D maxSize() const; /** - * @brief Returns the common ImageFormat across all images, or a - * default-constructed (0-channel) ImageFormat sentinel if formats are - * heterogeneous or the batch is empty. + * @brief Returns the common ImageFormat across all images, or FMT_NONE if + * formats are heterogeneous or the batch is empty. */ ImageFormat uniqueFormat() const; diff --git a/include/core/image_format.hpp b/include/core/image_format.hpp index 7dd891f3..ddb1d100 100644 --- a/include/core/image_format.hpp +++ b/include/core/image_format.hpp @@ -40,7 +40,10 @@ enum class eSwizzle { */ class ImageFormat { public: - explicit ImageFormat() {} + /** + * @brief Default-constructs to FMT_NONE. + */ + constexpr ImageFormat() : m_dtype(eDataType::DATA_TYPE_U8), m_numChannels(0), m_swizzle(eSwizzle::XYZW) {} explicit constexpr ImageFormat(eDataType dtype, int32_t numChannels, eSwizzle swizzle = eSwizzle::XYZW) : m_dtype(dtype), m_numChannels(numChannels), m_swizzle(swizzle) {} @@ -48,12 +51,20 @@ class ImageFormat { int32_t channels() const noexcept; eSwizzle swizzle() const noexcept; + constexpr bool operator==(const ImageFormat& other) const noexcept { + return m_dtype == other.m_dtype && m_numChannels == other.m_numChannels && m_swizzle == other.m_swizzle; + } + constexpr bool operator!=(const ImageFormat& other) const noexcept { return !(*this == other); } + private: eDataType m_dtype; int32_t m_numChannels; eSwizzle m_swizzle; }; +// Undefined format. Used to represent an uninitialized or invalid format. +constexpr ImageFormat FMT_NONE{eDataType::DATA_TYPE_U8, 0, eSwizzle::XYZW}; + // Single plane with one 8-bit unsigned integer channel. constexpr ImageFormat FMT_U8(eDataType::DATA_TYPE_U8, 1, eSwizzle::XYZW); diff --git a/src/core/image_batch_data.cpp b/src/core/image_batch_data.cpp new file mode 100644 index 00000000..a8ce07ba --- /dev/null +++ b/src/core/image_batch_data.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image_batch_data.hpp" + +#include "core/image_batch_buffer.hpp" +#include "core/image_format.hpp" +#include "core/util_enums.h" + +namespace roccv { + +int32_t ImageBatchData::numImages() const { return m_numImages; } + +eDeviceType ImageBatchData::device() const { return m_deviceType; } + +ImageBatchData::ImageBatchData(int32_t numImages, const ImageBatchBuffer& buffer) + : m_numImages(numImages), + m_deviceType(eDeviceType::GPU), + m_bufferType(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE), + m_buffer(buffer) {} + +bool ImageBatchData::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType != ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE; +} + +ImageBatchVarShapeData::ImageBatchVarShapeData(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchData(numImages, buffer) {} + +bool ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP || + bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; +} + +Size2D ImageBatchVarShapeData::maxSize() const { + return Size2D{m_buffer.varShapeStrided.maxWidth, m_buffer.varShapeStrided.maxHeight}; +} + +ImageFormat ImageBatchVarShapeData::uniqueFormat() const { return m_buffer.varShapeStrided.uniqueFormat; } + +const ImageFormat* ImageBatchVarShapeData::formatList() const { return m_buffer.varShapeStrided.formatList; } + +const ImageFormat* ImageBatchVarShapeData::hostFormatList() const { return m_buffer.varShapeStrided.hostFormatList; } + +ImageBatchVarShapeDataStrided::ImageBatchVarShapeDataStrided(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchVarShapeData(numImages, buffer) {} + +bool ImageBatchVarShapeDataStrided::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP || + bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; +} + +const ImageBufferStrided* ImageBatchVarShapeDataStrided::imageList() const { + return m_buffer.varShapeStrided.imageList; +} + +ImageBatchVarShapeDataStridedHip::ImageBatchVarShapeDataStridedHip(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchVarShapeDataStrided(numImages, buffer) { + m_bufferType = ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP; + m_deviceType = eDeviceType::GPU; +} + +ImageBatchVarShapeDataStridedHip::ImageBatchVarShapeDataStridedHip( + int32_t numImages, const ImageBatchVarShapeDataStridedHip::Buffer& buffer) + : ImageBatchVarShapeDataStridedHip(numImages, ImageBatchBuffer{.varShapeStrided = buffer}) {} + +bool ImageBatchVarShapeDataStridedHip::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP; +} + +ImageBatchVarShapeDataStridedHost::ImageBatchVarShapeDataStridedHost(int32_t numImages, const ImageBatchBuffer& buffer) + : ImageBatchVarShapeDataStrided(numImages, buffer) { + m_bufferType = ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; + m_deviceType = eDeviceType::CPU; +} + +ImageBatchVarShapeDataStridedHost::ImageBatchVarShapeDataStridedHost( + int32_t numImages, const ImageBatchVarShapeDataStridedHost::Buffer& buffer) + : ImageBatchVarShapeDataStridedHost(numImages, ImageBatchBuffer{.varShapeStrided = buffer}) {} + +bool ImageBatchVarShapeDataStridedHost::IsCompatibleKind(ImageBatchBufferType bufferType) { + return bufferType == ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST; +} + +} // namespace roccv diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp new file mode 100644 index 00000000..f402d261 --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include + +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +// ImageBatchData carries pointers but never dereferences them; the buffer is a +// metadata snapshot. Use opaque sentinel pointers so we can verify values flow +// through the hierarchy without needing real allocations. +void* const FAKE_IMG_PTR_A = reinterpret_cast(0xA0A0A0A0ull); +void* const FAKE_IMG_PTR_B = reinterpret_cast(0xB0B0B0B0ull); + +// Static descriptor/format storage for the batch buffer. These are real host +// allocations (so the pointers are valid) but the batch tests only read +// metadata back out of them; nothing dereferences the per-image basePtr fields. +ImageBufferStrided g_imageList[2]; +ImageFormat g_formatList[2] = {FMT_RGB8, FMT_RGB8}; +ImageFormat g_hostFormatList[2] = {FMT_RGB8, FMT_RGB8}; + +ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {width, height, rowStride, basePtr}; + return buf; +} + +// Builds a homogeneous two-image varshape descriptor with a known bounding box +// and uniqueFormat. The returned struct's pointers reference module-static +// arrays so addresses remain stable across calls within a test. +ImageBatchVarShapeBufferStrided MakeHomogeneousBuffer() { + g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_IMG_PTR_A); + g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 3, FAKE_IMG_PTR_B); + g_formatList[0] = FMT_RGB8; + g_formatList[1] = FMT_RGB8; + g_hostFormatList[0] = FMT_RGB8; + g_hostFormatList[1] = FMT_RGB8; + + ImageBatchVarShapeBufferStrided buf{}; + buf.uniqueFormat = FMT_RGB8; + buf.maxWidth = 640; + buf.maxHeight = 480; + buf.formatList = g_formatList; + buf.hostFormatList = g_hostFormatList; + buf.imageList = g_imageList; + return buf; +} + +/** + * @brief Verifies HIP-strided varshape construction populates all observable + * state and tags itself as GPU-resident. + */ +void TestImageBatchVarShapeDataStridedHipConstruction() { + auto buf = MakeHomogeneousBuffer(); + ImageBatchVarShapeDataStridedHip data(2, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(data.uniqueFormat().channels(), 3); + EXPECT_EQ(AsAddr(const_cast(data.formatList())), AsAddr(g_formatList)); + EXPECT_EQ(AsAddr(const_cast(data.hostFormatList())), AsAddr(g_hostFormatList)); + EXPECT_EQ(AsAddr(const_cast(data.imageList())), AsAddr(g_imageList)); + EXPECT_EQ(data.imageList()[0].planes[0].width, 640); + EXPECT_EQ(data.imageList()[1].planes[0].width, 320); +} + +/** + * @brief Same shape as the Hip test but for Host-resident varshape data. + */ +void TestImageBatchVarShapeDataStridedHostConstruction() { + auto buf = MakeHomogeneousBuffer(); + ImageBatchVarShapeDataStridedHost data(2, buf); + + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(data.uniqueFormat().channels(), 3); + EXPECT_EQ(AsAddr(const_cast(data.imageList())), AsAddr(g_imageList)); +} + +/** + * @brief Empty batch: maxSize collapses to 0x0 and uniqueFormat is FMT_NONE. + * Producers signal "no images" via numImages == 0; the buffer fields stay + * valid pointers but get ignored. + */ +void TestImageBatchVarShapeDataEmpty() { + ImageBatchVarShapeBufferStrided buf{}; + buf.uniqueFormat = FMT_NONE; + buf.maxWidth = 0; + buf.maxHeight = 0; + buf.formatList = g_formatList; + buf.hostFormatList = g_hostFormatList; + buf.imageList = g_imageList; + + ImageBatchVarShapeDataStridedHip data(0, buf); + + EXPECT_EQ(data.numImages(), 0); + EXPECT_EQ(data.maxSize().w, 0); + EXPECT_EQ(data.maxSize().h, 0); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1); +} + +/** + * @brief Heterogeneous formats: per-image formatList carries each entry + * verbatim; uniqueFormat is FMT_NONE since no single format spans the batch. + */ +void TestImageBatchVarShapeDataHeterogeneousFormats() { + g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_IMG_PTR_A); + g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 4, FAKE_IMG_PTR_B); + g_formatList[0] = FMT_RGB8; + g_formatList[1] = FMT_RGBA8; + g_hostFormatList[0] = FMT_RGB8; + g_hostFormatList[1] = FMT_RGBA8; + + ImageBatchVarShapeBufferStrided buf{}; + buf.uniqueFormat = FMT_NONE; + buf.maxWidth = 640; + buf.maxHeight = 480; + buf.formatList = g_formatList; + buf.hostFormatList = g_hostFormatList; + buf.imageList = g_imageList; + + ImageBatchVarShapeDataStridedHip data(2, buf); + + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[1] == FMT_RGBA8), 1); +} + +/** + * @brief The two leaf ctors (taking ImageBatchBuffer vs the concrete strided + * buffer directly) must produce observably identical state. + */ +void TestImageBatchVarShapeDataSugarCtor() { + auto buf = MakeHomogeneousBuffer(); + + ImageBatchVarShapeDataStridedHip wide(2, ImageBatchBuffer{.varShapeStrided = buf}); + ImageBatchVarShapeDataStridedHip sugar(2, buf); + + EXPECT_EQ(AsInt(wide.device()), AsInt(sugar.device())); + EXPECT_EQ(wide.numImages(), sugar.numImages()); + EXPECT_EQ(wide.maxSize().w, sugar.maxSize().w); + EXPECT_EQ(wide.maxSize().h, sugar.maxSize().h); + EXPECT_EQ(AsAddr(const_cast(wide.imageList())), + AsAddr(const_cast(sugar.imageList()))); + + ImageBatchVarShapeDataStridedHost wideHost(2, ImageBatchBuffer{.varShapeStrided = buf}); + ImageBatchVarShapeDataStridedHost sugarHost(2, buf); + EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device())); + EXPECT_EQ(AsAddr(const_cast(wideHost.imageList())), + AsAddr(const_cast(sugarHost.imageList()))); +} + +/** + * @brief IsCompatibleKind on each level discriminates the buffer kinds it + * accepts. Base accepts anything-but-NONE; VarShape and VarShapeStrided accept + * both Hip and Host varshape; leaves accept only their own. + */ +void TestImageBatchDataIsCompatibleKind() { + EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ(AsInt(ImageBatchData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); + + EXPECT_EQ(AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0); + EXPECT_EQ( + AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ( + AsInt(ImageBatchVarShapeData::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); + + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStrided::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); + + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), + 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 1); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHip::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 0); + + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind(ImageBatchBufferType::IMAGE_BATCH_BUFFER_NONE)), + 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HIP)), + 0); + EXPECT_EQ(AsInt(ImageBatchVarShapeDataStridedHost::IsCompatibleKind( + ImageBatchBufferType::IMAGE_BATCH_VARSHAPE_BUFFER_STRIDED_HOST)), + 1); +} + +/** + * @brief Round-trip a derived ImageBatchData through the base reference and + * back via cast<>(). Successful casts must preserve every observable field; + * casts to incompatible kinds must return std::nullopt. + */ +void TestImageBatchDataCast() { + auto buf = MakeHomogeneousBuffer(); + + // Hip → base → Hip should round-trip; intermediate VarShape/Strided also + // succeed; Hip → Host fails. + { + ImageBatchVarShapeDataStridedHip hip(2, buf); + const ImageBatchData& base = hip; + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 1); + EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU)); + EXPECT_EQ(asHip->numImages(), 2); + EXPECT_EQ(asHip->maxSize().w, 640); + EXPECT_EQ(AsAddr(const_cast(asHip->imageList())), AsAddr(g_imageList)); + + auto asStrided = base.cast(); + EXPECT_EQ(AsInt(asStrided.has_value()), 1); + EXPECT_EQ(AsInt(asStrided->device()), AsInt(eDeviceType::GPU)); + + auto asVar = base.cast(); + EXPECT_EQ(AsInt(asVar.has_value()), 1); + EXPECT_EQ(asVar->maxSize().h, 480); + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 0); + } + + // Symmetrically: Host → base → Host succeeds, Host → Hip fails. + { + ImageBatchVarShapeDataStridedHost host(2, buf); + const ImageBatchData& base = host; + + auto asHost = base.cast(); + EXPECT_EQ(AsInt(asHost.has_value()), 1); + EXPECT_EQ(AsInt(asHost->device()), AsInt(eDeviceType::CPU)); + EXPECT_EQ(asHost->numImages(), 2); + + auto asHip = base.cast(); + EXPECT_EQ(AsInt(asHip.has_value()), 0); + } +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + TEST_CASE(TestImageBatchVarShapeDataStridedHipConstruction()); + TEST_CASE(TestImageBatchVarShapeDataStridedHostConstruction()); + TEST_CASE(TestImageBatchVarShapeDataEmpty()); + TEST_CASE(TestImageBatchVarShapeDataHeterogeneousFormats()); + TEST_CASE(TestImageBatchVarShapeDataSugarCtor()); + TEST_CASE(TestImageBatchDataIsCompatibleKind()); + TEST_CASE(TestImageBatchDataCast()); + + TEST_CASES_END(); +} From 1977fe682fa15959ef13a07529c1ec898a087b27 Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 6 May 2026 16:31:33 -0400 Subject: [PATCH 08/13] Initial ImageBatchVarShape implementation --- include/core/image_batch_var_shape.hpp | 206 ++++++++ src/core/image_batch_var_shape.cpp | 244 ++++++++++ .../core/image/test_image_batch_var_shape.cpp | 460 ++++++++++++++++++ 3 files changed, 910 insertions(+) create mode 100644 include/core/image_batch_var_shape.hpp create mode 100644 src/core/image_batch_var_shape.cpp create mode 100644 tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp diff --git a/include/core/image_batch_var_shape.hpp b/include/core/image_batch_var_shape.hpp new file mode 100644 index 00000000..a57f355a --- /dev/null +++ b/include/core/image_batch_var_shape.hpp @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "core/detail/allocators/i_allocator.hpp" +#include "core/image.hpp" +#include "core/image_batch_data.hpp" +#include "core/image_format.hpp" +#include "exception.hpp" +#include "operator_types.h" + +namespace roccv { + +/** + * @brief Producer-side container for a batch of variable-sized images that + * share a single GPU-resident descriptor table. + * + * Holds up to `capacity()` Image handles and maintains a parallel descriptor + * table that operators can dispatch over without iterating Image-by-Image. + * Capacity is fixed at construction; pushBack/popBack move within it. + * + * The host descriptor mirrors are pinned so the H2D copy in exportData() is a + * true DMA (no runtime bounce buffer) and so the snapshot can expose the same + * pinned pointer as both `formatList`'s host shadow and `hostFormatList`. + * + * Sync model: pushBack/popBack mutate the host mirrors only; the device + * descriptor table is brought up to date lazily inside exportData(stream), + * which copies just the dirty suffix `[dirtyStart, numImages)`. A hipEvent + * (`m_postFence`) guards the host buffers — if a previous exportData's H2D + * is still in flight, pushBack hipEventSynchronize's on the CPU before + * mutating, so the snapshot a consumer is reading never tears. + * + * GPU-only in v1. CPU-resident images are rejected on push. + */ +class ImageBatchVarShape { + public: + using const_iterator = std::vector::const_iterator; + + /** + * @brief Construct an empty batch with `capacity` slots, using the global + * default allocator. + */ + explicit ImageBatchVarShape(int32_t capacity); + + /** + * @brief Construct an empty batch with `capacity` slots, using the supplied + * allocator. The allocator must outlive the batch. + */ + explicit ImageBatchVarShape(int32_t capacity, const IAllocator &alloc); + + ~ImageBatchVarShape(); + + ImageBatchVarShape(const ImageBatchVarShape &) = delete; + ImageBatchVarShape &operator=(const ImageBatchVarShape &) = delete; + ImageBatchVarShape(ImageBatchVarShape &&) noexcept; + ImageBatchVarShape &operator=(ImageBatchVarShape &&) = delete; + + int32_t capacity() const noexcept { return m_capacity; } + int32_t numImages() const noexcept { return static_cast(m_images.size()); } + + /** + * @brief Append an image to the batch. Throws if capacity would be + * exceeded, the image is CPU-resident, or the image has more than one + * plane (rocCV is single-plane today). + */ + void pushBack(const Image &img); + + /** + * @brief Append a range of images. Strong exception guarantee — if any + * image fails validation, the batch is rolled back to its pre-call state + * and the exception is rethrown. + */ + template + void pushBack(It begin, It end); + + /** + * @brief Remove the trailing `count` images. Throws if `count` exceeds + * numImages(). + */ + void popBack(int32_t count = 1); + + /** + * @brief Drop all images. Buffers are kept; the batch is reusable. + */ + void clear(); + + const Image &operator[](int32_t i) const { return m_images[i]; } + + const_iterator begin() const noexcept { return m_images.cbegin(); } + const_iterator end() const noexcept { return m_images.cend(); } + + /** + * @brief Bounding box across all images, in pixels. Returns Size2D{0, 0} + * for an empty batch. + */ + Size2D maxSize() const; + + /** + * @brief The common ImageFormat across all images, or FMT_NONE if formats + * are heterogeneous or the batch is empty. After popping the only image + * with a given heterogenizing format, the cached value may stay FMT_NONE + * until the next emptying operation — conservative, never wrong. + */ + ImageFormat uniqueFormat() const; + + /** + * @brief Build (and return by value) a GPU-resident snapshot of the batch. + * + * Synchronizes the dirty suffix of the host mirrors to the device + * descriptor table on the supplied stream before returning. The returned + * snapshot's `imageList` and `formatList` are device pointers safe for + * kernels enqueued on the same stream; `hostFormatList` aliases the pinned + * host format mirror and is safe to read from host code. The snapshot is + * a metadata view valid as long as this batch outlives it. + */ + ImageBatchVarShapeDataStridedHip exportData(hipStream_t stream); + + /** + * @brief Build a snapshot and down-cast it to a specific subclass. Throws + * std::bad_cast if the underlying buffer kind doesn't match Derived. + */ + template + Derived exportData(hipStream_t stream); + + private: + void doSyncDirtySuffix(hipStream_t stream); + void doUpdateCache() const; + + int32_t m_capacity; + int32_t m_dirtyStartingFromIndex = 0; + bool m_fencePending = false; + + const IAllocator &m_allocator; + std::vector m_images; + + ImageBufferStrided *m_devImagesBuffer = nullptr; + ImageFormat *m_devFormatsBuffer = nullptr; + ImageBufferStrided *m_hostImagesBuffer = nullptr; + ImageFormat *m_hostFormatsBuffer = nullptr; + + hipEvent_t m_postFence = nullptr; + + mutable std::optional m_cacheMaxSize; + mutable std::optional m_cacheUniqueFormat; +}; + +template +void ImageBatchVarShape::pushBack(It begin, It end) { + const int32_t incoming = static_cast(std::distance(begin, end)); + if (incoming + numImages() > m_capacity) { + throw Exception("ImageBatchVarShape::pushBack range would exceed capacity", eStatusType::OUT_OF_BOUNDS); + } + + const int32_t oldNumImages = numImages(); + const auto oldMaxSize = m_cacheMaxSize; + const auto oldUniqueFormat = m_cacheUniqueFormat; + + try { + for (auto it = begin; it != end; ++it) { + pushBack(*it); + } + } catch (...) { + m_images.erase(m_images.begin() + oldNumImages, m_images.end()); + m_cacheMaxSize = oldMaxSize; + m_cacheUniqueFormat = oldUniqueFormat; + throw; + } +} + +template +Derived ImageBatchVarShape::exportData(hipStream_t stream) { + ImageBatchVarShapeDataStridedHip data = exportData(stream); + auto derived = data.cast(); + if (!derived.has_value()) { + throw std::bad_cast(); + } + return derived.value(); +} + +} // namespace roccv diff --git a/src/core/image_batch_var_shape.cpp b/src/core/image_batch_var_shape.cpp new file mode 100644 index 00000000..d522e336 --- /dev/null +++ b/src/core/image_batch_var_shape.cpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "core/image_batch_var_shape.hpp" + +#include + +#include "core/detail/context.hpp" +#include "core/exception.hpp" +#include "core/hip_assert.h" +#include "core/image_batch_buffer.hpp" +#include "core/image_buffer.hpp" + +namespace roccv { + +ImageBatchVarShape::ImageBatchVarShape(int32_t capacity) + : ImageBatchVarShape(capacity, GlobalContext().getDefaultAllocator()) {} + +ImageBatchVarShape::ImageBatchVarShape(int32_t capacity, const IAllocator& alloc) + : m_capacity(capacity), m_allocator(alloc) { + if (capacity <= 0) { + throw Exception("ImageBatchVarShape capacity must be positive", eStatusType::INVALID_VALUE); + } + + m_images.reserve(capacity); + + const size_t imagesBytes = sizeof(ImageBufferStrided) * capacity; + const size_t formatsBytes = sizeof(ImageFormat) * capacity; + + m_devImagesBuffer = static_cast(m_allocator.allocHipMem(imagesBytes)); + m_devFormatsBuffer = static_cast(m_allocator.allocHipMem(formatsBytes)); + m_hostImagesBuffer = static_cast(m_allocator.allocHostPinnedMem(imagesBytes)); + m_hostFormatsBuffer = static_cast(m_allocator.allocHostPinnedMem(formatsBytes)); + + HIP_VALIDATE_NO_ERRORS(hipEventCreateWithFlags(&m_postFence, hipEventDisableTiming)); +} + +ImageBatchVarShape::~ImageBatchVarShape() { + if (m_fencePending && m_postFence != nullptr) { + // Drain any in-flight H2D copy before freeing the host mirrors it + // reads from. (void) — destructors must not throw. + (void)hipEventSynchronize(m_postFence); + } + if (m_postFence != nullptr) { + (void)hipEventDestroy(m_postFence); + } + if (m_hostFormatsBuffer != nullptr) m_allocator.freeHostPinnedMem(m_hostFormatsBuffer); + if (m_hostImagesBuffer != nullptr) m_allocator.freeHostPinnedMem(m_hostImagesBuffer); + if (m_devFormatsBuffer != nullptr) m_allocator.freeHipMem(m_devFormatsBuffer); + if (m_devImagesBuffer != nullptr) m_allocator.freeHipMem(m_devImagesBuffer); +} + +ImageBatchVarShape::ImageBatchVarShape(ImageBatchVarShape&& other) noexcept + : m_capacity(other.m_capacity), + m_dirtyStartingFromIndex(other.m_dirtyStartingFromIndex), + m_fencePending(other.m_fencePending), + m_allocator(other.m_allocator), + m_images(std::move(other.m_images)), + m_devImagesBuffer(other.m_devImagesBuffer), + m_devFormatsBuffer(other.m_devFormatsBuffer), + m_hostImagesBuffer(other.m_hostImagesBuffer), + m_hostFormatsBuffer(other.m_hostFormatsBuffer), + m_postFence(other.m_postFence), + m_cacheMaxSize(other.m_cacheMaxSize), + m_cacheUniqueFormat(other.m_cacheUniqueFormat) { + other.m_capacity = 0; + other.m_dirtyStartingFromIndex = 0; + other.m_fencePending = false; + other.m_devImagesBuffer = nullptr; + other.m_devFormatsBuffer = nullptr; + other.m_hostImagesBuffer = nullptr; + other.m_hostFormatsBuffer = nullptr; + other.m_postFence = nullptr; + other.m_cacheMaxSize.reset(); + other.m_cacheUniqueFormat.reset(); +} + +void ImageBatchVarShape::pushBack(const Image& img) { + const int32_t n = numImages(); + if (n >= m_capacity) { + throw Exception("ImageBatchVarShape::pushBack would exceed capacity", eStatusType::OUT_OF_BOUNDS); + } + if (img.device() != eDeviceType::GPU) { + throw Exception("ImageBatchVarShape only accepts GPU-resident images", eStatusType::INVALID_VALUE); + } + + ImageDataStridedHip data = img.exportData(); + if (data.numPlanes() != 1) { + throw Exception("ImageBatchVarShape only supports single-plane images", eStatusType::INVALID_VALUE); + } + + if (m_fencePending) { + HIP_VALIDATE_NO_ERRORS(hipEventSynchronize(m_postFence)); + m_fencePending = false; + } + + ImageBufferStrided slot{}; + slot.numPlanes = 1; + slot.planes[0] = data.plane(0); + m_hostImagesBuffer[n] = slot; + m_hostFormatsBuffer[n] = img.format(); + + const Size2D imgSize = img.size(); + if (n == 0) { + // Seed from scratch: an empty-batch query may have populated the + // cache with sentinels (FMT_NONE, 0×0); replacing avoids merging the + // first real image into them. + m_cacheMaxSize = imgSize; + m_cacheUniqueFormat = img.format(); + } else { + // popBack invalidates m_cacheMaxSize without rescanning, so make sure + // both halves of the cache are populated before merging in. + doUpdateCache(); + m_cacheMaxSize->w = std::max(m_cacheMaxSize->w, imgSize.w); + m_cacheMaxSize->h = std::max(m_cacheMaxSize->h, imgSize.h); + if (*m_cacheUniqueFormat != img.format()) { + m_cacheUniqueFormat = FMT_NONE; + } + } + + m_images.push_back(img); +} + +void ImageBatchVarShape::popBack(int32_t count) { + if (count < 0) { + throw Exception("ImageBatchVarShape::popBack count must be non-negative", eStatusType::INVALID_VALUE); + } + if (count > numImages()) { + throw Exception("ImageBatchVarShape::popBack count exceeds numImages", eStatusType::OUT_OF_BOUNDS); + } + + m_images.erase(m_images.end() - count, m_images.end()); + m_dirtyStartingFromIndex = std::min(m_dirtyStartingFromIndex, numImages()); + + // maxSize can only shrink on pop; force a rescan on next query. uniqueFormat + // stays — it may now be conservatively FMT_NONE, but never wrong. + m_cacheMaxSize.reset(); + if (numImages() == 0) { + m_cacheUniqueFormat.reset(); + } +} + +void ImageBatchVarShape::clear() { + m_images.clear(); + m_dirtyStartingFromIndex = 0; + m_cacheMaxSize.reset(); + m_cacheUniqueFormat.reset(); +} + +Size2D ImageBatchVarShape::maxSize() const { + doUpdateCache(); + return m_cacheMaxSize.value_or(Size2D{0, 0}); +} + +ImageFormat ImageBatchVarShape::uniqueFormat() const { + doUpdateCache(); + return m_cacheUniqueFormat.value_or(FMT_NONE); +} + +void ImageBatchVarShape::doUpdateCache() const { + if (m_cacheMaxSize.has_value() && m_cacheUniqueFormat.has_value()) { + return; + } + const int32_t n = static_cast(m_images.size()); + if (n == 0) { + m_cacheMaxSize = Size2D{0, 0}; + m_cacheUniqueFormat = FMT_NONE; + return; + } + + Size2D maxSz{0, 0}; + ImageFormat unique = m_hostFormatsBuffer[0]; + bool heterogeneous = false; + for (int32_t i = 0; i < n; ++i) { + const ImagePlaneStrided& p0 = m_hostImagesBuffer[i].planes[0]; + maxSz.w = std::max(maxSz.w, p0.width); + maxSz.h = std::max(maxSz.h, p0.height); + if (!heterogeneous && m_hostFormatsBuffer[i] != unique) { + heterogeneous = true; + } + } + m_cacheMaxSize = maxSz; + m_cacheUniqueFormat = heterogeneous ? FMT_NONE : unique; +} + +void ImageBatchVarShape::doSyncDirtySuffix(hipStream_t stream) { + const int32_t n = numImages(); + if (m_dirtyStartingFromIndex >= n) { + return; + } + const int32_t dirtyCount = n - m_dirtyStartingFromIndex; + + if (m_fencePending) { + HIP_VALIDATE_NO_ERRORS(hipStreamWaitEvent(stream, m_postFence, /*flags=*/0)); + } + + HIP_VALIDATE_NO_ERRORS(hipMemcpyAsync(m_devImagesBuffer + m_dirtyStartingFromIndex, + m_hostImagesBuffer + m_dirtyStartingFromIndex, + sizeof(ImageBufferStrided) * dirtyCount, hipMemcpyHostToDevice, stream)); + HIP_VALIDATE_NO_ERRORS(hipMemcpyAsync(m_devFormatsBuffer + m_dirtyStartingFromIndex, + m_hostFormatsBuffer + m_dirtyStartingFromIndex, + sizeof(ImageFormat) * dirtyCount, hipMemcpyHostToDevice, stream)); + + HIP_VALIDATE_NO_ERRORS(hipEventRecord(m_postFence, stream)); + m_fencePending = true; + m_dirtyStartingFromIndex = n; +} + +ImageBatchVarShapeDataStridedHip ImageBatchVarShape::exportData(hipStream_t stream) { + doSyncDirtySuffix(stream); + doUpdateCache(); + + const Size2D maxSz = m_cacheMaxSize.value(); + ImageBatchVarShapeBufferStrided buffer{}; + buffer.uniqueFormat = m_cacheUniqueFormat.value(); + buffer.maxWidth = maxSz.w; + buffer.maxHeight = maxSz.h; + buffer.formatList = m_devFormatsBuffer; + buffer.hostFormatList = m_hostFormatsBuffer; + buffer.imageList = m_devImagesBuffer; + + return ImageBatchVarShapeDataStridedHip(numImages(), buffer); +} + +} // namespace roccv diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp new file mode 100644 index 00000000..dea3c0c0 --- /dev/null +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp @@ -0,0 +1,460 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +namespace { + +/** + * @brief Test allocator that distinguishes pinned-host from regular-host + * allocations and tallies each entry-point. Pure host-backed; no actual GPU + * dependency on the descriptor buffers — tests verify metadata round-trip and + * pointer identity, never dereference device memory through these. + */ +class CountingAllocator : public IAllocator { + public: + mutable int hipAllocs = 0; + mutable int hipFrees = 0; + mutable int hostAllocs = 0; + mutable int hostFrees = 0; + mutable int pinnedAllocs = 0; + mutable int pinnedFrees = 0; + + void* allocHipMem(size_t size) const override { + ++hipAllocs; + return std::malloc(size); + } + void freeHipMem(void* ptr) const noexcept override { + ++hipFrees; + std::free(ptr); + } + + void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override { + ++hostAllocs; + return std::malloc(size); + } + void freeHostMem(void* ptr) const noexcept override { + ++hostFrees; + std::free(ptr); + } + + void* allocHostPinnedMem(size_t size) const override { + ++pinnedAllocs; + return std::malloc(size); + } + void freeHostPinnedMem(void* ptr) const noexcept override { + ++pinnedFrees; + std::free(ptr); + } +}; + +// Build a single-plane GPU-resident image wrapper around a sentinel pointer. +// The pointer is never dereferenced — pushBack only reads the descriptor. +Image MakeFakeGpuImage(int32_t w, int32_t h, ImageFormat fmt, void* basePtr) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {w, h, static_cast(w * fmt.channels()), basePtr}; + return ImageWrapData(ImageDataStridedHip(fmt, buf)); +} + +Image MakeFakeHostImage(int32_t w, int32_t h, ImageFormat fmt, void* basePtr) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {w, h, static_cast(w * fmt.channels()), basePtr}; + return ImageWrapData(ImageDataStridedHost(fmt, buf)); +} + +void* const FAKE_A = reinterpret_cast(0xA0000000ull); +void* const FAKE_B = reinterpret_cast(0xB0000000ull); +void* const FAKE_C = reinterpret_cast(0xC0000000ull); + +// ============================================================================= +// Construction +// ============================================================================= + +void TestConstruction() { + CountingAllocator alloc; + { + ImageBatchVarShape batch(8, alloc); + EXPECT_EQ(batch.capacity(), 8); + EXPECT_EQ(batch.numImages(), 0); + EXPECT_EQ(AsInt(batch.begin() == batch.end()), 1); + } + EXPECT_EQ(alloc.hipAllocs, 2); + EXPECT_EQ(alloc.pinnedAllocs, 2); + EXPECT_EQ(alloc.hipFrees, 2); + EXPECT_EQ(alloc.pinnedFrees, 2); +} + +void TestConstructionRejectsBadCapacity() { + CountingAllocator alloc; + EXPECT_EXCEPTION(ImageBatchVarShape(0, alloc), eStatusType::INVALID_VALUE); + EXPECT_EXCEPTION(ImageBatchVarShape(-3, alloc), eStatusType::INVALID_VALUE); +} + +// ============================================================================= +// pushBack — basic +// ============================================================================= + +void TestPushBackSingle() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + Image img = MakeFakeGpuImage(640, 480, FMT_RGB8, FAKE_A); + batch.pushBack(img); + + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch[0].size().w, 640); + EXPECT_EQ(batch[0].size().h, 480); + EXPECT_EQ(AsInt(batch[0].format() == FMT_RGB8), 1); +} + +void TestPushBackMultipleHeterogeneousSizes() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(640, 480, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(320, 240, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(800, 200, FMT_RGB8, FAKE_C)); + + EXPECT_EQ(batch.numImages(), 3); + EXPECT_EQ(batch.maxSize().w, 800); + EXPECT_EQ(batch.maxSize().h, 480); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1); +} + +void TestPushBackIteratorRange() { + CountingAllocator alloc; + ImageBatchVarShape batch(8, alloc); + + std::vector imgs; + imgs.push_back(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + imgs.push_back(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + imgs.push_back(MakeFakeGpuImage(300, 300, FMT_RGB8, FAKE_C)); + + batch.pushBack(imgs.begin(), imgs.end()); + + EXPECT_EQ(batch.numImages(), 3); + EXPECT_EQ(batch.maxSize().w, 300); +} + +// ============================================================================= +// pushBack — validation +// ============================================================================= + +void TestPushBackCapacityOverflow() { + CountingAllocator alloc; + ImageBatchVarShape batch(2, alloc); + + batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_B)); + + EXPECT_EXCEPTION(batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_C)), eStatusType::OUT_OF_BOUNDS); +} + +void TestPushBackHostImageRejected() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + Image cpuImg = MakeFakeHostImage(64, 64, FMT_U8, FAKE_A); + EXPECT_EXCEPTION(batch.pushBack(cpuImg), eStatusType::INVALID_VALUE); +} + +// Note: pushBack's single-plane validation is defense-in-depth — Image's own +// exportData() (image.cpp:118) currently hardcodes numPlanes=1 regardless of +// the underlying buffer, so the public API can't construct a multi-plane Image +// for this guard to fire on. The test would need to be revisited when planar +// formats land in Image itself. + +void TestPushBackRangeRollbackOnFailure() { + CountingAllocator alloc; + ImageBatchVarShape batch(8, alloc); + + // Pre-populate so we can confirm the rollback restores exactly the + // pre-call state, not just back to zero. + batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + EXPECT_EQ(batch.numImages(), 1); + + // Mid-range CPU image — should rollback the partially-pushed entries. + std::vector imgs; + imgs.push_back(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + imgs.push_back(MakeFakeHostImage(300, 300, FMT_RGB8, FAKE_C)); // Will throw. + + EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::INVALID_VALUE); + + // Pre-call state is intact: 1 image, original maxSize. + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch.maxSize().w, 100); +} + +void TestPushBackRangeOverflowPrechecked() { + CountingAllocator alloc; + ImageBatchVarShape batch(2, alloc); + + std::vector imgs; + imgs.push_back(MakeFakeGpuImage(10, 10, FMT_RGB8, FAKE_A)); + imgs.push_back(MakeFakeGpuImage(20, 20, FMT_RGB8, FAKE_B)); + imgs.push_back(MakeFakeGpuImage(30, 30, FMT_RGB8, FAKE_C)); // 3rd overflows capacity 2. + + EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::OUT_OF_BOUNDS); + // Pre-checked: nothing was pushed. + EXPECT_EQ(batch.numImages(), 0); +} + +// ============================================================================= +// popBack / clear +// ============================================================================= + +void TestPopBack() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + batch.popBack(); + + EXPECT_EQ(batch.numImages(), 1); + // maxSize was reset on pop; the rescan should drop back to 100. + EXPECT_EQ(batch.maxSize().w, 100); +} + +void TestPopBackMultiple() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(300, 300, FMT_RGB8, FAKE_C)); + batch.popBack(2); + + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(batch.maxSize().w, 100); +} + +void TestPopBackUnderflow() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + + EXPECT_EXCEPTION(batch.popBack(2), eStatusType::OUT_OF_BOUNDS); + // State preserved. + EXPECT_EQ(batch.numImages(), 1); +} + +void TestClearAndReuse() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + + batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + batch.clear(); + + EXPECT_EQ(batch.numImages(), 0); + EXPECT_EQ(batch.maxSize().w, 0); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); + + // Reuse after clear. + batch.pushBack(MakeFakeGpuImage(50, 50, FMT_U8, FAKE_C)); + EXPECT_EQ(batch.numImages(), 1); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_U8), 1); +} + +// ============================================================================= +// uniqueFormat / maxSize cache +// ============================================================================= + +void TestUniqueFormatHomogeneous() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(128, 128, FMT_RGB8, FAKE_B)); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1); +} + +void TestUniqueFormatHeterogeneous() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGBA8, FAKE_B)); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); +} + +void TestUniqueFormatEmptyBatch() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); + EXPECT_EQ(batch.maxSize().w, 0); + EXPECT_EQ(batch.maxSize().h, 0); +} + +// ============================================================================= +// exportData +// ============================================================================= + +// exportData tests use the default allocator instead of CountingAllocator +// because they exercise the real H2D hipMemcpyAsync, which requires the +// device-side buffer to be a real hipMalloc'd pointer. + +void TestExportDataEmpty() { + ImageBatchVarShape batch(4); + + auto data = batch.exportData(0); + EXPECT_EQ(data.numImages(), 0); + EXPECT_EQ(data.maxSize().w, 0); + EXPECT_EQ(data.maxSize().h, 0); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_NONE), 1); + EXPECT_EQ(AsInt(data.device()), AsInt(eDeviceType::GPU)); +} + +void TestExportDataMetadata() { + ImageBatchVarShape batch(4); + batch.pushBack(MakeFakeGpuImage(640, 480, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(320, 240, FMT_RGB8, FAKE_B)); + + auto data = batch.exportData(0); + EXPECT_EQ(data.numImages(), 2); + EXPECT_EQ(data.maxSize().w, 640); + EXPECT_EQ(data.maxSize().h, 480); + EXPECT_EQ(AsInt(data.uniqueFormat() == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.imageList() != nullptr), 1); + EXPECT_EQ(AsInt(data.formatList() != nullptr), 1); + EXPECT_EQ(AsInt(data.hostFormatList() != nullptr), 1); + // Pinned host mirror format entries are immediately host-readable. + EXPECT_EQ(AsInt(data.hostFormatList()[0] == FMT_RGB8), 1); + EXPECT_EQ(AsInt(data.hostFormatList()[1] == FMT_RGB8), 1); +} + +void TestExportDataCastRoundTrip() { + ImageBatchVarShape batch(4); + batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); + + auto hipData = batch.exportData(0); + EXPECT_EQ(hipData.numImages(), 1); + EXPECT_EQ(AsInt(hipData.device()), AsInt(eDeviceType::GPU)); + + // Cast through the base reference: succeeds for compatible kinds, nullopt + // for the host-resident leaf. + const ImageBatchData& base = hipData; + EXPECT_EQ(AsInt(base.cast().has_value()), 1); + EXPECT_EQ(AsInt(base.cast().has_value()), 0); +} + +// ============================================================================= +// Move semantics +// ============================================================================= + +void TestMoveConstruction() { + CountingAllocator alloc; + { + ImageBatchVarShape src(4, alloc); + src.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + src.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + + ImageBatchVarShape dst(std::move(src)); + EXPECT_EQ(dst.numImages(), 2); + EXPECT_EQ(dst.maxSize().w, 200); + + // Source is valid-but-empty; destructor must not double-free. + EXPECT_EQ(src.numImages(), 0); + EXPECT_EQ(src.capacity(), 0); + } + // Exactly one set of allocations should have been freed. + EXPECT_EQ(alloc.hipAllocs, alloc.hipFrees); + EXPECT_EQ(alloc.pinnedAllocs, alloc.pinnedFrees); +} + +// ============================================================================= +// Iterator +// ============================================================================= + +void TestIteratorRangeFor() { + CountingAllocator alloc; + ImageBatchVarShape batch(4, alloc); + batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(300, 300, FMT_RGB8, FAKE_C)); + + int32_t expectedW = 100; + int32_t count = 0; + for (const Image& img : batch) { + EXPECT_EQ(img.size().w, expectedW); + expectedW += 100; + ++count; + } + EXPECT_EQ(count, 3); +} + +} // namespace + +int main(int argc, char** argv) { + (void)argc; + (void)argv; + TEST_CASES_BEGIN(); + + TEST_CASE(TestConstruction()); + TEST_CASE(TestConstructionRejectsBadCapacity()); + + TEST_CASE(TestPushBackSingle()); + TEST_CASE(TestPushBackMultipleHeterogeneousSizes()); + TEST_CASE(TestPushBackIteratorRange()); + + TEST_CASE(TestPushBackCapacityOverflow()); + TEST_CASE(TestPushBackHostImageRejected()); + TEST_CASE(TestPushBackRangeRollbackOnFailure()); + TEST_CASE(TestPushBackRangeOverflowPrechecked()); + + TEST_CASE(TestPopBack()); + TEST_CASE(TestPopBackMultiple()); + TEST_CASE(TestPopBackUnderflow()); + TEST_CASE(TestClearAndReuse()); + + TEST_CASE(TestUniqueFormatHomogeneous()); + TEST_CASE(TestUniqueFormatHeterogeneous()); + TEST_CASE(TestUniqueFormatEmptyBatch()); + + TEST_CASE(TestExportDataEmpty()); + TEST_CASE(TestExportDataMetadata()); + TEST_CASE(TestExportDataCastRoundTrip()); + + TEST_CASE(TestMoveConstruction()); + + TEST_CASE(TestIteratorRangeFor()); + + TEST_CASES_END(); +} From a289c2d6606107b04e4d72f79b3d93d83280df6a Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Thu, 14 May 2026 16:45:07 -0400 Subject: [PATCH 09/13] Move image exportData implementation outside of class definition --- include/core/image.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/include/core/image.hpp b/include/core/image.hpp index 7a42d51a..ef7707a8 100644 --- a/include/core/image.hpp +++ b/include/core/image.hpp @@ -134,14 +134,7 @@ class Image { * std::bad_cast if the underlying buffer kind doesn't match Derived. */ template - Derived exportData() const { - ImageData data = exportData(); - auto derived = data.cast(); - if (!derived.has_value()) { - throw std::bad_cast(); - } - return derived.value(); - } + Derived exportData() const; private: Image(const Requirements& reqs, eDeviceType device, std::shared_ptr storage); @@ -155,6 +148,16 @@ class Image { std::array m_planeRowStride; }; +template +Derived Image::exportData() const { + ImageData data = exportData(); + auto derived = data.cast(); + if (!derived.has_value()) { + throw std::bad_cast(); + } + return derived.value(); +} + /** * @brief Wrap an externally-owned buffer as an Image without allocating. * From 9055ed0b5b3c3a6c6858029b17bedacb6ab9751e Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Fri, 15 May 2026 14:24:11 -0400 Subject: [PATCH 10/13] Support batch indexing in width and height methods for kernel wrappers --- include/core/wrappers/border_wrapper.hpp | 55 ++-- include/core/wrappers/image_wrapper.hpp | 8 +- .../core/wrappers/interpolation_wrapper.hpp | 47 ++-- src/op_bilateral_filter.cpp | 12 +- src/op_copy_make_border.cpp | 5 +- src/op_remap.cpp | 6 +- src/op_resize.cpp | 22 +- src/op_rotate.cpp | 11 +- src/op_warp_perspective.cpp | 4 +- .../core/wrappers/test_border_wrapper.cpp | 7 +- .../wrappers/test_interpolation_wrapper.cpp | 33 +-- .../operators/test_op_bilateral_filter.cpp | 15 +- .../operators/test_op_copy_make_border.cpp | 3 +- .../cpp/src/tests/operators/test_op_remap.cpp | 234 +++++++++++------- .../src/tests/operators/test_op_resize.cpp | 4 +- .../src/tests/operators/test_op_rotate.cpp | 6 +- .../tests/operators/test_op_warp_affine.cpp | 2 +- .../operators/test_op_warp_perspective.cpp | 2 +- 18 files changed, 266 insertions(+), 210 deletions(-) diff --git a/include/core/wrappers/border_wrapper.hpp b/include/core/wrappers/border_wrapper.hpp index f55493a5..08e21a3c 100644 --- a/include/core/wrappers/border_wrapper.hpp +++ b/include/core/wrappers/border_wrapper.hpp @@ -23,38 +23,37 @@ #include +#include +#include + #include "core/wrappers/image_wrapper.hpp" #include "operator_types.h" namespace roccv { /** - * @brief Wrapper class for ImageWrapper. This extends the descriptors by defining behaviors for when tensor - * coordinates go out of scope. + * @brief Wrapper class which adds border-handling behavior on top of an underlying image wrapper. + * + * Templated on the wrapper type W (e.g. ImageWrapper, VarShapeImageWrapper) so that the same + * border math is shared between uniform-shape and variable-shape image batches. The pixel value + * type T is recovered from W::ValueType. * - * @tparam T The underlying data type of the tensor. * @tparam BorderType The border type to use when coordinates are out of bounds. + * @tparam W The underlying image wrapper type. Must expose ValueType, at(n,h,w,c), width(n), and height(n). */ -template +template class BorderWrapper { public: - /** - * @brief Wraps an ImageWrapper and extends its capabilities to handle out of bounds coordinates. - * - * @param tensor The tensor to wrap. - * @param border_value The fallback border color to use when using a constant border mode. - */ - BorderWrapper(const Tensor& tensor, T border_value) : m_desc(tensor), m_border_value(border_value) {} + using ValueType = typename W::ValueType; /** - * @brief Constructs a BorderWrapper from an existing ImageWrapper. Extends its capabilities to handle out of bound - * coordinates. + * @brief Constructs a BorderWrapper from an existing image wrapper. Extends its capabilities to handle out + * of bound coordinates. * - * @param image_wrapper The ImageWrapper to wrap around the BorderWrapper. + * @param image_wrapper The image wrapper to wrap around the BorderWrapper. * @param border_value The fallback border color to use when using a constant border mode. */ - BorderWrapper(ImageWrapper image_wrapper, T border_value) - : m_desc(image_wrapper), m_border_value(border_value) {} + BorderWrapper(W image_wrapper, ValueType border_value) : m_desc(image_wrapper), m_border_value(border_value) {} /** * @brief Returns a reference to the underlying data given image coordinates. If the coordinates fall out of bounds, @@ -66,11 +65,14 @@ class BorderWrapper { * @param c The channel index. * @return A reference to the underlying data or a fallback border value of type T. */ - __device__ __host__ const T at(int64_t n, int64_t h, int64_t w, int64_t c) const { + __device__ __host__ const ValueType at(int64_t n, int64_t h, int64_t w, int64_t c) const { + const int64_t imgWidth = width(n); + const int64_t imgHeight = height(n); + // Constant border type implementation. This is a special case which doesn't remap values, but rather returns // the provided constant value. if constexpr (BorderType == eBorderType::BORDER_TYPE_CONSTANT) { - if (w < 0 || w >= width() || h < 0 || h >= height()) + if (w < 0 || w >= imgWidth || h < 0 || h >= imgHeight) return m_border_value; else return m_desc.at(n, h, w, c); @@ -80,13 +82,12 @@ class BorderWrapper { // required at image borders. While this may cause branch divergence, a good bulk of the pixels should fall // within image bounds and will take the same branch. This is preferred over having to do expensive calculations // for EVERY pixel in the image (most of which do not require said calculations). - if (w >= 0 && w < width() && h >= 0 && h < height()) { + if (w >= 0 && w < imgWidth && h >= 0 && h < imgHeight) { return m_desc.at(n, h, w, c); } // Otherwise, do some additional calculations to map the provided x and y coordinates to be within bounds. int64_t x = w, y = h; - int64_t imgWidth = width(), imgHeight = height(); // Reflect border type implementation. (Note: This is NOT REFLECT101, pixels at the border will be duplicated as // is the intended behavior for this border mode.) @@ -139,18 +140,20 @@ class BorderWrapper { } /** - * @brief Retrives the height of the images. + * @brief Retrives the height of the image at batch index n. * + * @param n Batch index. Ignored when W is a uniform-shape wrapper. * @return Image height. */ - __device__ __host__ inline int64_t height() const { return m_desc.height(); } + __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); } /** - * @brief Retrieves the width of the image. + * @brief Retrieves the width of the image at batch index n. * + * @param n Batch index. Ignored when W is a uniform-shape wrapper. * @return Image width. */ - __device__ __host__ inline int64_t width() const { return m_desc.width(); } + __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); } /** * @brief Retrieves the number of batches in the image tensor. @@ -167,7 +170,7 @@ class BorderWrapper { __device__ __host__ inline int64_t channels() const { return m_desc.channels(); } private: - ImageWrapper m_desc; - T m_border_value; + W m_desc; + ValueType m_border_value; }; } // namespace roccv \ No newline at end of file diff --git a/include/core/wrappers/image_wrapper.hpp b/include/core/wrappers/image_wrapper.hpp index e174c64a..4f1835f5 100644 --- a/include/core/wrappers/image_wrapper.hpp +++ b/include/core/wrappers/image_wrapper.hpp @@ -139,16 +139,20 @@ class ImageWrapper { /** * @brief Retrives the height of the images. * + * @param n Batch index. Ignored for uniform-shape ImageWrapper; included so the signature + * matches VarShapeImageWrapper for use as a template parameter to BorderWrapper et al. * @return Image height. */ - __device__ __host__ inline int64_t height() const { return shape.h; } + __device__ __host__ inline int64_t height(int64_t /*n*/ = 0) const { return shape.h; } /** * @brief Retrieves the width of the image. * + * @param n Batch index. Ignored for uniform-shape ImageWrapper; included so the signature + * matches VarShapeImageWrapper for use as a template parameter to BorderWrapper et al. * @return Image width. */ - __device__ __host__ inline int64_t width() const { return shape.w; } + __device__ __host__ inline int64_t width(int64_t /*n*/ = 0) const { return shape.w; } /** * @brief Retrieves the number of batches in the image tensor. diff --git a/include/core/wrappers/interpolation_wrapper.hpp b/include/core/wrappers/interpolation_wrapper.hpp index 7adb8cb6..68daaad8 100644 --- a/include/core/wrappers/interpolation_wrapper.hpp +++ b/include/core/wrappers/interpolation_wrapper.hpp @@ -23,41 +23,35 @@ #include "core/detail/casting.hpp" #include "core/detail/math/vectorized_type_math.hpp" -#include "core/wrappers/border_wrapper.hpp" #include "core/detail/vector_utils.hpp" +#include "core/wrappers/border_wrapper.hpp" #include "operator_types.h" namespace roccv { /** - * @brief A kernel-friendly wrapper which provides interpolation logic based on the given - * coordinates. This tensor wrapper is typically only used for input tensors and does not provide write access to its - * underlying data. + * @brief A kernel-friendly wrapper which provides interpolation logic on top of an underlying image wrapper. + * + * Templated on the wrapper type W (e.g. ImageWrapper, VarShapeImageWrapper) so that the same + * interpolation math is shared between uniform-shape and variable-shape image batches. The pixel value + * type T is recovered from W::ValueType. Read-only access; do not use for output tensors. * - * @tparam T Underlying data type of the tensor data. - * @tparam C Number of channels in data type. * @tparam B Border type to use for interpolation. * @tparam I Interpolation type to use. + * @tparam W The underlying image wrapper type. Must expose ValueType, at(n,h,w,c), width(n), and height(n). */ -template +template class InterpolationWrapper { public: - /** - * @brief Wraps a roccv::Tensor in an InterpolationWrapper to provide pixel interpolation when accessing - * non-integer coordinate mappings. - * - * @param tensor The tensor to wrap. - * @param border_value A fallback border value to use in the case of a constant border mode. - */ - InterpolationWrapper(const Tensor& tensor, T border_value) : m_desc(tensor, border_value) {} + using ValueType = typename W::ValueType; /** - * @brief Wraps a BorderWrapper in an Interpolation wrapper. Extends capabilities to interpolate pixel values when + * @brief Wraps a BorderWrapper in an InterpolationWrapper. Extends capabilities to interpolate pixel values when * given non-integer coordinates. * * @param borderWrapper The BorderWrapper to wrap. */ - InterpolationWrapper(BorderWrapper borderWrapper) : m_desc(borderWrapper) {} + InterpolationWrapper(BorderWrapper borderWrapper) : m_desc(borderWrapper) {} /** * @brief This function calculates the weighting coefficients for the Catmull-Rom cubic interpolation. @@ -92,7 +86,7 @@ class InterpolationWrapper { * @param w Width coordinates. * @return An interpolated value. */ - inline __device__ __host__ const T at(int64_t n, float h, float w, int64_t c) const { + inline __device__ __host__ const ValueType at(int64_t n, float h, float w, int64_t c) const { if constexpr (I == eInterpolationType::INTERP_TYPE_NEAREST) { // Nearest neighbor interpolation implementation return m_desc.at(n, lroundf(h), lroundf(w), c); @@ -102,7 +96,7 @@ class InterpolationWrapper { // - - // v3 -- v4 - using WorkType = detail::MakeType>; + using WorkType = detail::MakeType>; int64_t x0 = static_cast(floorf(w)); int64_t x1 = x0 + 1; @@ -118,10 +112,10 @@ class InterpolationWrapper { auto q2 = v3 * (x1 - w) + v4 * (w - x0); auto q = q1 * (y1 - h) + q2 * (h - y0); - return detail::RangeCast(q); + return detail::RangeCast(q); } else if constexpr (I == eInterpolationType::INTERP_TYPE_CUBIC) { using namespace roccv::detail; - using WorkType = detail::MakeType>; + using WorkType = detail::MakeType>; // Integer coordinates for pixel (x, y) int64_t int_x = static_cast(floorf(w)); @@ -136,20 +130,21 @@ class InterpolationWrapper { WorkType sum = SetAll(0.0f); for (int index_y = -1; index_y <= 2; index_y++) { for (int index_x = -1; index_x <= 2; index_x++) { - sum += detail::RangeCast(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * (weight_x[index_x + 1] * weight_y[index_y + 1]); + sum += detail::RangeCast(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * + (weight_x[index_x + 1] * weight_y[index_y + 1]); } } - return detail::RangeCast(sum); + return detail::RangeCast(sum); } } - __device__ __host__ inline int64_t height() const { return m_desc.height(); } - __device__ __host__ inline int64_t width() const { return m_desc.width(); } + __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); } + __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); } __device__ __host__ inline int64_t batches() const { return m_desc.batches(); } __device__ __host__ inline int64_t channels() const { return m_desc.channels(); } private: - BorderWrapper m_desc; + BorderWrapper m_desc; }; } // namespace roccv \ No newline at end of file diff --git a/src/op_bilateral_filter.cpp b/src/op_bilateral_filter.cpp index dffba8ae..c4adfb25 100644 --- a/src/op_bilateral_filter.cpp +++ b/src/op_bilateral_filter.cpp @@ -43,7 +43,7 @@ BilateralFilter::~BilateralFilter() {} template void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &input, const Tensor &output, int diameter, float sigmaColor, float sigmaSpace, T borderValue, eDeviceType device) { - BorderWrapper inputWrapper(input, borderValue); + BorderWrapper> inputWrapper(ImageWrapper(input), borderValue); ImageWrapper outputWrapper(output); if (outputWrapper.channels() > 4 || outputWrapper.channels() < 1) { @@ -61,8 +61,7 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp sigmaSpace = 1.0f; } - const int radius = - (diameter <= 0) ? static_cast(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1); + const int radius = (diameter <= 0) ? static_cast(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1); float spaceCoeff = -1 / (2 * sigmaSpace * sigmaSpace); float colorCoeff = -1 / (2 * sigmaColor * sigmaColor); @@ -89,9 +88,10 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp for (int j = 0; j < divisor; j++) { for (int i = 0; i < dividend; i++) { - threads.push_back(std::thread(Kernels::Host::bilateral_filter, ImageWrapper>, - inputWrapper, outputWrapper, radius, rollingHeight, rollingWidth, - prevHeight, prevWidth, spaceCoeff, colorCoeff)); + threads.push_back( + std::thread(Kernels::Host::bilateral_filter>, ImageWrapper>, + inputWrapper, outputWrapper, radius, rollingHeight, rollingWidth, prevHeight, prevWidth, + spaceCoeff, colorCoeff)); prevWidth = rollingWidth; rollingWidth += factorW; } diff --git a/src/op_copy_make_border.cpp b/src/op_copy_make_border.cpp index feacfbd9..32e4ad7a 100644 --- a/src/op_copy_make_border.cpp +++ b/src/op_copy_make_border.cpp @@ -38,7 +38,7 @@ namespace roccv { template void dispatch_copy_make_border_border_mode(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top, int32_t left, T border_value, eDeviceType device) { - BorderWrapper in_desc(input, border_value); + BorderWrapper> in_desc(ImageWrapper(input), border_value); ImageWrapper out_desc(output); switch (device) { @@ -83,8 +83,7 @@ void dispatch_copy_make_border(hipStream_t stream, const Tensor& input, const Te } void CopyMakeBorder::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top, - int32_t left, eBorderType border_mode, float4 border_value, - eDeviceType device) const { + int32_t left, eBorderType border_mode, float4 border_value, eDeviceType device) const { CHECK_TENSOR_DEVICE(input, device); CHECK_TENSOR_LAYOUT(input, eTensorLayout::TENSOR_LAYOUT_NHWC, eTensorLayout::TENSOR_LAYOUT_HWC); CHECK_TENSOR_DATATYPES(input, eDataType::DATA_TYPE_U8, eDataType::DATA_TYPE_S8, eDataType::DATA_TYPE_U16, diff --git a/src/op_remap.cpp b/src/op_remap.cpp index 0992cf44..5b01f4dc 100644 --- a/src/op_remap.cpp +++ b/src/op_remap.cpp @@ -77,8 +77,10 @@ void dispatch_remap_mapInterp(hipStream_t stream, const Tensor &input, const Ten const eRemapType mapValueType, const bool alignCorners, const T borderValue, const eDeviceType device) { ImageWrapper outputWrapper(output); - InterpolationWrapper wrappedMapTensor(map, make_float2(0, 0)); - InterpolationWrapper inputWrapper(input, borderValue); + BorderWrapper> mapBorder(ImageWrapper(map), make_float2(0, 0)); + InterpolationWrapper> wrappedMapTensor(mapBorder); + BorderWrapper> inputBorder(ImageWrapper(input), borderValue); + InterpolationWrapper> inputWrapper(inputBorder); int mapBatchSize = wrappedMapTensor.batches(); diff --git a/src/op_resize.cpp b/src/op_resize.cpp index d7cd0b61..71fe6889 100644 --- a/src/op_resize.cpp +++ b/src/op_resize.cpp @@ -25,7 +25,6 @@ THE SOFTWARE. #include #include "common/validation_helpers.hpp" -#include "core/detail/casting.hpp" #include "core/exception.hpp" #include "core/status_type.h" #include "core/wrappers/interpolation_wrapper.hpp" @@ -38,7 +37,8 @@ template void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) { ImageWrapper outputWrapper(output); // Resize operation should clamp values at the border (REPLICATE border mode) - InterpolationWrapper inputWrapper(input, T{}); + BorderWrapper> inputBorder(ImageWrapper(input), T{}); + InterpolationWrapper> inputWrapper(inputBorder); float scaleX = inputWrapper.width() / static_cast(outputWrapper.width()); float scaleY = inputWrapper.height() / static_cast(outputWrapper.height()); @@ -62,13 +62,13 @@ void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tenso template void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation, eDeviceType device) { - static const std::unordered_map< - eInterpolationType, - std::function> - funcs = {{eInterpolationType::INTERP_TYPE_NEAREST, dispatch_resize_interp}, - {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp}, - {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp} - }; + static const std::unordered_map> + funcs = { + {eInterpolationType::INTERP_TYPE_NEAREST, + dispatch_resize_interp}, + {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp}, + {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp}}; if (!funcs.contains(interpolation)) { throw Exception("Operation does not support the given interpolation mode.", eStatusType::NOT_IMPLEMENTED); @@ -78,8 +78,8 @@ void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor func(stream, input, output, device); } -void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, - eInterpolationType interpolation, eDeviceType device) const { +void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation, + eDeviceType device) const { CHECK_TENSOR_DEVICE(input, device); CHECK_TENSOR_DEVICE(output, device); diff --git a/src/op_rotate.cpp b/src/op_rotate.cpp index 28806779..c1eea211 100644 --- a/src/op_rotate.cpp +++ b/src/op_rotate.cpp @@ -55,7 +55,8 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso T borderVal = detail::SaturateCast(make_float4(0.0f, 0.0f, 0.0f, 0.0f)); ImageWrapper outputWrap(output); - InterpolationWrapper inputWrap(input, borderVal); + BorderWrapper> inputBorder(ImageWrapper(input), borderVal); + InterpolationWrapper> inputWrap(inputBorder); switch (device) { case eDeviceType::GPU: { @@ -74,8 +75,8 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso } template -void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, - double2 shift, eInterpolationType interpolation, eDeviceType device) { +void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, double2 shift, + eInterpolationType interpolation, eDeviceType device) { // clang-format off static const std::unordered_map transform(transMatrix); ImageWrapper outputWrapper(output); - InterpolationWrapper inputWrapper(input, borderValue); + BorderWrapper> inputBorder(ImageWrapper(input), borderValue); + InterpolationWrapper> inputWrapper(inputBorder); // Launch CPU/GPU kernel depending on requested device type. switch (device) { diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp index 873f05dc..fd836773 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp @@ -115,8 +115,8 @@ int64_t GetCoordOfBorderPel(int64_t u, int64_t dimSize, eBorderType borderMode) * coordinates fall out of bounds. */ template > -BT GoldenBorderAt(ImageWrapper& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, - int64_t x, int64_t channel) { +BT GoldenBorderAt(ImageWrapper& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, int64_t x, + int64_t channel) { int64_t outX = x, outY = y; if (borderMode == eBorderType::BORDER_TYPE_CONSTANT) { @@ -161,7 +161,8 @@ void TestCorrectness(float4 borderValue, int32_t batchSize, Size2D imageSize, in FillVector(inputData); // BorderWrapper to calculate the actual calculated values. - BorderWrapper borderWrap(ImageWrapper(inputData, batchSize, imageSize.w, imageSize.h), borderVal); + BorderWrapper> borderWrap( + ImageWrapper(inputData, batchSize, imageSize.w, imageSize.h), borderVal); std::vector actualOutput(numElementsWithBorder); int actualIndex = 0; for (int batch = 0; batch < batchSize; ++batch) { diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp index a4466530..5ec3176e 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp @@ -21,9 +21,9 @@ #include #include -#include "core/detail/vector_utils.hpp" #include +#include "core/detail/vector_utils.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -45,7 +45,7 @@ namespace { * @return T The interpolated pixel. */ template -T GoldenLinear(BorderWrapper input, int64_t sample, float y, float x) { +T GoldenLinear(BorderWrapper> input, int64_t sample, float y, float x) { // Defines the vectorized float type for intermediate calculations. using WorkType = detail::MakeType>; @@ -86,7 +86,7 @@ T GoldenLinear(BorderWrapper input, int64_t sample, float y, floa * @return T The interpolated pixel. */ template -T GoldenNearest(BorderWrapper input, int64_t sample, float y, float x) { +T GoldenNearest(BorderWrapper> input, int64_t sample, float y, float x) { // Nearest neighbor interpolation. Rounds given floating point values to the nearest integer. return input.at(sample, lroundf(y), lroundf(x), 0); } @@ -98,7 +98,7 @@ T GoldenNearest(BorderWrapper input, int64_t sample, float y, flo * @return None. */ void CalBicubicWeights(float dist, float* weight) { - const float A = -0.5f; // Note OpenCV sets alpha to -0.75f + const float A = -0.5f; // Note OpenCV sets alpha to -0.75f weight[0] = ((A * (dist + 1) - 5 * A) * (dist + 1) + 8 * A) * (dist + 1) - 4 * A; weight[1] = ((A + 2) * dist - (A + 3)) * dist * dist + 1; @@ -107,7 +107,8 @@ void CalBicubicWeights(float dist, float* weight) { } /** - * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV libraries. + * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV + * libraries. * * @tparam T Image datatype. * @tparam BorderType Border type for boundary conditions. @@ -118,7 +119,7 @@ void CalBicubicWeights(float dist, float* weight) { * @return T The interpolated pixel. */ template -T GoldenBicubic(BorderWrapper input, int64_t sample, float y, float x) { +T GoldenBicubic(BorderWrapper> input, int64_t sample, float y, float x) { // Defines the vectorized float type for intermediate calculations. using WorkType = detail::MakeType>; @@ -135,7 +136,8 @@ T GoldenBicubic(BorderWrapper input, int64_t sample, float y, flo WorkType sum = SetAll(0.0f); for (int indexY = -1; indexY <= 2; indexY++) { for (int indexX = -1; indexX <= 2; indexX++) { - sum += detail::RangeCast(input.at(sample, intY + indexY, intX + indexX, 0)) * (weightX[indexX + 1] * weightY[indexY + 1]); + sum += detail::RangeCast(input.at(sample, intY + indexY, intX + indexX, 0)) * + (weightX[indexX + 1] * weightY[indexY + 1]); } } @@ -156,7 +158,7 @@ T GoldenBicubic(BorderWrapper input, int64_t sample, float y, flo * @return T The interpolated pixel. */ template -T GoldenInterpolationAt(BorderWrapper input, int64_t sample, float y, float x, +T GoldenInterpolationAt(BorderWrapper> input, int64_t sample, float y, float x, eInterpolationType interp) { switch (interp) { case eInterpolationType::INTERP_TYPE_NEAREST: @@ -202,9 +204,11 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl std::vector> goldenOutput; // Use roccv::InterpolationWrapper to get actual output - InterpolationWrapper actualWrap( - (BorderWrapper(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal))); - BorderWrapper goldenWrap(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal); + InterpolationWrapper> actualWrap( + (BorderWrapper>(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), + borderVal))); + BorderWrapper> goldenWrap(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), + borderVal); for (int b = 0; b < batchSize; b++) { for (float y = 0; y < imageSize.h; y += idxDelta) { @@ -220,7 +224,8 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl } } } - if constexpr (std::is_integral_v> && std::is_signed_v> && sizeof(detail::BaseType) == 4) { + if constexpr (std::is_integral_v> && std::is_signed_v> && + sizeof(detail::BaseType) == 4) { CompareVectorsNear(actualOutput, goldenOutput, NEAR_EQUAL_THRESHOLD * 2); } else { CompareVectorsNear(actualOutput, goldenOutput); @@ -228,7 +233,7 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl } } // namespace -int main(int argc, char **argv) { +int main(int argc, char** argv) { (void)argc; (void)argv; TEST_CASES_BEGIN(); @@ -322,7 +327,7 @@ int main(int argc, char **argv) { TEST_CASE((TestCorrectness(1, {20, 53}, make_float4(0, 0, 0, 1), 0.1f))); TEST_CASE((TestCorrectness(3, {38, 10}, make_float4(0, 0, 0, 1), 0.1f))); TEST_CASE((TestCorrectness(5, {65, 21}, make_float4(1, 0.5, 0.5, 1), 0.1f))); - // clang-format on + // clang-format on TEST_CASES_END(); } \ No newline at end of file diff --git a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp index f208962c..8ae5a10a 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp @@ -51,7 +51,8 @@ namespace { template > void GenerateGoldenBilateral(std::vector& input, std::vector& output, int32_t batchSize, Size2D imageSize, int diameter, float sigmaColor, float sigmaSpace, T borderValue) { - BorderWrapper src(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderValue); + BorderWrapper> src(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), + borderValue); ImageWrapper dst(output, batchSize, imageSize.w, imageSize.h); using namespace roccv::detail; using Worktype = MakeType>; @@ -179,9 +180,9 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f, - {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); - TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, - {500.0, 500.0, 0.0, 0.0}, eDeviceType::GPU))); + {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0}, + eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); @@ -288,9 +289,9 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f, - {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); - TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, - {500.0, 500.0, 0.0, 0.0}, eDeviceType::CPU))); + {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0}, + eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp index 4320f04e..30050917 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp @@ -57,7 +57,8 @@ std::vector GoldenCopyMakeBorder(std::vector input, int batchSize, Size2 // Wrap the input images in a BorderWrapper to handle out of bounds image behavior. The BorderWrapper has already // been tested in another test so it can be used reliably. - BorderWrapper inputWrap(ImageWrapper(input, batchSize, inputSize.w, inputSize.h), borderVal); + BorderWrapper> inputWrap(ImageWrapper(input, batchSize, inputSize.w, inputSize.h), + borderVal); std::vector output(batchSize * outputSize.h * outputSize.w * channels); ImageWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp index 634344a4..4ad9d914 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp @@ -25,8 +25,9 @@ THE SOFTWARE. #include #include #include -#include "core/detail/internal_structs.hpp" + #include "core/detail/casting.hpp" +#include "core/detail/internal_structs.hpp" #include "core/detail/math/vectorized_type_math.hpp" #include "core/detail/type_traits.hpp" #include "operator_types.h" @@ -39,11 +40,11 @@ using namespace roccv::detail; // Keep all non-entrypoint functions in an anonymous namespace to prevent redefinition errors across translation units. namespace { -RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 &mapSize, bool alignCorners, eRemapType mapValueType) -{ +RemapParams GetRemapParams(const int2& srcSize, const int2& dstSize, const int2& mapSize, bool alignCorners, + eRemapType mapValueType) { RemapParams params; - switch(mapValueType) { + switch (mapValueType) { case REMAP_ABSOLUTE: params.srcScale = make_float2(0.f, 0.f); params.mapScale = StaticCast(mapSize) / StaticCast(dstSize); @@ -54,7 +55,7 @@ RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 case REMAP_ABSOLUTE_NORMALIZED: params.srcScale = make_float2(0.f, 0.f); params.mapScale = StaticCast(mapSize) / StaticCast(dstSize); - params.valScale = (StaticCast(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f; + params.valScale = (StaticCast(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f; params.srcOffset = params.valScale - (alignCorners ? 0.f : .5f); params.dstOffset = 0.f; break; @@ -87,15 +88,15 @@ RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 */ template > -std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, int32_t inHeight, int32_t outWidth, - int32_t outHeight, int32_t mapWidth, int32_t mapHeight, std::vector& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) { - +std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, + int32_t inHeight, int32_t outWidth, int32_t outHeight, int32_t mapWidth, int32_t mapHeight, + std::vector& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) { int channels = detail::NumElements; int outputSize = batchSize * outWidth * outHeight * channels; std::vector output(outputSize); // Create interpolation wrapper for input vector - InterpolationWrapper src((BorderWrapper( + InterpolationWrapper> src((BorderWrapper>( ImageWrapper(input, batchSize, inWidth, inHeight), detail::SaturateCast(borderValue)))); // Wrap the output vector for simplified data access @@ -103,8 +104,10 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m // Create an interpolation wrapper for the map tensor // InterpolationWrapper wrappedMapTensor(map, make_float2(0, 0)); - InterpolationWrapper map((BorderWrapper( - ImageWrapper(mapData.data(), mapBatchSize, mapWidth, mapHeight), detail::SaturateCast(borderValue)))); + InterpolationWrapper> map( + (BorderWrapper>( + ImageWrapper(mapData.data(), mapBatchSize, mapWidth, mapHeight), + detail::SaturateCast(borderValue)))); int2 srcSize = make_int2(src.width(), src.height()); int2 dstSize = make_int2(dst.width(), dst.height()); @@ -119,13 +122,12 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m for (int b = 0; b < dst.batches(); b++) { for (int y = 0; y < dst.height(); y++) { for (int x = 0; x < dst.width(); x++) { - dstCoord.x = static_cast(x); dstCoord.y = static_cast(y); - + mapCoord.x = (dstCoord.x + params.dstOffset) * params.mapScale.x; mapCoord.y = (dstCoord.y + params.dstOffset) * params.mapScale.y; - + float2 mapValue = map.at((mapBatchSize == 1 ? 0 : b), mapCoord.y, mapCoord.x, 0); srcCoord.x = dstCoord.x * params.srcScale.x + mapValue.x * params.valScale.x + params.srcOffset.x; @@ -162,7 +164,8 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m */ template > -void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType, +void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, + int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType, bool alignCorners, eDeviceType device) { // Create input and output tensor based on test parameters Tensor input(batchSize, {inWidth, inHeight}, format, device); @@ -174,7 +177,7 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, // Copy generated input data into input tensor CopyVectorIntoTensor(input, inputData); - + int mapSize = mapBatchSize * mapWidth * mapHeight; std::vector mapData(mapSize); @@ -188,11 +191,10 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, } } } - } - else if (mapType == REMAP_ABSOLUTE_NORMALIZED) { + } else if (mapType == REMAP_ABSOLUTE_NORMALIZED) { for (int b = 0; b < mapBatchSize; b++) { - for (int y = 0; y < mapHeight; y++){ - for (int x = 0; x < mapWidth; x++){ + for (int y = 0; y < mapHeight; y++) { + for (int x = 0; x < mapWidth; x++) { float normX = ((2.0f * static_cast(x)) / static_cast(mapWidth - 1)) - 1.0f; float normY = ((2.0f * static_cast(y)) / static_cast(mapHeight - 1)) - 1.0f; @@ -204,11 +206,10 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, } } } - } - else if (mapType == REMAP_RELATIVE_NORMALIZED) { + } else if (mapType == REMAP_RELATIVE_NORMALIZED) { for (int b = 0; b < mapBatchSize; b++) { - for (int y = 0; y < mapHeight; y++){ - for (int x = 0; x < mapWidth; x++){ + for (int y = 0; y < mapHeight; y++) { + for (int x = 0; x < mapWidth; x++) { // Generate normalized coordinates in [-1, 1] range float normX = ((2.0f * static_cast(x)) / static_cast(mapWidth - 1)) - 1.0f; float normY = ((2.0f * static_cast(y)) / static_cast(mapHeight - 1)) - 1.0f; @@ -235,7 +236,8 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, hipStream_t stream; HIP_VALIDATE_NO_ERRORS(hipStreamCreate(&stream)); Remap op; - op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, device); + op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, + device); HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); HIP_VALIDATE_NO_ERRORS(hipStreamDestroy(stream)); @@ -243,9 +245,9 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, std::vector result(output.shape().size()); CopyTensorIntoVector(result, output); - std::vector ref = GoldenRemap(inputData, batchSize, mapBatchSize, inWidth, - inHeight, outWidth, outHeight, - mapWidth, mapHeight, mapData, mapType, alignCorners, borderValue); + std::vector ref = GoldenRemap( + inputData, batchSize, mapBatchSize, inWidth, inHeight, outWidth, outHeight, mapWidth, mapHeight, mapData, + mapType, alignCorners, borderValue); // Compare data in actual output versus the generated golden reference image CompareVectors(result, ref); @@ -258,144 +260,186 @@ int main(int argc, char** argv) { TEST_CASES_BEGIN(); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); - + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::GPU))); + TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, + true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); - - + eInterpolationType::INTERP_TYPE_NEAREST>(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, + make_float4(0.0f, 0.0f, 0.0f, 1.0f), + REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASES_END(); } \ No newline at end of file diff --git a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp index d7c385d0..2482e346 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp @@ -54,8 +54,8 @@ std::vector GoldenResize(std::vector> &input, int batchS // Use the replicate (or clamping) border mode by default to handle out of bounds conditions with certain // interpolation modes. - InterpolationWrapper inputWrap( - BorderWrapper( + InterpolationWrapper> inputWrap( + BorderWrapper>( ImageWrapper(input, batchSize, inputSize.w, inputSize.h), T{})); // Determine the scaling factor required to map from the output coordinates to the corresponding input coordinates diff --git a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp index 56deeabb..37b81842 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp @@ -68,9 +68,9 @@ std::vector> GoldenRotate(std::vector>& T borderVal = detail::SaturateCast(make_float4(0.0f, 0.0f, 0.0f, 0.0f)); ImageWrapper outputWrapper(output, batchSize, imageSize.w, imageSize.h); - InterpolationWrapper inputWrapper( - BorderWrapper(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), - borderVal)); + InterpolationWrapper> inputWrapper( + BorderWrapper>( + ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal)); /** * Affine warp for a combined rotation and translate looks like the following when in its inverse representation: diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp index 93c91ae9..72748a1a 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp @@ -55,7 +55,7 @@ std::vector> GoldenWarpAffine(std::vector& mat, bool isInverted, int batchSize, Size2D inputSize, Size2D outputSize, float4 borderValue) { // Create interpolation wrapper for input vector - InterpolationWrapper inputWrap((BorderWrapper( + InterpolationWrapper> inputWrap((BorderWrapper>( ImageWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue)))); // Create ImageWrapper for output vector. We also need to create said output vector. diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp index 1461365c..be918fd6 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp @@ -52,7 +52,7 @@ std::vector> GoldenWarpPerspective(std::vector& mat, bool isInverted, int batchSize, Size2D inputSize, Size2D outputSize, float4 borderValue) { // Create interpolation wrapper for input vector - InterpolationWrapper inputWrap((BorderWrapper( + InterpolationWrapper> inputWrap((BorderWrapper>( ImageWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue)))); // Create ImageWrapper for output vector. We also need to create said output vector. From 9348f875ab50d1535be89cfb744bf0c48b5795dd Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 20 May 2026 10:23:11 -0400 Subject: [PATCH 11/13] Revert "Support batch indexing in width and height methods for kernel wrappers" This reverts commit 9055ed0b5b3c3a6c6858029b17bedacb6ab9751e. --- include/core/wrappers/border_wrapper.hpp | 55 ++-- include/core/wrappers/image_wrapper.hpp | 8 +- .../core/wrappers/interpolation_wrapper.hpp | 47 ++-- src/op_bilateral_filter.cpp | 12 +- src/op_copy_make_border.cpp | 5 +- src/op_remap.cpp | 6 +- src/op_resize.cpp | 22 +- src/op_rotate.cpp | 11 +- src/op_warp_perspective.cpp | 4 +- .../core/wrappers/test_border_wrapper.cpp | 7 +- .../wrappers/test_interpolation_wrapper.cpp | 33 ++- .../operators/test_op_bilateral_filter.cpp | 15 +- .../operators/test_op_copy_make_border.cpp | 3 +- .../cpp/src/tests/operators/test_op_remap.cpp | 234 +++++++----------- .../src/tests/operators/test_op_resize.cpp | 4 +- .../src/tests/operators/test_op_rotate.cpp | 6 +- .../tests/operators/test_op_warp_affine.cpp | 2 +- .../operators/test_op_warp_perspective.cpp | 2 +- 18 files changed, 210 insertions(+), 266 deletions(-) diff --git a/include/core/wrappers/border_wrapper.hpp b/include/core/wrappers/border_wrapper.hpp index 08e21a3c..f55493a5 100644 --- a/include/core/wrappers/border_wrapper.hpp +++ b/include/core/wrappers/border_wrapper.hpp @@ -23,37 +23,38 @@ #include -#include -#include - #include "core/wrappers/image_wrapper.hpp" #include "operator_types.h" namespace roccv { /** - * @brief Wrapper class which adds border-handling behavior on top of an underlying image wrapper. - * - * Templated on the wrapper type W (e.g. ImageWrapper, VarShapeImageWrapper) so that the same - * border math is shared between uniform-shape and variable-shape image batches. The pixel value - * type T is recovered from W::ValueType. + * @brief Wrapper class for ImageWrapper. This extends the descriptors by defining behaviors for when tensor + * coordinates go out of scope. * + * @tparam T The underlying data type of the tensor. * @tparam BorderType The border type to use when coordinates are out of bounds. - * @tparam W The underlying image wrapper type. Must expose ValueType, at(n,h,w,c), width(n), and height(n). */ -template +template class BorderWrapper { public: - using ValueType = typename W::ValueType; + /** + * @brief Wraps an ImageWrapper and extends its capabilities to handle out of bounds coordinates. + * + * @param tensor The tensor to wrap. + * @param border_value The fallback border color to use when using a constant border mode. + */ + BorderWrapper(const Tensor& tensor, T border_value) : m_desc(tensor), m_border_value(border_value) {} /** - * @brief Constructs a BorderWrapper from an existing image wrapper. Extends its capabilities to handle out - * of bound coordinates. + * @brief Constructs a BorderWrapper from an existing ImageWrapper. Extends its capabilities to handle out of bound + * coordinates. * - * @param image_wrapper The image wrapper to wrap around the BorderWrapper. + * @param image_wrapper The ImageWrapper to wrap around the BorderWrapper. * @param border_value The fallback border color to use when using a constant border mode. */ - BorderWrapper(W image_wrapper, ValueType border_value) : m_desc(image_wrapper), m_border_value(border_value) {} + BorderWrapper(ImageWrapper image_wrapper, T border_value) + : m_desc(image_wrapper), m_border_value(border_value) {} /** * @brief Returns a reference to the underlying data given image coordinates. If the coordinates fall out of bounds, @@ -65,14 +66,11 @@ class BorderWrapper { * @param c The channel index. * @return A reference to the underlying data or a fallback border value of type T. */ - __device__ __host__ const ValueType at(int64_t n, int64_t h, int64_t w, int64_t c) const { - const int64_t imgWidth = width(n); - const int64_t imgHeight = height(n); - + __device__ __host__ const T at(int64_t n, int64_t h, int64_t w, int64_t c) const { // Constant border type implementation. This is a special case which doesn't remap values, but rather returns // the provided constant value. if constexpr (BorderType == eBorderType::BORDER_TYPE_CONSTANT) { - if (w < 0 || w >= imgWidth || h < 0 || h >= imgHeight) + if (w < 0 || w >= width() || h < 0 || h >= height()) return m_border_value; else return m_desc.at(n, h, w, c); @@ -82,12 +80,13 @@ class BorderWrapper { // required at image borders. While this may cause branch divergence, a good bulk of the pixels should fall // within image bounds and will take the same branch. This is preferred over having to do expensive calculations // for EVERY pixel in the image (most of which do not require said calculations). - if (w >= 0 && w < imgWidth && h >= 0 && h < imgHeight) { + if (w >= 0 && w < width() && h >= 0 && h < height()) { return m_desc.at(n, h, w, c); } // Otherwise, do some additional calculations to map the provided x and y coordinates to be within bounds. int64_t x = w, y = h; + int64_t imgWidth = width(), imgHeight = height(); // Reflect border type implementation. (Note: This is NOT REFLECT101, pixels at the border will be duplicated as // is the intended behavior for this border mode.) @@ -140,20 +139,18 @@ class BorderWrapper { } /** - * @brief Retrives the height of the image at batch index n. + * @brief Retrives the height of the images. * - * @param n Batch index. Ignored when W is a uniform-shape wrapper. * @return Image height. */ - __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); } + __device__ __host__ inline int64_t height() const { return m_desc.height(); } /** - * @brief Retrieves the width of the image at batch index n. + * @brief Retrieves the width of the image. * - * @param n Batch index. Ignored when W is a uniform-shape wrapper. * @return Image width. */ - __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); } + __device__ __host__ inline int64_t width() const { return m_desc.width(); } /** * @brief Retrieves the number of batches in the image tensor. @@ -170,7 +167,7 @@ class BorderWrapper { __device__ __host__ inline int64_t channels() const { return m_desc.channels(); } private: - W m_desc; - ValueType m_border_value; + ImageWrapper m_desc; + T m_border_value; }; } // namespace roccv \ No newline at end of file diff --git a/include/core/wrappers/image_wrapper.hpp b/include/core/wrappers/image_wrapper.hpp index 4f1835f5..e174c64a 100644 --- a/include/core/wrappers/image_wrapper.hpp +++ b/include/core/wrappers/image_wrapper.hpp @@ -139,20 +139,16 @@ class ImageWrapper { /** * @brief Retrives the height of the images. * - * @param n Batch index. Ignored for uniform-shape ImageWrapper; included so the signature - * matches VarShapeImageWrapper for use as a template parameter to BorderWrapper et al. * @return Image height. */ - __device__ __host__ inline int64_t height(int64_t /*n*/ = 0) const { return shape.h; } + __device__ __host__ inline int64_t height() const { return shape.h; } /** * @brief Retrieves the width of the image. * - * @param n Batch index. Ignored for uniform-shape ImageWrapper; included so the signature - * matches VarShapeImageWrapper for use as a template parameter to BorderWrapper et al. * @return Image width. */ - __device__ __host__ inline int64_t width(int64_t /*n*/ = 0) const { return shape.w; } + __device__ __host__ inline int64_t width() const { return shape.w; } /** * @brief Retrieves the number of batches in the image tensor. diff --git a/include/core/wrappers/interpolation_wrapper.hpp b/include/core/wrappers/interpolation_wrapper.hpp index 68daaad8..7adb8cb6 100644 --- a/include/core/wrappers/interpolation_wrapper.hpp +++ b/include/core/wrappers/interpolation_wrapper.hpp @@ -23,35 +23,41 @@ #include "core/detail/casting.hpp" #include "core/detail/math/vectorized_type_math.hpp" -#include "core/detail/vector_utils.hpp" #include "core/wrappers/border_wrapper.hpp" +#include "core/detail/vector_utils.hpp" #include "operator_types.h" namespace roccv { /** - * @brief A kernel-friendly wrapper which provides interpolation logic on top of an underlying image wrapper. - * - * Templated on the wrapper type W (e.g. ImageWrapper, VarShapeImageWrapper) so that the same - * interpolation math is shared between uniform-shape and variable-shape image batches. The pixel value - * type T is recovered from W::ValueType. Read-only access; do not use for output tensors. + * @brief A kernel-friendly wrapper which provides interpolation logic based on the given + * coordinates. This tensor wrapper is typically only used for input tensors and does not provide write access to its + * underlying data. * + * @tparam T Underlying data type of the tensor data. + * @tparam C Number of channels in data type. * @tparam B Border type to use for interpolation. * @tparam I Interpolation type to use. - * @tparam W The underlying image wrapper type. Must expose ValueType, at(n,h,w,c), width(n), and height(n). */ -template +template class InterpolationWrapper { public: - using ValueType = typename W::ValueType; + /** + * @brief Wraps a roccv::Tensor in an InterpolationWrapper to provide pixel interpolation when accessing + * non-integer coordinate mappings. + * + * @param tensor The tensor to wrap. + * @param border_value A fallback border value to use in the case of a constant border mode. + */ + InterpolationWrapper(const Tensor& tensor, T border_value) : m_desc(tensor, border_value) {} /** - * @brief Wraps a BorderWrapper in an InterpolationWrapper. Extends capabilities to interpolate pixel values when + * @brief Wraps a BorderWrapper in an Interpolation wrapper. Extends capabilities to interpolate pixel values when * given non-integer coordinates. * * @param borderWrapper The BorderWrapper to wrap. */ - InterpolationWrapper(BorderWrapper borderWrapper) : m_desc(borderWrapper) {} + InterpolationWrapper(BorderWrapper borderWrapper) : m_desc(borderWrapper) {} /** * @brief This function calculates the weighting coefficients for the Catmull-Rom cubic interpolation. @@ -86,7 +92,7 @@ class InterpolationWrapper { * @param w Width coordinates. * @return An interpolated value. */ - inline __device__ __host__ const ValueType at(int64_t n, float h, float w, int64_t c) const { + inline __device__ __host__ const T at(int64_t n, float h, float w, int64_t c) const { if constexpr (I == eInterpolationType::INTERP_TYPE_NEAREST) { // Nearest neighbor interpolation implementation return m_desc.at(n, lroundf(h), lroundf(w), c); @@ -96,7 +102,7 @@ class InterpolationWrapper { // - - // v3 -- v4 - using WorkType = detail::MakeType>; + using WorkType = detail::MakeType>; int64_t x0 = static_cast(floorf(w)); int64_t x1 = x0 + 1; @@ -112,10 +118,10 @@ class InterpolationWrapper { auto q2 = v3 * (x1 - w) + v4 * (w - x0); auto q = q1 * (y1 - h) + q2 * (h - y0); - return detail::RangeCast(q); + return detail::RangeCast(q); } else if constexpr (I == eInterpolationType::INTERP_TYPE_CUBIC) { using namespace roccv::detail; - using WorkType = detail::MakeType>; + using WorkType = detail::MakeType>; // Integer coordinates for pixel (x, y) int64_t int_x = static_cast(floorf(w)); @@ -130,21 +136,20 @@ class InterpolationWrapper { WorkType sum = SetAll(0.0f); for (int index_y = -1; index_y <= 2; index_y++) { for (int index_x = -1; index_x <= 2; index_x++) { - sum += detail::RangeCast(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * - (weight_x[index_x + 1] * weight_y[index_y + 1]); + sum += detail::RangeCast(m_desc.at(n, int_y + index_y, int_x + index_x, 0)) * (weight_x[index_x + 1] * weight_y[index_y + 1]); } } - return detail::RangeCast(sum); + return detail::RangeCast(sum); } } - __device__ __host__ inline int64_t height(int64_t n = 0) const { return m_desc.height(n); } - __device__ __host__ inline int64_t width(int64_t n = 0) const { return m_desc.width(n); } + __device__ __host__ inline int64_t height() const { return m_desc.height(); } + __device__ __host__ inline int64_t width() const { return m_desc.width(); } __device__ __host__ inline int64_t batches() const { return m_desc.batches(); } __device__ __host__ inline int64_t channels() const { return m_desc.channels(); } private: - BorderWrapper m_desc; + BorderWrapper m_desc; }; } // namespace roccv \ No newline at end of file diff --git a/src/op_bilateral_filter.cpp b/src/op_bilateral_filter.cpp index c4adfb25..dffba8ae 100644 --- a/src/op_bilateral_filter.cpp +++ b/src/op_bilateral_filter.cpp @@ -43,7 +43,7 @@ BilateralFilter::~BilateralFilter() {} template void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &input, const Tensor &output, int diameter, float sigmaColor, float sigmaSpace, T borderValue, eDeviceType device) { - BorderWrapper> inputWrapper(ImageWrapper(input), borderValue); + BorderWrapper inputWrapper(input, borderValue); ImageWrapper outputWrapper(output); if (outputWrapper.channels() > 4 || outputWrapper.channels() < 1) { @@ -61,7 +61,8 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp sigmaSpace = 1.0f; } - const int radius = (diameter <= 0) ? static_cast(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1); + const int radius = + (diameter <= 0) ? static_cast(std::roundf(sigmaSpace * 1.5f)) : (diameter >> 1); float spaceCoeff = -1 / (2 * sigmaSpace * sigmaSpace); float colorCoeff = -1 / (2 * sigmaColor * sigmaColor); @@ -88,10 +89,9 @@ void dispatch_bilateral_filter_border_mode(hipStream_t stream, const Tensor &inp for (int j = 0; j < divisor; j++) { for (int i = 0; i < dividend; i++) { - threads.push_back( - std::thread(Kernels::Host::bilateral_filter>, ImageWrapper>, - inputWrapper, outputWrapper, radius, rollingHeight, rollingWidth, prevHeight, prevWidth, - spaceCoeff, colorCoeff)); + threads.push_back(std::thread(Kernels::Host::bilateral_filter, ImageWrapper>, + inputWrapper, outputWrapper, radius, rollingHeight, rollingWidth, + prevHeight, prevWidth, spaceCoeff, colorCoeff)); prevWidth = rollingWidth; rollingWidth += factorW; } diff --git a/src/op_copy_make_border.cpp b/src/op_copy_make_border.cpp index 32e4ad7a..feacfbd9 100644 --- a/src/op_copy_make_border.cpp +++ b/src/op_copy_make_border.cpp @@ -38,7 +38,7 @@ namespace roccv { template void dispatch_copy_make_border_border_mode(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top, int32_t left, T border_value, eDeviceType device) { - BorderWrapper> in_desc(ImageWrapper(input), border_value); + BorderWrapper in_desc(input, border_value); ImageWrapper out_desc(output); switch (device) { @@ -83,7 +83,8 @@ void dispatch_copy_make_border(hipStream_t stream, const Tensor& input, const Te } void CopyMakeBorder::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, int32_t top, - int32_t left, eBorderType border_mode, float4 border_value, eDeviceType device) const { + int32_t left, eBorderType border_mode, float4 border_value, + eDeviceType device) const { CHECK_TENSOR_DEVICE(input, device); CHECK_TENSOR_LAYOUT(input, eTensorLayout::TENSOR_LAYOUT_NHWC, eTensorLayout::TENSOR_LAYOUT_HWC); CHECK_TENSOR_DATATYPES(input, eDataType::DATA_TYPE_U8, eDataType::DATA_TYPE_S8, eDataType::DATA_TYPE_U16, diff --git a/src/op_remap.cpp b/src/op_remap.cpp index 5b01f4dc..0992cf44 100644 --- a/src/op_remap.cpp +++ b/src/op_remap.cpp @@ -77,10 +77,8 @@ void dispatch_remap_mapInterp(hipStream_t stream, const Tensor &input, const Ten const eRemapType mapValueType, const bool alignCorners, const T borderValue, const eDeviceType device) { ImageWrapper outputWrapper(output); - BorderWrapper> mapBorder(ImageWrapper(map), make_float2(0, 0)); - InterpolationWrapper> wrappedMapTensor(mapBorder); - BorderWrapper> inputBorder(ImageWrapper(input), borderValue); - InterpolationWrapper> inputWrapper(inputBorder); + InterpolationWrapper wrappedMapTensor(map, make_float2(0, 0)); + InterpolationWrapper inputWrapper(input, borderValue); int mapBatchSize = wrappedMapTensor.batches(); diff --git a/src/op_resize.cpp b/src/op_resize.cpp index 71fe6889..d7cd0b61 100644 --- a/src/op_resize.cpp +++ b/src/op_resize.cpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include "common/validation_helpers.hpp" +#include "core/detail/casting.hpp" #include "core/exception.hpp" #include "core/status_type.h" #include "core/wrappers/interpolation_wrapper.hpp" @@ -37,8 +38,7 @@ template void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tensor& output, eDeviceType device) { ImageWrapper outputWrapper(output); // Resize operation should clamp values at the border (REPLICATE border mode) - BorderWrapper> inputBorder(ImageWrapper(input), T{}); - InterpolationWrapper> inputWrapper(inputBorder); + InterpolationWrapper inputWrapper(input, T{}); float scaleX = inputWrapper.width() / static_cast(outputWrapper.width()); float scaleY = inputWrapper.height() / static_cast(outputWrapper.height()); @@ -62,13 +62,13 @@ void dispatch_resize_interp(hipStream_t stream, const Tensor& input, const Tenso template void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation, eDeviceType device) { - static const std::unordered_map> - funcs = { - {eInterpolationType::INTERP_TYPE_NEAREST, - dispatch_resize_interp}, - {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp}, - {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp}}; + static const std::unordered_map< + eInterpolationType, + std::function> + funcs = {{eInterpolationType::INTERP_TYPE_NEAREST, dispatch_resize_interp}, + {eInterpolationType::INTERP_TYPE_LINEAR, dispatch_resize_interp}, + {eInterpolationType::INTERP_TYPE_CUBIC, dispatch_resize_interp} + }; if (!funcs.contains(interpolation)) { throw Exception("Operation does not support the given interpolation mode.", eStatusType::NOT_IMPLEMENTED); @@ -78,8 +78,8 @@ void dispatch_resize_dtype(hipStream_t stream, const Tensor& input, const Tensor func(stream, input, output, device); } -void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, eInterpolationType interpolation, - eDeviceType device) const { +void Resize::operator()(hipStream_t stream, const Tensor& input, const Tensor& output, + eInterpolationType interpolation, eDeviceType device) const { CHECK_TENSOR_DEVICE(input, device); CHECK_TENSOR_DEVICE(output, device); diff --git a/src/op_rotate.cpp b/src/op_rotate.cpp index c1eea211..28806779 100644 --- a/src/op_rotate.cpp +++ b/src/op_rotate.cpp @@ -55,8 +55,7 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso T borderVal = detail::SaturateCast(make_float4(0.0f, 0.0f, 0.0f, 0.0f)); ImageWrapper outputWrap(output); - BorderWrapper> inputBorder(ImageWrapper(input), borderVal); - InterpolationWrapper> inputWrap(inputBorder); + InterpolationWrapper inputWrap(input, borderVal); switch (device) { case eDeviceType::GPU: { @@ -75,8 +74,8 @@ void dispatch_rotate_interp(hipStream_t stream, const Tensor &input, const Tenso } template -void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, double2 shift, - eInterpolationType interpolation, eDeviceType device) { +void dispatch_rotate_type(hipStream_t stream, const Tensor &input, const Tensor &output, double angleDeg, + double2 shift, eInterpolationType interpolation, eDeviceType device) { // clang-format off static const std::unordered_map transform(transMatrix); ImageWrapper outputWrapper(output); - BorderWrapper> inputBorder(ImageWrapper(input), borderValue); - InterpolationWrapper> inputWrapper(inputBorder); + InterpolationWrapper inputWrapper(input, borderValue); // Launch CPU/GPU kernel depending on requested device type. switch (device) { diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp index fd836773..873f05dc 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_border_wrapper.cpp @@ -115,8 +115,8 @@ int64_t GetCoordOfBorderPel(int64_t u, int64_t dimSize, eBorderType borderMode) * coordinates fall out of bounds. */ template > -BT GoldenBorderAt(ImageWrapper& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, int64_t x, - int64_t channel) { +BT GoldenBorderAt(ImageWrapper& input, eBorderType borderMode, T borderValue, int64_t sample, int64_t y, + int64_t x, int64_t channel) { int64_t outX = x, outY = y; if (borderMode == eBorderType::BORDER_TYPE_CONSTANT) { @@ -161,8 +161,7 @@ void TestCorrectness(float4 borderValue, int32_t batchSize, Size2D imageSize, in FillVector(inputData); // BorderWrapper to calculate the actual calculated values. - BorderWrapper> borderWrap( - ImageWrapper(inputData, batchSize, imageSize.w, imageSize.h), borderVal); + BorderWrapper borderWrap(ImageWrapper(inputData, batchSize, imageSize.w, imageSize.h), borderVal); std::vector actualOutput(numElementsWithBorder); int actualIndex = 0; for (int batch = 0; batch < batchSize; ++batch) { diff --git a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp index 5ec3176e..a4466530 100644 --- a/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp +++ b/tests/roccv/cpp/src/tests/core/wrappers/test_interpolation_wrapper.cpp @@ -21,9 +21,9 @@ #include #include +#include "core/detail/vector_utils.hpp" #include -#include "core/detail/vector_utils.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -45,7 +45,7 @@ namespace { * @return T The interpolated pixel. */ template -T GoldenLinear(BorderWrapper> input, int64_t sample, float y, float x) { +T GoldenLinear(BorderWrapper input, int64_t sample, float y, float x) { // Defines the vectorized float type for intermediate calculations. using WorkType = detail::MakeType>; @@ -86,7 +86,7 @@ T GoldenLinear(BorderWrapper> input, int64_t sample, * @return T The interpolated pixel. */ template -T GoldenNearest(BorderWrapper> input, int64_t sample, float y, float x) { +T GoldenNearest(BorderWrapper input, int64_t sample, float y, float x) { // Nearest neighbor interpolation. Rounds given floating point values to the nearest integer. return input.at(sample, lroundf(y), lroundf(x), 0); } @@ -98,7 +98,7 @@ T GoldenNearest(BorderWrapper> input, int64_t sample * @return None. */ void CalBicubicWeights(float dist, float* weight) { - const float A = -0.5f; // Note OpenCV sets alpha to -0.75f + const float A = -0.5f; // Note OpenCV sets alpha to -0.75f weight[0] = ((A * (dist + 1) - 5 * A) * (dist + 1) + 8 * A) * (dist + 1) - 4 * A; weight[1] = ((A + 2) * dist - (A + 3)) * dist * dist + 1; @@ -107,8 +107,7 @@ void CalBicubicWeights(float dist, float* weight) { } /** - * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV - * libraries. + * @brief Golden model for Bicubic interpolation. This is the Catmull-Rom cubic interpolation commonly used in CV libraries. * * @tparam T Image datatype. * @tparam BorderType Border type for boundary conditions. @@ -119,7 +118,7 @@ void CalBicubicWeights(float dist, float* weight) { * @return T The interpolated pixel. */ template -T GoldenBicubic(BorderWrapper> input, int64_t sample, float y, float x) { +T GoldenBicubic(BorderWrapper input, int64_t sample, float y, float x) { // Defines the vectorized float type for intermediate calculations. using WorkType = detail::MakeType>; @@ -136,8 +135,7 @@ T GoldenBicubic(BorderWrapper> input, int64_t sample WorkType sum = SetAll(0.0f); for (int indexY = -1; indexY <= 2; indexY++) { for (int indexX = -1; indexX <= 2; indexX++) { - sum += detail::RangeCast(input.at(sample, intY + indexY, intX + indexX, 0)) * - (weightX[indexX + 1] * weightY[indexY + 1]); + sum += detail::RangeCast(input.at(sample, intY + indexY, intX + indexX, 0)) * (weightX[indexX + 1] * weightY[indexY + 1]); } } @@ -158,7 +156,7 @@ T GoldenBicubic(BorderWrapper> input, int64_t sample * @return T The interpolated pixel. */ template -T GoldenInterpolationAt(BorderWrapper> input, int64_t sample, float y, float x, +T GoldenInterpolationAt(BorderWrapper input, int64_t sample, float y, float x, eInterpolationType interp) { switch (interp) { case eInterpolationType::INTERP_TYPE_NEAREST: @@ -204,11 +202,9 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl std::vector> goldenOutput; // Use roccv::InterpolationWrapper to get actual output - InterpolationWrapper> actualWrap( - (BorderWrapper>(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), - borderVal))); - BorderWrapper> goldenWrap(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), - borderVal); + InterpolationWrapper actualWrap( + (BorderWrapper(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal))); + BorderWrapper goldenWrap(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal); for (int b = 0; b < batchSize; b++) { for (float y = 0; y < imageSize.h; y += idxDelta) { @@ -224,8 +220,7 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl } } } - if constexpr (std::is_integral_v> && std::is_signed_v> && - sizeof(detail::BaseType) == 4) { + if constexpr (std::is_integral_v> && std::is_signed_v> && sizeof(detail::BaseType) == 4) { CompareVectorsNear(actualOutput, goldenOutput, NEAR_EQUAL_THRESHOLD * 2); } else { CompareVectorsNear(actualOutput, goldenOutput); @@ -233,7 +228,7 @@ void TestCorrectness(int64_t batchSize, Size2D imageSize, float4 borderValue, fl } } // namespace -int main(int argc, char** argv) { +int main(int argc, char **argv) { (void)argc; (void)argv; TEST_CASES_BEGIN(); @@ -327,7 +322,7 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, {20, 53}, make_float4(0, 0, 0, 1), 0.1f))); TEST_CASE((TestCorrectness(3, {38, 10}, make_float4(0, 0, 0, 1), 0.1f))); TEST_CASE((TestCorrectness(5, {65, 21}, make_float4(1, 0.5, 0.5, 1), 0.1f))); - // clang-format on + // clang-format on TEST_CASES_END(); } \ No newline at end of file diff --git a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp index 8ae5a10a..f208962c 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_bilateral_filter.cpp @@ -51,8 +51,7 @@ namespace { template > void GenerateGoldenBilateral(std::vector& input, std::vector& output, int32_t batchSize, Size2D imageSize, int diameter, float sigmaColor, float sigmaSpace, T borderValue) { - BorderWrapper> src(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), - borderValue); + BorderWrapper src(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderValue); ImageWrapper dst(output, batchSize, imageSize.w, imageSize.h); using namespace roccv::detail; using Worktype = MakeType>; @@ -180,9 +179,9 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f, - {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); - TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0}, - eDeviceType::GPU))); + {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, + {500.0, 500.0, 0.0, 0.0}, eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::GPU))); @@ -289,9 +288,9 @@ int main(int argc, char** argv) { TEST_CASE((TestCorrectness(1, 20, 20, FMT_U8, 0, 50.0f, 1.2f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 20, 20, FMT_RGB8, -1, 50.0f, 1.2f, - {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); - TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, {500.0, 500.0, 0.0, 0.0}, - eDeviceType::CPU))); + {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 24, 24, FMT_F32, 0, 500.0f, 1.2f, + {500.0, 500.0, 0.0, 0.0}, eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 20, 20, FMT_RGB8, 4, 50.0f, 3.0f, {0.0, 0.0, 0.0, 0.0}, eDeviceType::CPU))); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp index 30050917..4320f04e 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_copy_make_border.cpp @@ -57,8 +57,7 @@ std::vector GoldenCopyMakeBorder(std::vector input, int batchSize, Size2 // Wrap the input images in a BorderWrapper to handle out of bounds image behavior. The BorderWrapper has already // been tested in another test so it can be used reliably. - BorderWrapper> inputWrap(ImageWrapper(input, batchSize, inputSize.w, inputSize.h), - borderVal); + BorderWrapper inputWrap(ImageWrapper(input, batchSize, inputSize.w, inputSize.h), borderVal); std::vector output(batchSize * outputSize.h * outputSize.w * channels); ImageWrapper outputWrap(output, batchSize, outputSize.w, outputSize.h); diff --git a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp index 4ad9d914..634344a4 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_remap.cpp @@ -25,9 +25,8 @@ THE SOFTWARE. #include #include #include - -#include "core/detail/casting.hpp" #include "core/detail/internal_structs.hpp" +#include "core/detail/casting.hpp" #include "core/detail/math/vectorized_type_math.hpp" #include "core/detail/type_traits.hpp" #include "operator_types.h" @@ -40,11 +39,11 @@ using namespace roccv::detail; // Keep all non-entrypoint functions in an anonymous namespace to prevent redefinition errors across translation units. namespace { -RemapParams GetRemapParams(const int2& srcSize, const int2& dstSize, const int2& mapSize, bool alignCorners, - eRemapType mapValueType) { +RemapParams GetRemapParams(const int2 &srcSize, const int2 &dstSize, const int2 &mapSize, bool alignCorners, eRemapType mapValueType) +{ RemapParams params; - switch (mapValueType) { + switch(mapValueType) { case REMAP_ABSOLUTE: params.srcScale = make_float2(0.f, 0.f); params.mapScale = StaticCast(mapSize) / StaticCast(dstSize); @@ -55,7 +54,7 @@ RemapParams GetRemapParams(const int2& srcSize, const int2& dstSize, const int2& case REMAP_ABSOLUTE_NORMALIZED: params.srcScale = make_float2(0.f, 0.f); params.mapScale = StaticCast(mapSize) / StaticCast(dstSize); - params.valScale = (StaticCast(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f; + params.valScale = (StaticCast(srcSize) - (alignCorners ? 1.f : 0.f)) / 2.f; params.srcOffset = params.valScale - (alignCorners ? 0.f : .5f); params.dstOffset = 0.f; break; @@ -88,15 +87,15 @@ RemapParams GetRemapParams(const int2& srcSize, const int2& dstSize, const int2& */ template > -std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, - int32_t inHeight, int32_t outWidth, int32_t outHeight, int32_t mapWidth, int32_t mapHeight, - std::vector& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) { +std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t mapBatchSize, int32_t inWidth, int32_t inHeight, int32_t outWidth, + int32_t outHeight, int32_t mapWidth, int32_t mapHeight, std::vector& mapData, eRemapType mapType, bool alignCorners, float4 borderValue) { + int channels = detail::NumElements; int outputSize = batchSize * outWidth * outHeight * channels; std::vector output(outputSize); // Create interpolation wrapper for input vector - InterpolationWrapper> src((BorderWrapper>( + InterpolationWrapper src((BorderWrapper( ImageWrapper(input, batchSize, inWidth, inHeight), detail::SaturateCast(borderValue)))); // Wrap the output vector for simplified data access @@ -104,10 +103,8 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m // Create an interpolation wrapper for the map tensor // InterpolationWrapper wrappedMapTensor(map, make_float2(0, 0)); - InterpolationWrapper> map( - (BorderWrapper>( - ImageWrapper(mapData.data(), mapBatchSize, mapWidth, mapHeight), - detail::SaturateCast(borderValue)))); + InterpolationWrapper map((BorderWrapper( + ImageWrapper(mapData.data(), mapBatchSize, mapWidth, mapHeight), detail::SaturateCast(borderValue)))); int2 srcSize = make_int2(src.width(), src.height()); int2 dstSize = make_int2(dst.width(), dst.height()); @@ -122,12 +119,13 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m for (int b = 0; b < dst.batches(); b++) { for (int y = 0; y < dst.height(); y++) { for (int x = 0; x < dst.width(); x++) { + dstCoord.x = static_cast(x); dstCoord.y = static_cast(y); - + mapCoord.x = (dstCoord.x + params.dstOffset) * params.mapScale.x; mapCoord.y = (dstCoord.y + params.dstOffset) * params.mapScale.y; - + float2 mapValue = map.at((mapBatchSize == 1 ? 0 : b), mapCoord.y, mapCoord.x, 0); srcCoord.x = dstCoord.x * params.srcScale.x + mapValue.x * params.valScale.x + params.srcOffset.x; @@ -164,8 +162,7 @@ std::vector GoldenRemap(std::vector& input, int32_t batchSize, int32_t m */ template > -void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, - int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType, +void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, int outWidth, int outHeight, int mapWidth, int mapHeight, ImageFormat format, float4 borderValue, eRemapType mapType, bool alignCorners, eDeviceType device) { // Create input and output tensor based on test parameters Tensor input(batchSize, {inWidth, inHeight}, format, device); @@ -177,7 +174,7 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, // Copy generated input data into input tensor CopyVectorIntoTensor(input, inputData); - + int mapSize = mapBatchSize * mapWidth * mapHeight; std::vector mapData(mapSize); @@ -191,10 +188,11 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, } } } - } else if (mapType == REMAP_ABSOLUTE_NORMALIZED) { + } + else if (mapType == REMAP_ABSOLUTE_NORMALIZED) { for (int b = 0; b < mapBatchSize; b++) { - for (int y = 0; y < mapHeight; y++) { - for (int x = 0; x < mapWidth; x++) { + for (int y = 0; y < mapHeight; y++){ + for (int x = 0; x < mapWidth; x++){ float normX = ((2.0f * static_cast(x)) / static_cast(mapWidth - 1)) - 1.0f; float normY = ((2.0f * static_cast(y)) / static_cast(mapHeight - 1)) - 1.0f; @@ -206,10 +204,11 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, } } } - } else if (mapType == REMAP_RELATIVE_NORMALIZED) { + } + else if (mapType == REMAP_RELATIVE_NORMALIZED) { for (int b = 0; b < mapBatchSize; b++) { - for (int y = 0; y < mapHeight; y++) { - for (int x = 0; x < mapWidth; x++) { + for (int y = 0; y < mapHeight; y++){ + for (int x = 0; x < mapWidth; x++){ // Generate normalized coordinates in [-1, 1] range float normX = ((2.0f * static_cast(x)) / static_cast(mapWidth - 1)) - 1.0f; float normY = ((2.0f * static_cast(y)) / static_cast(mapHeight - 1)) - 1.0f; @@ -236,8 +235,7 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, hipStream_t stream; HIP_VALIDATE_NO_ERRORS(hipStreamCreate(&stream)); Remap op; - op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, - device); + op(stream, input, output, mapTensor, InterpType, MapInterpType, mapType, alignCorners, BorderType, borderValue, device); HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); HIP_VALIDATE_NO_ERRORS(hipStreamDestroy(stream)); @@ -245,9 +243,9 @@ void TestCorrectness(int batchSize, int mapBatchSize, int inWidth, int inHeight, std::vector result(output.shape().size()); CopyTensorIntoVector(result, output); - std::vector ref = GoldenRemap( - inputData, batchSize, mapBatchSize, inWidth, inHeight, outWidth, outHeight, mapWidth, mapHeight, mapData, - mapType, alignCorners, borderValue); + std::vector ref = GoldenRemap(inputData, batchSize, mapBatchSize, inWidth, + inHeight, outWidth, outHeight, + mapWidth, mapHeight, mapData, mapType, alignCorners, borderValue); // Compare data in actual output versus the generated golden reference image CompareVectors(result, ref); @@ -260,186 +258,144 @@ int main(int argc, char** argv) { TEST_CASES_BEGIN(); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - false, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - false, eDeviceType::GPU))); - + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - true, eDeviceType::GPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::GPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::GPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - false, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGB8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness(1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, - true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE_NORMALIZED, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness( - 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, - true, eDeviceType::CPU))); + 1, 1, 480, 360, 480, 360, 480, 360, FMT_RGBA8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_RELATIVE_NORMALIZED, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, false, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, false, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 1, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); TEST_CASE((TestCorrectness(2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, - make_float4(0.0f, 0.0f, 0.0f, 1.0f), - REMAP_ABSOLUTE, true, eDeviceType::CPU))); + eInterpolationType::INTERP_TYPE_NEAREST>( + 2, 2, 480, 360, 480, 360, 480, 360, FMT_U8, make_float4(0.0f, 0.0f, 0.0f, 1.0f), REMAP_ABSOLUTE, true, eDeviceType::CPU))); + + TEST_CASES_END(); } \ No newline at end of file diff --git a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp index 2482e346..d7c385d0 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_resize.cpp @@ -54,8 +54,8 @@ std::vector GoldenResize(std::vector> &input, int batchS // Use the replicate (or clamping) border mode by default to handle out of bounds conditions with certain // interpolation modes. - InterpolationWrapper> inputWrap( - BorderWrapper>( + InterpolationWrapper inputWrap( + BorderWrapper( ImageWrapper(input, batchSize, inputSize.w, inputSize.h), T{})); // Determine the scaling factor required to map from the output coordinates to the corresponding input coordinates diff --git a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp index 37b81842..56deeabb 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_rotate.cpp @@ -68,9 +68,9 @@ std::vector> GoldenRotate(std::vector>& T borderVal = detail::SaturateCast(make_float4(0.0f, 0.0f, 0.0f, 0.0f)); ImageWrapper outputWrapper(output, batchSize, imageSize.w, imageSize.h); - InterpolationWrapper> inputWrapper( - BorderWrapper>( - ImageWrapper(input, batchSize, imageSize.w, imageSize.h), borderVal)); + InterpolationWrapper inputWrapper( + BorderWrapper(ImageWrapper(input, batchSize, imageSize.w, imageSize.h), + borderVal)); /** * Affine warp for a combined rotation and translate looks like the following when in its inverse representation: diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp index 72748a1a..93c91ae9 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_affine.cpp @@ -55,7 +55,7 @@ std::vector> GoldenWarpAffine(std::vector& mat, bool isInverted, int batchSize, Size2D inputSize, Size2D outputSize, float4 borderValue) { // Create interpolation wrapper for input vector - InterpolationWrapper> inputWrap((BorderWrapper>( + InterpolationWrapper inputWrap((BorderWrapper( ImageWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue)))); // Create ImageWrapper for output vector. We also need to create said output vector. diff --git a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp index be918fd6..1461365c 100644 --- a/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp +++ b/tests/roccv/cpp/src/tests/operators/test_op_warp_perspective.cpp @@ -52,7 +52,7 @@ std::vector> GoldenWarpPerspective(std::vector& mat, bool isInverted, int batchSize, Size2D inputSize, Size2D outputSize, float4 borderValue) { // Create interpolation wrapper for input vector - InterpolationWrapper> inputWrap((BorderWrapper>( + InterpolationWrapper inputWrap((BorderWrapper( ImageWrapper(input, batchSize, inputSize.w, inputSize.h), detail::SaturateCast(borderValue)))); // Create ImageWrapper for output vector. We also need to create said output vector. From 8b3b1b6017c575772452cec994061dc4bb047849 Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 20 May 2026 10:50:44 -0400 Subject: [PATCH 12/13] Move common helpers to image_test_helpers.hpp --- .../roccv/cpp/include/image_test_helpers.hpp | 128 ++++++++++++++++ .../cpp/src/tests/core/image/test_image.cpp | 57 +------ .../core/image/test_image_batch_data.cpp | 22 +-- .../core/image/test_image_batch_var_shape.cpp | 144 +++++------------- .../src/tests/core/image/test_image_data.cpp | 15 +- 5 files changed, 174 insertions(+), 192 deletions(-) create mode 100644 tests/roccv/cpp/include/image_test_helpers.hpp diff --git a/tests/roccv/cpp/include/image_test_helpers.hpp b/tests/roccv/cpp/include/image_test_helpers.hpp new file mode 100644 index 00000000..f7318f89 --- /dev/null +++ b/tests/roccv/cpp/include/image_test_helpers.hpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace roccv { +namespace tests { + +// Opaque sentinel pointers used by image-layer tests. ImageData / ImageBatchData +// carry pointers but never dereference them — the buffer is a metadata snapshot +// only — so tests use these to verify values flow through without needing real +// allocations. +inline void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); +inline void* const FAKE_PTR_B = reinterpret_cast(0xBBBBBBBBull); +inline void* const FAKE_PTR_C = reinterpret_cast(0xCCCCCCCCull); + +/** + * @brief Test allocator that backs every allocation kind with malloc and tallies + * how many times each entry point is invoked. Pure host-backed; no actual GPU + * dependency on the returned pointers — callers that exercise the Hip/pinned + * paths must only inspect metadata, never dereference device memory. + * + * `lastAllocBytes` is updated from every alloc path (hip, host, pinned), so + * callers may assert on the most recent allocation regardless of kind. + */ +class CountingAllocator : public IAllocator { + public: + mutable int hipAllocs = 0; + mutable int hipFrees = 0; + mutable int hostAllocs = 0; + mutable int hostFrees = 0; + mutable int pinnedAllocs = 0; + mutable int pinnedFrees = 0; + mutable size_t lastAllocBytes = 0; + + void* allocHipMem(size_t size) const override { + ++hipAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHipMem(void* ptr) const noexcept override { + ++hipFrees; + std::free(ptr); + } + + void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override { + ++hostAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHostMem(void* ptr) const noexcept override { + ++hostFrees; + std::free(ptr); + } + + void* allocHostPinnedMem(size_t size) const override { + ++pinnedAllocs; + lastAllocBytes = size; + return std::malloc(size); + } + void freeHostPinnedMem(void* ptr) const noexcept override { + ++pinnedFrees; + std::free(ptr); + } +}; + +// Single-plane packed-row buffer descriptor around `basePtr`. The pointer is +// never dereferenced by the consumers (ImageData / ImageBatchVarShape). +inline ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) { + ImageBufferStrided buf{}; + buf.numPlanes = 1; + buf.planes[0] = {width, height, rowStride, basePtr}; + return buf; +} + +// Single-plane GPU-resident ImageData snapshot with packed-row stride implied +// by `fmt`. For tests that need an ImageData but won't touch the pixels. +inline ImageDataStridedHip MakeFakeHipData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + return ImageDataStridedHip(fmt, MakeSinglePlaneBuffer(width, height, static_cast(width * fmt.channels()), + basePtr)); +} + +// Host counterpart of MakeFakeHipData. +inline ImageDataStridedHost MakeFakeHostData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + return ImageDataStridedHost(fmt, MakeSinglePlaneBuffer(width, height, static_cast(width * fmt.channels()), + basePtr)); +} + +// Single-plane GPU-resident Image wrapping a sentinel pointer via ImageWrapData. +// Use for batch tests where pushBack only reads the descriptor. +inline Image MakeFakeGpuImage(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + return ImageWrapData(MakeFakeHipData(width, height, basePtr, fmt)); +} + +// Host counterpart of MakeFakeGpuImage. +inline Image MakeFakeHostImage(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { + return ImageWrapData(MakeFakeHostData(width, height, basePtr, fmt)); +} + +} // namespace tests +} // namespace roccv diff --git a/tests/roccv/cpp/src/tests/core/image/test_image.cpp b/tests/roccv/cpp/src/tests/core/image/test_image.cpp index 13937dff..b2ae2aeb 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image.cpp @@ -21,17 +21,14 @@ */ #include -#include -#include #include -#include #include #include -#include #include #include +#include "image_test_helpers.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -39,58 +36,6 @@ using namespace roccv::tests; namespace { -void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); - -/** - * @brief Test allocator that backs allocations with malloc and tallies how - * many times each entry point is invoked. Pure host-side; no GPU dependency. - * - * The Hip path returns malloc'd memory because no test dereferences it — we - * only care that ptr round-trips through Image and that free is called the - * right number of times. - */ -class CountingAllocator : public IAllocator { - public: - mutable int hipAllocs = 0; - mutable int hipFrees = 0; - mutable int hostAllocs = 0; - mutable int hostFrees = 0; - mutable size_t lastAllocBytes = 0; - - void* allocHipMem(size_t size) const override { - ++hipAllocs; - lastAllocBytes = size; - return std::malloc(size); - } - void freeHipMem(void* ptr) const noexcept override { - ++hipFrees; - std::free(ptr); - } - - void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override { - ++hostAllocs; - lastAllocBytes = size; - return std::malloc(size); - } - void freeHostMem(void* ptr) const noexcept override { - ++hostFrees; - std::free(ptr); - } - - // Unused by the Image paths under test. Trip loudly if invoked unexpectedly. - void* allocHostPinnedMem(size_t) const override { throw std::runtime_error("unused in tests"); } - void freeHostPinnedMem(void*) const noexcept override { std::abort(); } -}; - -// Build a single-plane ImageData snapshot referencing a sentinel pointer. Used -// for ImageWrapData tests where we never dereference the buffer. -ImageDataStridedHip MakeFakeHipData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { - ImageBufferStrided buf{}; - buf.numPlanes = 1; - buf.planes[0] = {width, height, static_cast(width * fmt.channels()), basePtr}; - return ImageDataStridedHip(fmt, buf); -} - // ============================================================================= // CalcRequirements // ============================================================================= diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp index f402d261..a2e238be 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp @@ -27,6 +27,7 @@ #include #include +#include "image_test_helpers.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -34,12 +35,6 @@ using namespace roccv::tests; namespace { -// ImageBatchData carries pointers but never dereferences them; the buffer is a -// metadata snapshot. Use opaque sentinel pointers so we can verify values flow -// through the hierarchy without needing real allocations. -void* const FAKE_IMG_PTR_A = reinterpret_cast(0xA0A0A0A0ull); -void* const FAKE_IMG_PTR_B = reinterpret_cast(0xB0B0B0B0ull); - // Static descriptor/format storage for the batch buffer. These are real host // allocations (so the pointers are valid) but the batch tests only read // metadata back out of them; nothing dereferences the per-image basePtr fields. @@ -47,19 +42,12 @@ ImageBufferStrided g_imageList[2]; ImageFormat g_formatList[2] = {FMT_RGB8, FMT_RGB8}; ImageFormat g_hostFormatList[2] = {FMT_RGB8, FMT_RGB8}; -ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) { - ImageBufferStrided buf{}; - buf.numPlanes = 1; - buf.planes[0] = {width, height, rowStride, basePtr}; - return buf; -} - // Builds a homogeneous two-image varshape descriptor with a known bounding box // and uniqueFormat. The returned struct's pointers reference module-static // arrays so addresses remain stable across calls within a test. ImageBatchVarShapeBufferStrided MakeHomogeneousBuffer() { - g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_IMG_PTR_A); - g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 3, FAKE_IMG_PTR_B); + g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A); + g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 3, FAKE_PTR_B); g_formatList[0] = FMT_RGB8; g_formatList[1] = FMT_RGB8; g_hostFormatList[0] = FMT_RGB8; @@ -137,8 +125,8 @@ void TestImageBatchVarShapeDataEmpty() { * verbatim; uniqueFormat is FMT_NONE since no single format spans the batch. */ void TestImageBatchVarShapeDataHeterogeneousFormats() { - g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_IMG_PTR_A); - g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 4, FAKE_IMG_PTR_B); + g_imageList[0] = MakeSinglePlaneBuffer(640, 480, 640 * 3, FAKE_PTR_A); + g_imageList[1] = MakeSinglePlaneBuffer(320, 240, 320 * 4, FAKE_PTR_B); g_formatList[0] = FMT_RGB8; g_formatList[1] = FMT_RGBA8; g_hostFormatList[0] = FMT_RGB8; diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp index dea3c0c0..ec148240 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_var_shape.cpp @@ -22,18 +22,15 @@ #include #include -#include -#include #include #include #include -#include -#include #include #include #include +#include "image_test_helpers.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -41,69 +38,6 @@ using namespace roccv::tests; namespace { -/** - * @brief Test allocator that distinguishes pinned-host from regular-host - * allocations and tallies each entry-point. Pure host-backed; no actual GPU - * dependency on the descriptor buffers — tests verify metadata round-trip and - * pointer identity, never dereference device memory through these. - */ -class CountingAllocator : public IAllocator { - public: - mutable int hipAllocs = 0; - mutable int hipFrees = 0; - mutable int hostAllocs = 0; - mutable int hostFrees = 0; - mutable int pinnedAllocs = 0; - mutable int pinnedFrees = 0; - - void* allocHipMem(size_t size) const override { - ++hipAllocs; - return std::malloc(size); - } - void freeHipMem(void* ptr) const noexcept override { - ++hipFrees; - std::free(ptr); - } - - void* allocHostMem(size_t size, int32_t /*alignment*/ = 0) const override { - ++hostAllocs; - return std::malloc(size); - } - void freeHostMem(void* ptr) const noexcept override { - ++hostFrees; - std::free(ptr); - } - - void* allocHostPinnedMem(size_t size) const override { - ++pinnedAllocs; - return std::malloc(size); - } - void freeHostPinnedMem(void* ptr) const noexcept override { - ++pinnedFrees; - std::free(ptr); - } -}; - -// Build a single-plane GPU-resident image wrapper around a sentinel pointer. -// The pointer is never dereferenced — pushBack only reads the descriptor. -Image MakeFakeGpuImage(int32_t w, int32_t h, ImageFormat fmt, void* basePtr) { - ImageBufferStrided buf{}; - buf.numPlanes = 1; - buf.planes[0] = {w, h, static_cast(w * fmt.channels()), basePtr}; - return ImageWrapData(ImageDataStridedHip(fmt, buf)); -} - -Image MakeFakeHostImage(int32_t w, int32_t h, ImageFormat fmt, void* basePtr) { - ImageBufferStrided buf{}; - buf.numPlanes = 1; - buf.planes[0] = {w, h, static_cast(w * fmt.channels()), basePtr}; - return ImageWrapData(ImageDataStridedHost(fmt, buf)); -} - -void* const FAKE_A = reinterpret_cast(0xA0000000ull); -void* const FAKE_B = reinterpret_cast(0xB0000000ull); -void* const FAKE_C = reinterpret_cast(0xC0000000ull); - // ============================================================================= // Construction // ============================================================================= @@ -136,7 +70,7 @@ void TestPushBackSingle() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - Image img = MakeFakeGpuImage(640, 480, FMT_RGB8, FAKE_A); + Image img = MakeFakeGpuImage(640, 480, FAKE_PTR_A); batch.pushBack(img); EXPECT_EQ(batch.numImages(), 1); @@ -149,9 +83,9 @@ void TestPushBackMultipleHeterogeneousSizes() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(640, 480, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(320, 240, FMT_RGB8, FAKE_B)); - batch.pushBack(MakeFakeGpuImage(800, 200, FMT_RGB8, FAKE_C)); + batch.pushBack(MakeFakeGpuImage(640, 480, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(320, 240, FAKE_PTR_B)); + batch.pushBack(MakeFakeGpuImage(800, 200, FAKE_PTR_C)); EXPECT_EQ(batch.numImages(), 3); EXPECT_EQ(batch.maxSize().w, 800); @@ -164,9 +98,9 @@ void TestPushBackIteratorRange() { ImageBatchVarShape batch(8, alloc); std::vector imgs; - imgs.push_back(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); - imgs.push_back(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); - imgs.push_back(MakeFakeGpuImage(300, 300, FMT_RGB8, FAKE_C)); + imgs.push_back(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + imgs.push_back(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + imgs.push_back(MakeFakeGpuImage(300, 300, FAKE_PTR_C)); batch.pushBack(imgs.begin(), imgs.end()); @@ -182,17 +116,17 @@ void TestPushBackCapacityOverflow() { CountingAllocator alloc; ImageBatchVarShape batch(2, alloc); - batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_B)); - EXPECT_EXCEPTION(batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_C)), eStatusType::OUT_OF_BOUNDS); + EXPECT_EXCEPTION(batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_C)), eStatusType::OUT_OF_BOUNDS); } void TestPushBackHostImageRejected() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - Image cpuImg = MakeFakeHostImage(64, 64, FMT_U8, FAKE_A); + Image cpuImg = MakeFakeHostImage(64, 64, FAKE_PTR_A, FMT_U8); EXPECT_EXCEPTION(batch.pushBack(cpuImg), eStatusType::INVALID_VALUE); } @@ -208,13 +142,13 @@ void TestPushBackRangeRollbackOnFailure() { // Pre-populate so we can confirm the rollback restores exactly the // pre-call state, not just back to zero. - batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); EXPECT_EQ(batch.numImages(), 1); // Mid-range CPU image — should rollback the partially-pushed entries. std::vector imgs; - imgs.push_back(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); - imgs.push_back(MakeFakeHostImage(300, 300, FMT_RGB8, FAKE_C)); // Will throw. + imgs.push_back(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + imgs.push_back(MakeFakeHostImage(300, 300, FAKE_PTR_C)); // Will throw. EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::INVALID_VALUE); @@ -228,9 +162,9 @@ void TestPushBackRangeOverflowPrechecked() { ImageBatchVarShape batch(2, alloc); std::vector imgs; - imgs.push_back(MakeFakeGpuImage(10, 10, FMT_RGB8, FAKE_A)); - imgs.push_back(MakeFakeGpuImage(20, 20, FMT_RGB8, FAKE_B)); - imgs.push_back(MakeFakeGpuImage(30, 30, FMT_RGB8, FAKE_C)); // 3rd overflows capacity 2. + imgs.push_back(MakeFakeGpuImage(10, 10, FAKE_PTR_A)); + imgs.push_back(MakeFakeGpuImage(20, 20, FAKE_PTR_B)); + imgs.push_back(MakeFakeGpuImage(30, 30, FAKE_PTR_C)); // 3rd overflows capacity 2. EXPECT_EXCEPTION(batch.pushBack(imgs.begin(), imgs.end()), eStatusType::OUT_OF_BOUNDS); // Pre-checked: nothing was pushed. @@ -245,8 +179,8 @@ void TestPopBack() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); batch.popBack(); EXPECT_EQ(batch.numImages(), 1); @@ -258,9 +192,9 @@ void TestPopBackMultiple() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); - batch.pushBack(MakeFakeGpuImage(300, 300, FMT_RGB8, FAKE_C)); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + batch.pushBack(MakeFakeGpuImage(300, 300, FAKE_PTR_C)); batch.popBack(2); EXPECT_EQ(batch.numImages(), 1); @@ -270,7 +204,7 @@ void TestPopBackMultiple() { void TestPopBackUnderflow() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); EXPECT_EXCEPTION(batch.popBack(2), eStatusType::OUT_OF_BOUNDS); // State preserved. @@ -281,8 +215,8 @@ void TestClearAndReuse() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); batch.clear(); EXPECT_EQ(batch.numImages(), 0); @@ -290,7 +224,7 @@ void TestClearAndReuse() { EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); // Reuse after clear. - batch.pushBack(MakeFakeGpuImage(50, 50, FMT_U8, FAKE_C)); + batch.pushBack(MakeFakeGpuImage(50, 50, FAKE_PTR_C, FMT_U8)); EXPECT_EQ(batch.numImages(), 1); EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_U8), 1); } @@ -302,16 +236,16 @@ void TestClearAndReuse() { void TestUniqueFormatHomogeneous() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(128, 128, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(128, 128, FAKE_PTR_B)); EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_RGB8), 1); } void TestUniqueFormatHeterogeneous() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGBA8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_B, FMT_RGBA8)); EXPECT_EQ(AsInt(batch.uniqueFormat() == FMT_NONE), 1); } @@ -344,8 +278,8 @@ void TestExportDataEmpty() { void TestExportDataMetadata() { ImageBatchVarShape batch(4); - batch.pushBack(MakeFakeGpuImage(640, 480, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(320, 240, FMT_RGB8, FAKE_B)); + batch.pushBack(MakeFakeGpuImage(640, 480, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(320, 240, FAKE_PTR_B)); auto data = batch.exportData(0); EXPECT_EQ(data.numImages(), 2); @@ -362,7 +296,7 @@ void TestExportDataMetadata() { void TestExportDataCastRoundTrip() { ImageBatchVarShape batch(4); - batch.pushBack(MakeFakeGpuImage(64, 64, FMT_RGB8, FAKE_A)); + batch.pushBack(MakeFakeGpuImage(64, 64, FAKE_PTR_A)); auto hipData = batch.exportData(0); EXPECT_EQ(hipData.numImages(), 1); @@ -383,8 +317,8 @@ void TestMoveConstruction() { CountingAllocator alloc; { ImageBatchVarShape src(4, alloc); - src.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); - src.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); + src.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + src.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); ImageBatchVarShape dst(std::move(src)); EXPECT_EQ(dst.numImages(), 2); @@ -406,9 +340,9 @@ void TestMoveConstruction() { void TestIteratorRangeFor() { CountingAllocator alloc; ImageBatchVarShape batch(4, alloc); - batch.pushBack(MakeFakeGpuImage(100, 100, FMT_RGB8, FAKE_A)); - batch.pushBack(MakeFakeGpuImage(200, 200, FMT_RGB8, FAKE_B)); - batch.pushBack(MakeFakeGpuImage(300, 300, FMT_RGB8, FAKE_C)); + batch.pushBack(MakeFakeGpuImage(100, 100, FAKE_PTR_A)); + batch.pushBack(MakeFakeGpuImage(200, 200, FAKE_PTR_B)); + batch.pushBack(MakeFakeGpuImage(300, 300, FAKE_PTR_C)); int32_t expectedW = 100; int32_t count = 0; diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp index bea99d59..8a7945fd 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image_data.cpp @@ -26,6 +26,7 @@ #include #include +#include "image_test_helpers.hpp" #include "test_helpers.hpp" using namespace roccv; @@ -33,20 +34,6 @@ using namespace roccv::tests; namespace { -// ImageData carries pointers but never dereferences them; the buffer is a -// metadata snapshot. Use opaque sentinel pointers in tests so we can verify -// values flow through without needing real allocations. -void* const FAKE_PTR_A = reinterpret_cast(0xAAAAAAAAull); -void* const FAKE_PTR_B = reinterpret_cast(0xBBBBBBBBull); -void* const FAKE_PTR_C = reinterpret_cast(0xCCCCCCCCull); - -ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, int64_t rowStride, void* basePtr) { - ImageBufferStrided buf{}; - buf.numPlanes = 1; - buf.planes[0] = {width, height, rowStride, basePtr}; - return buf; -} - ImageBufferStrided MakeThreePlaneBuffer() { // Mimics a planar layout (e.g. YUV420-style) with sub-sampled chroma — three // planes of differing dimensions and strides backed by distinct buffers. From 6f2469995cbbecc057204b9495199fe00c892686 Mon Sep 17 00:00:00 2001 From: Zach Vincze Date: Wed, 20 May 2026 12:33:49 -0400 Subject: [PATCH 13/13] Address review comments --- include/core/image_batch_var_shape.hpp | 6 ++-- src/core/image.cpp | 35 ++++++++++++------- src/core/image_batch_var_shape.cpp | 20 +++++++---- .../roccv/cpp/include/image_test_helpers.hpp | 9 ++--- tests/roccv/cpp/include/test_helpers.hpp | 2 +- .../cpp/src/tests/core/image/test_image.cpp | 6 ++-- .../core/image/test_image_batch_data.cpp | 16 ++++----- 7 files changed, 56 insertions(+), 38 deletions(-) diff --git a/include/core/image_batch_var_shape.hpp b/include/core/image_batch_var_shape.hpp index a57f355a..ba4fd147 100644 --- a/include/core/image_batch_var_shape.hpp +++ b/include/core/image_batch_var_shape.hpp @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -123,9 +124,8 @@ class ImageBatchVarShape { /** * @brief The common ImageFormat across all images, or FMT_NONE if formats - * are heterogeneous or the batch is empty. After popping the only image - * with a given heterogenizing format, the cached value may stay FMT_NONE - * until the next emptying operation — conservative, never wrong. + * are heterogeneous or the batch is empty. popBack invalidates the cache + * so the next call rescans and may return an exact format again. */ ImageFormat uniqueFormat() const; diff --git a/src/core/image.cpp b/src/core/image.cpp index 12a20d9a..d6077dcb 100644 --- a/src/core/image.cpp +++ b/src/core/image.cpp @@ -77,11 +77,19 @@ Image::Requirements Image::CalcRequirements(Size2D size, ImageFormat format) { const int64_t bytesPerPixel = static_cast(DataType(format.dtype()).size()) * format.channels(); + // Guard signed-overflow in the rowStride = bytesPerPixel * width product + // (UB on overflow). Realistic image sizes don't approach INT64_MAX, but + // pathological callers shouldn't silently propagate garbage into strides. + int64_t rowStride = 0; + if (__builtin_mul_overflow(bytesPerPixel, static_cast(size.w), &rowStride)) { + throw Exception("Image row stride overflows int64.", eStatusType::INVALID_VALUE); + } + // TODO: derive a sensible default base/row alignment from device attributes. return ImageRequirements{ .size = size, .format = format, - .planeRowStride = {bytesPerPixel * size.w}, + .planeRowStride = {rowStride}, .alignBytes = 0, }; } @@ -103,11 +111,7 @@ Image::Image(const Requirements& reqs, const IAllocator& alloc, eDeviceType devi : Image(reqs, device, makeStorage(reqs, alloc, device)) {} Image::Image(const Requirements& reqs, eDeviceType device, std::shared_ptr storage) - : m_data(std::move(storage)), - m_size(reqs.size), - m_format(reqs.format), - m_device(device), - m_planeRowStride{} { + : m_data(std::move(storage)), m_size(reqs.size), m_format(reqs.format), m_device(device), m_planeRowStride{} { std::copy(std::begin(reqs.planeRowStride), std::end(reqs.planeRowStride), m_planeRowStride.begin()); } @@ -163,13 +167,18 @@ Image ImageWrapData(const ImageData& data, ImageDataCleanupFunc cleanup) { // The deleter captures `data` by value so the original snapshot survives // long enough to be passed to the cleanup callback on last-handle drop. - auto storage = std::shared_ptr(new ImageStorage(plane0.basePtr), - [data, cleanup](ImageStorage* s) { - if (cleanup) { - cleanup(data); - } - delete s; - }); + // Swallow exceptions from `cleanup` — shared_ptr deleters run during + // destruction, and a throw would propagate into std::terminate. + auto storage = + std::shared_ptr(new ImageStorage(plane0.basePtr), [data, cleanup](ImageStorage* s) noexcept { + if (cleanup) { + try { + cleanup(data); + } catch (...) { + } + } + delete s; + }); return Image(reqs, data.device(), std::move(storage)); } diff --git a/src/core/image_batch_var_shape.cpp b/src/core/image_batch_var_shape.cpp index d522e336..510cccac 100644 --- a/src/core/image_batch_var_shape.cpp +++ b/src/core/image_batch_var_shape.cpp @@ -46,12 +46,20 @@ ImageBatchVarShape::ImageBatchVarShape(int32_t capacity, const IAllocator& alloc const size_t imagesBytes = sizeof(ImageBufferStrided) * capacity; const size_t formatsBytes = sizeof(ImageFormat) * capacity; - m_devImagesBuffer = static_cast(m_allocator.allocHipMem(imagesBytes)); - m_devFormatsBuffer = static_cast(m_allocator.allocHipMem(formatsBytes)); - m_hostImagesBuffer = static_cast(m_allocator.allocHostPinnedMem(imagesBytes)); - m_hostFormatsBuffer = static_cast(m_allocator.allocHostPinnedMem(formatsBytes)); - - HIP_VALIDATE_NO_ERRORS(hipEventCreateWithFlags(&m_postFence, hipEventDisableTiming)); + try { + m_devImagesBuffer = static_cast(m_allocator.allocHipMem(imagesBytes)); + m_devFormatsBuffer = static_cast(m_allocator.allocHipMem(formatsBytes)); + m_hostImagesBuffer = static_cast(m_allocator.allocHostPinnedMem(imagesBytes)); + m_hostFormatsBuffer = static_cast(m_allocator.allocHostPinnedMem(formatsBytes)); + + HIP_VALIDATE_NO_ERRORS(hipEventCreateWithFlags(&m_postFence, hipEventDisableTiming)); + } catch (...) { + if (m_hostFormatsBuffer != nullptr) m_allocator.freeHostPinnedMem(m_hostFormatsBuffer); + if (m_hostImagesBuffer != nullptr) m_allocator.freeHostPinnedMem(m_hostImagesBuffer); + if (m_devFormatsBuffer != nullptr) m_allocator.freeHipMem(m_devFormatsBuffer); + if (m_devImagesBuffer != nullptr) m_allocator.freeHipMem(m_devImagesBuffer); + throw; + } } ImageBatchVarShape::~ImageBatchVarShape() { diff --git a/tests/roccv/cpp/include/image_test_helpers.hpp b/tests/roccv/cpp/include/image_test_helpers.hpp index f7318f89..c4613367 100644 --- a/tests/roccv/cpp/include/image_test_helpers.hpp +++ b/tests/roccv/cpp/include/image_test_helpers.hpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -103,14 +104,14 @@ inline ImageBufferStrided MakeSinglePlaneBuffer(int32_t width, int32_t height, i // Single-plane GPU-resident ImageData snapshot with packed-row stride implied // by `fmt`. For tests that need an ImageData but won't touch the pixels. inline ImageDataStridedHip MakeFakeHipData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { - return ImageDataStridedHip(fmt, MakeSinglePlaneBuffer(width, height, static_cast(width * fmt.channels()), - basePtr)); + const int64_t rowStride = static_cast(width) * fmt.channels() * DataType(fmt.dtype()).size(); + return ImageDataStridedHip(fmt, MakeSinglePlaneBuffer(width, height, rowStride, basePtr)); } // Host counterpart of MakeFakeHipData. inline ImageDataStridedHost MakeFakeHostData(int32_t width, int32_t height, void* basePtr, ImageFormat fmt = FMT_RGB8) { - return ImageDataStridedHost(fmt, MakeSinglePlaneBuffer(width, height, static_cast(width * fmt.channels()), - basePtr)); + const int64_t rowStride = static_cast(width) * fmt.channels() * DataType(fmt.dtype()).size(); + return ImageDataStridedHost(fmt, MakeSinglePlaneBuffer(width, height, rowStride, basePtr)); } // Single-plane GPU-resident Image wrapping a sentinel pointer via ImageWrapData. diff --git a/tests/roccv/cpp/include/test_helpers.hpp b/tests/roccv/cpp/include/test_helpers.hpp index df6840f5..7ed56309 100644 --- a/tests/roccv/cpp/include/test_helpers.hpp +++ b/tests/roccv/cpp/include/test_helpers.hpp @@ -201,7 +201,7 @@ namespace tests { // EXPECT_EQ pipes through std::to_string, so wrap enums/pointers/bools through // these casts before comparing. inline auto AsInt = [](auto v) { return static_cast(v); }; -inline auto AsAddr = [](void* p) { return reinterpret_cast(p); }; +inline auto AsAddr = [](const void* p) { return reinterpret_cast(p); }; inline auto AsSize = [](auto v) { return static_cast(v); }; /** diff --git a/tests/roccv/cpp/src/tests/core/image/test_image.cpp b/tests/roccv/cpp/src/tests/core/image/test_image.cpp index b2ae2aeb..ce6ef69b 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image.cpp @@ -87,7 +87,7 @@ void TestCalcRequirementsRejectsInvalidDims() { * fit in int64. */ void TestCalcRequirementsLargeDims() { - // 8K image, RGBA32 (4 channels * 4 bytes = 16 B/pixel) → 8192 * 16 = 131072 B/row. + // 8K image, RGBA8 (4 channels * 1 byte = 4 B/pixel) → 8192 * 4 = 32768 B/row. auto reqs = Image::CalcRequirements({8192, 4320}, FMT_RGBA8); EXPECT_EQ(reqs.planeRowStride[0], static_cast(8192 * 4)); } @@ -173,7 +173,9 @@ void TestImageCopySharesBuffer() { EXPECT_EQ(AsAddr(second.exportData().cast()->plane(0).basePtr), AsAddr(buf)); // Drop `first`; buffer must NOT be freed yet — `second` still holds it. - { Image sink = std::move(first); } + { + Image sink = std::move(first); + } EXPECT_EQ(alloc.hipFrees, 0); } // All handles dropped — exactly one free. diff --git a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp index a2e238be..31e449e6 100644 --- a/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp +++ b/tests/roccv/cpp/src/tests/core/image/test_image_batch_data.cpp @@ -76,9 +76,9 @@ void TestImageBatchVarShapeDataStridedHipConstruction() { EXPECT_EQ(data.maxSize().w, 640); EXPECT_EQ(data.maxSize().h, 480); EXPECT_EQ(data.uniqueFormat().channels(), 3); - EXPECT_EQ(AsAddr(const_cast(data.formatList())), AsAddr(g_formatList)); - EXPECT_EQ(AsAddr(const_cast(data.hostFormatList())), AsAddr(g_hostFormatList)); - EXPECT_EQ(AsAddr(const_cast(data.imageList())), AsAddr(g_imageList)); + EXPECT_EQ(AsAddr(data.formatList()), AsAddr(g_formatList)); + EXPECT_EQ(AsAddr(data.hostFormatList()), AsAddr(g_hostFormatList)); + EXPECT_EQ(AsAddr(data.imageList()), AsAddr(g_imageList)); EXPECT_EQ(data.imageList()[0].planes[0].width, 640); EXPECT_EQ(data.imageList()[1].planes[0].width, 320); } @@ -95,7 +95,7 @@ void TestImageBatchVarShapeDataStridedHostConstruction() { EXPECT_EQ(data.maxSize().w, 640); EXPECT_EQ(data.maxSize().h, 480); EXPECT_EQ(data.uniqueFormat().channels(), 3); - EXPECT_EQ(AsAddr(const_cast(data.imageList())), AsAddr(g_imageList)); + EXPECT_EQ(AsAddr(data.imageList()), AsAddr(g_imageList)); } /** @@ -161,14 +161,12 @@ void TestImageBatchVarShapeDataSugarCtor() { EXPECT_EQ(wide.numImages(), sugar.numImages()); EXPECT_EQ(wide.maxSize().w, sugar.maxSize().w); EXPECT_EQ(wide.maxSize().h, sugar.maxSize().h); - EXPECT_EQ(AsAddr(const_cast(wide.imageList())), - AsAddr(const_cast(sugar.imageList()))); + EXPECT_EQ(AsAddr(wide.imageList()), AsAddr(sugar.imageList())); ImageBatchVarShapeDataStridedHost wideHost(2, ImageBatchBuffer{.varShapeStrided = buf}); ImageBatchVarShapeDataStridedHost sugarHost(2, buf); EXPECT_EQ(AsInt(wideHost.device()), AsInt(sugarHost.device())); - EXPECT_EQ(AsAddr(const_cast(wideHost.imageList())), - AsAddr(const_cast(sugarHost.imageList()))); + EXPECT_EQ(AsAddr(wideHost.imageList()), AsAddr(sugarHost.imageList())); } /** @@ -237,7 +235,7 @@ void TestImageBatchDataCast() { EXPECT_EQ(AsInt(asHip->device()), AsInt(eDeviceType::GPU)); EXPECT_EQ(asHip->numImages(), 2); EXPECT_EQ(asHip->maxSize().w, 640); - EXPECT_EQ(AsAddr(const_cast(asHip->imageList())), AsAddr(g_imageList)); + EXPECT_EQ(AsAddr(asHip->imageList()), AsAddr(g_imageList)); auto asStrided = base.cast(); EXPECT_EQ(AsInt(asStrided.has_value()), 1);