Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,48 @@ jobs:
working-directory: ${{github.workspace}}/build
run: ./Test "-r=.." -m=a -tt=$(nproc) "-ot=log_${{matrix.build_type}}.txt" -ts=10

build_and_test_hexagon_cross:
runs-on: ubuntu-latest

strategy:
matrix:
build_type: [Debug, Release]

env:
HEXAGON_TOOLCHAIN: ${{github.workspace}}/clang+llvm-22.1.0-cross-hexagon-unknown-linux-musl/x86_64-linux-gnu

steps:
- uses: actions/checkout@v3

- name: Host properties
run: lscpu

- name: Install host dependencies
run: sudo apt-get update && sudo apt-get install -y libc++1 libc++abi1 liburing2 libglib2.0-0t64

- name: Install Hexagon toolchain
run: |
wget -q https://artifacts.codelinaro.org/artifactory/codelinaro-toolchain-for-hexagon/22.1.0_/clang+llvm-22.1.0-cross-hexagon-unknown-linux-musl.tar.zst
tar --zstd -xf clang+llvm-22.1.0-cross-hexagon-unknown-linux-musl.tar.zst

- name: Configure CMake
run: >
cmake ./prj/cmake -B ${{github.workspace}}/build
-DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-DSIMD_TOOLCHAIN="${{env.HEXAGON_TOOLCHAIN}}/bin/hexagon-linux-musl-clang"
-DSIMD_TARGET="hexagon"

- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} --parallel$(nproc)

- name: Test
working-directory: ${{github.workspace}}/build
run: >
${{env.HEXAGON_TOOLCHAIN}}/bin/qemu-hexagon -cpu any
-L ${{env.HEXAGON_TOOLCHAIN}}/target/hexagon-unknown-linux-musl
./Test "-r=.." -m=a -tt=1 "-ot=log_${{matrix.build_type}}.txt" -ts=10
-fe=WarpAffine -fe=RecursiveBilateral

build_and_test_mingw:
runs-on: windows-latest

Expand Down
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ object detection and classification, neural network.

The algorithms are optimized with using of different SIMD CPU extensions.
In particular the library supports following CPU extensions:
SSE, AVX, AVX-512 and AMX for x86/x64, NEON for ARM.
SSE, AVX, AVX-512 and AMX for x86/x64, NEON for ARM, HVX for Hexagon.

The Simd Library has C API and also contains useful C++ classes and functions to facilitate access to C API.
The library supports dynamic and static linking, 32-bit and 64-bit Windows and Linux,
Expand Down Expand Up @@ -62,7 +62,7 @@ Files of CMake build systems are placed in the directory:

`simd/prj/cmake/`

The library can be built for x86/x64, ARM(32/64) platforms using the G++ or Clang compilers.
The library can be built for x86/x64, ARM(32/64), and Hexagon platforms using the G++ or Clang compilers.
Using the native compiler (g++) for the current platform is simple:

mkdir build
Expand All @@ -85,6 +85,13 @@ And for ARM (64 bit):
cmake ../prj/cmake -DSIMD_TOOLCHAIN="/your_toolchain/usr/bin/aarch64-linux-gnu-g++" -DSIMD_TARGET="aarch64" -DCMAKE_BUILD_TYPE="Release"
make

And for Hexagon with HVX (cross-compilation using the Hexagon Clang toolchain):

mkdir build
cd build
cmake ../prj/cmake -DSIMD_TOOLCHAIN="/your_toolchain/bin/hexagon-linux-musl-clang" -DSIMD_TARGET="hexagon" -DCMAKE_BUILD_TYPE="Release"
make

As result the library and the test application will be built in the current directory.

There are addition build parameters:
Expand Down Expand Up @@ -199,7 +206,7 @@ Also you can use parameters:
* `-tr=2` a number of test execution repeats.
* `-ts=1` to print statistics of time of tests execution.
* `-cc=1` to check c++ API.
* `-de=2` a flags of SIMD extensions which testing are disabled. Base - 1, 2 - SSE4.1/NEON, 4 - AVX2, 8 - AVX-512BW, 16 - AVX-512VNNI, 32 - AMX-BF16.
* `-de=2` a flags of SIMD extensions which testing are disabled. Base - 1, 2 - SSE4.1/NEON/HVX, 4 - AVX2, 8 - AVX-512BW, 16 - AVX-512VNNI, 32 - AMX-BF16.
* `-wu=100` a time to warm up CPU before testing (in milliseconds).
* `-pt=1` a boolean flag to pin threads to cpu cores.

2 changes: 2 additions & 0 deletions prj/cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ else()
include(x86.cmake)
elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "arm") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64"))
include(arm.cmake)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "hexagon")
include(hexagon.cmake)
else()
message(FATAL_ERROR "Unknown value of CMAKE_SYSTEM_PROCESSOR!")
endif()
Expand Down
33 changes: 33 additions & 0 deletions prj/cmake/hexagon.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: MIT

set(CXX_HVX_FLAG "-mhvx -mhvx-length=128B")

file(GLOB_RECURSE SIMD_BASE_SRC ${SIMD_ROOT}/src/Simd/SimdBase*.cpp)
set_source_files_properties(${SIMD_BASE_SRC} PROPERTIES COMPILE_FLAGS "${COMMON_CXX_FLAGS}")

file(GLOB_RECURSE SIMD_HVX_SRC ${SIMD_ROOT}/src/Simd/SimdHvx*.cpp)
set_source_files_properties(${SIMD_HVX_SRC} PROPERTIES COMPILE_FLAGS "${COMMON_CXX_FLAGS} ${CXX_HVX_FLAG}")

file(GLOB_RECURSE SIMD_LIB_SRC ${SIMD_ROOT}/src/Simd/SimdLib.cpp)
set_source_files_properties(${SIMD_LIB_SRC} PROPERTIES COMPILE_FLAGS "${COMMON_CXX_FLAGS} ${CXX_HVX_FLAG}")
add_library(Simd ${SIMD_LIB_TYPE} ${SIMD_LIB_SRC} ${SIMD_BASE_SRC} ${SIMD_HVX_SRC})

if(SIMD_TEST)
# Work around QEMU Hexagon emulation bug: test code compiled at -O2 or
# higher triggers misemulation of certain instruction sequences (the
# stack-coloring pass at -O2 produces code that QEMU handles incorrectly).
# The library itself remains at full optimization; only test harness code
# is affected. Cap test files at -O1 until the QEMU fix is available.
string(REGEX REPLACE "-O[23s]" "-O1" TEST_CXX_FLAGS "${COMMON_CXX_FLAGS}")
file(GLOB_RECURSE TEST_SRC_C ${SIMD_ROOT}/src/Test/*.c)
file(GLOB_RECURSE TEST_SRC_CPP ${SIMD_ROOT}/src/Test/*.cpp)
set_source_files_properties(${TEST_SRC_CPP} PROPERTIES COMPILE_FLAGS "${TEST_CXX_FLAGS} ${CXX_HVX_FLAG} -D_GLIBCXX_USE_NANOSLEEP")
add_executable(Test ${TEST_SRC_C} ${TEST_SRC_CPP})
target_link_libraries(Test Simd -lpthread -lstdc++ -lm)
if(SIMD_OPENCV)
target_compile_definitions(Test PUBLIC SIMD_OPENCV_ENABLE)
target_link_libraries(Test ${OpenCV_LIBS})
target_include_directories(Test PUBLIC ${OpenCV_INCLUDE_DIRS})
endif()
endif()
5 changes: 5 additions & 0 deletions src/Simd/SimdAlignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ namespace Simd
if (Neon::Enable)
return sizeof(uint8x16_t);
else
#endif
#ifdef SIMD_HVX_ENABLE
if (Hvx::Enable)
return sizeof(HVX_Vector);
else
#endif
return sizeof(void *);
}
Expand Down
37 changes: 37 additions & 0 deletions src/Simd/SimdCompare.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,5 +451,42 @@ namespace Simd
}
}
#endif// SIMD_NEON_ENABLE

#ifdef SIMD_HVX_ENABLE
namespace Hvx
{
template<SimdCompareType compareType> SIMD_INLINE HVX_VectorPred Compare8u(const HVX_Vector & a, const HVX_Vector & b);

template<> SIMD_INLINE HVX_VectorPred Compare8u<SimdCompareEqual>(const HVX_Vector & a, const HVX_Vector & b)
{
return Q6_Q_vcmp_eq_VbVb(a, b);
}

template<> SIMD_INLINE HVX_VectorPred Compare8u<SimdCompareNotEqual>(const HVX_Vector & a, const HVX_Vector & b)
{
return Q6_Q_not_Q(Q6_Q_vcmp_eq_VbVb(a, b));
}

template<> SIMD_INLINE HVX_VectorPred Compare8u<SimdCompareGreater>(const HVX_Vector & a, const HVX_Vector & b)
{
return Q6_Q_vcmp_gt_VubVub(a, b);
}

template<> SIMD_INLINE HVX_VectorPred Compare8u<SimdCompareGreaterOrEqual>(const HVX_Vector & a, const HVX_Vector & b)
{
return Q6_Q_not_Q(Q6_Q_vcmp_gt_VubVub(b, a));
}

template<> SIMD_INLINE HVX_VectorPred Compare8u<SimdCompareLesser>(const HVX_Vector & a, const HVX_Vector & b)
{
return Q6_Q_vcmp_gt_VubVub(b, a);
}

template<> SIMD_INLINE HVX_VectorPred Compare8u<SimdCompareLesserOrEqual>(const HVX_Vector & a, const HVX_Vector & b)
{
return Q6_Q_not_Q(Q6_Q_vcmp_gt_VubVub(a, b));
}
}
#endif// SIMD_HVX_ENABLE
}
#endif//__SimdCompare_h__
2 changes: 2 additions & 0 deletions src/Simd/SimdConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@

//#define SIMD_NEON_FP16_DISABLE

//#define SIMD_HVX_DISABLE

//#define SIMD_STATIC

#define SIMD_LOG_ENABLE
Expand Down
16 changes: 16 additions & 0 deletions src/Simd/SimdConst.h
Original file line number Diff line number Diff line change
Expand Up @@ -673,5 +673,21 @@ namespace Simd
const int32x4_t K32_YUV_TO_BGR_ROUND_TERM = SIMD_VEC_SET1_EPI32(Base::YUV_TO_BGR_ROUND_TERM);
}
#endif

#ifdef SIMD_HVX_ENABLE
namespace Hvx
{
const size_t A = 128;
const size_t DA = 2 * A;
const size_t QA = 4 * A;
const size_t OA = 8 * A;
const size_t HA = A / 2;

const size_t F = A / sizeof(float);
const size_t DF = 2 * F;
const size_t QF = 4 * F;
const size_t HF = F / 2;
}
#endif
}
#endif
23 changes: 21 additions & 2 deletions src/Simd/SimdDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,10 @@
#define SIMD_ARM64_ENABLE
#endif

#if defined __hexagon__
#define SIMD_HEXAGON_ENABLE
#endif

#if defined(SIMD_X86_ENABLE) || defined(SIMD_X64_ENABLE)

#if !defined(SIMD_SSE41_DISABLE) && defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) && defined(__SSSE3__) && defined(__SSE4_1__) && defined(__SSE4_2__)
Expand Down Expand Up @@ -225,7 +229,15 @@

#endif//defined(SIMD_ARM_ENABLE) || defined(SIMD_ARM64_ENABLE)

#if defined(__clang__)
#if defined(SIMD_HEXAGON_ENABLE)

#if !defined(SIMD_HVX_DISABLE) && defined(__HVX__)
#define SIMD_HVX_ENABLE
#endif

#endif//defined(SIMD_HEXAGON_ENABLE)

#if defined(__clang__) && !defined(__hexagon__)
#define SIMD_CLANG_AVX2_BGR_TO_BGRA_ERROR
#endif

Expand Down Expand Up @@ -256,7 +268,14 @@
#include <arm_neon.h>
#endif

#if defined(SIMD_AVX512BW_ENABLE) || defined(SIMD_AVX512VNNI_ENABLE) || defined(SIMD_AMXBF16_ENABLE)
#if defined(SIMD_HVX_ENABLE)
#include <hexagon_types.h>
#include <hvx_hexagon_protos.h>
#endif

#if defined(SIMD_HVX_ENABLE)
#define SIMD_ALIGN 128
#elif defined(SIMD_AVX512BW_ENABLE) || defined(SIMD_AVX512VNNI_ENABLE) || defined(SIMD_AMXBF16_ENABLE)
#define SIMD_ALIGN 64
#elif defined(SIMD_AVX2_ENABLE)
#define SIMD_ALIGN 32
Expand Down
17 changes: 16 additions & 1 deletion src/Simd/SimdEnable.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ namespace Simd
const bool Enable = GetEnable();
}
#endif

#ifdef SIMD_HVX_ENABLE
namespace Hvx
{
bool GetEnable();

const bool Enable = GetEnable();
}
#endif
}

#define SIMD_BASE_FUNC(func) Simd::Base::func
Expand Down Expand Up @@ -120,11 +129,17 @@ namespace Simd
#endif

#ifdef SIMD_NEON_ENABLE
#define SIMD_NEON_FUNC(func) Simd::Neon::Enable ? Simd::Neon::func :
#define SIMD_NEON_FUNC(func) Simd::Neon::Enable ? Simd::Neon::func :
#else
#define SIMD_NEON_FUNC(func)
#endif

#ifdef SIMD_HVX_ENABLE
#define SIMD_HVX_FUNC(func) Simd::Hvx::Enable ? Simd::Hvx::func :
#else
#define SIMD_HVX_FUNC(func)
#endif

#define SIMD_FUNC0(func) SIMD_BASE_FUNC(func)
#define SIMD_FUNC1(func, EXT1) EXT1(func) SIMD_BASE_FUNC(func)
#define SIMD_FUNC2(func, EXT1, EXT2) EXT1(func) EXT2(func) SIMD_BASE_FUNC(func)
Expand Down
76 changes: 76 additions & 0 deletions src/Simd/SimdHvx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
* SPDX-License-Identifier: MIT
*/
#ifndef __SimdHvx_h__
#define __SimdHvx_h__

#include "Simd/SimdDefs.h"

namespace Simd
{
#ifdef SIMD_HVX_ENABLE
namespace Hvx
{
void AbsDifference(const uint8_t* a, size_t aStride, const uint8_t* b, size_t bStride, uint8_t* c, size_t cStride,
size_t width, size_t height);

void AbsDifferenceSum(const uint8_t* a, size_t aStride, const uint8_t* b, size_t bStride,
size_t width, size_t height, uint64_t* sum);

void AbsGradientSaturatedSum(const uint8_t* src, size_t srcStride, size_t width, size_t height,
uint8_t* dst, size_t dstStride);

void AddFeatureDifference(const uint8_t* value, size_t valueStride, size_t width, size_t height,
const uint8_t* lo, size_t loStride, const uint8_t* hi, size_t hiStride,
uint16_t weight, uint8_t* difference, size_t differenceStride);

void BgrToGray(const uint8_t* bgr, size_t width, size_t height, size_t bgrStride,
uint8_t* gray, size_t grayStride);

void BgrToRgb(const uint8_t* bgr, size_t width, size_t height, size_t bgrStride,
uint8_t* rgb, size_t rgbStride);

void FillBgra(uint8_t* dst, size_t stride, size_t width, size_t height,
uint8_t blue, uint8_t green, uint8_t red, uint8_t alpha);

void FillPixel(uint8_t* dst, size_t stride, size_t width, size_t height,
const uint8_t* pixel, size_t pixelSize);

void OperationBinary8u(const uint8_t* a, size_t aStride, const uint8_t* b, size_t bStride,
size_t width, size_t height, size_t channelCount, uint8_t* dst, size_t dstStride,
SimdOperationBinary8uType type);

void GetStatistic(const uint8_t* src, size_t stride, size_t width, size_t height,
uint8_t* min, uint8_t* max, uint8_t* average);

void GetRowSums(const uint8_t* src, size_t stride, size_t width, size_t height, uint32_t* sums);

void GetColSums(const uint8_t* src, size_t stride, size_t width, size_t height, uint32_t* sums);

void GetAbsDyRowSums(const uint8_t* src, size_t stride, size_t width, size_t height, uint32_t* sums);

void GetAbsDxColSums(const uint8_t* src, size_t stride, size_t width, size_t height, uint32_t* sums);

void ValueSum(const uint8_t* src, size_t stride, size_t width, size_t height, uint64_t* sum);

void SquareSum(const uint8_t* src, size_t stride, size_t width, size_t height, uint64_t* sum);

void ValueSquareSum(const uint8_t* src, size_t stride, size_t width, size_t height, uint64_t* valueSum, uint64_t* squareSum);

void ValueSquareSums(const uint8_t* src, size_t stride, size_t width, size_t height, size_t channels, uint64_t* valueSums, uint64_t* squareSums);

void CorrelationSum(const uint8_t* a, size_t aStride, const uint8_t* b, size_t bStride, size_t width, size_t height, uint64_t* sum);

void AbsSecondDerivativeHistogram(const uint8_t* src, size_t width, size_t height, size_t stride,
size_t step, size_t indent, uint32_t* histogram);

void HistogramMasked(const uint8_t* src, size_t srcStride, size_t width, size_t height,
const uint8_t* mask, size_t maskStride, uint8_t index, uint32_t* histogram);

void HistogramConditional(const uint8_t* src, size_t srcStride, size_t width, size_t height,
const uint8_t* mask, size_t maskStride, uint8_t value, SimdCompareType compareType, uint32_t* histogram);
}
#endif
}
#endif
Loading
Loading