From ef180f7b59f89805349aa125cd75475eb36db7db Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 01:49:21 -0800 Subject: [PATCH 01/30] fix(simd): umasked AVX2 load --- include/svs/core/distance/simd_utils.h | 38 ++++++++++++-------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index f883abcaf..f7467c636 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -19,6 +19,7 @@ #if defined(__i386__) || defined(__x86_64__) #include +#include #include #include @@ -332,11 +333,10 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // AVX2 doesn't have native masked load, so we load and then blend - auto data = _mm256_loadu_ps(ptr); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Full width load with blending may case out-of-bounds read (SEGV) + // Therefore we use _mm256_maskload_ps which safely handles masked loads + auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); + return _mm256_maskload_ps(ptr, mask_vec); } // from float16 @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const Float16* ptr) { - auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(ptr))); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + __m128i buffer = _mm_setzero_si128(); + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16)); + return _mm256_cvtph_ps(buffer); } // from uint8 @@ -359,12 +359,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const uint8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer))); } // from int8 @@ -375,12 +373,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const int8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer))); } // We do not need to treat the left or right-hand differently. From c1705f5062865303bfc3920ee46b55503a33e06c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 06:45:17 -0800 Subject: [PATCH 02/30] remove L2Impl specific test --- tests/svs/core/distance.cpp | 58 +++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 3b65c2b91..91d363a06 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -20,6 +20,9 @@ // catch 2 #include "catch2/catch_test_macros.hpp" +#include +#include + namespace { std::string_view test_table = R"( @@ -94,3 +97,58 @@ CATCH_TEST_CASE("Distance Utils", "[core][distance][distance_type]") { } } } + +CATCH_TEST_CASE("Distance asan L2", "[distance][simd][asan][l2]") { + // Try various sizes to hit the case where vector capacity == size + // and the SIMD load reads past the end into the redzone. + // We test sizes that are not multiples of 8 (AVX2 width) or 16 (AVX512 width). + for (size_t size = 1; size < 128; ++size) { + std::vector a(size); + std::vector b(size); + + std::iota(a.begin(), a.end(), 0.0f); + std::iota(b.begin(), b.end(), 1.0f); + + // Ensure no spare capacity + a.shrink_to_fit(); + b.shrink_to_fit(); + + auto dist = svs::distance::L2::compute(a.data(), b.data(), size); + CATCH_REQUIRE(dist >= 0); + } +} + +CATCH_TEST_CASE("Distance asan Cosine", "[distance][simd][asan][cosine]") { + for (size_t size = 1; size < 128; ++size) { + std::vector a(size); + std::vector b(size); + + std::iota(a.begin(), a.end(), 0.0f); + std::iota(b.begin(), b.end(), 1.0f); + + // Ensure no spare capacity + a.shrink_to_fit(); + b.shrink_to_fit(); + + auto dist = + svs::distance::CosineSimilarity::compute(a.data(), b.data(), 1.0f, size); + CATCH_REQUIRE(dist >= 0); + } +} + +CATCH_TEST_CASE("Distance asan IP", "[distance][simd][asan][ip]") { + for (size_t size = 1; size < 128; ++size) { + std::vector a(size); + std::vector b(size); + + std::iota(a.begin(), a.end(), 0.0f); + std::iota(b.begin(), b.end(), 1.0f); + + // Ensure no spare capacity + a.shrink_to_fit(); + b.shrink_to_fit(); + + auto dist = svs::distance::IP::compute(a.data(), b.data(), size); + CATCH_REQUIRE(dist >= 0); + } +} From 05dce8faa151b79eb0d4cf03bc9a2a7128167d94 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 07:04:19 -0800 Subject: [PATCH 03/30] add asan yml --- .github/workflows/asan.yml | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 .github/workflows/asan.yml diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml new file mode 100644 index 000000000..504c8bcbe --- /dev/null +++ b/.github/workflows/asan.yml @@ -0,0 +1,54 @@ +name: Linux ASan Test +run-name: ${{ github.event.inputs.run_name || github.event.pull_request.title }} + +on: + workflow_dispatch: + pull_request: + +concurrency: + group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" + cancel-in-progress: true + +jobs: + asan-test: + name: Clang ASan + runs-on: [self-hosted, Linux, ubuntu-22.04] + env: + CXX: clang++-15 + CC: clang-15 + + steps: + - name: "Cleanup build folder" + run: | + ls -la ./ + sudo rm -rf ./* || true + sudo rm -rf ./.??* || true + ls -la ./ + + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Configure build + working-directory: ${{ runner.temp }} + env: + TEMP_WORKSPACE: ${{ runner.temp }} + run: | + cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -g" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address" \ + -DSVS_BUILD_TESTS=YES \ + -DSVS_BUILD_BINARIES=NO \ + -DSVS_BUILD_EXAMPLES=NO + + - name: Build tests + working-directory: ${{ runner.temp }}/build + run: make tests -j10 + + - name: Run tests + env: + CTEST_OUTPUT_ON_FAILURE: 1 + ASAN_OPTIONS: detect_leaks=0 + working-directory: ${{ runner.temp }}/build/tests + run: ./tests "[distance][simd][asan]" From ecf5b2861052d604cbf43560731489d40e11572b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 07:07:50 -0800 Subject: [PATCH 04/30] fix naming and clang version --- .github/workflows/asan.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml index 504c8bcbe..d2b59cfe5 100644 --- a/.github/workflows/asan.yml +++ b/.github/workflows/asan.yml @@ -1,4 +1,4 @@ -name: Linux ASan Test +name: Linux Build and Test run-name: ${{ github.event.inputs.run_name || github.event.pull_request.title }} on: @@ -11,11 +11,11 @@ concurrency: jobs: asan-test: - name: Clang ASan + name: clang-18 fsanitize=address runs-on: [self-hosted, Linux, ubuntu-22.04] env: - CXX: clang++-15 - CC: clang-15 + CXX: clang++-18 + CC: clang-18 steps: - name: "Cleanup build folder" @@ -44,11 +44,11 @@ jobs: - name: Build tests working-directory: ${{ runner.temp }}/build - run: make tests -j10 + run: make tests -j - name: Run tests env: CTEST_OUTPUT_ON_FAILURE: 1 ASAN_OPTIONS: detect_leaks=0 working-directory: ${{ runner.temp }}/build/tests - run: ./tests "[distance][simd][asan]" + run: ./tests From e5d46aabd4fef468903456674f18109a1ea86e70 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 07:08:45 -0800 Subject: [PATCH 05/30] copyright --- .github/workflows/asan.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml index d2b59cfe5..63200cd66 100644 --- a/.github/workflows/asan.yml +++ b/.github/workflows/asan.yml @@ -1,3 +1,16 @@ +# Copyright (C) 2025 Intel Corporation +# +# This software and the related documents are Intel copyrighted materials, +# and your use of them is governed by the express license under which they +# were provided to you ("License"). Unless the License provides otherwise, +# you may not use, modify, copy, publish, distribute, disclose or transmit +# this software or the related documents without Intel's prior written +# permission. +# +# This software and the related documents are provided as is, with no +# express or implied warranties, other than those that are expressly stated +# in the License. + name: Linux Build and Test run-name: ${{ github.event.inputs.run_name || github.event.pull_request.title }} From f271dc97ed4e23b5c059014b90c27da20bbf017e Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 07:10:37 -0800 Subject: [PATCH 06/30] typo --- include/svs/core/distance/simd_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index f7467c636..bd9834da5 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -333,7 +333,7 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // Full width load with blending may case out-of-bounds read (SEGV) + // Full width load with blending may cause out-of-bounds read (SEGV) // Therefore we use _mm256_maskload_ps which safely handles masked loads auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); return _mm256_maskload_ps(ptr, mask_vec); From 2f8dd96f232d0093188c8de61565006271f472be Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 9 Dec 2025 10:10:55 -0800 Subject: [PATCH 07/30] comments on workflow file --- .github/workflows/asan.yml | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml index 63200cd66..b4d1c7ffa 100644 --- a/.github/workflows/asan.yml +++ b/.github/workflows/asan.yml @@ -1,17 +1,18 @@ -# Copyright (C) 2025 Intel Corporation +# Copyright 2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, -# and your use of them is governed by the express license under which they -# were provided to you ("License"). Unless the License provides otherwise, -# you may not use, modify, copy, publish, distribute, disclose or transmit -# this software or the related documents without Intel's prior written -# permission. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# This software and the related documents are provided as is, with no -# express or implied warranties, other than those that are expressly stated -# in the License. +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -name: Linux Build and Test +name: ASan Build and Test run-name: ${{ github.event.inputs.run_name || github.event.pull_request.title }} on: @@ -24,7 +25,7 @@ concurrency: jobs: asan-test: - name: clang-18 fsanitize=address + name: clang++-18 -fsanitize=address runs-on: [self-hosted, Linux, ubuntu-22.04] env: CXX: clang++-18 From 4046e99831b9e21dad751b5ca0c6d2ed82c7a326 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 00:24:59 -0800 Subject: [PATCH 08/30] chore(ci): streamline asan & linux workflows --- .github/workflows/asan.yml | 68 ----------------------- .github/workflows/build-linux.yml | 92 ++++++++++++++++++------------- 2 files changed, 54 insertions(+), 106 deletions(-) delete mode 100644 .github/workflows/asan.yml diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml deleted file mode 100644 index b4d1c7ffa..000000000 --- a/.github/workflows/asan.yml +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2025 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: ASan Build and Test -run-name: ${{ github.event.inputs.run_name || github.event.pull_request.title }} - -on: - workflow_dispatch: - pull_request: - -concurrency: - group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" - cancel-in-progress: true - -jobs: - asan-test: - name: clang++-18 -fsanitize=address - runs-on: [self-hosted, Linux, ubuntu-22.04] - env: - CXX: clang++-18 - CC: clang-18 - - steps: - - name: "Cleanup build folder" - run: | - ls -la ./ - sudo rm -rf ./* || true - sudo rm -rf ./.??* || true - ls -la ./ - - - uses: actions/checkout@v4 - with: - submodules: true - - - name: Configure build - working-directory: ${{ runner.temp }} - env: - TEMP_WORKSPACE: ${{ runner.temp }} - run: | - cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ - -DCMAKE_BUILD_TYPE=Debug \ - -DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -g" \ - -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address" \ - -DSVS_BUILD_TESTS=YES \ - -DSVS_BUILD_BINARIES=NO \ - -DSVS_BUILD_EXAMPLES=NO - - - name: Build tests - working-directory: ${{ runner.temp }}/build - run: make tests -j - - - name: Run tests - env: - CTEST_OUTPUT_ON_FAILURE: 1 - ASAN_OPTIONS: detect_leaks=0 - working-directory: ${{ runner.temp }}/build/tests - run: ./tests diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index af4adf4d6..618cd9d18 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -25,18 +25,20 @@ permissions: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" cancel-in-progress: true jobs: build: - name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }} + name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }}, asan=${{ matrix.asan }} runs-on: ubuntu-22.04 strategy: matrix: build_type: [RelWithDebugInfo] ivf: [OFF, ON] cxx: [g++-11, g++-12, clang++-15] + asan: [OFF] + cmake_extra_args: ["-DSVS_BUILD_BINARIES=YES -DSVS_BUILD_EXAMPLES=YES"] include: - cxx: g++-11 cc: gcc-11 @@ -44,50 +46,64 @@ jobs: cc: gcc-12 - cxx: clang++-15 cc: clang-15 + - cxx: clang++-18 + cc: clang-18 + build_type: Debug + ivf: OFF + asan: ON + cmake_extra_args: "-DCMAKE_CXX_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address' -DSVS_BUILD_BINARIES=NO -DSVS_BUILD_EXAMPLES=NO" exclude: - cxx: g++-12 ivf: ON steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - name: Install MKL - timeout-minutes: 5 - run: | - .github/scripts/setup_apt_repo_linux.sh - sudo apt install intel-oneapi-mkl intel-oneapi-mkl-devel - # Setup environment variables for building against MKL. - # Persist the environment variables for use across multiple subsequent actions. - source /opt/intel/oneapi/setvars.sh - printenv >> $GITHUB_ENV + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + - name: Install MKL + timeout-minutes: 5 + run: | + .github/scripts/setup_apt_repo_linux.sh + sudo apt install intel-oneapi-mkl intel-oneapi-mkl-devel + # Setup environment variables for building against MKL. + # Persist the environment variables for use across multiple subsequent actions. + source /opt/intel/oneapi/setvars.sh + printenv >> $GITHUB_ENV - - name: Configure build - working-directory: ${{ runner.temp }} - env: - CXX: ${{ matrix.cxx }} - CC: ${{ matrix.cc }} - TEMP_WORKSPACE: ${{ runner.temp }} - run: | - cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DSVS_BUILD_BINARIES=YES \ - -DSVS_BUILD_TESTS=YES \ - -DSVS_BUILD_EXAMPLES=YES \ - -DSVS_EXPERIMENTAL_LEANVEC=YES \ - -DSVS_NO_AVX512=NO \ - -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} + - name: Install Clang 18 + if: matrix.cxx == 'clang++-18' + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 18 - - name: Build Tests and Utilities - working-directory: ${{ runner.temp }}/build - run: make -j$(nproc) + - name: Configure build + working-directory: ${{ runner.temp }} + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + TEMP_WORKSPACE: ${{ runner.temp }} + run: | + cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DSVS_BUILD_TESTS=YES \ + -DSVS_EXPERIMENTAL_LEANVEC=YES \ + -DSVS_NO_AVX512=NO \ + -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} \ + ${{ matrix.cmake_extra_args }} - - name: Run tests - env: + - name: Build Tests and Utilities + working-directory: ${{ runner.temp }}/build + run: make -j$(nproc) + + - name: Run tests + env: CTEST_OUTPUT_ON_FAILURE: 1 - working-directory: ${{ runner.temp }}/build/tests - run: ctest -C ${{ matrix.build_type }} + ASAN_OPTIONS: detect_leaks=0 + working-directory: ${{ runner.temp }}/build/tests + run: ctest -C ${{ matrix.build_type }} - - name: Run Cpp Examples - env: + - name: Run Cpp Examples + if: matrix.asan != 'ON' + env: CTEST_OUTPUT_ON_FAILURE: 1 - working-directory: ${{ runner.temp }}/build/examples/cpp - run: ctest -C RelWithDebugInfo + working-directory: ${{ runner.temp }}/build/examples/cpp + run: ctest -C ${{ matrix.build_type }} From 9bc53132e8174c76f1c30777788291e5fdf02dad Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 00:28:21 -0800 Subject: [PATCH 09/30] fixup: undo format --- .github/workflows/build-linux.yml | 91 ++++++++++++++++--------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 618cd9d18..ffd450a2f 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -57,53 +57,54 @@ jobs: ivf: ON steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - name: Install MKL - timeout-minutes: 5 - run: | - .github/scripts/setup_apt_repo_linux.sh - sudo apt install intel-oneapi-mkl intel-oneapi-mkl-devel - # Setup environment variables for building against MKL. - # Persist the environment variables for use across multiple subsequent actions. - source /opt/intel/oneapi/setvars.sh - printenv >> $GITHUB_ENV + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + - name: Install MKL + timeout-minutes: 5 + run: | + .github/scripts/setup_apt_repo_linux.sh + sudo apt install intel-oneapi-mkl intel-oneapi-mkl-devel + # Setup environment variables for building against MKL. + # Persist the environment variables for use across multiple subsequent actions. + source /opt/intel/oneapi/setvars.sh + printenv >> $GITHUB_ENV - - name: Install Clang 18 - if: matrix.cxx == 'clang++-18' - run: | - wget https://apt.llvm.org/llvm.sh - chmod +x llvm.sh - sudo ./llvm.sh 18 + - name: Install Clang 18 + if: matrix.cxx == 'clang++-18' + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 18 - - name: Configure build - working-directory: ${{ runner.temp }} - env: - CXX: ${{ matrix.cxx }} - CC: ${{ matrix.cc }} - TEMP_WORKSPACE: ${{ runner.temp }} - run: | - cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DSVS_BUILD_TESTS=YES \ - -DSVS_EXPERIMENTAL_LEANVEC=YES \ - -DSVS_NO_AVX512=NO \ - -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} \ - ${{ matrix.cmake_extra_args }} - - name: Build Tests and Utilities - working-directory: ${{ runner.temp }}/build - run: make -j$(nproc) + - name: Configure build + working-directory: ${{ runner.temp }} + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + TEMP_WORKSPACE: ${{ runner.temp }} + run: | + cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DSVS_BUILD_TESTS=YES \ + -DSVS_EXPERIMENTAL_LEANVEC=YES \ + -DSVS_NO_AVX512=NO \ + -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} \ + ${{ matrix.cmake_extra_args }} - - name: Run tests - env: - CTEST_OUTPUT_ON_FAILURE: 1 - ASAN_OPTIONS: detect_leaks=0 - working-directory: ${{ runner.temp }}/build/tests - run: ctest -C ${{ matrix.build_type }} + - name: Build Tests and Utilities + working-directory: ${{ runner.temp }}/build + run: make -j$(nproc) - - name: Run Cpp Examples - if: matrix.asan != 'ON' - env: - CTEST_OUTPUT_ON_FAILURE: 1 - working-directory: ${{ runner.temp }}/build/examples/cpp - run: ctest -C ${{ matrix.build_type }} + - name: Run tests + env: + CTEST_OUTPUT_ON_FAILURE: 1 + ASAN_OPTIONS: detect_leaks=0 + working-directory: ${{ runner.temp }}/build/tests + run: ctest -C ${{ matrix.build_type }} + + - name: Run Cpp Examples + if: matrix.asan != 'ON' + env: + CTEST_OUTPUT_ON_FAILURE: 1 + working-directory: ${{ runner.temp }}/build/examples/cpp + run: ctest -C ${{ matrix.build_type }} From 2a43709022532660427b68c96fc02ff410a6713f Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 01:02:22 -0800 Subject: [PATCH 10/30] refactor: use CATCH_TEMPLATE_TEST_CASE --- tests/svs/core/distance.cpp | 52 ++++++++++--------------------------- 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 91d363a06..d32ee3f3e 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -18,6 +18,7 @@ #include "svs/core/distance.h" // catch 2 +#include "catch2/catch_template_test_macros.hpp" #include "catch2/catch_test_macros.hpp" #include @@ -98,7 +99,15 @@ CATCH_TEST_CASE("Distance Utils", "[core][distance][distance_type]") { } } -CATCH_TEST_CASE("Distance asan L2", "[distance][simd][asan][l2]") { +CATCH_TEMPLATE_TEST_CASE( + "Distance ASan", + "[distance][simd][asan]", + svs::DistanceL2, + svs::DistanceIP, + svs::DistanceCosineSimilarity +) { + using Distance = TestType; + // Try various sizes to hit the case where vector capacity == size // and the SIMD load reads past the end into the redzone. // We test sizes that are not multiples of 8 (AVX2 width) or 16 (AVX512 width). @@ -106,49 +115,14 @@ CATCH_TEST_CASE("Distance asan L2", "[distance][simd][asan][l2]") { std::vector a(size); std::vector b(size); - std::iota(a.begin(), a.end(), 0.0f); - std::iota(b.begin(), b.end(), 1.0f); - - // Ensure no spare capacity - a.shrink_to_fit(); - b.shrink_to_fit(); - - auto dist = svs::distance::L2::compute(a.data(), b.data(), size); - CATCH_REQUIRE(dist >= 0); - } -} - -CATCH_TEST_CASE("Distance asan Cosine", "[distance][simd][asan][cosine]") { - for (size_t size = 1; size < 128; ++size) { - std::vector a(size); - std::vector b(size); - - std::iota(a.begin(), a.end(), 0.0f); - std::iota(b.begin(), b.end(), 1.0f); - - // Ensure no spare capacity - a.shrink_to_fit(); - b.shrink_to_fit(); - - auto dist = - svs::distance::CosineSimilarity::compute(a.data(), b.data(), 1.0f, size); - CATCH_REQUIRE(dist >= 0); - } -} - -CATCH_TEST_CASE("Distance asan IP", "[distance][simd][asan][ip]") { - for (size_t size = 1; size < 128; ++size) { - std::vector a(size); - std::vector b(size); - - std::iota(a.begin(), a.end(), 0.0f); - std::iota(b.begin(), b.end(), 1.0f); + std::iota(a.begin(), a.end(), 1.0f); + std::iota(b.begin(), b.end(), 2.0f); // Ensure no spare capacity a.shrink_to_fit(); b.shrink_to_fit(); - auto dist = svs::distance::IP::compute(a.data(), b.data(), size); + auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); CATCH_REQUIRE(dist >= 0); } } From 1e74a554cd2c157013025200e383d9b581ad103b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 01:04:04 -0800 Subject: [PATCH 11/30] fix(ci): remove unused cmake option --- .github/workflows/build-linux.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index ffd450a2f..ca2ffea49 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -86,7 +86,6 @@ jobs: cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DSVS_BUILD_TESTS=YES \ - -DSVS_EXPERIMENTAL_LEANVEC=YES \ -DSVS_NO_AVX512=NO \ -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} \ ${{ matrix.cmake_extra_args }} From dbe4ae8c447ad7155b93e183e9bae777e0f8fd82 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 01:47:14 -0800 Subject: [PATCH 12/30] fix(ci): skip integration testsin debug asan build --- .github/workflows/build-linux.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index ca2ffea49..28a7b6226 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -39,6 +39,7 @@ jobs: cxx: [g++-11, g++-12, clang++-15] asan: [OFF] cmake_extra_args: ["-DSVS_BUILD_BINARIES=YES -DSVS_BUILD_EXAMPLES=YES"] + ctest_args: [""] include: - cxx: g++-11 cc: gcc-11 @@ -51,7 +52,14 @@ jobs: build_type: Debug ivf: OFF asan: ON - cmake_extra_args: "-DCMAKE_CXX_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address' -DSVS_BUILD_BINARIES=NO -DSVS_BUILD_EXAMPLES=NO" + # address sanitizer flags + cmake_extra_args: >- + -DCMAKE_CXX_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' + -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address' + -DSVS_BUILD_BINARIES=NO + -DSVS_BUILD_EXAMPLES=NO + # skip longer-running integration tests + ctest_args: "-LE integration" exclude: - cxx: g++-12 ivf: ON @@ -75,7 +83,6 @@ jobs: chmod +x llvm.sh sudo ./llvm.sh 18 - - name: Configure build working-directory: ${{ runner.temp }} env: @@ -99,7 +106,7 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 ASAN_OPTIONS: detect_leaks=0 working-directory: ${{ runner.temp }}/build/tests - run: ctest -C ${{ matrix.build_type }} + run: ctest -C ${{ matrix.build_type }} ${{ matrix.ctest_args }} - name: Run Cpp Examples if: matrix.asan != 'ON' From 2b00b78abd8e275509e720183a905dca32c9fed5 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 01:50:21 -0800 Subject: [PATCH 13/30] feat(ci): add new tag 'long' that's skipped for asan --- .github/workflows/build-linux.yml | 4 ++-- tests/svs/index/vamana/index.cpp | 2 +- tests/svs/index/vamana/multi.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 28a7b6226..04213c675 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -58,8 +58,8 @@ jobs: -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address' -DSVS_BUILD_BINARIES=NO -DSVS_BUILD_EXAMPLES=NO - # skip longer-running integration tests - ctest_args: "-LE integration" + # skip longer-running tests + ctest_args: "-LE long" exclude: - cxx: g++-12 ivf: ON diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 464b12349..b94b902b8 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -181,7 +181,7 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { CATCH_REQUIRE(captured_logs[2].find("Batch Size:") != std::string::npos); } -CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { +CATCH_TEST_CASE("Vamana Index Default Parameters", "[long][parameter][vamana]") { using Catch::Approx; std::filesystem::path data_path = test_dataset::data_svs_file(); diff --git a/tests/svs/index/vamana/multi.cpp b/tests/svs/index/vamana/multi.cpp index af52864f6..63d450b3e 100644 --- a/tests/svs/index/vamana/multi.cpp +++ b/tests/svs/index/vamana/multi.cpp @@ -48,7 +48,7 @@ template float pick_alpha(Distance SVS_UNUSED(dist)) { CATCH_TEMPLATE_TEST_CASE( "Multi-vector dynamic vamana index", - "[index][vamana][multi]", + "[long][index][vamana][multi]", svs::DistanceL2, svs::DistanceIP, svs::DistanceCosineSimilarity From 6cf9ecd5a87cabc6d49346717850396485fe9394 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 05:53:25 -0800 Subject: [PATCH 14/30] update catch2 and use ADD_TAGS_AS_LABELS ADD_TAGS_AS_LABELS --- tests/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ad82db1c3..63c55a934 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -37,7 +37,7 @@ set(CMAKE_CXX_STANDARD ${SVS_CXX_STANDARD}) FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.4.0 + GIT_TAG v3.11.0 ) FetchContent_MakeAvailable(Catch2) @@ -230,5 +230,4 @@ target_include_directories(tests PRIVATE ${PROJECT_SOURCE_DIR}) list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) include(CTest) include(Catch) -catch_discover_tests(tests) - +catch_discover_tests(tests ADD_TAGS_AS_LABELS SKIP_IS_FAILURE) From d94e2a79bcd3b46f2abff370c90bcebdc869af04 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 05:57:16 -0800 Subject: [PATCH 15/30] add more [long] labels --- tests/svs/index/inverted/clustering.cpp | 2 +- tests/svs/index/inverted/memory_based.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index 29844f611..6ac896256 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -385,7 +385,7 @@ void test_end_to_end_clustering( } // namespace -CATCH_TEST_CASE("Random Clustering - End to End", "[inverted][random_clustering]") { +CATCH_TEST_CASE("Random Clustering - End to End", "[long][inverted][random_clustering]") { CATCH_SECTION("Uncompressed Data") { auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); test_end_to_end_clustering(data, svs::DistanceL2(), 1.2f); diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index 604791485..ad7d01b46 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -23,7 +23,7 @@ #include "tests/utils/test_dataset.h" #include -CATCH_TEST_CASE("InvertedIndex Logging Test", "[logging]") { +CATCH_TEST_CASE("InvertedIndex Logging Test", "[long][logging]") { // Vector to store captured log messages std::vector captured_logs; std::vector global_captured_logs; From 3ba1fd84180171f1d67a76b5d38f2576ae710276 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 06:00:32 -0800 Subject: [PATCH 16/30] revert simd_utils.h to trip new asan check in CI --- include/svs/core/distance/simd_utils.h | 38 ++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index bd9834da5..f883abcaf 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -19,7 +19,6 @@ #if defined(__i386__) || defined(__x86_64__) #include -#include #include #include @@ -333,10 +332,11 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // Full width load with blending may cause out-of-bounds read (SEGV) - // Therefore we use _mm256_maskload_ps which safely handles masked loads - auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); - return _mm256_maskload_ps(ptr, mask_vec); + // AVX2 doesn't have native masked load, so we load and then blend + auto data = _mm256_loadu_ps(ptr); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // from float16 @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const Float16* ptr) { - // Safe masked load using a temporary buffer to avoid SEGV - __m128i buffer = _mm_setzero_si128(); - std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16)); - return _mm256_cvtph_ps(buffer); + auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(ptr))); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // from uint8 @@ -359,10 +359,12 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const uint8_t* ptr) { - // Safe masked load using a temporary buffer to avoid SEGV - int64_t buffer = 0; - std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t)); - return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer))); + auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32( + _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) + )); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // from int8 @@ -373,10 +375,12 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const int8_t* ptr) { - // Safe masked load using a temporary buffer to avoid SEGV - int64_t buffer = 0; - std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t)); - return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer))); + auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32( + _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) + )); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // We do not need to treat the left or right-hand differently. From 528ff192e614b80779e283e99329984968b41333 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 07:00:15 -0800 Subject: [PATCH 17/30] add AVX2 L2 calculation back to trigger asan --- tests/svs/core/distance.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index d32ee3f3e..877294b47 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -124,5 +124,10 @@ CATCH_TEMPLATE_TEST_CASE( auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); CATCH_REQUIRE(dist >= 0); + + dist = svs::distance:: + L2Impl:: + compute(a.data(), b.data(), svs::lib::MaybeStatic(size)); + CATCH_REQUIRE(dist >= 0); } } From 99dbac307e599dd1bd182296313de99501feb185 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 07:01:00 -0800 Subject: [PATCH 18/30] Revert "add AVX2 L2 calculation back to trigger asan" This reverts commit 528ff192e614b80779e283e99329984968b41333. --- tests/svs/core/distance.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 877294b47..d32ee3f3e 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -124,10 +124,5 @@ CATCH_TEMPLATE_TEST_CASE( auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); CATCH_REQUIRE(dist >= 0); - - dist = svs::distance:: - L2Impl:: - compute(a.data(), b.data(), svs::lib::MaybeStatic(size)); - CATCH_REQUIRE(dist >= 0); } } From 055214f37c7d569648af0ebf7ac6cfa7a6790153 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 07:01:12 -0800 Subject: [PATCH 19/30] Revert "revert simd_utils.h to trip new asan check in CI" This reverts commit 3ba1fd84180171f1d67a76b5d38f2576ae710276. --- include/svs/core/distance/simd_utils.h | 38 ++++++++++++-------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index f883abcaf..bd9834da5 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -19,6 +19,7 @@ #if defined(__i386__) || defined(__x86_64__) #include +#include #include #include @@ -332,11 +333,10 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // AVX2 doesn't have native masked load, so we load and then blend - auto data = _mm256_loadu_ps(ptr); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Full width load with blending may cause out-of-bounds read (SEGV) + // Therefore we use _mm256_maskload_ps which safely handles masked loads + auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); + return _mm256_maskload_ps(ptr, mask_vec); } // from float16 @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const Float16* ptr) { - auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(ptr))); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + __m128i buffer = _mm_setzero_si128(); + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16)); + return _mm256_cvtph_ps(buffer); } // from uint8 @@ -359,12 +359,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const uint8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer))); } // from int8 @@ -375,12 +373,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const int8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer))); } // We do not need to treat the left or right-hand differently. From 478c0dc6906740c3ec1b0341b1cd7f45f6acbd93 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 07:25:15 -0800 Subject: [PATCH 20/30] run all ISA paths in test --- include/svs/lib/avx_detection.h | 2 +- tests/svs/core/distance.cpp | 65 ++++++++++++++++++++++++++------- tests/svs/lib/avx_detection.cpp | 7 ++++ 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/include/svs/lib/avx_detection.h b/include/svs/lib/avx_detection.h index fc9b246f9..75a7b1190 100644 --- a/include/svs/lib/avx_detection.h +++ b/include/svs/lib/avx_detection.h @@ -54,6 +54,6 @@ struct AVXRuntimeFlags { }; #endif -inline const AVXRuntimeFlags avx_runtime_flags = {}; +inline AVXRuntimeFlags avx_runtime_flags = {}; } // namespace svs::detail diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index d32ee3f3e..479ebb1df 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -24,6 +24,8 @@ #include #include +#include "svs/lib/avx_detection.h" + namespace { std::string_view test_table = R"( @@ -108,21 +110,58 @@ CATCH_TEMPLATE_TEST_CASE( ) { using Distance = TestType; - // Try various sizes to hit the case where vector capacity == size - // and the SIMD load reads past the end into the redzone. - // We test sizes that are not multiples of 8 (AVX2 width) or 16 (AVX512 width). - for (size_t size = 1; size < 128; ++size) { - std::vector a(size); - std::vector b(size); + auto run_test = []() { + // Try various sizes to hit the case where vector capacity == size + // and the SIMD load reads past the end into the redzone. + // We test sizes that are not multiples of 8 (AVX2 width) or 16 (AVX512 width). + for (size_t size = 1; size < 128; ++size) { + std::vector a(size); + std::vector b(size); + + std::iota(a.begin(), a.end(), 1.0f); + std::iota(b.begin(), b.end(), 2.0f); + + // Ensure no spare capacity + a.shrink_to_fit(); + b.shrink_to_fit(); - std::iota(a.begin(), a.end(), 1.0f); - std::iota(b.begin(), b.end(), 2.0f); + auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); + CATCH_REQUIRE(dist >= 0); + } + }; - // Ensure no spare capacity - a.shrink_to_fit(); - b.shrink_to_fit(); + CATCH_SECTION("Default") { run_test(); } - auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); - CATCH_REQUIRE(dist >= 0); + CATCH_SECTION("No AVX512VNNI") { + if (!svs::detail::avx_runtime_flags.is_avx512vnni_supported()) { + CATCH_SKIP("AVX512VNNI not supported on this platform"); + } + auto original = svs::detail::avx_runtime_flags; + svs::detail::avx_runtime_flags.avx512vnni = false; + run_test(); + svs::detail::avx_runtime_flags = original; + } + + CATCH_SECTION("No AVX512F") { + if (!svs::detail::avx_runtime_flags.is_avx512f_supported()) { + CATCH_SKIP("AVX512F not supported on this platform"); + } + auto original = svs::detail::avx_runtime_flags; + svs::detail::avx_runtime_flags.avx512vnni = false; + svs::detail::avx_runtime_flags.avx512f = false; + run_test(); + svs::detail::avx_runtime_flags = original; + } + + CATCH_SECTION("No AVX2") { + if (!svs::detail::avx_runtime_flags.is_avx2_supported()) { + CATCH_SKIP("AVX2 not supported on this platform"); + } + auto original = svs::detail::avx_runtime_flags; + svs::detail::avx_runtime_flags.avx512vnni = false; + svs::detail::avx_runtime_flags.avx512f = false; + svs::detail::avx_runtime_flags.avx2 = false; + run_test(); + svs::detail::avx_runtime_flags = original; } } diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index 02d5f9e36..00d373ba4 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -29,4 +29,11 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { << svs::detail::avx_runtime_flags.is_avx512f_supported() << "\n"; std::cout << "AVX512VNNI: " << std::boolalpha << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n"; + + CATCH_SECTION("Patching") { + auto original = svs::detail::avx_runtime_flags.avx512f; + svs::detail::avx_runtime_flags.avx512f = false; + CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); + svs::detail::avx_runtime_flags.avx512f = original; + } } From c205443e61917fc94fba1cff049fcb6cce0e9311 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 07:33:42 -0800 Subject: [PATCH 21/30] keep avx_runtime_flags const by using const_cast in tests --- include/svs/lib/avx_detection.h | 2 +- tests/svs/core/distance.cpp | 30 ++++++++++++++++++------------ tests/svs/lib/avx_detection.cpp | 8 +++++--- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/svs/lib/avx_detection.h b/include/svs/lib/avx_detection.h index 75a7b1190..fc9b246f9 100644 --- a/include/svs/lib/avx_detection.h +++ b/include/svs/lib/avx_detection.h @@ -54,6 +54,6 @@ struct AVXRuntimeFlags { }; #endif -inline AVXRuntimeFlags avx_runtime_flags = {}; +inline const AVXRuntimeFlags avx_runtime_flags = {}; } // namespace svs::detail diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 479ebb1df..0c1713913 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -136,32 +136,38 @@ CATCH_TEMPLATE_TEST_CASE( if (!svs::detail::avx_runtime_flags.is_avx512vnni_supported()) { CATCH_SKIP("AVX512VNNI not supported on this platform"); } - auto original = svs::detail::avx_runtime_flags; - svs::detail::avx_runtime_flags.avx512vnni = false; + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; run_test(); - svs::detail::avx_runtime_flags = original; + mutable_flags = original; } CATCH_SECTION("No AVX512F") { if (!svs::detail::avx_runtime_flags.is_avx512f_supported()) { CATCH_SKIP("AVX512F not supported on this platform"); } - auto original = svs::detail::avx_runtime_flags; - svs::detail::avx_runtime_flags.avx512vnni = false; - svs::detail::avx_runtime_flags.avx512f = false; + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + mutable_flags.avx512f = false; run_test(); - svs::detail::avx_runtime_flags = original; + mutable_flags = original; } CATCH_SECTION("No AVX2") { if (!svs::detail::avx_runtime_flags.is_avx2_supported()) { CATCH_SKIP("AVX2 not supported on this platform"); } - auto original = svs::detail::avx_runtime_flags; - svs::detail::avx_runtime_flags.avx512vnni = false; - svs::detail::avx_runtime_flags.avx512f = false; - svs::detail::avx_runtime_flags.avx2 = false; + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + mutable_flags.avx512f = false; + mutable_flags.avx2 = false; run_test(); - svs::detail::avx_runtime_flags = original; + mutable_flags = original; } } diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index 00d373ba4..306673180 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -31,9 +31,11 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n"; CATCH_SECTION("Patching") { - auto original = svs::detail::avx_runtime_flags.avx512f; - svs::detail::avx_runtime_flags.avx512f = false; + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags.avx512f; + mutable_flags.avx512f = false; CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); - svs::detail::avx_runtime_flags.avx512f = original; + mutable_flags.avx512f = original; } } From de0bfac6b7b362f1c94aa46d359ff6c249b47df5 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 08:13:32 -0800 Subject: [PATCH 22/30] fix false positive failure for skipped tests --- tests/svs/core/distance.cpp | 57 ++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 0c1713913..f254d19e4 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -132,42 +132,39 @@ CATCH_TEMPLATE_TEST_CASE( CATCH_SECTION("Default") { run_test(); } - CATCH_SECTION("No AVX512VNNI") { - if (!svs::detail::avx_runtime_flags.is_avx512vnni_supported()) { - CATCH_SKIP("AVX512VNNI not supported on this platform"); + if (svs::detail::avx_runtime_flags.is_avx512vnni_supported()) { + CATCH_SECTION("No AVX512VNNI") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + run_test(); + mutable_flags = original; } - auto& mutable_flags = - const_cast(svs::detail::avx_runtime_flags); - auto original = mutable_flags; - mutable_flags.avx512vnni = false; - run_test(); - mutable_flags = original; } - CATCH_SECTION("No AVX512F") { - if (!svs::detail::avx_runtime_flags.is_avx512f_supported()) { - CATCH_SKIP("AVX512F not supported on this platform"); + if (svs::detail::avx_runtime_flags.is_avx512f_supported()) { + CATCH_SECTION("No AVX512F") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + mutable_flags.avx512f = false; + run_test(); + mutable_flags = original; } - auto& mutable_flags = - const_cast(svs::detail::avx_runtime_flags); - auto original = mutable_flags; - mutable_flags.avx512vnni = false; - mutable_flags.avx512f = false; - run_test(); - mutable_flags = original; } - CATCH_SECTION("No AVX2") { - if (!svs::detail::avx_runtime_flags.is_avx2_supported()) { - CATCH_SKIP("AVX2 not supported on this platform"); + if (svs::detail::avx_runtime_flags.is_avx2_supported()) { + CATCH_SECTION("No AVX2") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + mutable_flags.avx512f = false; + mutable_flags.avx2 = false; + run_test(); + mutable_flags = original; } - auto& mutable_flags = - const_cast(svs::detail::avx_runtime_flags); - auto original = mutable_flags; - mutable_flags.avx512vnni = false; - mutable_flags.avx512f = false; - mutable_flags.avx2 = false; - run_test(); - mutable_flags = original; } } From be64f341aa0c5c4796f80d8fd9d5ffd7beea38b9 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 08:18:17 -0800 Subject: [PATCH 23/30] fix: only modify isa dispatching on x86 --- tests/svs/core/distance.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index f254d19e4..06ecb3acb 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -132,6 +132,7 @@ CATCH_TEMPLATE_TEST_CASE( CATCH_SECTION("Default") { run_test(); } +#ifdef __x86_64__ if (svs::detail::avx_runtime_flags.is_avx512vnni_supported()) { CATCH_SECTION("No AVX512VNNI") { auto& mutable_flags = @@ -167,4 +168,5 @@ CATCH_TEMPLATE_TEST_CASE( mutable_flags = original; } } +#endif // __x86_64__ } From f856a965523a461e9e438bc01389e522450ce998 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 08:22:46 -0800 Subject: [PATCH 24/30] fixup --- include/svs/core/distance/simd_utils.h | 38 ++++++++++++++------------ tests/svs/lib/avx_detection.cpp | 2 ++ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index bd9834da5..f883abcaf 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -19,7 +19,6 @@ #if defined(__i386__) || defined(__x86_64__) #include -#include #include #include @@ -333,10 +332,11 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // Full width load with blending may cause out-of-bounds read (SEGV) - // Therefore we use _mm256_maskload_ps which safely handles masked loads - auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); - return _mm256_maskload_ps(ptr, mask_vec); + // AVX2 doesn't have native masked load, so we load and then blend + auto data = _mm256_loadu_ps(ptr); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // from float16 @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const Float16* ptr) { - // Safe masked load using a temporary buffer to avoid SEGV - __m128i buffer = _mm_setzero_si128(); - std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16)); - return _mm256_cvtph_ps(buffer); + auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(ptr))); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // from uint8 @@ -359,10 +359,12 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const uint8_t* ptr) { - // Safe masked load using a temporary buffer to avoid SEGV - int64_t buffer = 0; - std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t)); - return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer))); + auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32( + _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) + )); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // from int8 @@ -373,10 +375,12 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const int8_t* ptr) { - // Safe masked load using a temporary buffer to avoid SEGV - int64_t buffer = 0; - std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t)); - return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer))); + auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32( + _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) + )); + auto zero = _mm256_setzero_ps(); + auto mask_vec = create_blend_mask_avx2(m); + return _mm256_blendv_ps(zero, data, mask_vec); } // We do not need to treat the left or right-hand differently. diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index 306673180..6ac72e4f9 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -30,6 +30,7 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { std::cout << "AVX512VNNI: " << std::boolalpha << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n"; +#ifdef __x86_64__ CATCH_SECTION("Patching") { auto& mutable_flags = const_cast(svs::detail::avx_runtime_flags); @@ -38,4 +39,5 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); mutable_flags.avx512f = original; } +#endif // __x86_64__ } From 80d1d840814a180577c0dcbe704c6978c355cae9 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 08:30:17 -0800 Subject: [PATCH 25/30] simplify test --- tests/svs/core/distance.cpp | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 06ecb3acb..4f19b7334 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -111,23 +111,20 @@ CATCH_TEMPLATE_TEST_CASE( using Distance = TestType; auto run_test = []() { - // Try various sizes to hit the case where vector capacity == size - // and the SIMD load reads past the end into the redzone. - // We test sizes that are not multiples of 8 (AVX2 width) or 16 (AVX512 width). - for (size_t size = 1; size < 128; ++size) { - std::vector a(size); - std::vector b(size); - - std::iota(a.begin(), a.end(), 1.0f); - std::iota(b.begin(), b.end(), 2.0f); - - // Ensure no spare capacity - a.shrink_to_fit(); - b.shrink_to_fit(); - - auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); - CATCH_REQUIRE(dist >= 0); - } + // some full-width AVX2/AVX512 registers plus (crucially) ragged epilogue + constexpr size_t size = 64 + 2; + std::vector a(size); + std::vector b(size); + + std::iota(a.begin(), a.end(), 1.0f); + std::iota(b.begin(), b.end(), 2.0f); + + // Ensure no spare capacity + a.shrink_to_fit(); + b.shrink_to_fit(); + + auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); + CATCH_REQUIRE(dist >= 0); }; CATCH_SECTION("Default") { run_test(); } From 966d58ccd5994fc6fa800d511348704909567c6a Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 09:01:06 -0800 Subject: [PATCH 26/30] Revert "fixup" This reverts commit f856a965523a461e9e438bc01389e522450ce998. --- include/svs/core/distance/simd_utils.h | 38 ++++++++++++-------------- tests/svs/lib/avx_detection.cpp | 2 -- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index f883abcaf..bd9834da5 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -19,6 +19,7 @@ #if defined(__i386__) || defined(__x86_64__) #include +#include #include #include @@ -332,11 +333,10 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // AVX2 doesn't have native masked load, so we load and then blend - auto data = _mm256_loadu_ps(ptr); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Full width load with blending may cause out-of-bounds read (SEGV) + // Therefore we use _mm256_maskload_ps which safely handles masked loads + auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); + return _mm256_maskload_ps(ptr, mask_vec); } // from float16 @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const Float16* ptr) { - auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(ptr))); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + __m128i buffer = _mm_setzero_si128(); + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16)); + return _mm256_cvtph_ps(buffer); } // from uint8 @@ -359,12 +359,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const uint8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer))); } // from int8 @@ -375,12 +373,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const int8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer))); } // We do not need to treat the left or right-hand differently. diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index 6ac72e4f9..306673180 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -30,7 +30,6 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { std::cout << "AVX512VNNI: " << std::boolalpha << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n"; -#ifdef __x86_64__ CATCH_SECTION("Patching") { auto& mutable_flags = const_cast(svs::detail::avx_runtime_flags); @@ -39,5 +38,4 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); mutable_flags.avx512f = original; } -#endif // __x86_64__ } From 6fcc214814a74cdb4ec77f2a041047f79e594f74 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 09:01:38 -0800 Subject: [PATCH 27/30] fixup --- tests/svs/lib/avx_detection.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index 306673180..bdfe24578 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -30,6 +30,7 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { std::cout << "AVX512VNNI: " << std::boolalpha << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n"; +#ifdef __x86_64__ CATCH_SECTION("Patching") { auto& mutable_flags = const_cast(svs::detail::avx_runtime_flags); @@ -38,4 +39,5 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); mutable_flags.avx512f = original; } +#endifx } From 7d5b6ed21a5dca731cfb5a01ea770234ce5e6dd0 Mon Sep 17 00:00:00 2001 From: Andreas Huber <9201869+ahuber21@users.noreply.github.com> Date: Wed, 10 Dec 2025 18:02:41 +0100 Subject: [PATCH 28/30] Include asan in C flags Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/build-linux.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 04213c675..2d055e54f 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -55,7 +55,9 @@ jobs: # address sanitizer flags cmake_extra_args: >- -DCMAKE_CXX_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' + -DCMAKE_C_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address' + -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=address' -DSVS_BUILD_BINARIES=NO -DSVS_BUILD_EXAMPLES=NO # skip longer-running tests From 63e58cd076c79d29b82dd2cd184de01044e7a5f9 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 10 Dec 2025 09:04:21 -0800 Subject: [PATCH 29/30] fixup --- tests/svs/lib/avx_detection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index bdfe24578..6c090efce 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -39,5 +39,5 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); mutable_flags.avx512f = original; } -#endifx +#endif } From 8b36bef94e906b4a0578a780c1dd5842a260a8cf Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 11 Dec 2025 02:01:14 -0800 Subject: [PATCH 30/30] remove asan_options; remove auto-formatted double-quote change --- .github/workflows/build-linux.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 2d055e54f..d520b020e 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -25,7 +25,7 @@ permissions: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: - group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' cancel-in-progress: true jobs: @@ -106,7 +106,6 @@ jobs: - name: Run tests env: CTEST_OUTPUT_ON_FAILURE: 1 - ASAN_OPTIONS: detect_leaks=0 working-directory: ${{ runner.temp }}/build/tests run: ctest -C ${{ matrix.build_type }} ${{ matrix.ctest_args }}