From 7de50fe8c59dd81cada554718ed206933b067cc1 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 29 May 2026 19:01:02 -0700 Subject: [PATCH] Update XNNPACK elementwise benchmarks to use consistent N elements. PiperOrigin-RevId: 923706605 --- bench/utils.h | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/bench/utils.h b/bench/utils.h index 47151fe33db..3dada334a48 100644 --- a/bench/utils.h +++ b/bench/utils.h @@ -114,48 +114,24 @@ static void ReduceDiscontiguousParameters(benchmark::Benchmark* b) { b->Args({32768, 5}); } -// Set number of elements for a unary elementwise microkernel such that: -// - It is divisible by 2, 3, 4, 5, 6. -// - It is divisible by AVX512 width. -// - Total memory footprint does not exceed the characteristic cache size for -// the architecture. +// Set number of elements for a unary elementwise microkernel. +// Use a consistent number of elements to allow comparisons between different +// data types and architectures. template void UnaryElementwiseParameters(benchmark::Benchmark* benchmark) { benchmark->ArgName("N"); - - size_t characteristic_l1 = 32 * 1024; - size_t characteristic_l2 = 256 * 1024; -#if XNN_ARCH_ARM - characteristic_l1 = 16 * 1024; - characteristic_l2 = 128 * 1024; -#endif // XNN_ARCH_ARM - - const size_t elementwise_size = sizeof(InType) + sizeof(OutType); - benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960); - benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960); + benchmark->Arg(8 * 1024); + benchmark->Arg(64 * 1024); } -// Set number of elements for a binary elementwise microkernel such that: -// - It is divisible by 2, 3, 4, 5, 6. -// - It is divisible by AVX512 width. -// - Total memory footprint does not exceed the characteristic cache size for -// the architecture. +// Set number of elements for a binary elementwise microkernel. +// Use a consistent number of elements to allow comparisons between different +// data types and architectures. template void BinaryElementwiseParameters(benchmark::Benchmark* benchmark) { benchmark->ArgName("N"); - - size_t characteristic_l1 = 32 * 1024; - size_t characteristic_l2 = 256 * 1024; -#if XNN_ARCH_ARM - characteristic_l1 = 16 * 1024; - characteristic_l2 = 128 * 1024; -#endif // XNN_ARCH_ARM - - const size_t elementwise_size = 2 * sizeof(InType) + sizeof(OutType); - benchmark->Arg( - std::max(1, characteristic_l1 / elementwise_size / 960) * 960); - benchmark->Arg( - std::max(1, characteristic_l2 / elementwise_size / 960) * 960); + benchmark->Arg(8 * 1024); + benchmark->Arg(64 * 1024); } // Check if the architecture flags are supported.