Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 10 additions & 34 deletions bench/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,48 +114,24 @@ static void ReduceDiscontiguousParameters(benchmark::Benchmark* b) {
b->Args({32768, 5});
}

// Set number of elements for a unary elementwise microkernel such that:
// - It is divisible by 2, 3, 4, 5, 6.
// - It is divisible by AVX512 width.
// - Total memory footprint does not exceed the characteristic cache size for
// the architecture.
// Set number of elements for a unary elementwise microkernel.
// Use a consistent number of elements to allow comparisons between different
// data types and architectures.
template <class InType, class OutType>
void UnaryElementwiseParameters(benchmark::Benchmark* benchmark) {
benchmark->ArgName("N");

size_t characteristic_l1 = 32 * 1024;
size_t characteristic_l2 = 256 * 1024;
#if XNN_ARCH_ARM
characteristic_l1 = 16 * 1024;
characteristic_l2 = 128 * 1024;
#endif // XNN_ARCH_ARM

const size_t elementwise_size = sizeof(InType) + sizeof(OutType);
benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960);
benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960);
benchmark->Arg(8 * 1024);
benchmark->Arg(64 * 1024);
}

// Set number of elements for a binary elementwise microkernel such that:
// - It is divisible by 2, 3, 4, 5, 6.
// - It is divisible by AVX512 width.
// - Total memory footprint does not exceed the characteristic cache size for
// the architecture.
// Set number of elements for a binary elementwise microkernel.
// Use a consistent number of elements to allow comparisons between different
// data types and architectures.
template <class InType, class OutType>
void BinaryElementwiseParameters(benchmark::Benchmark* benchmark) {
benchmark->ArgName("N");

size_t characteristic_l1 = 32 * 1024;
size_t characteristic_l2 = 256 * 1024;
#if XNN_ARCH_ARM
characteristic_l1 = 16 * 1024;
characteristic_l2 = 128 * 1024;
#endif // XNN_ARCH_ARM

const size_t elementwise_size = 2 * sizeof(InType) + sizeof(OutType);
benchmark->Arg(
std::max<size_t>(1, characteristic_l1 / elementwise_size / 960) * 960);
benchmark->Arg(
std::max<size_t>(1, characteristic_l2 / elementwise_size / 960) * 960);
benchmark->Arg(8 * 1024);
benchmark->Arg(64 * 1024);
}

// Check if the architecture flags are supported.
Expand Down
Loading