Collection of math libraries in different programming languages
- use
INLINEfor andOpenMPin functions accessing the type. - use
-ftree-vectorize -fopenmp -fopenmp-simd -O3 -march=<your target ARCH> -mfpmath=<your SIMD> -std=[gnu|c++]23
enum class alg
{
unk = 0 << 0,
sca = 1 << 0, /* align to 1 * alignof(T) */
vec = 1 << 1, /* align to N_POW2 * alignof(T) */
mat = 1 << 2, /* align to bitceil(COLS * alignof(vec<T,ROWS>)) */
ten = 1 << 3,
qut = 1 << 4,
mot = 1 << 5,
sed = 1 << 6,
oct = 1 << 7,
std = 1 << 8, /* N == N_POW2 ? vec/mat/other : sca */
};
#ifdef _MSC_VER
#define INLINE __force_inline __flatten __declspec(noalias) __declspec(nothrow) inline
#else
#define INLINE __attribute__((always_inline),(nothrow),(const),(flatten)) inline
#endif
#if __STDC_VERSION__ < 202311L
#ifndef _MSC_VER
#include <stdalign.h>
#endif
#if !__alignas_is_defined
#define alignas _Alignas
#define __alignas_is_defined 1
#endif
#if !__alignof_is_defined
#define alignof _Alignof
#define __alignof_is_defined 1
#endif
#endif
#ifdef _MSC_VER
#define QALIGN(x) __declspec(align(x))
#elif defined(__GNUC__) || defined(__clang__)
#define QALIGN(x) __attribute__((aligned(x)))
#else
#define QALIGN(x)
#endif
#define BITOP_RUP01__(x) ( (x) | ( (x) >> 1))
#define BITOP_RUP02__(x) (BITOP_RUP01__(x) | (BITOP_RUP01__(x) >> 2))
#define BITOP_RUP04__(x) (BITOP_RUP02__(x) | (BITOP_RUP02__(x) >> 4))
#define BITOP_RUP08__(x) (BITOP_RUP04__(x) | (BITOP_RUP04__(x) >> 8))
#define BITOP_RUP16__(x) (BITOP_RUP08__(x) | (BITOP_RUP08__(x) >> 16))
#define bitceil(x) (const uint32_t)(BITOP_RUP16__(((uint32_t)(x)) - 1) + 1)
#define countof(x) sizeof(x)/sizeof((x)[0])
#define isarray(a) __builtin_choose_expr(__builtin_types_compatible_p(__typeof__((a)[0]) [], __typeof__((a))), true, false)
/* WARNING: vec_ext_countof(a) return different results in GCC/LLVM for non-power two element count */
/* TODO: find a way to return vector element count for non-power of two vectors in GCC */
#define vec(N,T) QALIGN((N == bitceil(N) ? N : 1) * alignof(N)) __typeof__(__typeof__(T)[N])
#define avec(N,T) QALIGN(bitceil(N)) __typeof__(__typeof__(T)[N])
#ifdef __clang__
/* evec(t,n) for LLVM SIMD Vector Extension */
#pragma pack(push,1)
#define evec(N,T) __attribute__((ext_vector_type(N))) __typeof__(T)
#pragma pack(pop)
#define rc_avec(src,n) (*(avec(__typeof__((src)[0]),n)*)__builtin_addressof(src))
#define rc_evec(src,n) (*(evec(__typeof__((src)[0]),n)*)__builtin_addressof(src))
#define evec_countof(a) __builtin_vectorelements(rc_evec(a,countof(a)))
#define isvector(a) !isarray(a) && \
__builtin_choose_expr( \
bitceil(evec_countof(a)) == countof(a),true,false)
#elif defined(__GNUC__)
/* evec(t,n) for GCC SIMD vector types */
#pragma pack(push,1)
#define evec(n,t) __attribute__((vector_size(bitceil(n) * alignof(t)))) __typeof__(t)
#pragma pack(pop)
#define rc_avec(src,n) (*(avec(__typeof__((src)[0]),n)*)&src)
#define rc_evec(src,n) (*(evec(__typeof__((src)[0]),n)*)&src)
#define evec_countof(a) countof(a)
#define isvector(a) !isarray(a) && \
_Generic(__builtin_convertvector(rc_evec(a,evec_countof(a)), \
evec(float,bitceil(evec_countof(a)))), \
evec(float,bitceil(evec_countof(a))): true, \
default: false)
#else
/* evec(T,N) for other compilers */
#pragma pack(push,1)
#define evec(N,T) avec(N,T)
#pragma pack(pop)
#endifDot/Cross3 product example:
#include "array.h"
#define dot(a,b,t,n,i) \
({ \
t _dst = (t)i; \
static_assert((isarray(a) || isvector(a)),"ERROR: dot - a is not of array or vector type!"); \
static_assert((isarray(b) || isvector(b)),"ERROR: dot - b is not of array or vector type!"); \
size_t len = (size_t)llabs((ssize_t)n); \
len = MIN(MIN(!isarray(a) ? vec_countof(a) : countof(a), \
!isarray(b) ? vec_countof(b) : countof(b)), \
(ssize_t)n>0?len:0); \
_Pragma("omp simd reduction(+:_dst)") \
for(size_t j = 0; j < len; j++) \
_dst += (t)((a)[j]) * (t)((b)[j]); \
_dst; \
})
typedef float real;
#define dot3(a,b) dot(a,b,real,3,0)
#define dot4(a,b) dot(a,b,real,4,0)
#define perm3(a,x,y,z,t,n) (vec(t,n)){ (t)(a)[x], (t)(a)[y], (t)(a)[z] }
#define perm4(a,x,y,z,w,t,n) (vec(t,n)){ (t)(a)[x], (t)(a)[y], (t)(a)[z], (t)(a)[w] }
#define rc_cross3(a,b,t,n) perm3(a,1,2,0,t,n) * perm3(b,2,0,1,t,n) - perm3(a,2,0,1,t,n) * perm3(b,1,2,0,t,n)
#define cross3(a,b) rc_cross3(a,b,__typeof__((rc_vec(a,3) * rc_vec(b,3))[0]),3)
void usage(int argc, char** argv)
{
fprintf(stderr,"Usage: %s NUM NUM NUM NUM NUM NUM\n", argv[0]);
}
int main(int argc, char** argv)
{
if(argc < 7) { usage(argc,argv); exit(EXIT_FAILURE); }
vec(real,3) a = { (real)strtod(argv[1],NULL), (real)strtod(argv[2],NULL), (real)strtod(argv[3],NULL) };
arr(real,3) b = { (real)strtod(argv[4],NULL), (real)strtod(argv[5],NULL), (real)strtod(argv[6],NULL) };
vec(real,3) c = cross3(a,b);
vec(real,3) d = cross3(b,a);
printf("array vector vec_countof countof alignof sizeof\n");
printf("%5b %6b %11zu %7zu %7zu %6zu\n", isarray(a), isvector(a),vec_countof(a),countof(a), alignof(a), sizeof(a));
printf("%5b %6b %11zu %7zu %7zu %6zu\n", isarray(b), isvector(b),vec_countof(b),countof(b), alignof(b), sizeof(b));
puts("");
printf("dot3(a,b) = %+e\n", dot3(a,b));
printf("dot3(b,a) = %+e\n", dot3(b,a));
printf("cross3(a,b) = [%+e %+e %+e]\n", c[0],c[1],c[2]);
printf("cross3(b,a) = [%+e %+e %+e]\n", d[0],d[1],d[2]);
exit(EXIT_SUCCESS);
}template<typename T,size_t N, size_t N_POW2 = std::bit_ceil<size_t>(N)>
struct alignas((N == N_POW2 ? N : 1) * alignof(T)) vec<T,N> : std::array<T,N> {};
static_assert(countof(vec<T,N>) == N);
static_assert(sizeof(vec<T,N>) == (N * sizeof(T)));#define evec(n,t) __attribute__((vector_size(bitceil(n) * alignof(t)))) __typeof__(t)
static_assert(countof(vec(T,N)) == bitceil(N));
static_assert(sizeof(vec(T,N)) == (bitceil(N) * sizeof(T)));```cpp
#define evec(N,T) __attribute__((ext_vector_type(N))) __typeof__(T)
static_assert(__builtin_vectorelements(vec(T,N)) == N && countof(vec(T,N)) == bitceil(N));
static_assert(sizeof(vec(T,N)) == (bitceil(N) * sizeof(T)));- OpenMP
- OpenCL
- OpenGL/KHR
- SYCL Overview
- Microsoft/DirectXMath
- Microsoft Visual Studio OpenMP SIMD
- Vectorization optimization in GCC
- gtsam
- xcmath
- smath
- wykobi
- arrayfire
- libxsmm
- sleef
- petsc
- ctmd
- linalg
- lapack
- NumKong
- version2
- oneAPI/mkl
- glm
- ITK
- vxl/vnl
- Terathon-Math-Library
- CML
- mr-math
- versor
- eve
- nicemath
- highway
- libsimdpp
- kfr
- prideout/par
- eigen
- armadillo
- ensmallen
- bandicoot
- flint
- libigl
- cgal
- qhull
- muparser