|
1 | 1 | /**************************** vectormath_lib.h ***************************** |
2 | 2 | * Author: Agner Fog |
3 | 3 | * Date created: 2012-05-30 |
4 | | -* Last modified: 2022-07-26 |
| 4 | +* Last modified: 2022-08-02 |
5 | 5 | * Version: 2.02.00 |
6 | 6 | * Project: vector class library |
7 | 7 | * Description: |
8 | 8 | * Header file defining mathematical functions on floating point vectors |
9 | | -* using Intel SVML library |
10 | | -* |
11 | | -* Instructions to use SVML library: |
12 | | -* Include this file and link with svml |
| 9 | +* using Intel SVML (Short Vector Math Library) |
13 | 10 | * |
| 11 | +* Include this file if you want to use SVML for math functions on vectors |
| 12 | +* See vcl_manual.pdf for details on how to obtain the SVML library and link to it. |
14 | 13 | * Alternatively, use the inline math functions by including |
15 | | -* vectormath_exp.h for power and exponential functions |
16 | | -* vectormath_trig.h for trigonometric functions |
| 14 | +* vectormath_exp.h for power and exponential functions, |
| 15 | +* vectormath_trig.h for trigonometric functions, |
17 | 16 | * vectormath_hyp.h for hyperbolic functions |
18 | 17 | * |
19 | 18 | * For detailed instructions, see vcl_manual.pdf |
|
36 | 35 | namespace VCL_NAMESPACE { // optional name space |
37 | 36 | #endif |
38 | 37 |
|
| 38 | +#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) |
| 39 | +#define USE_SVML_INTRINSICS // Intel compilers have intrinsic functions of access to SVML library |
| 40 | +#endif |
| 41 | + |
| 42 | +#if !(defined(USE_SVML_INTRINSICS)) |
| 43 | +// sinpi, cospi, and tanpi functions are included in SVML, but undocumented |
| 44 | +// (The "Classic" version of Intel compiler accepts the intrinsics of these functions even though they are not in the header files) |
| 45 | +#define TRIGPI_FUNCTIONS |
| 46 | +#endif |
| 47 | + |
39 | 48 | #if defined(__clang__) || defined (__GNUC__) |
40 | 49 | #define SINCOS_ASM // sincos can be fixed with inline assembly |
41 | 50 | #else |
42 | 51 | // MS compiler does not support inline assembly. sincos not available |
43 | 52 | #endif |
44 | 53 |
|
45 | 54 |
|
46 | | -#if !(defined(__INTEL_COMPILER) && defined(__clang__)) |
47 | | -#define TRIGPI_FUNCTIONS // sinpi etc. not yet defined intel icpx compiler 2022.1 |
48 | | -#endif |
49 | | - |
50 | 55 |
|
51 | | -#ifdef __INTEL_COMPILER |
| 56 | +#ifdef USE_SVML_INTRINSICS |
52 | 57 |
|
53 | 58 | /***************************************************************************** |
54 | 59 | * |
@@ -284,7 +289,7 @@ static inline Vec2d cdfnorminv(Vec2d const x) { // inverse cumulative normal di |
284 | 289 | * |
285 | 290 | *************************************************************************************/ |
286 | 291 |
|
287 | | -#if (defined(_WIN64) && !defined(__INTEL_COMPILER) ) |
| 292 | +#if (defined(_WIN64) && !defined(USE_SVML_INTRINSICS) ) |
288 | 293 | // (call with one parameter may work without __vectorcall because the parameter happens to be in zmm0, but that would be unsafe) |
289 | 294 | #define V_VECTORCALL __vectorcall // fix calling convention, one parameter. |
290 | 295 | #define V_VECTORCALL2 __vectorcall // fix calling convention, two parameters or two returns |
@@ -627,15 +632,15 @@ static inline Vec2d cdfnorminv (Vec2d const x) { // inverse cumulative normal di |
627 | 632 | return __svml_cdfnorminv2(x); |
628 | 633 | } |
629 | 634 |
|
630 | | -#endif // __INTEL_COMPILER |
| 635 | +#endif // USE_SVML_INTRINSICS |
631 | 636 |
|
632 | 637 |
|
633 | 638 |
|
634 | 639 | #if defined (MAX_VECTOR_SIZE) && MAX_VECTOR_SIZE >= 256 // 256 bit vectors |
635 | 640 |
|
636 | 641 | #if defined (VECTORF256_H) // 256-bit vector registers supported |
637 | 642 |
|
638 | | -#ifdef __INTEL_COMPILER |
| 643 | +#ifdef USE_SVML_INTRINSICS |
639 | 644 | /***************************************************************************** |
640 | 645 | * |
641 | 646 | * 256-bit vector functions using Intel compiler intrinsic functions |
@@ -863,7 +868,7 @@ static inline Vec4d cdfnorminv(Vec4d const x) {// inverse cumulative normal dist |
863 | 868 | return _mm256_cdfnorminv_pd(x); |
864 | 869 | } |
865 | 870 |
|
866 | | -#else // not __INTEL_COMPILER |
| 871 | +#else // not USE_SVML_INTRINSICS |
867 | 872 | /***************************************************************************** |
868 | 873 | * |
869 | 874 | * 256-bit vector functions using other compiler than Intel |
@@ -1170,7 +1175,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal d |
1170 | 1175 | return __svml_cdfnorminv4(x); |
1171 | 1176 | } |
1172 | 1177 |
|
1173 | | -#endif // __INTEL_COMPILER |
| 1178 | +#endif // USE_SVML_INTRINSICS |
1174 | 1179 |
|
1175 | 1180 | #else // not VECTORF256_H |
1176 | 1181 |
|
@@ -1415,7 +1420,7 @@ static inline Vec4d cdfnorminv (Vec4d const x) { // inverse cumulative normal di |
1415 | 1420 |
|
1416 | 1421 | #if defined (VECTORF512_H) // 512-bit vector registers supported |
1417 | 1422 |
|
1418 | | -#ifdef __INTEL_COMPILER |
| 1423 | +#ifdef USE_SVML_INTRINSICS |
1419 | 1424 | /***************************************************************************** |
1420 | 1425 | * |
1421 | 1426 | * 512-bit vector functions using Intel compiler intrinsic functions |
@@ -1540,12 +1545,15 @@ static inline Vec8d cospi(Vec8d const x) { // cosine |
1540 | 1545 | static inline Vec16f tanpi(Vec16f const x) { // tangent |
1541 | 1546 | return _mm512_tanpi_ps(x); |
1542 | 1547 | } |
1543 | | -/* |
| 1548 | + |
1544 | 1549 | static inline Vec8d tanpi(Vec8d const x) { // tangent |
1545 | | - // bug in compiler intrinsic? expecting argument __m512, should be __m512d |
| 1550 | +#ifdef __INTEL_COMPILER |
| 1551 | + // see https://community.intel.com/t5/Intel-C-Compiler/mm512-tanpi-pd-wrong-declaration/m-p/1404627 |
| 1552 | + return _mm512_castps_pd(_mm512_tanpi_pd(_mm512_castpd_ps(x))); |
| 1553 | +#else |
1546 | 1554 | return _mm512_tanpi_pd(x); |
1547 | | -} */ |
1548 | | - |
| 1555 | +#endif |
| 1556 | +} |
1549 | 1557 | #endif // TRIGPI_FUNCTIONS |
1550 | 1558 |
|
1551 | 1559 | // inverse trigonometric functions |
@@ -1647,7 +1655,7 @@ static inline Vec8d cdfnorminv(Vec8d const x) { // inverse cumulative normal di |
1647 | 1655 | return _mm512_cdfnorminv_pd(x); |
1648 | 1656 | } |
1649 | 1657 |
|
1650 | | -#else // __INTEL_COMPILER |
| 1658 | +#else // USE_SVML_INTRINSICS |
1651 | 1659 | /***************************************************************************** |
1652 | 1660 | * |
1653 | 1661 | * 512-bit vector functions using other compiler than Intel |
@@ -1954,7 +1962,7 @@ static inline Vec8d cdfnorminv (Vec8d const x) { // inverse cumulative normal |
1954 | 1962 | return __svml_cdfnorminv8(x); |
1955 | 1963 | } |
1956 | 1964 |
|
1957 | | -#endif // __INTEL_COMPILER |
| 1965 | +#endif // USE_SVML_INTRINSICS |
1958 | 1966 |
|
1959 | 1967 | #else // VECTORF512_H |
1960 | 1968 |
|
|
0 commit comments