Skip to content

Commit 4e3f16b

Browse files
authored
initial RISC-V support (#2614)
Unlike related PR #2344 that simply warns about unsupported FTZ, this PR attempts to correctly handle FTZ on RISC-V. RISC-V 'f' extension does not support any way to enable/disable flushing subnormals to zero, implementations are required to always support subnormals. Therefore this PR re-uses FTZ handling code from PPC, where flushing also has to be explicitly performed.
1 parent 6506421 commit 4e3f16b

7 files changed

Lines changed: 32 additions & 18 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
8989
set(CLConform_TARGET_ARCH x86_64)
9090
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
9191
set(CLConform_TARGET_ARCH x86)
92+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
93+
set(CLConform_TARGET_ARCH RISCV)
9294
endif()
9395

9496
if(NOT DEFINED CLConform_TARGET_ARCH)

test_common/harness/fpcontrol.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
4545
#elif defined(__PPC__)
4646
#include <fpu_control.h>
4747
extern __thread fpu_control_t fpu_control;
48+
#elif defined(__riscv)
49+
#define _FPU_MASK_NI 1
50+
static FPU_mode_type fpu_control;
4851
#elif defined(__mips__)
4952
#include "mips/m32c1.h"
5053
#endif
@@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
5659
|| defined(_M_X64) || defined(__MINGW32__)
5760
*oldMode = _mm_getcsr();
5861
_mm_setcsr(*oldMode | 0x8040);
59-
#elif defined(__PPC__)
62+
#elif defined(__PPC__) || defined(__riscv)
6063
*oldMode = fpu_control;
6164
fpu_control |= _FPU_MASK_NI;
6265
#elif defined(__arm__)
@@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
8992
|| defined(_M_X64) || defined(__MINGW32__)
9093
*oldMode = _mm_getcsr();
9194
_mm_setcsr(*oldMode & ~0x8040);
92-
#elif defined(__PPC__)
93-
*mode = fpu_control;
95+
#elif defined(__PPC__) || defined(__riscv)
96+
*oldMode = fpu_control;
9497
fpu_control &= ~_FPU_MASK_NI;
9598
#elif defined(__arm__)
9699
unsigned fpscr;
@@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
121124
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
122125
|| defined(_M_X64) || defined(__MINGW32__)
123126
_mm_setcsr(*mode);
124-
#elif defined(__PPC__)
127+
#elif defined(__PPC__) || defined(__riscv)
125128
fpu_control = *mode;
126129
#elif defined(__arm__)
127130
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));

test_common/harness/rounding_mode.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ RoundingMode get_round(void)
201201
#elif defined(__mips__)
202202
#include "mips/m32c1.h"
203203
#endif
204+
204205
void *FlushToZero(void)
205206
{
206207
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
@@ -231,6 +232,8 @@ void *FlushToZero(void)
231232
#elif defined(__mips__)
232233
fpa_bissr(FPA_CSR_FS);
233234
return NULL;
235+
#elif defined(__riscv)
236+
return NULL;
234237
#else
235238
#error Unknown arch
236239
#endif
@@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
266269
_FPU_SETCW(flags);
267270
#elif defined(__mips__)
268271
fpa_bicsr(FPA_CSR_FS);
272+
#elif defined(__riscv)
273+
return;
269274
#else
270275
#error Unknown arch
271276
#endif

test_common/harness/testHarness.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,8 @@ void PrintArch(void)
14091409
vlog("ARCH:\tWindows\n");
14101410
#elif defined(__mips__)
14111411
vlog("ARCH:\tmips\n");
1412+
#elif defined(__riscv)
1413+
vlog("ARCH:\tRISC-V\n");
14121414
#else
14131415
#error unknown arch
14141416
#endif

test_conformance/contractions/contractions.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
191191
}
192192
#endif
193193

194-
#ifdef __PPC__
194+
#if defined(__PPC__) || defined(__riscv)
195195
float ppc_mul(float a, float b)
196196
{
197197
float p;
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
630630
// turn that off
631631
f3[i] = sse_mul(q, q2);
632632
f4[i] = sse_mul(-q, q2);
633-
#elif defined(__PPC__)
634-
// None of the current generation PPC processors support HW
635-
// FTZ, emulate it in sw.
633+
#elif (defined(__PPC__) || defined(__riscv))
634+
// RISC-V CPUs with default 'f' fp32 extension do not support
635+
// enabling/disabling FTZ mode, subnormals are always handled
636+
// without FTZ. None of the current generation PPC processors
637+
// support HW FTZ, emulate it in sw.
636638
f3[i] = ppc_mul(q, q2);
637639
f4[i] = ppc_mul(-q, q2);
638640
#else
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
721723
skipTest[j][i] = (bufSkip[i] ||
722724
(gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
723725

724-
#if defined(__PPC__)
725-
// Since the current Power processors don't emulate flush to zero in HW,
726-
// it must be emulated in SW instead.
726+
#if defined(__PPC__) || defined(__riscv)
727+
// Since the current Power processors don't emulate flush to
728+
// zero in HW, it must be emulated in SW instead. (same for
729+
// RISC-V CPUs with 'f' extension)
727730
if (gForceFTZ)
728731
{
729732
if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
760763
}
761764
}
762765

763-
764766
double *f = (double*) buf1;
765767
double *f2 = (double*) buf2;
766768
double *f3 = (double*) buf3_double;

test_conformance/conversions/basic_test_conversions.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
120120
uint64_t GetTime(void);
121121

122122
void WriteInputBufferComplete(void *);
123-
void *FlushToZero(void);
124-
void UnFlushToZero(void *);
125123
}
126124

127125
struct CalcRefValsBase

test_conformance/math_brute_force/reference_math.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,9 @@ double reference_add(double x, double y)
859859
__m128 vb = _mm_set_ss((float)b);
860860
va = _mm_add_ss(va, vb);
861861
_mm_store_ss((float *)&a, va);
862-
#elif defined(__PPC__)
862+
#elif defined(__PPC__) || defined(__riscv)
863+
// RISC-V CPUs with default 'f' fp32 extension do not support any way to
864+
// enable/disable FTZ mode, subnormals are always handled without flushing.
863865
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
864866
// denorm's to zero. As such, the reference add with FTZ must be emulated in
865867
// sw.
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
876878
} ub;
877879
ub.d = b;
878880
cl_uint mantA, mantB;
879-
cl_ulong addendA, addendB, sum;
881+
cl_ulong addendA, addendB;
880882
int expA = extractf(a, &mantA);
881883
int expB = extractf(b, &mantB);
882884
cl_uint signA = ua.u & 0x80000000U;
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
972974
__m128 vb = _mm_set_ss((float)b);
973975
va = _mm_mul_ss(va, vb);
974976
_mm_store_ss((float *)&a, va);
975-
#elif defined(__PPC__)
977+
#elif defined(__PPC__) || defined(__riscv)
976978
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
977979
// denorm's to zero. As such, the reference multiply with FTZ must be
978980
// emulated in sw.
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
33513353

33523354
long double reference_rintl(long double x)
33533355
{
3354-
#if defined(__PPC__)
3356+
#if defined(__PPC__) || defined(__riscv)
33553357
// On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
33563358
// mantissa can represent more than LDBL_MANT_DIG binary digits.
33573359
x = rintl(x);

0 commit comments

Comments
 (0)