initial RISC-V support (#2614)

franz · web-flow · commit 4e3f16b2b91d · 2026-03-17T09:25:59.000-07:00
Unlike related PR #2344 that simply warns about unsupported FTZ, this PR attempts to correctly handle FTZ on RISC-V. RISC-V 'f' extension does not support any way to enable/disable flushing subnormals to zero, implementations are required to always support subnormals. Therefore this PR re-uses FTZ handling code from PPC, where flushing also has to be explicitly performed.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -89,6 +89,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
     set(CLConform_TARGET_ARCH x86_64)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
     set(CLConform_TARGET_ARCH x86)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
+    set(CLConform_TARGET_ARCH RISCV)
 endif()
 
 if(NOT DEFINED CLConform_TARGET_ARCH)
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
@@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
 #elif defined(__PPC__)
 #include <fpu_control.h>
 extern __thread fpu_control_t fpu_control;
+#elif defined(__riscv)
+#define _FPU_MASK_NI 1
+static FPU_mode_type fpu_control;
 #elif defined(__mips__)
 #include "mips/m32c1.h"
 #endif
@@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
     || defined(_M_X64) || defined(__MINGW32__)
     *oldMode = _mm_getcsr();
     _mm_setcsr(*oldMode | 0x8040);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     *oldMode = fpu_control;
     fpu_control |= _FPU_MASK_NI;
 #elif defined(__arm__)
@@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
     || defined(_M_X64) || defined(__MINGW32__)
     *oldMode = _mm_getcsr();
     _mm_setcsr(*oldMode & ~0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
+#elif defined(__PPC__) || defined(__riscv)
+    *oldMode = fpu_control;
     fpu_control &= ~_FPU_MASK_NI;
 #elif defined(__arm__)
     unsigned fpscr;
@@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)               \
     || defined(_M_X64) || defined(__MINGW32__)
     _mm_setcsr(*mode);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     fpu_control = *mode;
 #elif defined(__arm__)
     __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
@@ -201,6 +201,7 @@ RoundingMode get_round(void)
 #elif defined(__mips__)
 #include "mips/m32c1.h"
 #endif
+
 void *FlushToZero(void)
 {
 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
@@ -231,6 +232,8 @@ void *FlushToZero(void)
 #elif defined(__mips__)
     fpa_bissr(FPA_CSR_FS);
     return NULL;
+#elif defined(__riscv)
+    return NULL;
 #else
 #error Unknown arch
 #endif
@@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
     _FPU_SETCW(flags);
 #elif defined(__mips__)
     fpa_bicsr(FPA_CSR_FS);
+#elif defined(__riscv)
+    return;
 #else
 #error Unknown arch
 #endif
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
@@ -1409,6 +1409,8 @@ void PrintArch(void)
     vlog("ARCH:\tWindows\n");
 #elif defined(__mips__)
     vlog("ARCH:\tmips\n");
+#elif defined(__riscv)
+    vlog("ARCH:\tRISC-V\n");
 #else
 #error unknown arch
 #endif
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
 }
 #endif
 
-#ifdef __PPC__
+#if defined(__PPC__) || defined(__riscv)
 float ppc_mul(float a, float b)
 {
     float p;
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
             // turn that off
             f3[i] = sse_mul(q, q2);
             f4[i] = sse_mul(-q, q2);
-#elif defined(__PPC__)
-            // None of the current generation PPC processors support HW
-            // FTZ, emulate it in sw.
+#elif (defined(__PPC__) || defined(__riscv))
+            // RISC-V CPUs with default 'f' fp32 extension do not support
+            // enabling/disabling FTZ mode, subnormals are always handled
+            // without FTZ. None of the current generation PPC processors
+            // support HW FTZ, emulate it in sw.
             f3[i] = ppc_mul(q, q2);
             f4[i] = ppc_mul(-q, q2);
 #else
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
                 skipTest[j][i] = (bufSkip[i] ||
                                   (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
 
-#if defined(__PPC__)
-                // Since the current Power processors don't emulate flush to zero in HW,
-                // it must be emulated in SW instead.
+#if defined(__PPC__) || defined(__riscv)
+                // Since the current Power processors don't emulate flush to
+                // zero in HW, it must be emulated in SW instead. (same for
+                // RISC-V CPUs with 'f' extension)
                 if (gForceFTZ)
                 {
                     if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
                 }
             }
 
-
             double *f  = (double*) buf1;
             double *f2 = (double*) buf2;
             double *f3 = (double*) buf3_double;
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
 uint64_t GetTime(void);
 
 void WriteInputBufferComplete(void *);
-void *FlushToZero(void);
-void UnFlushToZero(void *);
 }
 
 struct CalcRefValsBase
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
@@ -859,7 +859,9 @@ double reference_add(double x, double y)
     __m128 vb = _mm_set_ss((float)b);
     va = _mm_add_ss(va, vb);
     _mm_store_ss((float *)&a, va);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
+    // RISC-V CPUs with default 'f' fp32 extension do not support any way to
+    // enable/disable FTZ mode, subnormals are always handled without flushing.
     // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
     // denorm's to zero. As such, the reference add with FTZ must be emulated in
     // sw.
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
         } ub;
         ub.d = b;
         cl_uint mantA, mantB;
-        cl_ulong addendA, addendB, sum;
+        cl_ulong addendA, addendB;
         int expA = extractf(a, &mantA);
         int expB = extractf(b, &mantB);
         cl_uint signA = ua.u & 0x80000000U;
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
     __m128 vb = _mm_set_ss((float)b);
     va = _mm_mul_ss(va, vb);
     _mm_store_ss((float *)&a, va);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
     // denorm's to zero. As such, the reference multiply with FTZ must be
     // emulated in sw.
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
 
 long double reference_rintl(long double x)
 {
-#if defined(__PPC__)
+#if defined(__PPC__) || defined(__riscv)
     // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
     // mantissa can represent more than LDBL_MANT_DIG binary digits.
     x = rintl(x);

Original file line number	Diff line number	Diff line change
`@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);`
`120`	`120`	`uint64_t GetTime(void);`
`121`	`121`
`122`	`122`	`void WriteInputBufferComplete(void *);`
`123`		`-void *FlushToZero(void);`
`124`		`-void UnFlushToZero(void *);`
`125`	`123`	`}`
`126`	`124`
`127`	`125`	`struct CalcRefValsBase`