@@ -89,6 +89,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
8989}
9090#endif
9191
92+ //thread throttling for dgemv
93+ #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
94+ static inline int get_dgemv_optimal_nthreads_neoversev1 (BLASLONG MN , int ncpu ) {
95+
96+ return
97+ MN < 8100L ? 1
98+ : MN < 12100L ? MIN (ncpu , 2 )
99+ : MN < 36100L ? MIN (ncpu , 4 )
100+ : MN < 84100L ? MIN (ncpu , 8 )
101+ : MN < 348100L ? MIN (ncpu , 16 )
102+ : MN < 435600L ? MIN (ncpu , 24 )
103+ : MN < 810000L ? MIN (ncpu , 32 )
104+ : MN < 1050625 ? MIN (ncpu , 40 )
105+ : ncpu ;
106+
107+ }
108+ #endif
109+
92110static inline int get_gemv_optimal_nthreads (BLASLONG MN ) {
93111 int ncpu = num_cpu_avail (3 );
94112#if defined(_WIN64 ) && defined(_M_ARM64 )
@@ -98,6 +116,8 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
98116#endif
99117#if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
100118 return get_gemv_optimal_nthreads_neoversev1 (MN , ncpu );
119+ #elif defined(NEOVERSEV1 ) && !defined(COMPLEX ) && defined(DOUBLE ) && !defined(BFLOAT16 )
120+ return get_dgemv_optimal_nthreads_neoversev1 (MN , ncpu );
101121#elif defined(NEOVERSEV2 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
102122 return get_gemv_optimal_nthreads_neoversev2 (MN , ncpu );
103123#elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
0 commit comments