@@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
7070
7171#if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
7272static inline int get_gemv_optimal_nthreads_neoversev1 (BLASLONG MN , int ncpu ) {
73- return
74- MN < 25600L ? 1
75- : MN < 63001L ? MIN (ncpu , 4 )
76- : MN < 459684L ? MIN (ncpu , 16 )
77- : ncpu ;
73+ #ifdef DOUBLE
74+ return (MN < 8100L ) ? 1
75+ : (MN < 12100L ) ? MIN (ncpu , 2 )
76+ : (MN < 36100L ) ? MIN (ncpu , 4 )
77+ : (MN < 84100L ) ? MIN (ncpu , 8 )
78+ : (MN < 348100L ) ? MIN (ncpu , 16 )
79+ : (MN < 435600L ) ? MIN (ncpu , 24 )
80+ : (MN < 810000L ) ? MIN (ncpu , 32 )
81+ : (MN < 1050625L ) ? MIN (ncpu , 40 )
82+ : ncpu ;
83+ #else
84+ return (MN < 25600L ) ? 1
85+ : (MN < 63001L ) ? MIN (ncpu , 4 )
86+ : (MN < 459684L ) ? MIN (ncpu , 16 )
87+ : ncpu ;
88+ #endif
7889}
7990#endif
8091
@@ -89,50 +100,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
89100}
90101#endif
91102
92- //thread throttling for dgemv
93- #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
94- static inline int get_dgemv_optimal_nthreads_neoversev1 (BLASLONG MN , int ncpu ) {
95-
96- return
97- MN < 8100L ? 1
98- : MN < 12100L ? MIN (ncpu , 2 )
99- : MN < 36100L ? MIN (ncpu , 4 )
100- : MN < 84100L ? MIN (ncpu , 8 )
101- : MN < 348100L ? MIN (ncpu , 16 )
102- : MN < 435600L ? MIN (ncpu , 24 )
103- : MN < 810000L ? MIN (ncpu , 32 )
104- : MN < 1050625 ? MIN (ncpu , 40 )
105- : ncpu ;
106-
107- }
108- #endif
109-
110103static inline int get_gemv_optimal_nthreads (BLASLONG MN ) {
111104 int ncpu = num_cpu_avail (3 );
112105#if defined(_WIN64 ) && defined(_M_ARM64 )
113106 if (MN > 100000000L )
114107 return num_cpu_avail (4 );
115108 return 1 ;
116109#endif
117- #if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined( BFLOAT16 )
110+ #if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
118111 return get_gemv_optimal_nthreads_neoversev1 (MN , ncpu );
119- #elif defined(NEOVERSEV1 ) && !defined(COMPLEX ) && defined(DOUBLE ) && !defined(BFLOAT16 )
120- return get_dgemv_optimal_nthreads_neoversev1 (MN , ncpu );
121112#elif defined(NEOVERSEV2 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
122113 return get_gemv_optimal_nthreads_neoversev2 (MN , ncpu );
123- #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined( BFLOAT16 )
114+ #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
124115 if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
125116 return get_gemv_optimal_nthreads_neoversev1 (MN , ncpu );
126117 }
127118 if (strcmp (gotoblas_corename (), "neoversev2" ) == 0 ) {
128119 return get_gemv_optimal_nthreads_neoversev2 (MN , ncpu );
129120 }
130- #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && defined(DOUBLE ) && !defined(BFLOAT16 )
131- if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
132- return get_dgemv_optimal_nthreads_neoversev1 (MN , ncpu );
133- }
134-
135-
136121#endif
137122
138123 if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
0 commit comments