44using System . Collections . Generic ;
55using System . Diagnostics ;
66using System . Runtime . InteropServices ;
7+ using System . Runtime . Intrinsics . X86 ;
78
89using ILCompiler ;
910
@@ -52,7 +53,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
5253 // Ready to run images are built with certain instruction set baselines
5354 if ( ( targetArchitecture == TargetArchitecture . X86 ) || ( targetArchitecture == TargetArchitecture . X64 ) )
5455 {
55- instructionSetSupportBuilder . AddSupportedInstructionSet ( "sse2 " ) ; // Lower baselines included by implication
56+ instructionSetSupportBuilder . AddSupportedInstructionSet ( "base " ) ;
5657 }
5758 else if ( targetArchitecture == TargetArchitecture . ARM64 )
5859 {
@@ -63,7 +64,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
6364 }
6465 else
6566 {
66- instructionSetSupportBuilder . AddSupportedInstructionSet ( "neon" ) ; // Lower baselines included by implication
67+ instructionSetSupportBuilder . AddSupportedInstructionSet ( "neon" ) ;
6768 }
6869 }
6970
@@ -72,6 +73,8 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
7273 // compile both branches of IsSupported checks.
7374 bool allowOptimistic = ! optimizingForSize ;
7475
76+ bool throttleAvx512 = false ;
77+
7578 if ( instructionSet == "native" )
7679 {
7780 // We're compiling for a specific chip
@@ -92,31 +95,78 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
9295 }
9396 HardwareIntrinsicHelpers . AddRuntimeRequiredIsaFlagsToBuilder ( instructionSetSupportBuilder , cpuFeatures ) ;
9497
98+ if ( targetArchitecture is TargetArchitecture . X64 or TargetArchitecture . X86 )
99+ {
100+ // Some architectures can experience frequency throttling when executing
101+ // 512-bit width instructions. To account for this we set the
102+ // default preferred vector width to 256-bits in some scenarios.
103+ ( int Eax , int Ebx , int Ecx , int Edx ) cpuidInfo = X86Base . CpuId ( 0 , 0 ) ;
104+ bool isGenuineIntel = ( cpuidInfo . Ebx == 0x756E6547 ) && // Genu
105+ ( cpuidInfo . Edx == 0x49656E69 ) && // ineI
106+ ( cpuidInfo . Ecx == 0x6C65746E ) ; // ntel
107+ if ( isGenuineIntel )
108+ {
109+ cpuidInfo = X86Base . CpuId ( 1 , 0 ) ;
110+ Debug . Assert ( ( cpuidInfo . Edx & ( 1 << 15 ) ) != 0 ) ; // CMOV
111+ int model = ( cpuidInfo . Eax >> 4 ) & 0xF ;
112+ int family = ( cpuidInfo . Eax >> 8 ) & 0xF ;
113+ int extendedModel = ( cpuidInfo . Eax >> 16 ) & 0xF ;
114+
115+ if ( family == 0x06 )
116+ {
117+ if ( extendedModel == 0x05 )
118+ {
119+ if ( model == 0x05 )
120+ {
121+ // * Skylake (Server)
122+ // * Cascade Lake
123+ // * Cooper Lake
124+
125+ throttleAvx512 = true ;
126+ }
127+ }
128+ else if ( extendedModel == 0x06 )
129+ {
130+ if ( model == 0x06 )
131+ {
132+ // * Cannon Lake
133+
134+ throttleAvx512 = true ;
135+ }
136+ }
137+ }
138+ }
139+
140+ if ( throttleAvx512 && logger . IsVerbose )
141+ logger . LogMessage ( "Vector512 is throttled" ) ;
142+ }
143+
95144 if ( logger . IsVerbose )
96145 logger . LogMessage ( $ "The 'native' instruction set expanded to { instructionSetSupportBuilder } ") ;
97146 }
98147 else if ( instructionSet != null )
99148 {
100149 List < string > instructionSetParams = new List < string > ( ) ;
150+ string [ ] instructionSetParamsInput = instructionSet . Split ( ',' ) ;
101151
102152 // Normalize instruction set format to include implied +.
103- string [ ] instructionSetParamsInput = instructionSet . Split ( ',' ) ;
104153 for ( int i = 0 ; i < instructionSetParamsInput . Length ; i ++ )
105154 {
106- instructionSet = instructionSetParamsInput [ i ] ;
155+ instructionSet = instructionSetParamsInput [ i ] . Trim ( ) ;
107156
108157 if ( string . IsNullOrEmpty ( instructionSet ) )
109158 throw new CommandLineException ( string . Format ( mustNotBeMessage , "" ) ) ;
110159
111160 char firstChar = instructionSet [ 0 ] ;
161+
112162 if ( ( firstChar != '+' ) && ( firstChar != '-' ) )
113163 {
114- instructionSet = "+" + instructionSet ;
164+ instructionSet = "+" + instructionSet ;
115165 }
166+
116167 instructionSetParams . Add ( instructionSet ) ;
117168 }
118169
119- Dictionary < string , bool > instructionSetSpecification = new Dictionary < string , bool > ( ) ;
120170 foreach ( string instructionSetSpecifier in instructionSetParams )
121171 {
122172 instructionSet = instructionSetSpecifier . Substring ( 1 ) ;
@@ -160,53 +210,56 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
160210 InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder ( instructionSetSupportBuilder ) ;
161211
162212 // Optimistically assume some instruction sets are present.
163- if ( allowOptimistic && ( targetArchitecture == TargetArchitecture . X86 || targetArchitecture == TargetArchitecture . X64 ) )
213+ if ( allowOptimistic && targetArchitecture is TargetArchitecture . X86 or TargetArchitecture . X64 )
164214 {
165215 // We set these hardware features as opportunistically enabled as most of hardware in the wild supports them.
166216 // Note that we do not indicate support for AVX, or any other instruction set which uses the VEX encodings as
167217 // the presence of those makes otherwise acceptable code be unusable on hardware which does not support VEX encodings.
168218 //
169- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "sse4.2 " ) ; // Lower SSE versions included by implication
219+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "sse42 " ) ;
170220 optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "aes" ) ;
171- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "pclmul" ) ;
172- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "movbe" ) ;
173- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "popcnt" ) ;
174- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "lzcnt" ) ;
175- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "serialize" ) ;
221+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "gfni" ) ;
222+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "sha" ) ;
223+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "waitpkg" ) ;
224+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "x86serialize" ) ;
176225
177226 // If AVX was enabled, we can opportunistically enable instruction sets which use the VEX encodings
178227 Debug . Assert ( InstructionSet . X64_AVX == InstructionSet . X86_AVX ) ;
228+ Debug . Assert ( InstructionSet . X64_AVX2 == InstructionSet . X86_AVX2 ) ;
229+
179230 if ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX ) )
180231 {
181- // TODO: Enable optimistic usage of AVX2 once we validate it doesn't break Vector<T> usage
182- // optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2");
232+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx2" ) ;
233+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avxifma" ) ;
234+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avxvnni" ) ;
235+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avxvnniint" ) ;
236+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "aes_v256" ) ;
237+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "gfni_v256" ) ;
238+
239+ // If AVX2 is not in the supported set, we need to restrict the optimistic Vector<T> size, because
240+ // 256-bit Vector<T> cannot be fully accelerated based on AVX2 being in the optimistic set only.
183241
184- if ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX2 ) )
242+ if ( ! supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX2 ) )
185243 {
186- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avxvnni" ) ;
244+ maxVectorTBitWidth = 128 ;
187245 }
188-
189- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "fma" ) ;
190- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "bmi" ) ;
191- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "bmi2" ) ;
192246 }
193247
194- Debug . Assert ( InstructionSet . X64_AVX512F == InstructionSet . X86_AVX512F ) ;
195- if ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512F ) )
248+ Debug . Assert ( InstructionSet . X64_AVX512 == InstructionSet . X86_AVX512 ) ;
249+ if ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512 ) )
196250 {
197- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512F_VL ) ) ;
198- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512BW ) ) ;
199- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512BW_VL ) ) ;
200- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512CD ) ) ;
201- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512CD_VL ) ) ;
202- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512DQ ) ) ;
203- Debug . Assert ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X64_AVX512DQ_VL ) ) ;
204-
205- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx512vbmi" ) ;
206- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx512vbmi_vl" ) ;
251+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx512v2" ) ;
252+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx512v3" ) ;
253+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx10v1" ) ;
254+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx10v2" ) ;
255+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avxvnniint_v512" ) ;
256+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "avx512vp2intersect" ) ;
257+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "aes_v512" ) ;
258+ optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "gfni_v512" ) ;
259+
207260 }
208261 }
209- else if ( targetArchitecture == TargetArchitecture . ARM64 )
262+ else if ( allowOptimistic && targetArchitecture is TargetArchitecture . ARM64 )
210263 {
211264 optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "aes" ) ;
212265 optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "crc" ) ;
@@ -215,7 +268,6 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
215268 optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "lse" ) ;
216269 optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "dotprod" ) ;
217270 optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "rdma" ) ;
218- optimisticInstructionSetSupportBuilder . AddSupportedInstructionSet ( "rcpc" ) ;
219271 }
220272
221273 // Vector<T> can always be part of the optimistic set, we only want to optionally exclude it from the supported set
@@ -224,6 +276,37 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
224276 optimisticInstructionSet . Remove ( unsupportedInstructionSet ) ;
225277 optimisticInstructionSet . Add ( supportedInstructionSet ) ;
226278
279+ if ( throttleAvx512 )
280+ {
281+ Debug . Assert ( InstructionSet . X86_AVX512 == InstructionSet . X64_AVX512 ) ;
282+ if ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X86_AVX512 ) )
283+ {
284+ Debug . Assert ( InstructionSet . X86_Vector256 == InstructionSet . X64_Vector256 ) ;
285+ Debug . Assert ( InstructionSet . X86_VectorT256 == InstructionSet . X64_VectorT256 ) ;
286+ Debug . Assert ( InstructionSet . X86_VectorT512 == InstructionSet . X64_VectorT512 ) ;
287+
288+ // AVX-512 is supported, but we are compiling specifically for hardware that has a performance penalty for
289+ // using 512-bit ops. We want to tell JIT not to consider Vector512 to be hardware accelerated, which we do
290+ // by passing a PreferredVectorBitWidth value, in the form of a virtual vector ISA of the appropriate size.
291+ //
292+ // If we are downgrading the max accelerated vector size, we also need to downgrade Vector<T> size.
293+
294+ supportedInstructionSet . AddInstructionSet ( InstructionSet . X86_Vector256 ) ;
295+
296+ if ( supportedInstructionSet . HasInstructionSet ( InstructionSet . X86_VectorT512 ) )
297+ {
298+ supportedInstructionSet . RemoveInstructionSet ( InstructionSet . X86_VectorT512 ) ;
299+ supportedInstructionSet . AddInstructionSet ( InstructionSet . X86_VectorT256 ) ;
300+ }
301+
302+ if ( optimisticInstructionSet . HasInstructionSet ( InstructionSet . X86_VectorT512 ) )
303+ {
304+ optimisticInstructionSet . RemoveInstructionSet ( InstructionSet . X86_VectorT512 ) ;
305+ optimisticInstructionSet . AddInstructionSet ( InstructionSet . X86_VectorT256 ) ;
306+ }
307+ }
308+ }
309+
227310 return new InstructionSetSupport ( supportedInstructionSet ,
228311 unsupportedInstructionSet ,
229312 optimisticInstructionSet ,
0 commit comments