preseve SIMD element type information

folkertdev · folkertdev · commit c1444a03e009 · 2026-04-08T22:04:18.000+02:00
and provide it to LLVM for better optimization
diff --git a/compiler/rustc_abi/src/callconv.rs b/compiler/rustc_abi/src/callconv.rs
@@ -3,7 +3,7 @@ use crate::{BackendRepr, FieldsShape, Primitive, Size, TyAbiInterface, TyAndLayo
 
 mod reg;
 
-pub use reg::{Reg, RegKind};
+pub use reg::{Reg, RegKind, VectorElemKind};
 
 /// Return value from the `homogeneous_aggregate` test function.
 #[derive(Copy, Clone, Debug)]
@@ -75,10 +75,30 @@ impl<'a, Ty> TyAndLayout<'a, Ty> {
                 Ok(HomogeneousAggregate::Homogeneous(Reg { kind, size: self.size }))
             }
 
-            BackendRepr::SimdVector { .. } => {
+            BackendRepr::SimdVector { element, count: _ } => {
                 assert!(!self.is_zst());
+
+                let hint_vector_elem = match element.primitive() {
+                    Primitive::Int(integer, _) => match integer.size().bits() {
+                        8 => VectorElemKind::I8,
+                        16 => VectorElemKind::I16,
+                        32 => VectorElemKind::I32,
+                        64 => VectorElemKind::I64,
+                        128 => VectorElemKind::I128,
+                        bits => panic!("unsupported vector integer element size: {bits}"),
+                    },
+                    Primitive::Float(float) => match float.size().bits() {
+                        16 => VectorElemKind::F16,
+                        32 => VectorElemKind::F32,
+                        64 => VectorElemKind::F64,
+                        128 => VectorElemKind::F128,
+                        bits => panic!("unsupported vector float element size: {bits}"),
+                    },
+                    Primitive::Pointer(_) => VectorElemKind::Ptr,
+                };
+
                 Ok(HomogeneousAggregate::Homogeneous(Reg {
-                    kind: RegKind::Vector,
+                    kind: RegKind::Vector { hint_vector_elem },
                     size: self.size,
                 }))
             }
diff --git a/compiler/rustc_abi/src/callconv/reg.rs b/compiler/rustc_abi/src/callconv/reg.rs
@@ -3,12 +3,38 @@ use rustc_macros::HashStable_Generic;
 
 use crate::{Align, HasDataLayout, Size};
 
+/// The element kind of a vector type. This is used to generate a more accurate vector type in
+/// the backend, and not relevant for the ABI.
+#[cfg_attr(feature = "nightly", derive(HashStable_Generic))]
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum VectorElemKind {
+    I8,
+    I16,
+    I32,
+    I64,
+    I128,
+
+    F16,
+    F32,
+    F64,
+    F128,
+
+    Ptr,
+}
+
 #[cfg_attr(feature = "nightly", derive(HashStable_Generic))]
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum RegKind {
     Integer,
     Float,
-    Vector,
+    Vector {
+        /// The `hint_vector_elem` is strictly for optimization purposes and can be safely ignored (e.g.
+        /// by always picking i8) by codegen backends.
+        ///
+        /// The element kind is used to provide more accurate type information to the backend, which
+        /// helps with optimization (e.g. because it prevents extra bitcasts that obscure a pattern).
+        hint_vector_elem: VectorElemKind,
+    },
 }
 
 #[cfg_attr(feature = "nightly", derive(HashStable_Generic))]
@@ -36,6 +62,11 @@ impl Reg {
     reg_ctor!(f32, Float, 32);
     reg_ctor!(f64, Float, 64);
     reg_ctor!(f128, Float, 128);
+
+    /// A vector of the given size with an unknown (and irrelevant) element type.
+    pub fn opaque_vector(size: Size) -> Reg {
+        Reg { kind: RegKind::Vector { hint_vector_elem: VectorElemKind::I8 }, size }
+    }
 }
 
 impl Reg {
@@ -58,7 +89,7 @@ impl Reg {
                 128 => dl.f128_align,
                 _ => panic!("unsupported float: {self:?}"),
             },
-            RegKind::Vector => dl.llvmlike_vector_align(self.size),
+            RegKind::Vector { .. } => dl.llvmlike_vector_align(self.size),
         }
     }
 }
diff --git a/compiler/rustc_abi/src/lib.rs b/compiler/rustc_abi/src/lib.rs
@@ -58,7 +58,7 @@ mod layout;
 #[cfg(test)]
 mod tests;
 
-pub use callconv::{Heterogeneous, HomogeneousAggregate, Reg, RegKind};
+pub use callconv::{Heterogeneous, HomogeneousAggregate, Reg, RegKind, VectorElemKind};
 pub use canon_abi::{ArmCall, CanonAbi, InterruptKind, X86Call};
 #[cfg(feature = "nightly")]
 pub use extern_abi::CVariadicStatus;
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -3,7 +3,7 @@ use std::cmp;
 use libc::c_uint;
 use rustc_abi::{
     ArmCall, BackendRepr, CanonAbi, HasDataLayout, InterruptKind, Primitive, Reg, RegKind, Size,
-    X86Call,
+    VectorElemKind, X86Call,
 };
 use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
@@ -137,7 +137,27 @@ impl LlvmType for Reg {
                 128 => cx.type_f128(),
                 _ => bug!("unsupported float: {:?}", self),
             },
-            RegKind::Vector => cx.type_vector(cx.type_i8(), self.size.bytes()),
+            RegKind::Vector { hint_vector_elem } => {
+                // NOTE: it is valid to ignore the element type hint (and always pick i8).
+                // But providing a more accurate type means fewer casts in LLVM IR,
+                // which helps with optimization.
+                let (ty, bytes) = match hint_vector_elem {
+                    VectorElemKind::I8 => (cx.type_ix(8), 1),
+                    VectorElemKind::I16 => (cx.type_ix(16), 2),
+                    VectorElemKind::I32 => (cx.type_ix(32), 4),
+                    VectorElemKind::I64 => (cx.type_ix(64), 8),
+                    VectorElemKind::I128 => (cx.type_ix(128), 16),
+                    VectorElemKind::F16 => (cx.type_f16(), 2),
+                    VectorElemKind::F32 => (cx.type_f32(), 4),
+                    VectorElemKind::F64 => (cx.type_f64(), 8),
+                    VectorElemKind::F128 => (cx.type_f128(), 16),
+                    VectorElemKind::Ptr => {
+                        (cx.type_ptr(), cx.tcx.data_layout.pointer_size().bytes())
+                    }
+                };
+
+                cx.type_vector(ty, self.size.bytes() / bytes)
+            }
         }
     }
 }
diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -416,7 +416,7 @@ fn wasm_type<'tcx>(signature: &mut String, arg_abi: &ArgAbi<'_, Ty<'tcx>>, ptr_t
                     ..=8 => "f64",
                     _ => ptr_type,
                 },
-                RegKind::Vector => "v128",
+                RegKind::Vector { .. } => "v128",
             };
 
             signature.push_str(wrapped_wasm_type);
diff --git a/compiler/rustc_monomorphize/src/mono_checks/abi_check.rs b/compiler/rustc_monomorphize/src/mono_checks/abi_check.rs
@@ -25,8 +25,11 @@ fn passes_vectors_by_value(mode: &PassMode, repr: &BackendRepr) -> UsesVectorReg
     match mode {
         PassMode::Ignore | PassMode::Indirect { .. } => UsesVectorRegisters::No,
         PassMode::Cast { pad_i32: _, cast }
-            if cast.prefix.iter().any(|r| r.is_some_and(|x| x.kind == RegKind::Vector))
-                || cast.rest.unit.kind == RegKind::Vector =>
+            if cast
+                .prefix
+                .iter()
+                .any(|r| r.is_some_and(|x| matches!(x.kind, RegKind::Vector { .. })))
+                || matches!(cast.rest.unit.kind, RegKind::Vector { .. }) =>
         {
             UsesVectorRegisters::FixedVector
         }
diff --git a/compiler/rustc_target/src/callconv/aarch64.rs b/compiler/rustc_target/src/callconv/aarch64.rs
@@ -35,7 +35,7 @@ where
             // The softfloat ABI treats floats like integers, so they
             // do not get homogeneous aggregate treatment.
             RegKind::Float => cx.target_spec().rustc_abi != Some(RustcAbi::Softfloat),
-            RegKind::Vector => size.bits() == 64 || size.bits() == 128,
+            RegKind::Vector { .. } => size.bits() == 64 || size.bits() == 128,
         };
 
         valid_unit.then_some(Uniform::consecutive(unit, size))
diff --git a/compiler/rustc_target/src/callconv/arm.rs b/compiler/rustc_target/src/callconv/arm.rs
@@ -19,7 +19,7 @@ where
         let valid_unit = match unit.kind {
             RegKind::Integer => false,
             RegKind::Float => true,
-            RegKind::Vector => size.bits() == 64 || size.bits() == 128,
+            RegKind::Vector { .. } => size.bits() == 64 || size.bits() == 128,
         };
 
         valid_unit.then_some(Uniform::consecutive(unit, size))
diff --git a/compiler/rustc_target/src/callconv/powerpc64.rs b/compiler/rustc_target/src/callconv/powerpc64.rs
@@ -36,7 +36,7 @@ where
         let valid_unit = match unit.kind {
             RegKind::Integer => false,
             RegKind::Float => true,
-            RegKind::Vector => arg.layout.size.bits() == 128,
+            RegKind::Vector { .. } => arg.layout.size.bits() == 128,
         };
 
         valid_unit.then_some(Uniform::consecutive(unit, arg.layout.size))
diff --git a/compiler/rustc_target/src/callconv/s390x.rs b/compiler/rustc_target/src/callconv/s390x.rs
@@ -3,7 +3,7 @@
 
 use rustc_abi::{BackendRepr, HasDataLayout, TyAbiInterface};
 
-use crate::callconv::{ArgAbi, FnAbi, Reg, RegKind};
+use crate::callconv::{ArgAbi, FnAbi, Reg};
 use crate::spec::{Env, HasTargetSpec, Os};
 
 fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
@@ -51,7 +51,7 @@ where
 
         if arg.layout.is_single_vector_element(cx, size) {
             // pass non-transparent wrappers around a vector as `PassMode::Cast`
-            arg.cast_to(Reg { kind: RegKind::Vector, size });
+            arg.cast_to(Reg::opaque_vector(size));
             return;
         }
     }
diff --git a/compiler/rustc_target/src/callconv/x86.rs b/compiler/rustc_target/src/callconv/x86.rs
@@ -1,9 +1,8 @@
 use rustc_abi::{
-    AddressSpace, Align, BackendRepr, HasDataLayout, Primitive, Reg, RegKind, TyAbiInterface,
-    TyAndLayout,
+    AddressSpace, Align, BackendRepr, HasDataLayout, Primitive, Reg, RegKind, TyAndLayout,
 };
 
-use crate::callconv::{ArgAttribute, FnAbi, PassMode};
+use crate::callconv::{ArgAttribute, FnAbi, PassMode, TyAbiInterface};
 use crate::spec::{HasTargetSpec, RustcAbi};
 
 #[derive(PartialEq)]
@@ -175,7 +174,7 @@ pub(crate) fn fill_inregs<'a, Ty, C>(
         // At this point we know this must be a primitive of sorts.
         let unit = arg.layout.homogeneous_aggregate(cx).unwrap().unit().unwrap();
         assert_eq!(unit.size, arg.layout.size);
-        if matches!(unit.kind, RegKind::Float | RegKind::Vector) {
+        if matches!(unit.kind, RegKind::Float | RegKind::Vector { .. }) {
             continue;
         }
 
@@ -226,7 +225,7 @@ where
                 // This is a single scalar that fits into an SSE register, and the target uses the
                 // SSE ABI. We prefer this over integer registers as float scalars need to be in SSE
                 // registers for float operations, so that's the best place to pass them around.
-                fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
+                fn_abi.ret.cast_to(Reg::opaque_vector(fn_abi.ret.layout.size));
             } else if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::ZERO).size(cx) {
                 // Same size or smaller than pointer, return in an integer register.
                 fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
diff --git a/compiler/rustc_target/src/callconv/x86_64.rs b/compiler/rustc_target/src/callconv/x86_64.rs
@@ -151,7 +151,7 @@ fn reg_component(cls: &[Option<Class>], i: &mut usize, size: Size) -> Option<Reg
                     _ => Reg::f64(),
                 }
             } else {
-                Reg { kind: RegKind::Vector, size: Size::from_bytes(8) * (vec_len as u64) }
+                Reg::opaque_vector(Size::from_bytes(8) * (vec_len as u64))
             })
         }
         Some(c) => unreachable!("reg_component: unhandled class {:?}", c),
diff --git a/compiler/rustc_target/src/callconv/x86_win64.rs b/compiler/rustc_target/src/callconv/x86_win64.rs
@@ -1,4 +1,4 @@
-use rustc_abi::{BackendRepr, Float, Integer, Primitive, RegKind, Size, TyAbiInterface};
+use rustc_abi::{BackendRepr, Float, Integer, Primitive, Size, TyAbiInterface};
 
 use crate::callconv::{ArgAbi, FnAbi, Reg};
 use crate::spec::{HasTargetSpec, RustcAbi};
@@ -33,8 +33,7 @@ where
                     } else {
                         // `i128` is returned in xmm0 by Clang and GCC
                         // FIXME(#134288): This may change for the `-msvc` targets in the future.
-                        let reg = Reg { kind: RegKind::Vector, size: Size::from_bits(128) };
-                        a.cast_to(reg);
+                        a.cast_to(Reg::opaque_vector(Size::from_bits(128)));
                     }
                 } else if a.layout.size.bytes() > 8
                     && !matches!(scalar.primitive(), Primitive::Float(Float::F128))
diff --git a/tests/assembly-llvm/aarch64-vld2-s16.rs b/tests/assembly-llvm/aarch64-vld2-s16.rs
@@ -0,0 +1,48 @@
+//@ assembly-output: emit-asm
+//@ compile-flags: -Copt-level=3
+//@ only-aarch64-unknown-linux-gnu
+#![feature(repr_simd, portable_simd, core_intrinsics, f16, f128)]
+#![crate_type = "lib"]
+#![allow(non_camel_case_types)]
+
+// Test `vld_s16` can be implemented in a portable way (i.e. without using LLVM neon intrinsics).
+// This relies on rust preserving the SIMD vector element type and using it to construct the
+// LLVM type. Without this information, additional casts are needed that defeat the LLVM pattern
+// matcher, see https://github.com/llvm/llvm-project/issues/181514.
+
+use std::mem::transmute;
+use std::simd::Simd;
+
+#[unsafe(no_mangle)]
+#[target_feature(enable = "neon")]
+unsafe extern "C" fn vld2_s16_old(ptr: *const i16) -> std::arch::aarch64::int16x4x2_t {
+    // CHECK-LABEL: vld2_s16_old
+    // CHECK: .cfi_startproc
+    // CHECK-NEXT: ld2 { v0.4h, v1.4h }, [x0]
+    // CHECK-NEXT: ret
+    std::arch::aarch64::vld2_s16(ptr)
+}
+
+#[unsafe(no_mangle)]
+#[target_feature(enable = "neon")]
+unsafe extern "C" fn vld2_s16_new(a: *const i16) -> std::arch::aarch64::int16x4x2_t {
+    // CHECK-LABEL: vld2_s16_new
+    // CHECK: .cfi_startproc
+    // CHECK-NEXT: ld2 { v0.4h, v1.4h }, [x0]
+    // CHECK-NEXT: ret
+
+    type V = Simd<i16, 4>;
+    type W = Simd<i16, 8>;
+
+    let w: W = std::ptr::read_unaligned(a as *const W);
+
+    #[repr(simd)]
+    pub(crate) struct SimdShuffleIdx<const LEN: usize>([u32; LEN]);
+
+    let v0: V =
+        std::intrinsics::simd::simd_shuffle(w, w, const { SimdShuffleIdx([0u32, 2, 4, 6]) });
+    let v1: V =
+        std::intrinsics::simd::simd_shuffle(w, w, const { SimdShuffleIdx([1u32, 3, 5, 7]) });
+
+    transmute((v0, v1))
+}
diff --git a/tests/codegen-llvm/preserve-vec-element-types.rs b/tests/codegen-llvm/preserve-vec-element-types.rs

Original file line number	Diff line number	Diff line change
`@@ -25,8 +25,11 @@ fn passes_vectors_by_value(mode: &PassMode, repr: &BackendRepr) -> UsesVectorReg`
`25`	`25`	`match mode {`
`26`	`26`	`PassMode::Ignore \| PassMode::Indirect { .. } => UsesVectorRegisters::No,`
`27`	`27`	`PassMode::Cast { pad_i32: _, cast }`
`28`		`- if cast.prefix.iter().any(\|r\| r.is_some_and(\|x\| x.kind == RegKind::Vector))`
`29`		`- \|\| cast.rest.unit.kind == RegKind::Vector =>`
	`28`	`+ if cast`
	`29`	`+ .prefix`
	`30`	`+ .iter()`
	`31`	`+ .any(\|r\| r.is_some_and(\|x\| matches!(x.kind, RegKind::Vector { .. })))`
	`32`	`+ \|\| matches!(cast.rest.unit.kind, RegKind::Vector { .. }) =>`
`30`	`33`	`{`
`31`	`34`	`UsesVectorRegisters::FixedVector`
`32`	`35`	`}`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`
`4`	`4`	`use rustc_abi::{BackendRepr, HasDataLayout, TyAbiInterface};`
`5`	`5`
`6`		`-use crate::callconv::{ArgAbi, FnAbi, Reg, RegKind};`
	`6`	`+use crate::callconv::{ArgAbi, FnAbi, Reg};`
`7`	`7`	`use crate::spec::{Env, HasTargetSpec, Os};`
`8`	`8`
`9`	`9`	`fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {`
`@@ -51,7 +51,7 @@ where`
`51`	`51`
`52`	`52`	`if arg.layout.is_single_vector_element(cx, size) {`
`53`	`53`	// pass non-transparent wrappers around a vector as `PassMode::Cast`
`54`		`- arg.cast_to(Reg { kind: RegKind::Vector, size });`
	`54`	`+ arg.cast_to(Reg::opaque_vector(size));`
`55`	`55`	`return;`
`56`	`56`	`}`
`57`	`57`	`}`