Skip to content

Commit e06bfca

Browse files
committed
Auto merge of #154094 - folkertdev:aarch64-arm-load-store, r=<try>
add neon load/store assembly test try-job: aarch64-apple
2 parents 20f19f4 + b44f9c1 commit e06bfca

File tree

1 file changed

+78
-0
lines changed

1 file changed

+78
-0
lines changed
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
//@ assembly-output: emit-asm
2+
//
3+
//@ revisions: AARCH64
4+
//@[AARCH64] compile-flags: -Copt-level=3
5+
//@[AARCH64] only-aarch64-unknown-linux-gnu
6+
//
7+
//@ revisions: ARMV7
8+
//@[ARMV7] compile-flags: -Copt-level=3
9+
//@[ARMV7] only-arm
10+
//@[ARMV7] ignore-thumb
11+
//@[ARMV7] ignore-android
12+
#![crate_type = "lib"]
13+
#![cfg_attr(target_arch = "arm", feature(arm_target_feature, stdarch_arm_neon_intrinsics))]
14+
15+
#[cfg(target_arch = "aarch64")]
16+
use std::arch::aarch64::*;
17+
#[cfg(target_arch = "arm")]
18+
use std::arch::arm::*;
19+
20+
// Loads of 3 are error-prone because a `repr(simd)` type's size is always rounded up to the next
21+
// power of 2. Hence, using `read_unaligned` and `write_unaligned` on such types is invalid, it
22+
// would go out of bounds.
23+
#[unsafe(no_mangle)]
24+
#[cfg_attr(target_arch = "arm", target_feature(enable = "neon,v7"))]
25+
fn test_vld3q_f32(ptr: *const f32) -> float32x4x3_t {
26+
// AARCH64-LABEL: test_vld3q_f32
27+
// AARCH64: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
28+
// AARCH64: stp q0, q1, [x8]
29+
// AARCH64: str q2, [x8, #32]
30+
// AARCH64: ret
31+
//
32+
// ARMV7-LABEL: test_vld3q_f32
33+
// ARMV7: vld3.32 {d16, d18, d20}, [r1]!
34+
// ARMV7: vld3.32 {d17, d19, d21}, [r1]
35+
// ARMV7: vst1.32 {d16, d17}, [r0]!
36+
// ARMV7: vst1.32 {d18, d19}, [r0]!
37+
// ARMV7: vst1.64 {d20, d21}, [r0]
38+
// ARMV7: bx lr
39+
unsafe { vld3q_f32(ptr) }
40+
}
41+
42+
#[unsafe(no_mangle)]
43+
#[cfg_attr(target_arch = "arm", target_feature(enable = "neon,v7"))]
44+
fn test_vld3q_s32(ptr: *const i32) -> int32x4x3_t {
45+
// AARCH64-LABEL: test_vld3q_s32
46+
// AARCH64: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
47+
// AARCH64: stp q0, q1, [x8]
48+
// AARCH64: str q2, [x8, #32]
49+
// AARCH64: ret
50+
//
51+
// ARMV7-LABEL: test_vld3q_s32
52+
// ARMV7: vld3.32 {d16, d18, d20}, [r1]!
53+
// ARMV7: vld3.32 {d17, d19, d21}, [r1]
54+
// ARMV7: vst1.32 {d16, d17}, [r0]!
55+
// ARMV7: vst1.32 {d18, d19}, [r0]!
56+
// ARMV7: vst1.64 {d20, d21}, [r0]
57+
// ARMV7: bx lr
58+
unsafe { vld3q_s32(ptr) }
59+
}
60+
61+
#[unsafe(no_mangle)]
62+
#[cfg_attr(target_arch = "arm", target_feature(enable = "neon,v7"))]
63+
fn test_vld3q_u32(ptr: *const u32) -> uint32x4x3_t {
64+
// AARCH64-LABEL: test_vld3q_u32
65+
// AARCH64: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
66+
// AARCH64: stp q0, q1, [x8]
67+
// AARCH64: str q2, [x8, #32]
68+
// AARCH64: ret
69+
//
70+
// ARMV7-LABEL: test_vld3q_u32
71+
// ARMV7: vld3.32 {d16, d18, d20}, [r1]!
72+
// ARMV7: vld3.32 {d17, d19, d21}, [r1]
73+
// ARMV7: vst1.32 {d16, d17}, [r0]!
74+
// ARMV7: vst1.32 {d18, d19}, [r0]!
75+
// ARMV7: vst1.64 {d20, d21}, [r0]
76+
// ARMV7: bx lr
77+
unsafe { vld3q_u32(ptr) }
78+
}

0 commit comments

Comments
 (0)