Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1588,7 +1588,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
FeatureUnalignedDSAccess, FeatureGWS,
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
Expand Down
28 changes: 12 additions & 16 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3947,9 +3947,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 15
; GFX9-NEXT: scratch_store_dword v0, v1, off
; GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
Expand All @@ -3969,9 +3969,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: v_add_u32_e32 v0, s1, v0
; GFX942-NEXT: v_add3_u32 v0, s0, v0, -16
; GFX942-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 15
; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1
; GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_endpgm
;
Expand All @@ -3996,9 +3996,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s2, v0
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX9-NEXT: s_endpgm
;
Expand All @@ -4018,9 +4018,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
; UNALIGNED_GFX942: ; %bb.0: ; %bb
; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s1, v0
; UNALIGNED_GFX942-NEXT: v_add3_u32 v0, s0, v0, -16
; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s0, v0
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 15
; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1
; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX942-NEXT: s_endpgm
;
Expand Down Expand Up @@ -4052,8 +4052,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8
; GFX9-NEXT: scratch_load_dword v2, off, s0
; GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
Expand All @@ -4071,8 +4070,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
;
; GFX942-LABEL: sgpr_base_negative_offset:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8
; GFX942-NEXT: scratch_load_dword v2, off, s0
; GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dword v[0:1], v2, off
; GFX942-NEXT: s_endpgm
Expand All @@ -4095,8 +4093,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
; UNALIGNED_GFX9: ; %bb.0: ; %entry
; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; UNALIGNED_GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8
; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s0
; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX9-NEXT: global_store_dword v[0:1], v2, off
; UNALIGNED_GFX9-NEXT: s_endpgm
Expand All @@ -4114,8 +4111,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
;
; UNALIGNED_GFX942-LABEL: sgpr_base_negative_offset:
; UNALIGNED_GFX942: ; %bb.0: ; %entry
; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off
; UNALIGNED_GFX942-NEXT: s_endpgm
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1582,8 +1582,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, -1, v0
; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1
; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_endpgm
;
Expand All @@ -1593,8 +1592,8 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_endpgm
;
Expand Down
51 changes: 21 additions & 30 deletions llvm/test/CodeGen/AMDGPU/flat-scratch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4561,11 +4561,10 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg)
; GFX9-LABEL: store_load_i32_negative_unaligned:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, -1, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 1
; GFX9-NEXT: scratch_store_byte v0, v1, off
; GFX9-NEXT: scratch_store_byte v0, v1, off offset:-1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_ubyte v0, v0, off glc
; GFX9-NEXT: scratch_load_ubyte v0, v0, off offset:-1 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -4632,22 +4631,20 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg)
; GFX9-PAL-LABEL: store_load_i32_negative_unaligned:
; GFX9-PAL: ; %bb.0: ; %bb
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: v_add_u32_e32 v0, -1, v0
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 1
; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off
; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off offset:-1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off glc
; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off offset:-1 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: store_load_i32_negative_unaligned:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_add_u32_e32 v0, -1, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 1
; GFX942-NEXT: scratch_store_byte v0, v1, off sc0 sc1
; GFX942-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1
; GFX942-NEXT: scratch_load_ubyte v0, v0, off offset:-1 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -4732,11 +4729,11 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture
; GFX9-LABEL: store_load_i32_large_negative_unaligned:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0
; GFX9-NEXT: v_add_u32_e32 v0, 0xfffff000, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 1
; GFX9-NEXT: scratch_store_byte v0, v1, off
; GFX9-NEXT: scratch_store_byte v0, v1, off offset:-129
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_ubyte v0, v0, off glc
; GFX9-NEXT: scratch_load_ubyte v0, v0, off offset:-129 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -4805,22 +4802,22 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture
; GFX9-PAL-LABEL: store_load_i32_large_negative_unaligned:
; GFX9-PAL: ; %bb.0: ; %bb
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0
; GFX9-PAL-NEXT: v_add_u32_e32 v0, 0xfffff000, v0
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 1
; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off
; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off offset:-129
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off glc
; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off offset:-129 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: store_load_i32_large_negative_unaligned:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0
; GFX942-NEXT: v_add_u32_e32 v0, 0xfffff000, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 1
; GFX942-NEXT: scratch_store_byte v0, v1, off sc0 sc1
; GFX942-NEXT: scratch_store_byte v0, v1, off offset:-129 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1
; GFX942-NEXT: scratch_load_ubyte v0, v0, off offset:-129 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -5485,9 +5482,8 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: s_add_i32 s2, s2, s3
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
; GFX9-NEXT: v_add_u32_e32 v0, -16, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 15
; GFX9-NEXT: scratch_store_dword v0, v1, off
; GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
Expand Down Expand Up @@ -5531,18 +5527,16 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1
; GFX9-PAL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX9-PAL-NEXT: v_add_u32_e32 v0, -16, v0
; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off
; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off offset:-16
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
;
; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_add_i32 s0, s0, s1
; GFX942-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-NEXT: v_add_u32_e32 v0, -16, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 15
; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1
; GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_endpgm
;
Expand Down Expand Up @@ -5591,8 +5585,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: s_addk_i32 s2, 0xffe8
; GFX9-NEXT: scratch_load_dword v2, off, s2
; GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
Expand Down Expand Up @@ -5631,16 +5624,14 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff
; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-PAL-NEXT: s_addk_i32 s0, 0xffe8
; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0
; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0 offset:-24
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off
; GFX9-PAL-NEXT: s_endpgm
;
; GFX942-LABEL: sgpr_base_negative_offset:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_addk_i32 s0, 0xffe8
; GFX942-NEXT: scratch_load_dword v2, off, s0
; GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dword v[0:1], v2, off
; GFX942-NEXT: s_endpgm
Expand Down
27 changes: 14 additions & 13 deletions llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
Original file line number Diff line number Diff line change
Expand Up @@ -294,28 +294,29 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: s_mov_b32 s0, 0
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 offset:1024
; FLATSCR-NEXT: s_movk_i32 s1, 0x2000
; FLATSCR-NEXT: s_movk_i32 s0, 0x4010
; FLATSCR-NEXT: scratch_store_dword off, v0, s1
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s1, 0
; FLATSCR-NEXT: .LBB2_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
; FLATSCR-NEXT: s_add_i32 s1, s0, 0x2000
; FLATSCR-NEXT: s_add_i32 s0, s0, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v0, s1
; FLATSCR-NEXT: s_add_i32 s2, s1, 0x4000
; FLATSCR-NEXT: s_add_i32 s1, s1, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s1, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v0, s2
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
; FLATSCR-NEXT: s_addk_i32 s0, 0x2000
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
; FLATSCR-NEXT: s_movk_i32 s1, 0x1000
; FLATSCR-NEXT: s_addk_i32 s1, 0x4000
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s1 offset:720 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s1 offset:704 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:16 glc
; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s0 glc
; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s0 offset:-16 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; FLATSCR-NEXT: v_mov_b32_e32 v12, 0
Expand Down