diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5dea64844e64e..8834aafcbffb9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1588,7 +1588,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, - FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, + FeatureUnalignedDSAccess, FeatureGWS, FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 2356dad5275c9..f191b5e4fecb5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -3947,9 +3947,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16 +; GFX9-NEXT: v_add_u32_e32 v0, s2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 15 -; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm ; @@ -3969,9 +3969,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: v_add_u32_e32 v0, s1, v0 -; GFX942-NEXT: v_add3_u32 v0, s0, v0, -16 +; GFX942-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 15 -; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_endpgm ; @@ -3996,9 +3996,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0 ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s2, v0 ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off +; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16 ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: s_endpgm ; @@ -4018,9 +4018,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: ; UNALIGNED_GFX942: ; %bb.0: ; %bb ; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s1, v0 -; UNALIGNED_GFX942-NEXT: v_add3_u32 v0, s0, v0, -16 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s0, v0 ; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1 ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX942-NEXT: s_endpgm ; @@ -4052,8 +4052,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8 -; GFX9-NEXT: scratch_load_dword v2, off, s0 +; GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm @@ -4071,8 +4070,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; ; GFX942-LABEL: sgpr_base_negative_offset: ; GFX942: ; %bb.0: ; %entry -; GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8 -; GFX942-NEXT: scratch_load_dword v2, off, s0 +; GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dword v[0:1], v2, off ; GFX942-NEXT: s_endpgm @@ -4095,8 +4093,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; UNALIGNED_GFX9: ; %bb.0: ; %entry ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; UNALIGNED_GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8 -; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s0 +; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24 ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: global_store_dword v[0:1], v2, off ; UNALIGNED_GFX9-NEXT: s_endpgm @@ -4114,8 +4111,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; ; UNALIGNED_GFX942-LABEL: sgpr_base_negative_offset: ; UNALIGNED_GFX942: ; %bb.0: ; %entry -; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8 -; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24 ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off ; UNALIGNED_GFX942-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll index e01cb79382c05..737c811cd9d93 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -1582,8 +1582,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX942-SDAG-NEXT: v_add_u32_e32 v0, -1, v0 -; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_endpgm ; @@ -1593,8 +1592,8 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { ; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1 -; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1 ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 870b679a84d11..0863698dee499 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -4561,11 +4561,10 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg) ; GFX9-LABEL: store_load_i32_negative_unaligned: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v0, -1, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 1 -; GFX9-NEXT: scratch_store_byte v0, v1, off +; GFX9-NEXT: scratch_store_byte v0, v1, off offset:-1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: scratch_load_ubyte v0, v0, off glc +; GFX9-NEXT: scratch_load_ubyte v0, v0, off offset:-1 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4632,22 +4631,20 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg) ; GFX9-PAL-LABEL: store_load_i32_negative_unaligned: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-PAL-NEXT: v_add_u32_e32 v0, -1, v0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 1 -; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off +; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off offset:-1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off glc +; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off offset:-1 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: store_load_i32_negative_unaligned: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_add_u32_e32 v0, -1, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 1 -; GFX942-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1 +; GFX942-NEXT: scratch_load_ubyte v0, v0, off offset:-1 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] ; @@ -4732,11 +4729,11 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture ; GFX9-LABEL: store_load_i32_large_negative_unaligned: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0 +; GFX9-NEXT: v_add_u32_e32 v0, 0xfffff000, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 1 -; GFX9-NEXT: scratch_store_byte v0, v1, off +; GFX9-NEXT: scratch_store_byte v0, v1, off offset:-129 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: scratch_load_ubyte v0, v0, off glc +; GFX9-NEXT: scratch_load_ubyte v0, v0, off offset:-129 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4805,22 +4802,22 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture ; GFX9-PAL-LABEL: store_load_i32_large_negative_unaligned: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-PAL-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0 +; GFX9-PAL-NEXT: v_add_u32_e32 v0, 0xfffff000, v0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 1 -; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off +; GFX9-PAL-NEXT: scratch_store_byte v0, v1, off offset:-129 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off glc +; GFX9-PAL-NEXT: scratch_load_ubyte v0, v0, off offset:-129 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: store_load_i32_large_negative_unaligned: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0 +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffff000, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 1 -; GFX942-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-NEXT: scratch_store_byte v0, v1, off offset:-129 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1 +; GFX942-NEXT: scratch_load_ubyte v0, v0, off offset:-129 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] ; @@ -5485,9 +5482,8 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: s_add_i32 s2, s2, s3 ; GFX9-NEXT: v_add_u32_e32 v0, s2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, -16, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 15 -; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm ; @@ -5531,8 +5527,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1 ; GFX9-PAL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX9-PAL-NEXT: v_add_u32_e32 v0, -16, v0 -; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off +; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off offset:-16 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; @@ -5540,9 +5535,8 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_add_i32 s0, s0, s1 ; GFX942-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX942-NEXT: v_add_u32_e32 v0, -16, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 15 -; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_endpgm ; @@ -5591,8 +5585,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_addk_i32 s2, 0xffe8 -; GFX9-NEXT: scratch_load_dword v2, off, s2 +; GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm @@ -5631,16 +5624,14 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_addk_i32 s0, 0xffe8 -; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0 +; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0 offset:-24 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-PAL-NEXT: s_endpgm ; ; GFX942-LABEL: sgpr_base_negative_offset: ; GFX942: ; %bb.0: ; %entry -; GFX942-NEXT: s_addk_i32 s0, 0xffe8 -; GFX942-NEXT: scratch_load_dword v2, off, s0 +; GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dword v[0:1], v2, off ; GFX942-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index db80f5479d36b..47ebeaa7ca820 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -294,28 +294,29 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: s_mov_b32 s0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, s0 offset:1024 +; FLATSCR-NEXT: s_movk_i32 s1, 0x2000 +; FLATSCR-NEXT: s_movk_i32 s0, 0x4010 +; FLATSCR-NEXT: scratch_store_dword off, v0, s1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: .LBB2_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_i32 s1, s0, 0x2000 -; FLATSCR-NEXT: s_add_i32 s0, s0, 1 -; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120 -; FLATSCR-NEXT: scratch_store_byte off, v0, s1 +; FLATSCR-NEXT: s_add_i32 s2, s1, 0x4000 +; FLATSCR-NEXT: s_add_i32 s1, s1, 1 +; FLATSCR-NEXT: s_cmpk_lt_u32 s1, 0x2120 +; FLATSCR-NEXT: scratch_store_byte off, v0, s2 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1 ; FLATSCR-NEXT: ; %bb.2: ; %split -; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 -; FLATSCR-NEXT: s_addk_i32 s0, 0x2000 -; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc +; FLATSCR-NEXT: s_movk_i32 s1, 0x1000 +; FLATSCR-NEXT: s_addk_i32 s1, 0x4000 +; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s1 offset:720 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc +; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s1 offset:704 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0x2000 -; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:16 glc +; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s0 glc +; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s0 offset:-16 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; FLATSCR-NEXT: v_mov_b32_e32 v12, 0