From 7678df0e1257c184b34e049cf0dcabdeb0f466ee Mon Sep 17 00:00:00 2001 From: lalaniket8 Date: Sun, 14 Dec 2025 14:37:37 +0530 Subject: [PATCH 1/2] Checks for NonSSA Exec Mask Manipulation --- .../wavetransform-basic-check-rcfg.mir | 104 +++ .../WaveTransform/wavetransform-basic.mir | 436 +++++++-- ...wavetransform-natural-loops-check-rcfg.mir | 200 +++++ .../wavetransform-natural-loops.mir | 841 ++++++++++++++++-- 4 files changed, 1436 insertions(+), 145 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic-check-rcfg.mir create mode 100644 llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops-check-rcfg.mir diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic-check-rcfg.mir b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic-check-rcfg.mir new file mode 100644 index 0000000000000..7f740b7d6acc2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic-check-rcfg.mir @@ -0,0 +1,104 @@ +# REQUIRES: asserts +# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %S%{fs-sep}wavetransform-basic.mir 2>&1 | FileCheck -check-prefixes=CFG %s + +--- + +# CFG-LABEL: Wave CFG for if_then_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) [LatestPostDom: bb.2] [divergent] +# CFG-NEXT: bb.1 (#1) -> bb.2(*) +# CFG-NEXT: bb.2 (#2) [secondary] + + +# CFG-LABEL: Wave CFG for if_then_divergent_with_no_terminator: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) [LatestPostDom: bb.2] [divergent] +# CFG-NEXT: bb.1 (#1) -> bb.2(*) +# CFG-NEXT: bb.2 (#2) [secondary] + + +# CFG-LABEL: Wave CFG for if_then_else_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) (bb.2) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.1 (#1) -> (bb.3) +# CFG-NEXT: (#2) -> bb.2(*) bb.3(*) [LatestPostDom: bb.3] [divergent] [secondary] +# CFG-NEXT: bb.2 (#2) -> bb.3(*) +# CFG-NEXT: bb.3 (#3) [secondary] + + +# CFG-LABEL: Wave CFG for if_then_else_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) +# CFG-NEXT: bb.1 (#1) -> bb.3(*) +# CFG-NEXT: bb.2 (#2) -> bb.3(*) +# CFG-NEXT: bb.3 (#3) + + +# +# 0 +# / \ +# / \ +# 1 2 +# / \ / \ +# 3 4 5 6 +# \ \ / / +# ->7<- +# + +# CFG-LABEL: Wave CFG for if_then_else_nested_divergent_divergent +# CFG-NEXT: bb.0 (#0) -> bb.1(*) (bb.2) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.1 (#1) -> bb.3(*) (bb.4) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.3 (#2) -> (bb.7) +# CFG-NEXT: (#3) -> bb.4(*) (bb.7) [divergent] [secondary] +# CFG-NEXT: bb.4 (#3) -> (bb.7) +# CFG-NEXT: (#4) -> bb.2(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] +# CFG-NEXT: bb.2 (#4) -> bb.5(*) (bb.6) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.5 (#5) -> (bb.7) +# CFG-NEXT: (#6) -> bb.6(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] +# CFG-NEXT: bb.6 (#6) -> bb.7(*) +# CFG-NEXT: bb.7 (#7) [secondary] + + +# +# 0 +# / \ +# / \ +# 1 2 +# / \ / \ +# 3 4 5 6 +# \ \ / / +# ->7<- +# + +# CFG-LABEL: Wave CFG for if_then_else_nested_uniform_divergent +# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) +# CFG-NEXT: bb.1 (#1) -> bb.3(*) (bb.4) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.3 (#2) -> (bb.7) +# CFG-NEXT: (#3) -> bb.4(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] +# CFG-NEXT: bb.4 (#3) -> bb.7(*) +# CFG-NEXT: bb.2 (#4) -> bb.5(*) (bb.6) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.5 (#5) -> (bb.7) +# CFG-NEXT: (#6) -> bb.6(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] +# CFG-NEXT: bb.6 (#6) -> bb.7(*) +# CFG-NEXT: bb.7 (#7) [secondary] + + +# +# 0 +# / \ +# / \ +# 1 2 +# / \ / \ +# 3 4 5 6 +# \ \ / / +# ->7<- +# + + +# CFG-LABEL: Wave CFG for if_then_else_nested_divergent_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) (bb.2) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.1 (#1) -> bb.3(*) bb.4(*) +# CFG-NEXT: bb.3 (#2) -> (bb.7) +# CFG-NEXT: bb.4 (#3) -> (bb.7) +# CFG-NEXT: (#4) -> bb.2(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] +# CFG-NEXT: bb.2 (#4) -> bb.5(*) bb.6(*) +# CFG-NEXT: bb.5 (#5) -> bb.7(*) +# CFG-NEXT: bb.6 (#6) -> bb.7(*) +# CFG-NEXT: bb.7 (#7) [secondary] + diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir index ab267d1ddba71..e0395a89133cd 100644 --- a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir @@ -1,16 +1,38 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # REQUIRES: asserts -# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %s 2>&1 | FileCheck -check-prefixes=CFG %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %s 2>&1 | FileCheck -check-prefixes=POSTWT %s --- -# CFG-LABEL: Wave CFG for if_then_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) [LatestPostDom: bb.2] [divergent] -# CFG-NEXT: bb.1 (#1) -> bb.2(*) -# CFG-NEXT: bb.2 (#2) [secondary] name: if_then_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: liveins: $vgpr0 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], killed [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0 @@ -18,7 +40,7 @@ body: | %8:vgpr_32 = COPY $vgpr0 %15:sreg_32 = S_MOV_B32 0 %16:sreg_32 = V_CMP_EQ_U32_e64 %8, killed %15, implicit $exec - SI_BRCOND %bb.2, killed %16 + SI_BRCOND %bb.2, killed %16 S_BRANCH %bb.1 bb.1: @@ -32,14 +54,35 @@ body: | --- -# CFG-LABEL: Wave CFG for if_then_divergent_with_no_terminator: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) [LatestPostDom: bb.2] [divergent] -# CFG-NEXT: bb.1 (#1) -> bb.2(*) -# CFG-NEXT: bb.2 (#2) [secondary] name: if_then_divergent_with_no_terminator tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_divergent_with_no_terminator + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: liveins: $vgpr0 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], killed [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0 @@ -58,16 +101,52 @@ body: | ... --- -# CFG-LABEL: Wave CFG for if_then_else_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) (bb.2) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.1 (#1) -> (bb.3) -# CFG-NEXT: (#2) -> bb.2(*) bb.3(*) [LatestPostDom: bb.3] [divergent] [secondary] -# CFG-NEXT: bb.2 (#2) -> bb.3(*) -# CFG-NEXT: bb.3 (#3) [secondary] name: if_then_else_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_else_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: liveins: $vgpr0 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], killed [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_CMP_EQ_U32_e64_]] + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.4(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY2]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_EQ_U32_e64_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[COPY1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.3(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0 @@ -93,15 +172,34 @@ body: | ... --- -# CFG-LABEL: Wave CFG for if_then_else_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) -# CFG-NEXT: bb.1 (#1) -> bb.3(*) -# CFG-NEXT: bb.2 (#2) -> bb.3(*) -# CFG-NEXT: bb.3 (#3) name: if_then_else_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_else_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: liveins: $sgpr0 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], killed [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_EQ_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.3(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.3(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $sgpr0 @@ -138,22 +236,116 @@ body: | # ->7<- # -# CFG-LABEL: Wave CFG for if_then_else_nested_divergent_divergent -# CFG-NEXT: bb.0 (#0) -> bb.1(*) (bb.2) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.1 (#1) -> bb.3(*) (bb.4) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.3 (#2) -> (bb.7) -# CFG-NEXT: (#3) -> bb.4(*) (bb.7) [divergent] [secondary] -# CFG-NEXT: bb.4 (#3) -> (bb.7) -# CFG-NEXT: (#4) -> bb.2(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] -# CFG-NEXT: bb.2 (#4) -> bb.5(*) (bb.6) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.5 (#5) -> (bb.7) -# CFG-NEXT: (#6) -> bb.6(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] -# CFG-NEXT: bb.6 (#6) -> bb.7(*) -# CFG-NEXT: bb.7 (#7) [secondary] name: if_then_else_nested_divergent_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_else_nested_divergent_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.9(0x40000000) + ; POSTWT-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_EQ_U32_e64_]] + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.9, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.10(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_1]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_EQ_U32_e64_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.9: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY4]], %bb.0, %33, %bb.10, %33, %bb.4 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_EQ_U32_e64_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_3]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[COPY3]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY2]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_4:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_2]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_4]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[V_CMP_EQ_U32_e64_2]] + ; POSTWT-NEXT: [[S_XOR_B32_5:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_4]], implicit-def $scc + ; POSTWT-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_5]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_4]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.10(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.10 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.10: + ; POSTWT-NEXT: successors: %bb.4(0x40000000), %bb.9(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_6:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_6]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_AND_B32_5]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.9, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.9(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.9 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.8(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.8 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.8: + ; POSTWT-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY6]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_7:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_EQ_U32_e64_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_7]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_3]], [[S_AND_B32_6]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[COPY5]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.7: + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.6, [[S_AND_B32_3]], %bb.9, [[S_OR_B32_1]], %bb.8 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1, $vgpr2 @@ -212,21 +404,96 @@ body: | # ->7<- # -# CFG-LABEL: Wave CFG for if_then_else_nested_uniform_divergent -# CFG-NEXT: bb.0 (#0) -> bb.1(*) bb.2(*) -# CFG-NEXT: bb.1 (#1) -> bb.3(*) (bb.4) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.3 (#2) -> (bb.7) -# CFG-NEXT: (#3) -> bb.4(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] -# CFG-NEXT: bb.4 (#3) -> bb.7(*) -# CFG-NEXT: bb.2 (#4) -> bb.5(*) (bb.6) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.5 (#5) -> (bb.7) -# CFG-NEXT: (#6) -> bb.6(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] -# CFG-NEXT: bb.6 (#6) -> bb.7(*) -# CFG-NEXT: bb.7 (#7) [secondary] name: if_then_else_nested_uniform_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_else_nested_uniform_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: liveins: $sgpr0, $vgpr1, $vgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_EQ_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.9(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_1]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_EQ_U32_e64_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.9, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY2]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_2]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_1]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_EQ_U32_e64_2]] + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_2]], implicit-def $scc + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_2]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_2]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.9(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.9 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.9: + ; POSTWT-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_3]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.8(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.8 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.8: + ; POSTWT-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_4:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_EQ_U32_e64_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_4]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[COPY3]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.7: + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_AND_B32_3]], %bb.4, [[S_AND_B32_4]], %bb.6, [[S_AND_B32_3]], %bb.9, [[S_AND_B32_4]], %bb.8 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $sgpr0, $vgpr1, $vgpr2 @@ -286,20 +553,80 @@ body: | # -# CFG-LABEL: Wave CFG for if_then_else_nested_divergent_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) (bb.2) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.1 (#1) -> bb.3(*) bb.4(*) -# CFG-NEXT: bb.3 (#2) -> (bb.7) -# CFG-NEXT: bb.4 (#3) -> (bb.7) -# CFG-NEXT: (#4) -> bb.2(*) bb.7(*) [LatestPostDom: bb.7] [divergent] [secondary] -# CFG-NEXT: bb.2 (#4) -> bb.5(*) bb.6(*) -# CFG-NEXT: bb.5 (#5) -> bb.7(*) -# CFG-NEXT: bb.6 (#6) -> bb.7(*) -# CFG-NEXT: bb.7 (#7) [secondary] name: if_then_else_nested_divergent_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: if_then_else_nested_divergent_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: liveins: $vgpr0, $sgpr1, $sgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_EQ_U32_e64_]] + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_EQ_U32_e64_1]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.8: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_EQ_U32_e64_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[COPY3]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[COPY2]], [[S_MOV_B32_]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_EQ_U32_e64_2]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.8(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.8 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.8(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.8 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.7: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $sgpr1, $sgpr2 @@ -345,3 +672,4 @@ body: | bb.7: S_ENDPGM 0 ... + diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops-check-rcfg.mir b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops-check-rcfg.mir new file mode 100644 index 0000000000000..da3f4cb7d7525 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops-check-rcfg.mir @@ -0,0 +1,200 @@ +# REQUIRES: asserts +# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %S%{fs-sep}wavetransform-natural-loops.mir 2>&1 | FileCheck -check-prefixes=CFG %s + + +# CFG-LABEL: Wave CFG for simple_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.1(*) bb.2(*) [LatestPostDom: bb.2] [divergent] +# CFG-NEXT: bb.2 (#4) [secondary] + +# CFG-LABEL: Wave CFG for two_backedges_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.1) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.3) [divergent] +# CFG-NEXT: (#4) -> bb.1(*) bb.3(*) [LatestPostDom: bb.3] [divergent] [secondary] +# CFG-NEXT: bb.3 (#5) [secondary] + +# CFG-LABEL: Wave CFG for two_backedges_uniform_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.1(*) +# CFG-NEXT: bb.2 (#3) -> bb.1(*) bb.3(*) [LatestPostDom: bb.3] [divergent] +# CFG-NEXT: bb.3 (#5) [secondary] + +# CFG-LABEL: Wave CFG for two_backedges_divergent_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.1) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.3) +# CFG-NEXT: (#4) -> bb.1(*) bb.3(*) [LatestPostDom: bb.3] [divergent] [secondary] +# CFG-NEXT: bb.3 (#5) [secondary] + +# +# 0 +# | +# v---<1<--\ +# | | | +# | v-<2<\ | +# | | | | | +# | | 3-^ | +# |/ \ | +# 4 ---^ +# + +# CFG-LABEL: Wave CFG for nested_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.4(*) bb.2(*) +# CFG-NEXT: bb.2 (#4) -> bb.3(*) bb.4(*) +# CFG-NEXT: bb.3 (#5) -> bb.2(*) bb.1(*) +# CFG-NEXT: bb.4 (#9) + +# +# 0 +# | +# v---<1<--\ +# | | | +# | v-<2<\ | +# | | | | | +# | | 3-^ | +# |/ \ | +# 4 ---^ +# + +# CFG-LABEL: Wave CFG for nested_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.4(*) [LatestPostDom: bb.4] [divergent] +# CFG-NEXT: bb.2 (#4) -> bb.3(*) (bb.4) [divergent] +# CFG-NEXT: bb.3 (#5) -> bb.2(*) (bb.1) [LatestPostDom: ] [divergent] +# CFG-NEXT: (#7) -> bb.1(*) bb.4(*) [LatestPostDom: bb.4] [divergent] [secondary] +# CFG-NEXT: bb.4 (#9) [secondary] + +# +# 0 +# | +# /->1 +# | | +# | [2 +# | | +# ^-<3 +# | +# 4 +# + +# CFG-LABEL: Wave CFG for nested2_uniform_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) +# CFG-NEXT: bb.2 (#4) -> bb.3(*) bb.2(*) +# CFG-NEXT: bb.3 (#6) -> bb.4(*) bb.1(*) +# CFG-NEXT: bb.4 (#8) + +# +# 0 +# | +# /->1 +# | | +# | [2 +# | | +# ^-<3 +# | +# 4 +# + +# CFG-LABEL: Wave CFG for nested2_divergent_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) +# CFG-NEXT: bb.2 (#4) -> bb.2(*) bb.3(*) [LatestPostDom: bb.3] [divergent] +# CFG-NEXT: bb.3 (#6) -> bb.1(*) bb.4(*) [LatestPostDom: bb.4] [divergent] [secondary] +# CFG-NEXT: bb.4 (#8) [secondary] + +# +# 0 +# | +# /-->1<--\ +# | / \ | +# ^-2 3-^ +# | | +# 4 5 +# \ / +# 6 +# + +# CFG-LABEL: Wave CFG for multi_backedge_exits_divergent_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.3) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.4) [divergent] +# CFG-NEXT: (#4) -> bb.3(*) (bb.4,bb.1) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: bb.3 (#4) -> (bb.1,bb.5) [divergent] +# CFG-NEXT: (#5) -> bb.1(*) (bb.4,bb.5) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: (#6) -> bb.5(*) (bb.4) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: bb.5 (#6) -> (bb.6) +# CFG-NEXT: (#7) -> bb.4(*) bb.6(*) [LatestPostDom: bb.6] [divergent] [secondary] +# CFG-NEXT: bb.4 (#7) -> bb.6(*) +# CFG-NEXT: bb.6 (#8) [secondary] + +# +# 0 +# | +# /-->1<--\ +# | / \ | +# ^-2 3-^ +# | | +# 4 5 +# \ / +# 6 +# + +# CFG-LABEL: Wave CFG for multi_backedge_exits_uniform_divergent: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.3(*) +# CFG-NEXT: bb.2 (#3) -> bb.1(*) (bb.4) [divergent] +# CFG-NEXT: bb.3 (#4) -> bb.1(*) (bb.5) [LatestPostDom: ] [divergent] +# CFG-NEXT: (#6) -> bb.5(*) (bb.4) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: bb.5 (#6) -> (bb.6) +# CFG-NEXT: (#7) -> bb.4(*) bb.6(*) [LatestPostDom: bb.6] [divergent] [secondary] +# CFG-NEXT: bb.4 (#7) -> bb.6(*) +# CFG-NEXT: bb.6 (#8) [secondary] + +# +# 0 +# | +# /-->1<--\ +# | / \ | +# ^-2 3-^ +# | | +# 4 5 +# \ / +# 6 +# + +# CFG-LABEL: Wave CFG for multi_backedge_exits_divergent_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.3) [LatestPostDom: ] [divergent] +# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.4) +# CFG-NEXT: (#4) -> bb.3(*) (bb.4,bb.1) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: bb.3 (#4) -> (bb.1,bb.5) +# CFG-NEXT: (#5) -> bb.1(*) (bb.4,bb.5) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: (#6) -> bb.5(*) (bb.4) [LatestPostDom: ] [divergent] [secondary] +# CFG-NEXT: bb.5 (#6) -> (bb.6) +# CFG-NEXT: (#7) -> bb.4(*) bb.6(*) [LatestPostDom: bb.6] [divergent] [secondary] +# CFG-NEXT: bb.4 (#7) -> bb.6(*) +# CFG-NEXT: bb.6 (#8) [secondary] + +# +# 0 +# | +# /-->1<--\ +# | / \ | +# ^-2 3-^ +# | | +# 4 5 +# \ / +# 6 +# + +# CFG-LABEL: Wave CFG for multi_backedge_exits_uniform_uniform: +# CFG-NEXT: bb.0 (#0) -> bb.1(*) +# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.3(*) +# CFG-NEXT: bb.2 (#3) -> bb.4(*) bb.1(*) +# CFG-NEXT: bb.3 (#4) -> bb.5(*) bb.1(*) +# CFG-NEXT: bb.5 (#6) -> bb.6(*) +# CFG-NEXT: bb.4 (#7) -> bb.6(*) +# CFG-NEXT: bb.6 (#8) + diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir index e28713c5607a8..6d6dff27b7017 100644 --- a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir @@ -1,19 +1,43 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # REQUIRES: asserts -# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %s 2>&1 | FileCheck -check-prefixes=CFG %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %s 2>&1 | FileCheck -check-prefixes=POSTWT %s -# TODO: Test some of the generated MIR. This is currently not done because -# of high fluctuations in the MIR. --- -# CFG-LABEL: Wave CFG for simple_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.1(*) bb.2(*) [LatestPostDom: bb.2] [divergent] -# CFG-NEXT: bb.2 (#4) [secondary] name: simple_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: simple_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %12, %bb.1 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %3, %bb.1 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI1]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_XOR_B32_]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0 @@ -36,16 +60,63 @@ body: | ... --- -# CFG-LABEL: Wave CFG for two_backedges_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.1) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.3) [divergent] -# CFG-NEXT: (#4) -> bb.1(*) bb.3(*) [LatestPostDom: bb.3] [divergent] [secondary] -# CFG-NEXT: bb.3 (#5) [secondary] name: two_backedges_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: two_backedges_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0, $vgpr1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %23, %bb.4 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.4 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI1]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.4(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[COPY3]], %bb.1, [[S_OR_B32_]], %bb.2 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI2]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI2]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 @@ -76,15 +147,48 @@ body: | ... --- -# CFG-LABEL: Wave CFG for two_backedges_uniform_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.1(*) -# CFG-NEXT: bb.2 (#3) -> bb.1(*) bb.3(*) [LatestPostDom: bb.3] [divergent] -# CFG-NEXT: bb.3 (#5) [secondary] name: two_backedges_uniform_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: two_backedges_uniform_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $sgpr0, $vgpr1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, [[PHI]], %bb.1, %14, %bb.2 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.1, %4, %bb.2 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI1]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_NE_U32_e64_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_XOR_B32_]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $sgpr0, $vgpr1 @@ -115,16 +219,63 @@ body: | ... --- -# CFG-LABEL: Wave CFG for two_backedges_divergent_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.1) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.3) -# CFG-NEXT: (#4) -> bb.1(*) bb.3(*) [LatestPostDom: bb.3] [divergent] [secondary] -# CFG-NEXT: bb.3 (#5) [secondary] name: two_backedges_divergent_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: two_backedges_divergent_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0, $sgpr1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %23, %bb.4 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.4 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI1]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_XOR_B32_1]] + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.4(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[COPY3]], %bb.1, [[S_OR_B32_]], %bb.2 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI2]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI2]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0, $sgpr1 @@ -167,16 +318,49 @@ body: | # 4 ---^ # -# CFG-LABEL: Wave CFG for nested_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.4(*) bb.2(*) -# CFG-NEXT: bb.2 (#4) -> bb.3(*) bb.4(*) -# CFG-NEXT: bb.3 (#5) -> bb.2(*) bb.1(*) -# CFG-NEXT: bb.4 (#9) name: nested_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: nested_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %5, %bb.3 + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[PHI]], [[COPY]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY2]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_2]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $sgpr0, $sgpr1, $sgpr2 @@ -225,17 +409,87 @@ body: | # 4 ---^ # -# CFG-LABEL: Wave CFG for nested_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.4(*) [LatestPostDom: bb.4] [divergent] -# CFG-NEXT: bb.2 (#4) -> bb.3(*) (bb.4) [divergent] -# CFG-NEXT: bb.3 (#5) -> bb.2(*) (bb.1) [LatestPostDom: ] [divergent] -# CFG-NEXT: (#7) -> bb.1(*) bb.4(*) [LatestPostDom: bb.4] [divergent] [secondary] -# CFG-NEXT: bb.4 (#9) [secondary] name: nested_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: nested_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; POSTWT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, %37, %bb.5 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, [[S_MOV_B32_1]], %bb.5 + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.0, %5, %bb.5 + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[PHI2]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_XOR_B32_]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.1, %28, %bb.3 + ; POSTWT-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[PHI1]], %bb.1, [[S_MOV_B32_1]], %bb.3 + ; POSTWT-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.1, %13, %bb.3 + ; POSTWT-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI2]], 1, 0, implicit $exec + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_ADD_U32_e64_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI4]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_OR_B32_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_1]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_OR_B32_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], implicit $exec + ; POSTWT-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI5]], [[V_CMP_NE_U32_e64_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_2]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_2]], [[S_XOR_B32_3]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_2]], %bb.2, [[S_OR_B32_4]], %bb.3 + ; POSTWT-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[PHI5]], %bb.2, [[S_OR_B32_3]], %bb.3 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI6]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_4:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI7]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_4]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[S_AND_B32_2]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI7]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: [[PHI8:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.1, [[S_OR_B32_5]], %bb.5 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI8]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1, $vgpr2 @@ -284,16 +538,48 @@ body: | # 4 # -# CFG-LABEL: Wave CFG for nested2_uniform_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) -# CFG-NEXT: bb.2 (#4) -> bb.3(*) bb.2(*) -# CFG-NEXT: bb.3 (#6) -> bb.4(*) bb.1(*) -# CFG-NEXT: bb.4 (#8) name: nested2_uniform_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: nested2_uniform_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $sgpr0, $sgpr1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.3 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.1, %7, %bb.2 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI1]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_1]], [[COPY]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $sgpr0, $sgpr1 @@ -342,16 +628,62 @@ body: | # 4 # -# CFG-LABEL: Wave CFG for nested2_divergent_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) -# CFG-NEXT: bb.2 (#4) -> bb.2(*) bb.3(*) [LatestPostDom: bb.3] [divergent] -# CFG-NEXT: bb.3 (#6) -> bb.1(*) bb.4(*) [LatestPostDom: bb.4] [divergent] [secondary] -# CFG-NEXT: bb.4 (#8) [secondary] name: nested2_divergent_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: nested2_divergent_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0, $vgpr1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, %24, %bb.3 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.0, %4, %bb.3 + ; POSTWT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.1, %20, %bb.2 + ; POSTWT-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_1]], %bb.1, %7, %bb.2 + ; POSTWT-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI3]], 1, 0, implicit $exec + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_ADD_U32_e64_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], [[S_XOR_B32_]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI1]], 1, 0, implicit $exec + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_ADD_U32_e64_1]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_NE_U32_e64_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_XOR_B32_1]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_1]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 @@ -400,22 +732,123 @@ body: | # 6 # -# CFG-LABEL: Wave CFG for multi_backedge_exits_divergent_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.3) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.4) [divergent] -# CFG-NEXT: (#4) -> bb.3(*) (bb.4,bb.1) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: bb.3 (#4) -> (bb.1,bb.5) [divergent] -# CFG-NEXT: (#5) -> bb.1(*) (bb.4,bb.5) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: (#6) -> bb.5(*) (bb.4) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: bb.5 (#6) -> (bb.6) -# CFG-NEXT: (#7) -> bb.4(*) bb.6(*) [LatestPostDom: bb.6] [divergent] [secondary] -# CFG-NEXT: bb.4 (#7) -> bb.6(*) -# CFG-NEXT: bb.6 (#8) [secondary] name: multi_backedge_exits_divergent_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: multi_backedge_exits_divergent_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %44, %bb.9 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %32, %bb.9 + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %26, %bb.9 + ; POSTWT-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %5, %bb.9 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI3]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.10(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_1]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI1]], [[S_AND_B32_2]], implicit-def $scc + ; POSTWT-NEXT: S_BRANCH %bb.10 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.10: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.9(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[PHI1]], %bb.1, [[S_OR_B32_]], %bb.2 + ; POSTWT-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.1, [[COPY4]], %bb.2 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.9, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.9(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY2]], implicit $exec + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI5]], [[V_CMP_NE_U32_e64_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_2]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_3]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], [[S_AND_B32_4]], implicit-def $scc + ; POSTWT-NEXT: S_BRANCH %bb.9 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.7: + ; POSTWT-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, %49, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_4:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI4]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_4]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI4]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.6(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.9: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[PHI2]], %bb.10, [[S_OR_B32_2]], %bb.3 + ; POSTWT-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[PHI5]], %bb.10, [[S_OR_B32_1]], %bb.3 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_3]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_5:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI7]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_5]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_AND_B32_6]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI7]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.8: + ; POSTWT-NEXT: successors: %bb.5(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_3]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_6:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI6]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_7:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_6]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI6]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_5]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1, $vgpr2 @@ -476,20 +909,103 @@ body: | # 6 # -# CFG-LABEL: Wave CFG for multi_backedge_exits_uniform_divergent: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.3(*) -# CFG-NEXT: bb.2 (#3) -> bb.1(*) (bb.4) [divergent] -# CFG-NEXT: bb.3 (#4) -> bb.1(*) (bb.5) [LatestPostDom: ] [divergent] -# CFG-NEXT: (#6) -> bb.5(*) (bb.4) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: bb.5 (#6) -> (bb.6) -# CFG-NEXT: (#7) -> bb.4(*) bb.6(*) [LatestPostDom: bb.6] [divergent] [secondary] -# CFG-NEXT: bb.4 (#7) -> bb.6(*) -# CFG-NEXT: bb.6 (#8) [secondary] name: multi_backedge_exits_uniform_divergent tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: multi_backedge_exits_uniform_divergent + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $sgpr0, $vgpr1, $vgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %29, %bb.2, %28, %bb.3 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %23, %bb.2, [[PHI1]], %bb.3 + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, [[PHI2]], %bb.2, %17, %bb.3 + ; POSTWT-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %5, %bb.2, %5, %bb.3 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI3]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI1]], [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY2]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_2]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_1]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], [[S_AND_B32_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_1]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_AND_B32_3]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_2]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.7: + ; POSTWT-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, %36, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, %26, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term %26 + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.6(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.8: + ; POSTWT-NEXT: successors: %bb.5(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_3]], %bb.3 + ; POSTWT-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[PHI1]], %bb.3 + ; POSTWT-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[PHI2]], %bb.2, [[S_OR_B32_2]], %bb.3 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI4]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI6]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_3]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI6]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_4]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $sgpr0, $vgpr1, $vgpr2 @@ -550,22 +1066,123 @@ body: | # 6 # -# CFG-LABEL: Wave CFG for multi_backedge_exits_divergent_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) (bb.3) [LatestPostDom: ] [divergent] -# CFG-NEXT: bb.2 (#3) -> (bb.1,bb.4) -# CFG-NEXT: (#4) -> bb.3(*) (bb.4,bb.1) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: bb.3 (#4) -> (bb.1,bb.5) -# CFG-NEXT: (#5) -> bb.1(*) (bb.4,bb.5) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: (#6) -> bb.5(*) (bb.4) [LatestPostDom: ] [divergent] [secondary] -# CFG-NEXT: bb.5 (#6) -> (bb.6) -# CFG-NEXT: (#7) -> bb.4(*) bb.6(*) [LatestPostDom: bb.6] [divergent] [secondary] -# CFG-NEXT: bb.4 (#7) -> bb.6(*) -# CFG-NEXT: bb.6 (#8) [secondary] name: multi_backedge_exits_divergent_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: multi_backedge_exits_divergent_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $vgpr0, $sgpr1, $sgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; POSTWT-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %44, %bb.9 + ; POSTWT-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %32, %bb.9 + ; POSTWT-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.0, %26, %bb.9 + ; POSTWT-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %5, %bb.9 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI3]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.10(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_1]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI1]], [[S_AND_B32_2]], implicit-def $scc + ; POSTWT-NEXT: S_BRANCH %bb.10 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.10: + ; POSTWT-NEXT: successors: %bb.3(0x40000000), %bb.9(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[PHI1]], %bb.1, [[S_OR_B32_]], %bb.2 + ; POSTWT-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_2]], %bb.1, [[COPY4]], %bb.2 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.9, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.9(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY2]], implicit $exec + ; POSTWT-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI5]], [[V_CMP_NE_U32_e64_2]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_2]], -1, implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_3]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], [[S_AND_B32_4]], implicit-def $scc + ; POSTWT-NEXT: S_BRANCH %bb.9 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.7: + ; POSTWT-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, %49, implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_4:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI4]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_4]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI4]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.6(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.9: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[PHI2]], %bb.10, [[S_OR_B32_2]], %bb.3 + ; POSTWT-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[PHI5]], %bb.10, [[S_OR_B32_1]], %bb.3 + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_3]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_5:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI7]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_5]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI]], [[S_AND_B32_6]], implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI7]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.8: + ; POSTWT-NEXT: successors: %bb.5(0x40000000), %bb.7(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_3]], implicit-def $scc + ; POSTWT-NEXT: [[S_XOR_B32_6:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[PHI6]], implicit-def $scc + ; POSTWT-NEXT: [[S_AND_B32_7:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_6]], $exec_lo, implicit-def $scc + ; POSTWT-NEXT: $exec_lo = S_MOV_B32_term [[PHI6]] + ; POSTWT-NEXT: SI_WAVE_CF_EDGE implicit-def $scc + ; POSTWT-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.7(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.7 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_AND_B32_5]], implicit-def $scc + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $vgpr0, $sgpr1, $sgpr2 @@ -626,18 +1243,59 @@ body: | # 6 # -# CFG-LABEL: Wave CFG for multi_backedge_exits_uniform_uniform: -# CFG-NEXT: bb.0 (#0) -> bb.1(*) -# CFG-NEXT: bb.1 (#2) -> bb.2(*) bb.3(*) -# CFG-NEXT: bb.2 (#3) -> bb.4(*) bb.1(*) -# CFG-NEXT: bb.3 (#4) -> bb.5(*) bb.1(*) -# CFG-NEXT: bb.5 (#6) -> bb.6(*) -# CFG-NEXT: bb.4 (#7) -> bb.6(*) -# CFG-NEXT: bb.6 (#8) name: multi_backedge_exits_uniform_uniform tracksRegLiveness: true body: | + ; POSTWT-LABEL: name: multi_backedge_exits_uniform_uniform + ; POSTWT: bb.0: + ; POSTWT-NEXT: successors: %bb.1(0x80000000) + ; POSTWT-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; POSTWT-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; POSTWT-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; POSTWT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; POSTWT-NEXT: S_BRANCH %bb.1 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.1: + ; POSTWT-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %5, %bb.2, %5, %bb.3 + ; POSTWT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PHI]], 1, implicit-def $scc + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.3 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.2: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY1]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_1]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.4 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.3: + ; POSTWT-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: [[V_CMP_NE_U32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[S_ADD_U32_]], [[COPY2]], implicit $exec + ; POSTWT-NEXT: $vcc_lo = COPY [[V_CMP_NE_U32_e64_2]] + ; POSTWT-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; POSTWT-NEXT: S_BRANCH %bb.5 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.4: + ; POSTWT-NEXT: successors: %bb.6(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.5: + ; POSTWT-NEXT: successors: %bb.6(0x80000000) + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: S_BRANCH %bb.6 + ; POSTWT-NEXT: {{ $}} + ; POSTWT-NEXT: bb.6: + ; POSTWT-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 liveins: $sgpr0, $sgpr1, $sgpr2 @@ -684,3 +1342,4 @@ body: | bb.6: S_ENDPGM 0 ... + From 9055fe7e7a6ff5e6db391d45349b4811061e22b8 Mon Sep 17 00:00:00 2001 From: anikelal Date: Wed, 17 Dec 2025 20:08:16 +0530 Subject: [PATCH 2/2] removed -amdgpu-wave-transform-print-final flag from littests --- llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir | 2 +- .../AMDGPU/WaveTransform/wavetransform-natural-loops.mir | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir index e0395a89133cd..5a5448903981a 100644 --- a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-basic.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # REQUIRES: asserts -# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %s 2>&1 | FileCheck -check-prefixes=POSTWT %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -o - %s 2>&1 | FileCheck -check-prefixes=POSTWT %s --- diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir index 6d6dff27b7017..5844ddd25228a 100644 --- a/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wavetransform-natural-loops.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # REQUIRES: asserts -# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -amdgpu-wave-transform-print-final=1 -o - %s 2>&1 | FileCheck -check-prefixes=POSTWT %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-late-wave-transform=1 -run-pass=amdgpu-wave-transform -o - %s 2>&1 | FileCheck -check-prefixes=POSTWT %s ---