Skip to content

Commit af4098b

Browse files
authored
[RISCV][llvm] Support PSLL codegen for P extension (#170074)
There's no instruciton for vector shift amount, so we have to scalarize it and rebuild the vector.
1 parent 71de783 commit af4098b

File tree

4 files changed

+171
-0
lines changed

4 files changed

+171
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
529529
setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VTs, Legal);
530530
setOperationAction({ISD::ABDS, ISD::ABDU}, VTs, Legal);
531531
setOperationAction(ISD::SPLAT_VECTOR, VTs, Legal);
532+
setOperationAction(ISD::SHL, VTs, Custom);
532533
setOperationAction(ISD::BITCAST, VTs, Custom);
533534
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
534535
}
@@ -8592,6 +8593,18 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
85928593
case ISD::VSELECT:
85938594
return lowerToScalableOp(Op, DAG);
85948595
case ISD::SHL:
8596+
if (Subtarget.enablePExtCodeGen() &&
8597+
Op.getSimpleValueType().isFixedLengthVector()) {
8598+
// We have patterns for scalar/immediate shift amount, so no lowering
8599+
// needed.
8600+
if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR)
8601+
return Op;
8602+
8603+
// There's no vector-vector version of shift instruction in P extension so
8604+
// we need to unroll to scalar computation and pack them back.
8605+
return DAG.UnrollVectorOp(Op.getNode());
8606+
}
8607+
[[fallthrough]];
85958608
case ISD::SRA:
85968609
case ISD::SRL:
85978610
if (Op.getSimpleValueType().isFixedLengthVector())

llvm/lib/Target/RISCV/RISCVInstrInfoP.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,6 +1532,15 @@ let Predicates = [HasStdExtP] in {
15321532
def: Pat<(XLenVecI16VT (sshlsat GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))),
15331533
(PSSLAI_H GPR:$rs1, uimm4:$shamt)>;
15341534

1535+
// 8-bit logical shift left
1536+
def: Pat<(XLenVecI8VT (shl GPR:$rs1,
1537+
(XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
1538+
(PSLL_BS GPR:$rs1, GPR:$rs2)>;
1539+
// 16-bit logical shift left
1540+
def: Pat<(XLenVecI16VT (shl GPR:$rs1,
1541+
(XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
1542+
(PSLL_HS GPR:$rs1, GPR:$rs2)>;
1543+
15351544
// 8-bit PLI SD node pattern
15361545
def: Pat<(XLenVecI8VT (splat_vector simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
15371546
// 16-bit PLI SD node pattern
@@ -1578,6 +1587,10 @@ let Predicates = [HasStdExtP, IsRV64] in {
15781587
def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>;
15791588
def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>;
15801589

1590+
// 32-bit logical shift left
1591+
def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
1592+
(PSLL_WS GPR:$rs1, GPR:$rs2)>;
1593+
15811594
// splat pattern
15821595
def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>;
15831596

llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,3 +637,112 @@ define void @test_psslai_h(ptr %ret_ptr, ptr %a_ptr) {
637637
store <2 x i16> %res, ptr %ret_ptr
638638
ret void
639639
}
640+
641+
; Test logical shift left(scalar shamt)
642+
define void @test_psll_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) {
643+
; CHECK-LABEL: test_psll_hs:
644+
; CHECK: # %bb.0:
645+
; CHECK-NEXT: lw a1, 0(a1)
646+
; CHECK-NEXT: psll.hs a1, a1, a2
647+
; CHECK-NEXT: sw a1, 0(a0)
648+
; CHECK-NEXT: ret
649+
%a = load <2 x i16>, ptr %a_ptr
650+
%insert = insertelement <2 x i16> poison, i16 %shamt, i32 0
651+
%b = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer
652+
%res = shl <2 x i16> %a, %b
653+
store <2 x i16> %res, ptr %ret_ptr
654+
ret void
655+
}
656+
657+
define void @test_psll_bs(ptr %ret_ptr, ptr %a_ptr, i8 %shamt) {
658+
; CHECK-LABEL: test_psll_bs:
659+
; CHECK: # %bb.0:
660+
; CHECK-NEXT: lw a1, 0(a1)
661+
; CHECK-NEXT: psll.bs a1, a1, a2
662+
; CHECK-NEXT: sw a1, 0(a0)
663+
; CHECK-NEXT: ret
664+
%a = load <4 x i8>, ptr %a_ptr
665+
%insert = insertelement <4 x i8> poison, i8 %shamt, i32 0
666+
%b = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer
667+
%res = shl <4 x i8> %a, %b
668+
store <4 x i8> %res, ptr %ret_ptr
669+
ret void
670+
}
671+
672+
; Test logical shift left(vector shamt)
673+
define void @test_psll_hs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) {
674+
; CHECK-RV32-LABEL: test_psll_hs_vec_shamt:
675+
; CHECK-RV32: # %bb.0:
676+
; CHECK-RV32-NEXT: lw a1, 0(a1)
677+
; CHECK-RV32-NEXT: lw a2, 0(a2)
678+
; CHECK-RV32-NEXT: sll a3, a1, a2
679+
; CHECK-RV32-NEXT: srli a2, a2, 16
680+
; CHECK-RV32-NEXT: srli a1, a1, 16
681+
; CHECK-RV32-NEXT: sll a1, a1, a2
682+
; CHECK-RV32-NEXT: pack a1, a3, a1
683+
; CHECK-RV32-NEXT: sw a1, 0(a0)
684+
; CHECK-RV32-NEXT: ret
685+
;
686+
; CHECK-RV64-LABEL: test_psll_hs_vec_shamt:
687+
; CHECK-RV64: # %bb.0:
688+
; CHECK-RV64-NEXT: lw a1, 0(a1)
689+
; CHECK-RV64-NEXT: lw a2, 0(a2)
690+
; CHECK-RV64-NEXT: sll a3, a1, a2
691+
; CHECK-RV64-NEXT: srli a2, a2, 16
692+
; CHECK-RV64-NEXT: srli a1, a1, 16
693+
; CHECK-RV64-NEXT: sll a1, a1, a2
694+
; CHECK-RV64-NEXT: ppack.w a1, a3, a1
695+
; CHECK-RV64-NEXT: sw a1, 0(a0)
696+
; CHECK-RV64-NEXT: ret
697+
%a = load <2 x i16>, ptr %a_ptr
698+
%b = load <2 x i16>, ptr %shamt_ptr
699+
%res = shl <2 x i16> %a, %b
700+
store <2 x i16> %res, ptr %ret_ptr
701+
ret void
702+
}
703+
704+
define void @test_psll_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) {
705+
; CHECK-RV32-LABEL: test_psll_bs_vec_shamt:
706+
; CHECK-RV32: # %bb.0:
707+
; CHECK-RV32-NEXT: lw a2, 0(a2)
708+
; CHECK-RV32-NEXT: lw a1, 0(a1)
709+
; CHECK-RV32-NEXT: srli a3, a2, 24
710+
; CHECK-RV32-NEXT: srli a4, a1, 24
711+
; CHECK-RV32-NEXT: srli a5, a2, 8
712+
; CHECK-RV32-NEXT: srli a6, a1, 8
713+
; CHECK-RV32-NEXT: sll a7, a4, a3
714+
; CHECK-RV32-NEXT: sll a6, a6, a5
715+
; CHECK-RV32-NEXT: sll a4, a1, a2
716+
; CHECK-RV32-NEXT: srli a2, a2, 16
717+
; CHECK-RV32-NEXT: srli a1, a1, 16
718+
; CHECK-RV32-NEXT: sll a5, a1, a2
719+
; CHECK-RV32-NEXT: ppack.dh a2, a4, a6
720+
; CHECK-RV32-NEXT: pack a1, a2, a3
721+
; CHECK-RV32-NEXT: sw a1, 0(a0)
722+
; CHECK-RV32-NEXT: ret
723+
;
724+
; CHECK-RV64-LABEL: test_psll_bs_vec_shamt:
725+
; CHECK-RV64: # %bb.0:
726+
; CHECK-RV64-NEXT: lw a2, 0(a2)
727+
; CHECK-RV64-NEXT: lw a1, 0(a1)
728+
; CHECK-RV64-NEXT: srli a3, a2, 24
729+
; CHECK-RV64-NEXT: srli a4, a1, 24
730+
; CHECK-RV64-NEXT: srli a5, a2, 16
731+
; CHECK-RV64-NEXT: sll a3, a4, a3
732+
; CHECK-RV64-NEXT: srli a4, a1, 16
733+
; CHECK-RV64-NEXT: sll a4, a4, a5
734+
; CHECK-RV64-NEXT: sll a5, a1, a2
735+
; CHECK-RV64-NEXT: srli a2, a2, 8
736+
; CHECK-RV64-NEXT: srli a1, a1, 8
737+
; CHECK-RV64-NEXT: sll a1, a1, a2
738+
; CHECK-RV64-NEXT: ppack.h a2, a4, a3
739+
; CHECK-RV64-NEXT: ppack.h a1, a5, a1
740+
; CHECK-RV64-NEXT: ppack.w a1, a1, a2
741+
; CHECK-RV64-NEXT: sw a1, 0(a0)
742+
; CHECK-RV64-NEXT: ret
743+
%a = load <4 x i8>, ptr %a_ptr
744+
%b = load <4 x i8>, ptr %shamt_ptr
745+
%res = shl <4 x i8> %a, %b
746+
store <4 x i8> %res, ptr %ret_ptr
747+
ret void
748+
}

llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,3 +805,39 @@ define void @test_psslai_w(ptr %ret_ptr, ptr %a_ptr) {
805805
store <2 x i32> %res, ptr %ret_ptr
806806
ret void
807807
}
808+
809+
; Test logical shift left(scalar shamt)
810+
define void @test_psll_ws(ptr %ret_ptr, ptr %a_ptr, i32 %shamt) {
811+
; CHECK-LABEL: test_psll_ws:
812+
; CHECK: # %bb.0:
813+
; CHECK-NEXT: ld a1, 0(a1)
814+
; CHECK-NEXT: psll.ws a1, a1, a2
815+
; CHECK-NEXT: sd a1, 0(a0)
816+
; CHECK-NEXT: ret
817+
%a = load <2 x i32>, ptr %a_ptr
818+
%insert = insertelement <2 x i32> poison, i32 %shamt, i32 0
819+
%b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
820+
%res = shl <2 x i32> %a, %b
821+
store <2 x i32> %res, ptr %ret_ptr
822+
ret void
823+
}
824+
825+
; Test logical shift left(vector shamt)
826+
define void @test_psll_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) {
827+
; CHECK-LABEL: test_psll_ws_vec_shamt:
828+
; CHECK: # %bb.0:
829+
; CHECK-NEXT: ld a1, 0(a1)
830+
; CHECK-NEXT: ld a2, 0(a2)
831+
; CHECK-NEXT: sllw a3, a1, a2
832+
; CHECK-NEXT: srli a2, a2, 32
833+
; CHECK-NEXT: srli a1, a1, 32
834+
; CHECK-NEXT: sllw a1, a1, a2
835+
; CHECK-NEXT: pack a1, a3, a1
836+
; CHECK-NEXT: sd a1, 0(a0)
837+
; CHECK-NEXT: ret
838+
%a = load <2 x i32>, ptr %a_ptr
839+
%b = load <2 x i32>, ptr %shamt_ptr
840+
%res = shl <2 x i32> %a, %b
841+
store <2 x i32> %res, ptr %ret_ptr
842+
ret void
843+
}

0 commit comments

Comments
 (0)