Skip to content

Commit 607a8fc

Browse files
committed
Optimizing AVX2 backend and some re-factoring
1 parent edf51e7 commit 607a8fc

2 files changed

Lines changed: 19 additions & 7 deletions

File tree

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7134,8 +7134,7 @@ void C2_MacroAssembler::vector_slice_32B_op(XMMRegister dst, XMMRegister src1, X
71347134
// Result lanes
71357135
// res[127:0] = {src1[255:128] , src1[127:0]} >> SHIFT
71367136
// res[255:128] = {src2[127:0] , src1[255:128]} >> SHIFT
7137-
vextracti128_high(xtmp, src1);
7138-
vinserti128_high(xtmp, src2);
7137+
vperm2i128(xtmp, src1, src2, 0x21);
71397138
vpalignr(dst, xtmp, src1, origin, Assembler::AVX_256bit);
71407139
} else {
71417140
assert(origin > 16 && origin <= 32, "");
@@ -7149,8 +7148,7 @@ void C2_MacroAssembler::vector_slice_32B_op(XMMRegister dst, XMMRegister src1, X
71497148
// Result lanes
71507149
// res[127:0] = {src2[127:0] , src1[255:127]} >> SHIFT
71517150
// res[255:128] = {src2[255:128] , src2[127:0]} >> SHIFT
7152-
vextracti128_high(xtmp, src1);
7153-
vinserti128_high(xtmp, src2);
7151+
vperm2i128(xtmp, src1, src2, 0x21);
71547152
vpalignr(dst, src2, xtmp, origin, Assembler::AVX_256bit);
71557153
}
71567154
}

src/hotspot/share/opto/callGenerator.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -450,9 +450,17 @@ class LateInlineVectorCallGenerator : public LateInlineCallGenerator {
450450
LateInlineCallGenerator(method, intrinsic_cg) , _inline_cg(inline_cg) {}
451451

452452
CallGenerator* inline_cg2() const { return _inline_cg; }
453-
virtual bool is_vector_late_inline() const { return true; }
453+
bool inline_fallback();
454+
virtual bool is_vector_late_inline() const { return true; }
454455
};
455456

457+
bool LateInlineVectorCallGenerator::inline_fallback() {
458+
switch (method()->intrinsic_id()) {
459+
case vmIntrinsics::_VectorSlice: return true;
460+
default : return false;
461+
}
462+
}
463+
456464
CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* inline_cg) {
457465
return new LateInlineVectorCallGenerator(m, intrinsic_cg, inline_cg);
458466
}
@@ -690,8 +698,14 @@ void CallGenerator::do_late_inline_helper() {
690698

691699
// Now perform the inlining using the synthesized JVMState
692700
JVMState* new_jvms = inline_cg()->generate(jvms);
693-
new_jvms = new_jvms == nullptr && is_vector_late_inline() ?
694-
static_cast<const LateInlineVectorCallGenerator*>(this)->inline_cg2()->generate(jvms) : new_jvms;
701+
// Attempt inlining fallback implementation in case of
702+
// intrinsification failure.
703+
if (new_jvms == nullptr && is_vector_late_inline()) {
704+
LateInlineVectorCallGenerator* late_inline_vec_cg = static_cast<LateInlineVectorCallGenerator*>(this);
705+
if (late_inline_vec_cg->inline_fallback()) {
706+
new_jvms = late_inline_vec_cg->inline_cg2()->generate(jvms);
707+
}
708+
}
695709
if (new_jvms == nullptr) return; // no change
696710
if (C->failing()) return;
697711

0 commit comments

Comments
 (0)