diff options
author | Craig Topper <craig.topper@intel.com> | 2018-03-09 23:36:58 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-03-09 23:36:58 +0000 |
commit | 2fb0fe6cdbf6876df591324dd86d3d0234b796d3 (patch) | |
tree | 782dc1c0100690204c13450dc14560c73a41a3df | |
parent | ac0df5fe842d1bce4c07476ca5955475562affab (diff) |
[TwoAddressInstructionPass] Improve tryInstructionCommute of X86 FMA and vpternlog instructions
These instructions have 3 operands that can be commuted. The first commute we find may not be the best. So we should keep searching if we performed an aggressive commute. There may still be an operand that is killed or a physical register constraint that might be better.
Differential Revision: https://reviews.llvm.org/D44324
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327188 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/TwoAddressInstructionPass.cpp | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-vpternlog-commute.ll | 6 |
2 files changed, 16 insertions, 9 deletions
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 774b76f84b7..539f8486192 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1205,6 +1205,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, if (!MI->isCommutable()) return false; + bool MadeChange = false; unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); @@ -1223,8 +1224,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp // operands. This makes the live ranges of DstOp and OtherOp joinable. - bool DoCommute = - !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + bool DoCommute = !BaseOpKilled && OtherOpKilled; if (!DoCommute && isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { @@ -1235,13 +1236,21 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, // If it's profitable to commute, try to do so. if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx, Dist)) { + MadeChange = true; ++NumCommuted; - if (AggressiveCommute) + if (AggressiveCommute) { ++NumAggrCommuted; - return true; + // There might be more than two commutable operands, update BaseOp and + // continue scanning. + BaseOpReg = OtherOpReg; + BaseOpKilled = OtherOpKilled; + continue; + } + // If this was a commute based on kill, we won't do better continuing. + return MadeChange; } } - return false; + return MadeChange; } /// For the case where an instruction has a single pair of tied register diff --git a/test/CodeGen/X86/avx512-vpternlog-commute.ll b/test/CodeGen/X86/avx512-vpternlog-commute.ll index d812fac60a4..7f3cde0d99a 100644 --- a/test/CodeGen/X86/avx512-vpternlog-commute.ll +++ b/test/CodeGen/X86/avx512-vpternlog-commute.ll @@ -27,8 +27,7 @@ define <16 x i32> @vpternlog_v16i32_102(<16 x i32> %x0, <16 x i32> %x1, <16 x i3 define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: vpternlog_v16i32_210: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1 -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1) ret <16 x i32> %res @@ -434,8 +433,7 @@ define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <1 ; CHECK-LABEL: vpternlog_v16i32_210_maskz: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1 {%k1} {z} -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask) ret <16 x i32> %res |