aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-03-09 23:36:58 +0000
committerCraig Topper <craig.topper@intel.com>2018-03-09 23:36:58 +0000
commit2fb0fe6cdbf6876df591324dd86d3d0234b796d3 (patch)
tree782dc1c0100690204c13450dc14560c73a41a3df
parentac0df5fe842d1bce4c07476ca5955475562affab (diff)
[TwoAddressInstructionPass] Improve tryInstructionCommute of X86 FMA and vpternlog instructions
These instructions have 3 operands that can be commuted. The first commute we find may not be the best. So we should keep searching if we performed an aggressive commute. There may still be an operand that is killed or a physical register constraint that might be better. Differential Revision: https://reviews.llvm.org/D44324 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327188 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp19
-rw-r--r--test/CodeGen/X86/avx512-vpternlog-commute.ll6
2 files changed, 16 insertions, 9 deletions
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 774b76f84b7..539f8486192 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1205,6 +1205,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
if (!MI->isCommutable())
return false;
+ bool MadeChange = false;
unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
unsigned OpsNum = MI->getDesc().getNumOperands();
@@ -1223,8 +1224,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
// operands. This makes the live ranges of DstOp and OtherOp joinable.
- bool DoCommute =
- !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+ bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+ bool DoCommute = !BaseOpKilled && OtherOpKilled;
if (!DoCommute &&
isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
@@ -1235,13 +1236,21 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// If it's profitable to commute, try to do so.
if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,
Dist)) {
+ MadeChange = true;
++NumCommuted;
- if (AggressiveCommute)
+ if (AggressiveCommute) {
++NumAggrCommuted;
- return true;
+ // There might be more than two commutable operands, update BaseOp and
+ // continue scanning.
+ BaseOpReg = OtherOpReg;
+ BaseOpKilled = OtherOpKilled;
+ continue;
+ }
+ // If this was a commute based on kill, we won't do better continuing.
+ return MadeChange;
}
}
- return false;
+ return MadeChange;
}
/// For the case where an instruction has a single pair of tied register
diff --git a/test/CodeGen/X86/avx512-vpternlog-commute.ll b/test/CodeGen/X86/avx512-vpternlog-commute.ll
index d812fac60a4..7f3cde0d99a 100644
--- a/test/CodeGen/X86/avx512-vpternlog-commute.ll
+++ b/test/CodeGen/X86/avx512-vpternlog-commute.ll
@@ -27,8 +27,7 @@ define <16 x i32> @vpternlog_v16i32_102(<16 x i32> %x0, <16 x i32> %x1, <16 x i3
define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: vpternlog_v16i32_210:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
ret <16 x i32> %res
@@ -434,8 +433,7 @@ define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <1
; CHECK-LABEL: vpternlog_v16i32_210_maskz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
ret <16 x i32> %res