diff options
author | Tom Stellard <tstellar@redhat.com> | 2018-07-11 20:59:01 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2018-07-11 20:59:01 +0000 |
commit | 1d6fd076a3d3c3904704ed33f40a880c80dacbeb (patch) | |
tree | 23196abbd7fbd75f8d2d6f01147ce8da8a0a72a1 | |
parent | 3c69e867ac49cb1f56b784a306938def944ce51b (diff) |
AMDGPU: Refactor Subtarget classes
Summary:
This is a follow-up to r335942.
- Merge SISubtarget into AMDGPUSubtarget and rename to GCNSubtarget
- Rename AMDGPUCommonSubtarget to AMDGPUSubtarget
- Merge R600Subtarget::Generation and GCNSubtarget::Generation into
AMDGPUSubtarget::Generation.
Reviewers: arsenm, jvesely
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D49037
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336851 91177308-0d34-0410-b5e6-96231b3b80d8
74 files changed, 340 insertions, 381 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 84ae355beb5..16c2a366db2 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -411,23 +411,23 @@ def FeatureGCN : SubtargetFeature<"gcn", "GCN or newer GPU" >; -class AMDGPUSubtargetFeatureGeneration <string Value, +class GCNSubtargetFeatureGeneration <string Value, list<SubtargetFeature> Implies> : - SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>; + SubtargetFeatureGeneration <Value, "GCNSubtarget", Implies>; -def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", +def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureLDSBankCount32, FeatureMovrel] >; -def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS", +def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel] >; -def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", +def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, @@ -438,7 +438,7 @@ def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS" ] >; -def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9", +def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index ce17202f341..1a70833a447 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -219,7 +219,7 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee, } bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); + const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); bool HasFlat = ST.hasFlatAddressSpace(); bool HasApertureRegs = ST.hasApertureRegs(); SmallPtrSet<const Constant *, 8> ConstantExprVisited; diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index b2bc4021b46..f0e6d1b83f1 100644 --- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -18,7 +18,7 @@ namespace llvm { class Function; class raw_ostream; -class SISubtarget; +class GCNSubtarget; class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index d6a707fde10..5b18aefbd78 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -67,7 +67,7 @@ using namespace llvm::AMDGPU; // instructions to run at the double precision rate for the device so it's // probably best to just report no single precision denormals. static uint32_t getFPMode(const MachineFunction &F) { - const SISubtarget& ST = F.getSubtarget<SISubtarget>(); + const GCNSubtarget& ST = F.getSubtarget<GCNSubtarget>(); // TODO: Is there any real use for the flush in only / flush out only modes? uint32_t FP32Denormals = @@ -197,7 +197,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { TM.getTargetTriple().getOS() == Triple::AMDHSA) return; - const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>(); amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(MF->getFunction())) { getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); @@ -255,14 +255,14 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { } const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>(); if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, &MF->getFunction()), getTargetStreamer()->EmitAMDGPUSymbolType( SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } - const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); if (STI.dumpCode()) { // Disassemble function name label to text. DisasmLines.push_back(MF->getName().str() + ":"); @@ -274,7 +274,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { } void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { - const AMDGPUSubtarget &STI = MBB.getParent()->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>(); if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) { // Write a line for the basic block label if it is not only fallthrough. DisasmLines.push_back( @@ -399,7 +399,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); MCContext &Context = getObjFileLowering().getContext(); // FIXME: This should be an explicit check for Mesa. if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) { @@ -440,7 +440,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, - Info.getTotalNumSGPRs(MF.getSubtarget<SISubtarget>()), + Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()), Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI); return false; @@ -475,7 +475,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); - if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) { + if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) { OutStreamer->emitRawComment( " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); @@ -526,7 +526,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const { - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = STM.getInstrInfo(); uint64_t CodeSize = 0; @@ -558,7 +558,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, } int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( - const SISubtarget &ST) const { + const GCNSubtarget &ST) const { return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), UsesVCC, UsesFlatScratch); } @@ -568,7 +568,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( SIFunctionResourceInfo Info; const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -812,7 +812,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, MF.getFunction().getContext().diagnose(DiagStackSize); } - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); const SIInstrInfo *TII = STM.getInstrInfo(); const SIRegisterInfo *RI = &TII->getRegisterInfo(); @@ -927,7 +927,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.DX10Clamp = STM.enableDX10Clamp(); unsigned LDSAlignShift; - if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) { + if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { // LDS is allocated in 64 dword blocks. LDSAlignShift = 8; } else { @@ -1000,7 +1000,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) { - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv()); @@ -1129,7 +1129,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &CurrentProgramInfo, const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index f8f8b5d39fc..22982d912c7 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -34,7 +34,7 @@ namespace llvm { class AMDGPUMachineFunction; class AMDGPUTargetStreamer; class MCOperand; -class SISubtarget; +class GCNSubtarget; class AMDGPUAsmPrinter final : public AsmPrinter { private: @@ -50,7 +50,7 @@ private: bool HasDynamicallySizedStack = false; bool HasRecursion = false; - int32_t getTotalNumSGPRs(const SISubtarget &ST) const; + int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const; }; SIProgramInfo CurrentProgramInfo; diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 2b14a22e0dc..18c7df0d94f 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -95,10 +95,10 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return false; MachineFunction &MF = MIRBuilder.getMF(); - const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>(); + const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); - const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector<CCValAssign, 16> ArgLocs; diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td index 8a8143a5d78..68bc7fdd996 100644 --- a/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -132,11 +132,11 @@ def RetCC_AMDGPU_Func : CallingConv<[ ]>; def CC_AMDGPU : CallingConv<[ - CCIf<"static_cast<const AMDGPUSubtarget&>" + CCIf<"static_cast<const GCNSubtarget&>" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>, - CCIf<"static_cast<const AMDGPUSubtarget&>" + CCIf<"static_cast<const GCNSubtarget&>" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", CCDelegateTo<CC_AMDGPU_Func>> diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 0260d1bd39f..44be33eca4a 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -57,7 +57,7 @@ static cl::opt<bool> WidenLoads( class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor<AMDGPUCodeGenPrepare, bool> { - const SISubtarget *ST = nullptr; + const GCNSubtarget *ST = nullptr; DivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; bool HasUnsafeFPMath = false; @@ -890,7 +890,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { return false; const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>(); - ST = &TM.getSubtarget<SISubtarget>(F); + ST = &TM.getSubtarget<GCNSubtarget>(F); DA = &getAnalysis<DivergenceAnalysis>(); HasUnsafeFPMath = hasUnsafeFPMath(F); AMDGPUASI = TM.getAMDGPUAS(); diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 7d222055acd..b33079ae4ba 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -203,7 +203,7 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); HSAMD::Kernel::CodeProps::Metadata HSACodeProps; const Function &F = MF.getFunction(); @@ -233,7 +233,7 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); HSAMD::Kernel::DebugProps::Metadata HSADebugProps; if (!STM.debuggerSupported()) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 49aacd414c7..f25f4d4693e 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -71,7 +71,7 @@ namespace { class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. - const AMDGPUSubtarget *Subtarget; + const GCNSubtarget *Subtarget; AMDGPUAS AMDGPUASI; bool EnableLateStructurizeCFG; @@ -274,7 +274,7 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, } bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); + Subtarget = &MF.getSubtarget<GCNSubtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -316,7 +316,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } const SIRegisterInfo *TRI - = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); + = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); return TRI->getPhysRegClass(Reg); } @@ -1397,7 +1397,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, return false; SDLoc SL(ByteOffsetNode); - AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); + GCNSubtarget::Generation Gen = Subtarget->getGeneration(); int64_t ByteOffset = C->getSExtValue(); int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); @@ -1664,7 +1664,7 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { return true; if (VT == MVT::i64) { - auto ST = static_cast<const SISubtarget *>(Subtarget); + auto ST = static_cast<const GCNSubtarget *>(Subtarget); ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 87819f3294b..7db4703ffde 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -155,7 +155,7 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, - const AMDGPUCommonSubtarget &STI) + const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number @@ -3939,8 +3939,8 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG, uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const MachineFunction &MF, const ImplicitParameter Param) const { const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); - const AMDGPUCommonSubtarget &ST = - AMDGPUCommonSubtarget::get(getTargetMachine(), MF.getFunction()); + const AMDGPUSubtarget &ST = + AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); unsigned Alignment = ST.getAlignmentForImplicitArgPtr(); uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + @@ -4242,8 +4242,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( switch (IID) { case Intrinsic::amdgcn_mbcnt_lo: case Intrinsic::amdgcn_mbcnt_hi: { - const SISubtarget &ST = - DAG.getMachineFunction().getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = + DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); // These return at most the wavefront size - 1. unsigned Size = Op.getValueType().getSizeInBits(); Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index c23bf60396c..1e027dd6712 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -23,12 +23,12 @@ namespace llvm { class AMDGPUMachineFunction; -class AMDGPUCommonSubtarget; +class AMDGPUSubtarget; struct ArgDescriptor; class AMDGPUTargetLowering : public TargetLowering { private: - const AMDGPUCommonSubtarget *Subtarget; + const AMDGPUSubtarget *Subtarget; /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been /// legalized from a smaller type VT. Need to match pre-legalized type because @@ -125,7 +125,7 @@ protected: void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl<ISD::InputArg> &Ins) const; public: - AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI); + AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); bool mayIgnoreSignedZero(SDValue Op) const { if (getTargetMachine().Options.NoSignedZerosFPMath) diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 949a3ce6ada..07aa7c2cc8a 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; // Pin the vtable to this file. //void AMDGPUInstrInfo::anchor() {} -AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { } +AMDGPUInstrInfo::AMDGPUInstrInfo(const GCNSubtarget &ST) { } // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index c12124ea07c..2f8166da0d3 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -22,14 +22,14 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class MachineFunction; class MachineInstr; class MachineInstrBuilder; class AMDGPUInstrInfo { public: - explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); + explicit AMDGPUInstrInfo(const GCNSubtarget &st); static bool isUniformMMO(const MachineMemOperand *MMO); }; diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 180101559b2..73b7241e447 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -37,11 +37,13 @@ using namespace llvm; #define GET_GLOBALISEL_IMPL +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL +#undef AMDGPUSubtarget AMDGPUInstructionSelector::AMDGPUInstructionSelector( - const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI, + const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM) : InstructionSelector(), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), @@ -447,7 +449,7 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned DstReg = I.getOperand(0).getReg(); const DebugLoc &DL = I.getDebugLoc(); diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 07117272c33..0ffbc2e4edf 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -22,26 +22,27 @@ namespace { #define GET_GLOBALISEL_PREDICATE_BITSET +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATE_BITSET +#undef AMDGPUSubtarget } namespace llvm { class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; -class AMDGPUSubtarget; +class GCNSubtarget; class MachineInstr; class MachineOperand; class MachineRegisterInfo; class SIInstrInfo; class SIMachineFunctionInfo; class SIRegisterInfo; -class SISubtarget; class AMDGPUInstructionSelector : public InstructionSelector { public: - AMDGPUInstructionSelector(const SISubtarget &STI, + AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM); @@ -91,11 +92,13 @@ private: const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; const AMDGPUTargetMachine &TM; - const SISubtarget &STI; + const GCNSubtarget &STI; bool EnableLateStructurizeCFG; #define GET_GLOBALISEL_PREDICATES_DECL +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_DECL +#undef AMDGPUSubtarget #define GET_GLOBALISEL_TEMPORARIES_DECL #include "AMDGPUGenGlobalISel.inc" diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ff16ceed6ab..2f357af3fc7 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; using namespace LegalizeActions; -AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST, +AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM) { using namespace TargetOpcode; diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index f972c748ffd..1cbd37c42c4 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -21,12 +21,12 @@ namespace llvm { class GCNTargetMachine; class LLVMContext; -class SISubtarget; +class GCNSubtarget; /// This class provides the information for the target register banks. class AMDGPULegalizerInfo : public LegalizerInfo { public: - AMDGPULegalizerInfo(const SISubtarget &ST, + AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM); }; } // End llvm namespace. diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index 3cb3129706f..2cec8fe5328 100644 --- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -124,7 +124,7 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const { if (!CI) continue; - Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI); + Changed |= AMDGPUSubtarget::get(TM, F).makeLIDRangeMetadata(CI); } return Changed; } diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index b06f3014ac6..3c5760804b3 100644 --- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -68,7 +68,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { auto &TPC = getAnalysis<TargetPassConfig>(); const TargetMachine &TM = TPC.getTM<TargetMachine>(); - const SISubtarget &ST = TM.getSubtarget<SISubtarget>(F); + const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); LLVMContext &Ctx = F.getParent()->getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); BasicBlock &EntryBlock = *F.begin(); diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 6aaa46e0603..1876dc3f712 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -204,7 +204,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { - const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); return MCInstLowering.lowerOperand(MO, MCOp); } @@ -243,7 +243,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; - const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); StringRef Err; diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 612777981f9..6f44e2dbb2d 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -2871,7 +2871,7 @@ static void checkRegOnlyPHIInputs(MachineFunction &MF) { } bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); MRI = &(MF.getRegInfo()); diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index d9c0ed91c52..2c4bf328008 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -15,7 +15,7 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class AMDGPUMachineFunction : public MachineFunctionInfo { /// A map to keep track of local memory objects and their offsets within the diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 688e1ba8349..d341fec6296 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -152,7 +152,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { IsAMDGCN = TT.getArch() == Triple::amdgcn; IsAMDHSA = TT.getOS() == Triple::AMDHSA; - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F); if (!ST.isPromoteAllocaEnabled()) return false; @@ -175,7 +175,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { std::pair<Value *, Value *> AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { const Function &F = *Builder.GetInsertBlock()->getParent(); - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F); if (!IsAMDHSA) { Function *LocalSizeYFn @@ -261,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { } Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) { - const AMDGPUCommonSubtarget &ST = - AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent()); + const AMDGPUSubtarget &ST = + AMDGPUSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent()); Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic; switch (N) { @@ -603,7 +603,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes( bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { FunctionType *FTy = F.getFunctionType(); - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so @@ -730,7 +730,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { if (!SufficientLDS) return false; - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h index db2f72f34f1..07de5fc549e 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -21,7 +21,7 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 9ad16e0b8a6..087c9cc9d67 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -34,12 +34,14 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenSubtargetInfo.inc" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR +#undef AMDGPUSubtarget #include "R600GenSubtargetInfo.inc" -AMDGPUSubtarget::~AMDGPUSubtarget() = default; +GCNSubtarget::~GCNSubtarget() = default; R600Subtarget & R600Subtarget::initializeSubtargetDependencies(const Triple &TT, @@ -51,7 +53,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT, // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? - if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) { + if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { FP32Denormals = false; } @@ -61,8 +63,8 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } -AMDGPUSubtarget & -AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, +GCNSubtarget & +GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { // Determine default and user-specified characteristics // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be @@ -122,7 +124,7 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } -AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT, +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits) : TargetTriple(TT), SubtargetFeatureBits(FeatureBits), @@ -140,11 +142,10 @@ AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT, WavefrontSize(0) { } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : +GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), - AMDGPUCommonSubtarget(TT, getFeatureBits()), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + AMDGPUSubtarget(TT, getFeatureBits()), TargetTriple(TT), Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), @@ -206,12 +207,20 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, ScalarizeGlobal(false), - FeatureDisable(false) { + FeatureDisable(false), + TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { AS = AMDGPU::getAMDGPUAS(TT); + CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM)); initializeSubtargetDependencies(TT, GPU, FS); } -unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, +unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, const Function &F) const { if (NWaves == 1) return getLocalMemorySize(); @@ -221,7 +230,7 @@ unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; } -unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, +unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &F) const { unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); @@ -234,13 +243,13 @@ unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, } unsigned -AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { +AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { const auto *MFI = MF.getInfo<SIMachineFunctionInfo>(); return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction()); } std::pair<unsigned, unsigned> -AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { +AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { switch (CC) { case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_KERNEL: @@ -258,7 +267,7 @@ AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { } } -std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes( +std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( const Function &F) const { // FIXME: 1024 if function. // Default minimum/maximum flat work group sizes. @@ -288,7 +297,7 @@ std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes( return Requested; } -std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU( +std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( const Function &F) const { // Default minimum/maximum number of waves per execution unit. std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); @@ -336,7 +345,7 @@ std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU( return Requested; } -bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const { +bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { Function *Kernel = I->getParent()->getParent(); unsigned MinSize = 0; unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; @@ -401,7 +410,7 @@ bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const { R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, FS), - AMDGPUCommonSubtarget(TT, getFeatureBits()), + AMDGPUSubtarget(TT, getFeatureBits()), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), FMA(false), @@ -417,20 +426,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, InstrItins(getInstrItineraryForCPU(GPU)), AS (AMDGPU::getAMDGPUAS(TT)) { } -SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM) - : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) { - CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); - Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); - - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); - InstSelector.reset(new AMDGPUInstructionSelector( - *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM)); -} - -void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, +void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { // Track register pressure so the scheduler can try to decrease // pressure once register usage is above the threshold defined by @@ -447,11 +443,11 @@ void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.ShouldTrackLaneMasks = true; } -bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { +bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -uint64_t SISubtarget::getExplicitKernArgSize(const Function &F) const { +uint64_t GCNSubtarget::getExplicitKernArgSize(const Function &F) const { assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL); const DataLayout &DL = F.getParent()->getDataLayout(); @@ -467,7 +463,7 @@ uint64_t SISubtarget::getExplicitKernArgSize(const Function &F) const { return ExplicitArgBytes; } -unsigned SISubtarget::getKernArgSegmentSize(const Function &F, +unsigned GCNSubtarget::getKernArgSegmentSize(const Function &F, int64_t ExplicitArgBytes) const { if (ExplicitArgBytes == -1) ExplicitArgBytes = getExplicitKernArgSize(F); @@ -485,8 +481,8 @@ unsigned SISubtarget::getKernArgSegmentSize(const Function &F, return alignTo(TotalSize, 4); } -unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { - if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { +unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) return 10; if (SGPRs <= 88) @@ -508,7 +504,7 @@ unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { return 5; } -unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { +unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { if (VGPRs <= 24) return 10; if (VGPRs <= 28) @@ -530,7 +526,7 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { return 1; } -unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); if (MFI.hasFlatScratchInit()) { if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) @@ -544,7 +540,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { return 2; // VCC. } -unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); @@ -594,7 +590,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { MaxAddressableNumSGPRs); } -unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); @@ -675,21 +671,21 @@ struct MemOpClusterMutation : ScheduleDAGMutation { }; } // namespace -void SISubtarget::getPostRAMutations( +void GCNSubtarget::getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); } -const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) { +const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn) - return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>()); + return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>()); else - return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>()); + return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>()); } -const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) { +const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) { if (TM.getTargetTriple().getArch() == Triple::amdgcn) - return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F)); + return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F)); else - return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F)); + return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F)); } diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 251ad8d2fc8..ae41f9e9184 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -46,7 +46,19 @@ namespace llvm { class StringRef; -class AMDGPUCommonSubtarget { +class AMDGPUSubtarget { +public: + enum Generation { + R600 = 0, + R700 = 1, + EVERGREEN = 2, + NORTHERN_ISLANDS = 3, + SOUTHERN_ISLANDS = 4, + SEA_ISLANDS = 5, + VOLCANIC_ISLANDS = 6, + GFX9 = 7 + }; + private: Triple TargetTriple; @@ -66,10 +78,10 @@ protected: unsigned WavefrontSize; public: - AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); - static const AMDGPUCommonSubtarget &get(const MachineFunction &MF); - static const AMDGPUCommonSubtarget &get(const TargetMachine &TM, + static const AMDGPUSubtarget &get(const MachineFunction &MF); + static const AMDGPUSubtarget &get(const TargetMachine &TM, const Function &F); /// \returns Default range flat work group size for a calling convention. @@ -219,21 +231,12 @@ public: /// Creates value range metadata on an workitemid.* inrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; - virtual ~AMDGPUCommonSubtarget() {} + virtual ~AMDGPUSubtarget() {} }; -class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo, - public AMDGPUCommonSubtarget { +class GCNSubtarget : public AMDGPUGenSubtargetInfo, + public AMDGPUSubtarget { public: - enum Generation { - // Gap for R600 generations, so we can do comparisons between - // AMDGPUSubtarget and r600Subtarget. - SOUTHERN_ISLANDS = 4, - SEA_ISLANDS = 5, - VOLCANIC_ISLANDS = 6, - GFX9 = 7, - }; - enum { ISAVersion0_0_0, ISAVersion6_0_0, @@ -274,8 +277,6 @@ public: }; private: - SIFrameLowering FrameLowering; - /// GlobalISel related APIs. std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; std::unique_ptr<InstructionSelector> InstSelector; @@ -360,24 +361,34 @@ protected: SelectionDAGTargetInfo TSInfo; AMDGPUAS AS; +private: + SITargetLowering TLInfo; + SIInstrInfo InstrInfo; + SIFrameLowering FrameLowering; public: - AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM); - ~AMDGPUSubtarget() override; + GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM); + ~GCNSubtarget() override; - AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT, + GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); - virtual const SIInstrInfo *getInstrInfo() const override = 0; + const SIInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } const SIFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const SITargetLowering *getTargetLowering() const override = 0; + const SITargetLowering *getTargetLowering() const override { + return &TLInfo; + } - virtual const SIRegisterInfo *getRegisterInfo() const override = 0; + const SIRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } const CallLowering *getCallLowering() const override { return CallLoweringInfo.get(); @@ -720,55 +731,7 @@ public: return AMDGPU::IsaInfo::getWavesPerWorkGroup( MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); } -}; - -class SISubtarget final : public AMDGPUSubtarget { -private: - SIInstrInfo InstrInfo; - SIFrameLowering FrameLowering; - SITargetLowering TLInfo; - - /// GlobalISel related APIs. - std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - -public: - SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, - const GCNTargetMachine &TM); - - const SIInstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const SIFrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const SITargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } - - const SIRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } // static wrappers static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); @@ -988,12 +951,8 @@ public: const override; }; - class R600Subtarget final : public R600GenSubtargetInfo, - public AMDGPUCommonSubtarget { -public: - enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 }; - + public AMDGPUSubtarget { private: R600InstrInfo InstrInfo; R600FrameLowering FrameLowering; diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b4066102553..7dfe33b52c3 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -466,7 +466,7 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} -const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { +const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { StringRef GPU = getGPUName(F); StringRef FS = getFeatureString(F); @@ -479,7 +479,7 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); + I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); @@ -750,7 +750,7 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( MachineSchedContext *C) const { - const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>(); if (ST.enableSIScheduler()) return createSIMachineScheduler(C); return createGCNMaxOccupancyMachineScheduler(C); diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 50b219d639c..0fe14493fab 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -100,7 +100,7 @@ public: class GCNTargetMachine final : public AMDGPUTargetMachine { private: AMDGPUIntrinsicInfo IntrinsicInfo; - mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap; + mutable StringMap<std::unique_ptr<GCNSubtarget>> SubtargetMap; public: GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -110,7 +110,7 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - const SISubtarget *getSubtargetImpl(const Function &) const override; + const GCNSubtarget *getSubtargetImpl(const Function &) const override; TargetTransformInfo getTargetTransformInfo(const Function &F) override; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index b15348d404a..8e63d789e17 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -62,7 +62,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { friend BaseT; - const AMDGPUSubtarget *ST; + const GCNSubtarget *ST; const AMDGPUTargetLowering *TLI; AMDGPUTTIImpl CommonTTI; bool IsGraphicsShader; @@ -91,7 +91,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { AMDGPU::HalfRate64Ops }; - const AMDGPUSubtarget *getST() const { return ST; } + const GCNSubtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } static inline int getFullRateInstrCost() { @@ -118,7 +118,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { public: explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), - ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))), + ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), CommonTTI(TM, F), IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index b5a657d55a9..944f4ffe598 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// def isEG : Predicate< - "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && " + "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && " "!Subtarget->hasCaymanISA()" >; def isEGorCayman : Predicate< - "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||" - "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS" + "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS" >; class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> { diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 7d642961b34..f236f10ba75 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -40,7 +40,7 @@ using namespace llvm; GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : CurrCycleInstr(nullptr), MF(MF), - ST(MF.getSubtarget<SISubtarget>()), + ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()), TRI(TII.getRegisterInfo()), ClauseUses(TRI.getNumRegUnits()), @@ -356,13 +356,13 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { } int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); int WaitStatesNeeded = 0; WaitStatesNeeded = checkSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. - if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) + if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) return WaitStatesNeeded; // A read of an SGPR by SMRD instruction requires 4 wait states when the @@ -399,7 +399,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) return 0; int WaitStatesNeeded = checkSoftClauseHazards(VMEM); diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h index f9a6e395a45..ca17e7cb601 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -28,7 +28,7 @@ class MachineRegisterInfo; class ScheduleDAG; class SIInstrInfo; class SIRegisterInfo; -class SISubtarget; +class GCNSubtarget; class GCNHazardRecognizer final : public ScheduleHazardRecognizer { // This variable stores the instruction that has been emitted this cycle. It @@ -37,7 +37,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { MachineInstr *CurrCycleInstr; std::list<MachineInstr*> EmittedInstrs; const MachineFunction &MF; - const SISubtarget &ST; + const GCNSubtarget &ST; const SIInstrInfo &TII; const SIRegisterInfo &TRI; diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 960c63ce41d..15366d66bd8 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -108,7 +108,7 @@ static void printLivenessInfo(raw_ostream &OS, LLVM_DUMP_METHOD void GCNIterativeScheduler::printRegions(raw_ostream &OS) const { - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); for (const auto R : Regions) { OS << "Region to schedule "; printRegion(OS, R->Begin, R->End, LIS, 1); @@ -132,7 +132,7 @@ LLVM_DUMP_METHOD void GCNIterativeScheduler::printSchedRP(raw_ostream &OS, const GCNRegPressure &Before, const GCNRegPressure &After) const { - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); OS << "RP before: "; Before.print(OS, &ST); OS << "RP after: "; @@ -316,7 +316,7 @@ void GCNIterativeScheduler::schedule() { // overriden if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { dbgs() << "Max RP: "; Regions.back()->MaxPressure.print( - dbgs(), &MF.getSubtarget<SISubtarget>()); + dbgs(), &MF.getSubtarget<GCNSubtarget>()); } dbgs() << '\n';); } @@ -418,7 +418,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule, #ifndef NDEBUG const auto RegionMaxRP = getRegionPressure(R); - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); #endif assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP)) || (dbgs() << "Max RP mismatch!!!\n" @@ -433,7 +433,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule, // Sort recorded regions by pressure - highest at the front void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); llvm::sort(Regions.begin(), Regions.end(), [&ST, TargetOcc](const Region *R1, const Region *R2) { return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc); @@ -451,7 +451,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { // BestSchedules aren't deleted on fail. unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { // TODO: assert Regions are sorted descending by pressure - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc << ", current = " << Occ << '\n'); @@ -488,7 +488,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( bool TryMaximizeOccupancy) { - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); auto TgtOcc = MFI->getMinAllowedOccupancy(); @@ -542,7 +542,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( // Minimal Register Strategy void GCNIterativeScheduler::scheduleMinReg(bool force) { - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); const auto TgtOcc = MFI->getOccupancy(); sortRegionsByPressure(TgtOcc); @@ -576,7 +576,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) { void GCNIterativeScheduler::scheduleILP( bool TryMaximizeOccupancy) { - const auto &ST = MF.getSubtarget<SISubtarget>(); + const auto &ST = MF.getSubtarget<GCNSubtarget>(); SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); auto TgtOcc = MFI->getMinAllowedOccupancy(); diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 53b552adb2d..3d8cacc4f02 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -132,7 +132,7 @@ void GCNRegPressure::inc(unsigned Reg, } } -bool GCNRegPressure::less(const SISubtarget &ST, +bool GCNRegPressure::less(const GCNSubtarget &ST, const GCNRegPressure& O, unsigned MaxOccupancy) const { const auto SGPROcc = std::min(MaxOccupancy, @@ -178,7 +178,7 @@ bool GCNRegPressure::less(const SISubtarget &ST, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const { +void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const { OS << "VGPRs: " << getVGPRNum(); if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')'; OS << ", SGPRs: " << getSGPRNum(); diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h index b15384eb156..357d3b7b233 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.h +++ b/lib/Target/AMDGPU/GCNRegPressure.h @@ -49,7 +49,7 @@ struct GCNRegPressure { unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; } unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; } - unsigned getOccupancy(const SISubtarget &ST) const { + unsigned getOccupancy(const GCNSubtarget &ST) const { return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()), ST.getOccupancyWithNumVGPRs(getVGPRNum())); } @@ -59,11 +59,11 @@ struct GCNRegPressure { LaneBitmask NewMask, const MachineRegisterInfo &MRI); - bool higherOccupancy(const SISubtarget &ST, const GCNRegPressure& O) const { + bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O) const { return getOccupancy(ST) > O.getOccupancy(ST); } - bool less(const SISubtarget &ST, const GCNRegPressure& O, + bool less(const GCNSubtarget &ST, const GCNRegPressure& O, unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const; bool operator==(const GCNRegPressure &O) const { @@ -74,7 +74,7 @@ struct GCNRegPressure { return !(*this == O); } - void print(raw_ostream &OS, const SISubtarget *ST = nullptr) const; + void print(raw_ostream &OS, const GCNSubtarget *ST = nullptr) const; void dump() const { print(dbgs()); } private: diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 1b054b92b70..f09b7f6cff2 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -35,7 +35,7 @@ void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { MF = &DAG->MF; - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); // FIXME: This is also necessary, because some passes that run after // scheduling and before regalloc increase register pressure. @@ -294,7 +294,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) : ScheduleDAGMILive(C, std::move(S)), - ST(MF.getSubtarget<SISubtarget>()), + ST(MF.getSubtarget<GCNSubtarget>()), MFI(*MF.getInfo<SIMachineFunctionInfo>()), StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h index a772a53fecb..3ac6af89cb9 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -21,7 +21,7 @@ namespace llvm { class SIMachineFunctionInfo; class SIRegisterInfo; -class SISubtarget; +class GCNSubtarget; /// This is a minimal scheduler strategy. The main difference between this /// and the GenericScheduler is that GCNSchedStrategy uses different @@ -62,7 +62,7 @@ public: class GCNScheduleDAGMILive : public ScheduleDAGMILive { - const SISubtarget &ST; + const GCNSubtarget &ST; SIMachineFunctionInfo &MFI; diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp index 25fdc430025..68f8c30775b 100644 --- a/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -69,7 +69,7 @@ void R600AsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { } unsigned RsrcReg; - if (STM.getGeneration() >= R600Subtarget::EVERGREEN) { + if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { // Evergreen / Northern Islands switch (MF.getFunction().getCallingConv()) { default: LLVM_FALLTHROUGH; diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index f447fc6576c..a19020276f3 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -137,7 +137,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { return 0; case CFStack::FIRST_NON_WQM_PUSH: assert(!ST->hasCaymanISA()); - if (ST->getGeneration() <= R600Subtarget::R700) { + if (ST->getGeneration() <= AMDGPUSubtarget::R700) { // +1 For the push operation. // +2 Extra space required. return 3; @@ -150,7 +150,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { return 2; } case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: - assert(ST->getGeneration() >= R600Subtarget::EVERGREEN); + assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); // +1 For the push operation. // +1 Extra space required. return 2; @@ -177,7 +177,7 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) { // See comment in // CFStack::getSubEntrySize() else if (CurrentEntries > 0 && - ST->getGeneration() > R600Subtarget::EVERGREEN && + ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && !ST->hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; @@ -250,7 +250,7 @@ private: const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { unsigned Opcode = 0; - bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN); + bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); switch (CFI) { case CF_TC: Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index bddab439f3b..4110e6a28d6 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -791,7 +791,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, DAG.getNode(ISD::FADD, DL, VT, FractPart, DAG.getConstantFP(-0.5, DL, MVT::f32))); - if (Gen >= R600Subtarget::R700) + if (Gen >= AMDGPUSubtarget::R700) return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, diff --git a/lib/Target/AMDGPU/R600InstrFormats.td b/lib/Target/AMDGPU/R600InstrFormats.td index 85d0b2d535b..687a9affa13 100644 --- a/lib/Target/AMDGPU/R600InstrFormats.td +++ b/lib/Target/AMDGPU/R600InstrFormats.td @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; +def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">; def isR600toCayman : Predicate< - "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; + "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; class R600Pat<dag pattern, dag result> : AMDGPUPat<pattern, result> { let SubtargetPredicate = isR600toCayman; diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 0afea658fa2..5397e779474 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1320,7 +1320,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( const { assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented"); unsigned Opcode; - if (ST.getGeneration() <= R600Subtarget::R700) + if (ST.getGeneration() <= AMDGPUSubtarget::R700) Opcode = R600::DOT4_r600; else Opcode = R600::DOT4_eg; diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index afded915982..a1429a2ac50 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -346,7 +346,7 @@ void R600SchedStrategy::PrepareNextSlot() { LLVM_DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; -// if (HwGen == R600Subtarget::NORTHERN_ISLANDS) +// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) // OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h index 5bc4800bafd..c4c77172b29 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.h +++ b/lib/Target/AMDGPU/R600RegisterInfo.h @@ -20,8 +20,6 @@ namespace llvm { -class AMDGPUSubtarget; - struct R600RegisterInfo final : public R600GenRegisterInfo { RegClassWeight RCW; diff --git a/lib/Target/AMDGPU/R700Instructions.td b/lib/Target/AMDGPU/R700Instructions.td index 988b70d16cd..613a0d729bb 100644 --- a/lib/Target/AMDGPU/R700Instructions.td +++ b/lib/Target/AMDGPU/R700Instructions.td @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">; +def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">; let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 770a1d346f4..7e884ad93a2 100644 --- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -63,7 +63,7 @@ FunctionPass *llvm::createSIDebuggerInsertNopsPass() { bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not // specified. - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (!ST.debuggerInsertNops()) return false; diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 2e649b9f724..566e0d3febc 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -568,7 +568,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp index 81b35d1dceb..15ba78edf91 100644 --- a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -47,7 +47,7 @@ char SIFixVGPRCopies::ID = 0; char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID; bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); bool Changed = false; diff --git a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/lib/Target/AMDGPU/SIFixWWMLiveness.cpp index 6b1e5d9b5f6..5d613d8874f 100644 --- a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp +++ b/lib/Target/AMDGPU/SIFixWWMLiveness.cpp @@ -185,7 +185,7 @@ bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable<LiveIntervals>(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index c9bbf5ca7c0..338cabcb906 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -76,7 +76,7 @@ public: MachineRegisterInfo *MRI; const SIInstrInfo *TII; const SIRegisterInfo *TRI; - const AMDGPUSubtarget *ST; + const GCNSubtarget *ST; void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, @@ -972,7 +972,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { return false; MRI = &MF.getRegInfo(); - ST = &MF.getSubtarget<AMDGPUSubtarget>(); + ST = &MF.getSubtarget<GCNSubtarget>(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index 22bf4ae8a15..cd14239de82 100644 --- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -70,7 +70,7 @@ private: bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, GCNDownwardRPTracker &RPT); - const SISubtarget *ST; + const GCNSubtarget *ST; const SIRegisterInfo *TRI; const MachineRegisterInfo *MRI; SIMachineFunctionInfo *MFI; @@ -296,7 +296,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - ST = &MF.getSubtarget<SISubtarget>(); + ST = &MF.getSubtarget<GCNSubtarget>(); if (!ST->isXNACKEnabled()) return false; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index d06730f0012..ac0ef90f25a 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -23,19 +23,19 @@ using namespace llvm; -static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST, +static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST, const MachineFunction &MF) { return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4); } -static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST, +static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST, const MachineFunction &MF) { return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF)); } -void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST, +void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB) const { const SIInstrInfo *TII = ST.getInstrInfo(); @@ -98,7 +98,7 @@ void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST, } unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, @@ -149,7 +149,7 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( // SGPRs. std::pair<unsigned, unsigned> SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, @@ -220,7 +220,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -364,7 +364,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, } // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. -void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, +void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, unsigned ScratchRsrcReg) const { @@ -508,7 +508,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) { MachineFunction *MF = MBB.getParent(); - const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(MBB); @@ -537,7 +537,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } const MachineFrameInfo &MFI = MF.getFrameInfo(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); @@ -607,7 +607,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, if (FuncInfo->isEntryFunction()) return; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -654,7 +654,7 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); + const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); FrameReg = RI->getFrameRegister(MF); return MF.getFrameInfo().getObjectOffset(FI); @@ -668,7 +668,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( if (!MFI.hasStackObjects()) return; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); @@ -750,7 +750,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( if (Amount == 0) return MBB.erase(I); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = I->getDebugLoc(); unsigned Opc = I->getOpcode(); @@ -779,7 +779,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); @@ -829,7 +829,7 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const { } bool SIFrameLowering::hasSP(const MachineFunction &MF) const { - const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); // All stack operations are relative to the frame offset SGPR. const MachineFrameInfo &MFI = MF.getFrameInfo(); return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF); diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index 6be7f262208..2f35b3631cd 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -17,7 +17,7 @@ namespace llvm { class SIInstrInfo; class SIMachineFunctionInfo; class SIRegisterInfo; -class SISubtarget; +class GCNSubtarget; class SIFrameLowering final : public AMDGPUFrameLowering { public: @@ -48,19 +48,19 @@ public: MachineBasicBlock::iterator MI) const override; private: - void emitFlatScratchInit(const SISubtarget &ST, + void emitFlatScratchInit(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB) const; unsigned getReservedPrivateSegmentBufferReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const; std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, @@ -70,7 +70,7 @@ private: void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. - void emitEntryFunctionScratchSetup(const SISubtarget &ST, MachineFunction &MF, + void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, unsigned ScratchRsrcReg) const; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 1d4724a61eb..5721669bf7c 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -112,7 +112,7 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) { } SITargetLowering::SITargetLowering(const TargetMachine &TM, - const SISubtarget &STI) + const GCNSubtarget &STI) : AMDGPUTargetLowering(TM, STI), Subtarget(&STI) { addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); @@ -378,7 +378,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - if (Subtarget->getGeneration() >= SISubtarget::SEA_ISLANDS) { + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); @@ -667,7 +667,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setHasFloatingPointExceptions(Subtarget->hasFPExceptions()); } -const SISubtarget *SITargetLowering::getSubtarget() const { +const GCNSubtarget *SITargetLowering::getSubtarget() const { return Subtarget; } @@ -708,12 +708,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, if (RsrcIntr->IsImage) { Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), + *MF.getSubtarget<GCNSubtarget>().getInstrInfo(), CI.getArgOperand(RsrcIntr->RsrcArg)); Info.align = 0; } else { Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), + *MF.getSubtarget<GCNSubtarget>().getInstrInfo(), CI.getArgOperand(RsrcIntr->RsrcArg)); } @@ -877,16 +877,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (Ty->isSized() && DL.getTypeStoreSize(Ty) < 4) return isLegalGlobalAddressingMode(AM); - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { // SMRD instructions have an 8-bit, dword offset on SI. if (!isUInt<8>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() == SISubtarget::SEA_ISLANDS) { + } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) { // On CI+, this can also be a 32-bit literal constant offset. If it fits // in 8-bits, it can use a smaller encoding. if (!isUInt<32>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + } else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { // On VI, these use the SMEM format and the offset is 20-bit in bytes. if (!isUInt<20>(AM.BaseOffs)) return false; @@ -1560,7 +1560,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, // the scratch registers to pass in. bool RequiresStackAccess = HasStackObjects || MFI.hasCalls(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (ST.isAmdCodeObjectV2(MF.getFunction())) { if (RequiresStackAccess) { // If we have stack objects, we unquestionably need the private buffer @@ -1676,7 +1676,7 @@ SDValue SITargetLowering::LowerFormalArguments( const Function &Fn = MF.getFunction(); FunctionType *FType = MF.getFunction().getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { DiagnosticInfoUnsupported NoGraphicsHSA( @@ -1808,7 +1808,7 @@ SDValue SITargetLowering::LowerFormalArguments( auto *ParamTy = dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex())); - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be @@ -2668,7 +2668,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, } - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { report_fatal_error(Twine("invalid register \"" + StringRef(RegName) + "\" for subtarget.")); @@ -2959,7 +2959,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII, // Control flow needs to be inserted if indexing with a VGPR. static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, - const SISubtarget &ST) { + const GCNSubtarget &ST) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); MachineFunction *MF = MBB.getParent(); @@ -3050,7 +3050,7 @@ static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI, static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, MachineBasicBlock &MBB, - const SISubtarget &ST) { + const GCNSubtarget &ST) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); MachineFunction *MF = MBB.getParent(); @@ -3964,7 +3964,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Chain = Op.getOperand(0); - if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || !Subtarget->isTrapHandlerEnabled()) return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); @@ -3979,7 +3979,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { QueuePtr, SDValue()); SDValue Ops[] = { ToReg, - DAG.getTargetConstant(SISubtarget::TrapIDLLVMTrap, SL, MVT::i16), + DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMTrap, SL, MVT::i16), SGPR01, ToReg.getValue(1) }; @@ -3991,7 +3991,7 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || !Subtarget->isTrapHandlerEnabled()) { DiagnosticInfoUnsupported NoTrap(MF.getFunction(), "debugtrap handler not supported", @@ -4004,7 +4004,7 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Chain, - DAG.getTargetConstant(SISubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) + DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) }; return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); } @@ -4513,7 +4513,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, MVT StoreVT = VData.getSimpleValueType(); if (StoreVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS || + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction @@ -4526,7 +4526,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } else { MVT LoadVT = Op.getSimpleValueType(); if (LoadVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS || + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction @@ -4620,7 +4620,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32; int Opcode = -1; - if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8, NumVDataDwords, NumVAddrDwords); if (Opcode == -1) @@ -4699,16 +4699,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_rsq: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq_legacy: - if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rcp_legacy: - if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq_clamp: { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); Type *Type = VT.getTypeForEVT(*DAG.getContext()); @@ -4845,7 +4845,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_log_clamp: { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) return SDValue(); DiagnosticInfoUnsupported BadIntrin( @@ -5278,7 +5278,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second; if (WGSize <= ST.getWavefrontSize()) return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other, @@ -5889,7 +5889,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { SDValue Scale; - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { // Workaround a hardware bug on SI where the condition output from div_scale // is not usable. @@ -6709,7 +6709,7 @@ static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { } static bool isCanonicalized(SelectionDAG &DAG, SDValue Op, - const SISubtarget *ST, unsigned MaxDepth=5) { + const GCNSubtarget *ST, unsigned MaxDepth=5) { // If source is a result of another standard FP operation it is already in // canonical form. @@ -8296,7 +8296,7 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, if (R) { const MachineFunction * MF = FLI->MF; - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); const MachineRegisterInfo &MRI = MF->getRegInfo(); const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); unsigned Reg = R->getReg(); diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index f64694846e4..3e4ff84ab47 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -23,7 +23,7 @@ namespace llvm { class SITargetLowering final : public AMDGPUTargetLowering { private: - const SISubtarget *Subtarget; + const GCNSubtarget *Subtarget; SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; @@ -162,9 +162,9 @@ private: bool shouldEmitPCReloc(const GlobalValue *GV) const; public: - SITargetLowering(const TargetMachine &tm, const SISubtarget &STI); + SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI); - const SISubtarget *getSubtarget() const; + const GCNSubtarget *getSubtarget() const; bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override; diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index 02f790f80b3..61c8f359e16 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -339,7 +339,7 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI, } bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); SkipThreshold = SkipThresholdFlag; diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 913e22ea381..d456e3d9b94 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -136,7 +136,7 @@ enum RegisterMapping { // "s_waitcnt 0" before use. class BlockWaitcntBrackets { public: - BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) { + BlockWaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { memset(VgprScores[T], 0, sizeof(VgprScores[T])); @@ -314,7 +314,7 @@ public: void dump() { print(dbgs()); } private: - const SISubtarget *ST = nullptr; + const GCNSubtarget *ST = nullptr; bool WaitAtBeginning = false; bool RevisitLoop = false; bool MixedExpTypes = false; @@ -364,7 +364,7 @@ private: class SIInsertWaitcnts : public MachineFunctionPass { private: - const SISubtarget *ST = nullptr; + const GCNSubtarget *ST = nullptr; const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; @@ -1837,7 +1837,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, } bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { - ST = &MF.getSubtarget<SISubtarget>(); + ST = &MF.getSubtarget<GCNSubtarget>(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index 9dbf3af9e86..b73d30940fc 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// def isGCN : Predicate<"Subtarget->getGeneration() " - ">= SISubtarget::SOUTHERN_ISLANDS">, + ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " - "== SISubtarget::SOUTHERN_ISLANDS">, + "== AMDGPUSubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureSouthernIslands">; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 997b41e4b1a..41c029c9c78 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -84,7 +84,7 @@ static cl::opt<unsigned> BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)")); -SIInstrInfo::SIInstrInfo(const SISubtarget &ST) +SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) {} @@ -1035,7 +1035,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( unsigned FrameOffset, unsigned Size) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize(); unsigned WavefrontSize = ST.getWavefrontSize(); @@ -2915,7 +2915,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } - if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) { + if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) { const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); if (Offset->getImm() != 0) { ErrInfo = "subtarget does not support offsets in flat instructions"; @@ -3666,8 +3666,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. - assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration() - < SISubtarget::VOLCANIC_ISLANDS && + assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration() + < AMDGPUSubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"); MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); @@ -3803,37 +3803,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { continue; case AMDGPU::S_LSHL_B32: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I32: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B32: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHL_B64: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I64: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B64: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B64; swapOperands(Inst); } @@ -4633,12 +4633,12 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; if (ST.isAmdHsaOS()) { // Set ATC = 1. GFX9 doesn't have this bit. - if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (1ULL << 56); // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this. // BTW, it disables TC L2 and therefore decreases performance. - if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (2ULL << 59); } @@ -4651,7 +4651,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { 0xffffffff; // Size; // GFX9 doesn't have ELEMENT_SIZE. - if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) { uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; } @@ -4661,7 +4661,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. // Clear them unless we want a huge stride. - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; return Rsrc23; @@ -4996,13 +4996,15 @@ enum SIEncodingFamily { GFX9 = 5 }; -static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) { +static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { switch (ST.getGeneration()) { - case SISubtarget::SOUTHERN_ISLANDS: - case SISubtarget::SEA_ISLANDS: + default: + break; + case AMDGPUSubtarget::SOUTHERN_ISLANDS: + case AMDGPUSubtarget::SEA_ISLANDS: return SIEncodingFamily::SI; - case SISubtarget::VOLCANIC_ISLANDS: - case SISubtarget::GFX9: + case AMDGPUSubtarget::VOLCANIC_ISLANDS: + case AMDGPUSubtarget::GFX9: return SIEncodingFamily::VI; } llvm_unreachable("Unknown subtarget generation!"); @@ -5012,11 +5014,11 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { SIEncodingFamily Gen = subtargetEncodingFamily(ST); if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && - ST.getGeneration() >= SISubtarget::GFX9) + ST.getGeneration() >= AMDGPUSubtarget::GFX9) Gen = SIEncodingFamily::GFX9; if (get(Opcode).TSFlags & SIInstrFlags::SDWA) - Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9 + Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 : SIEncodingFamily::SDWA; // Adjust the encoding family to GFX80 for D16 buffer instructions when the // subtarget has UnpackedD16VMem feature. diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index a582f4cb2fc..0a735257d34 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -39,13 +39,13 @@ namespace llvm { class APInt; class MachineRegisterInfo; class RegScavenger; -class SISubtarget; +class GCNSubtarget; class TargetRegisterClass; class SIInstrInfo final : public AMDGPUGenInstrInfo { private: const SIRegisterInfo RI; - const SISubtarget &ST; + const GCNSubtarget &ST; // The inverse predicate should have the negative value. enum BranchPredicate { @@ -147,7 +147,7 @@ public: MO_REL32_HI = 5 }; - explicit SIInstrInfo(const SISubtarget &ST); + explicit SIInstrInfo(const GCNSubtarget &ST); const SIRegisterInfo &getRegisterInfo() const { return RI; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index e8d89aaed4d..8fa37aa83da 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// def isCI : Predicate<"Subtarget->getGeneration() " - ">= SISubtarget::SEA_ISLANDS">; + ">= AMDGPUSubtarget::SEA_ISLANDS">; def isCIOnly : Predicate<"Subtarget->getGeneration() ==" - "SISubtarget::SEA_ISLANDS">, + "AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate <"FeatureSeaIslands">; def isVIOnly : Predicate<"Subtarget->getGeneration() ==" - "SISubtarget::VOLCANIC_ISLANDS">, + "AMDGPUSubtarget::VOLCANIC_ISLANDS">, AssemblerPredicate <"FeatureVolcanicIslands">; def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; @@ -487,7 +487,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{ }]>; class VGPRImm <dag frag> : PatLeaf<frag, [{ - if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { return false; } const SIRegisterInfo *SIRI = diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index b35010c794c..4b537540046 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -103,7 +103,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass { }; private: - const SISubtarget *STM = nullptr; + const GCNSubtarget *STM = nullptr; const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; MachineRegisterInfo *MRI = nullptr; @@ -939,7 +939,7 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - STM = &MF.getSubtarget<SISubtarget>(); + STM = &MF.getSubtarget<GCNSubtarget>(); if (!STM->loadStoreOptEnabled()) return false; diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 3c0c5f93ce9..ad30317c344 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -486,7 +486,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) { } bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 8a9fef41f68..ecc6cff407e 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -66,7 +66,7 @@ FunctionPass *llvm::createSILowerI1CopiesPass() { bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 1b74c140d0a..7c5bc7431e4 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -47,7 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitArgPtr(false), GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const Function &F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); @@ -178,7 +178,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { limitOccupancy(getMaxWavesPerEU()); - const SISubtarget& ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), MF.getFunction())); } @@ -253,7 +253,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, if (!SpillLanes.empty()) return true; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index b3941be3ce7..4e1200ad4f1 100644 --- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -257,12 +257,12 @@ protected: IsaInfo::IsaVersion IV; - SICacheControl(const SISubtarget &ST); + SICacheControl(const GCNSubtarget &ST); public: /// Create a cache control for the subtarget \p ST. - static std::unique_ptr<SICacheControl> create(const SISubtarget &ST); + static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST); /// Update \p MI memory load instruction to bypass any caches up to /// the \p Scope memory scope for address spaces \p @@ -322,7 +322,7 @@ protected: public: - SIGfx6CacheControl(const SISubtarget &ST) : SICacheControl(ST) {}; + SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}; bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -346,7 +346,7 @@ public: class SIGfx7CacheControl : public SIGfx6CacheControl { public: - SIGfx7CacheControl(const SISubtarget &ST) : SIGfx6CacheControl(ST) {}; + SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}; bool insertCacheInvalidate(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -606,14 +606,14 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo( return constructFromMIWithMMO(MI); } -SICacheControl::SICacheControl(const SISubtarget &ST) { +SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); } /* static */ -std::unique_ptr<SICacheControl> SICacheControl::create(const SISubtarget &ST) { - AMDGPUSubtarget::Generation Generation = ST.getGeneration(); +std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) { + GCNSubtarget::Generation Generation = ST.getGeneration(); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) return make_unique<SIGfx6CacheControl>(ST); return make_unique<SIGfx7CacheControl>(ST); @@ -1012,7 +1012,7 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; SIMemOpAccess MOA(MF); - CC = SICacheControl::create(MF.getSubtarget<SISubtarget>()); + CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>()); for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 7070ef80e48..ebcad30a186 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -209,7 +209,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 0971956d306..7b678d12ba8 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -107,7 +107,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 7c8297e4431..0e000b72962 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -90,9 +90,9 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineBasicBlock &MBB); std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI); - bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; + bool isConvertibleToSDWA(const MachineInstr &MI, const GCNSubtarget &ST) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); - void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; + void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const; StringRef getPassName() const override { return "SI Peephole SDWA"; } @@ -855,7 +855,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { } bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, - const SISubtarget &ST) const { + const GCNSubtarget &ST) const { // Check if this is already an SDWA instruction unsigned Opc = MI.getOpcode(); if (TII->isSDWA(Opc)) @@ -1082,7 +1082,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // If an instruction was converted to SDWA it should not have immediates or SGPR // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, - const SISubtarget &ST) const { + const GCNSubtarget &ST) const { const MCInstrDesc &Desc = TII->get(MI.getOpcode()); unsigned ConstantBusCount = 0; for (MachineOperand &Op : MI.explicit_uses()) { @@ -1113,7 +1113,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, } bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (!ST.hasSDWA() || skipFunction(MF.getFunction())) return false; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 5ad3af79b72..5bfe071c00e 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -56,7 +56,7 @@ static cl::opt<bool> EnableSpillSGPRToVGPR( cl::ReallyHidden, cl::init(true)); -SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) : +SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPURegisterInfo(), SGPRPressureSets(getNumRegPressureSets()), VGPRPressureSets(getNumRegPressureSets()), @@ -106,7 +106,7 @@ SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); @@ -131,7 +131,7 @@ static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } @@ -173,7 +173,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); @@ -253,7 +253,7 @@ bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( // create a virtual register for it during frame index elimination, so the // scavenger is directly needed. return MF.getFrameInfo().hasStackObjects() && - MF.getSubtarget<SISubtarget>().hasScalarStores() && + MF.getSubtarget<GCNSubtarget>().hasScalarStores() && MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs(); } @@ -308,7 +308,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = Subtarget.getInstrInfo(); if (Offset == 0) { @@ -337,7 +337,7 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = Subtarget.getInstrInfo(); #ifndef NDEBUG @@ -524,7 +524,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -647,7 +647,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, return false; MachineRegisterInfo &MRI = MF->getRegInfo(); - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); @@ -825,7 +825,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, return false; MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); @@ -985,7 +985,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -1527,7 +1527,7 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index caf0f5d9308..5a51b67ca71 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -21,10 +21,9 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class LiveIntervals; class MachineRegisterInfo; -class SISubtarget; class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPURegisterInfo { @@ -39,7 +38,7 @@ private: void classifyPressureSet(unsigned PSetID, unsigned Reg, BitVector &PressureSets) const; public: - SIRegisterInfo(const SISubtarget &ST); + SIRegisterInfo(const GCNSubtarget &ST); bool spillSGPRToVGPR() const { return SpillSGPRToVGPR; diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index b78bcc05d78..74f239d26e8 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -292,7 +292,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { return false; MachineRegisterInfo &MRI = MF.getRegInfo(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 4935a914a99..879726b1528 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -849,7 +849,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { LowerToCopyInstrs.clear(); CallingConv = MF.getFunction().getCallingConv(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); |