blob: 22c53d9e26cf3c33f6438a9304ad5f503250fad1 [file] [log] [blame]
//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the Base ARM implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <new>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "arm-instrinfo"
#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
uint16_t MulOpc; // Expanded multiplication opcode
uint16_t AddSubOpc; // Expanded add / sub opcode
bool NegAcc; // True if the acc is negated before the add / sub.
bool HasLane; // True if instruction has an extra "lane" operand.
};
static const ARM_MLxEntry ARM_MLxTable[] = {
// MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
// fp scalar ops
{ ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
{ ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
{ ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
{ ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
{ ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
{ ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
{ ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
{ ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
// fp SIMD ops
{ ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
{ ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
{ ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
{ ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
{ ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
{ ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
{ ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
{ ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
};
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
: ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
Subtarget(STI) {
for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
llvm_unreachable("Duplicated entries?");
MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
}
}
// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
// currently defaults to no prepass hazard recognizer.
ScheduleHazardRecognizer *
ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
if (usePreRAHazardRecognizer()) {
const InstrItineraryData *II =
static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
return nullptr;
MachineFunction &MF = *MI.getParent()->getParent();
uint64_t TSFlags = MI.getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return nullptr;
case ARMII::IndexModePre:
isPre = true;
break;
case ARMII::IndexModePost:
break;
}
// Try splitting an indexed load/store to an un-indexed one plus an add/sub
// operation.
unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
if (MemOpc == 0)
return nullptr;
MachineInstr *UpdateMI = nullptr;
MachineInstr *MemMI = nullptr;
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI.getDesc();
unsigned NumOps = MCID.getNumOperands();
bool isLoad = !MI.mayStore();
const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
const MachineOperand &Base = MI.getOperand(2);
const MachineOperand &Offset = MI.getOperand(NumOps - 3);
unsigned WBReg = WB.getReg();
unsigned BaseReg = Base.getReg();
unsigned OffReg = Offset.getReg();
unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
switch (AddrMode) {
default: llvm_unreachable("Unknown indexed op!");
case ARMII::AddrMode2: {
bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
if (OffReg == 0) {
if (ARM_AM::getSOImmVal(Amt) == -1)
// Can't encode it in a so_imm operand. This transformation will
// add more than 1 instruction. Abandon!
return nullptr;
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
.add(predOps(Pred))
.add(condCodeOp());
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
.addReg(0)
.addImm(SOOpc)
.add(predOps(Pred))
.add(condCodeOp());
} else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
.add(predOps(Pred))
.add(condCodeOp());
break;
}
case ARMII::AddrMode3 : {
bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM3Offset(OffImm);
if (OffReg == 0)
// Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
.add(predOps(Pred))
.add(condCodeOp());
else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
.add(predOps(Pred))
.add(condCodeOp());
break;
}
}
std::vector<MachineInstr*> NewMIs;
if (isPre) {
if (isLoad)
MemMI =
BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
.addReg(WBReg)
.addImm(0)
.addImm(Pred);
else
MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
.addReg(MI.getOperand(1).getReg())
.addReg(WBReg)
.addReg(0)
.addImm(0)
.addImm(Pred);
NewMIs.push_back(MemMI);
NewMIs.push_back(UpdateMI);
} else {
if (isLoad)
MemMI =
BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
.addReg(BaseReg)
.addImm(0)
.addImm(Pred);
else
MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
.addReg(MI.getOperand(1).getReg())
.addReg(BaseReg)
.addReg(0)
.addImm(0)
.addImm(Pred);
if (WB.isDead())
UpdateMI->getOperand(0).setIsDead();
NewMIs.push_back(UpdateMI);
NewMIs.push_back(MemMI);
}
// Transfer LiveVariables states, kill / dead info.
if (LV) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
if (MO.isDef()) {
MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
if (MO.isDead())
LV->addVirtualRegisterDead(Reg, *NewMI);
}
if (MO.isUse() && MO.isKill()) {
for (unsigned j = 0; j < 2; ++j) {
// Look at the two new MI's in reverse order.
MachineInstr *NewMI = NewMIs[j];
if (!NewMI->readsRegister(Reg))
continue;
LV->addVirtualRegisterKilled(Reg, *NewMI);
if (VI.removeKill(MI))
VI.Kills.push_back(NewMI);
break;
}
}
}
}
}
MachineBasicBlock::iterator MBBI = MI.getIterator();
MFI->insert(MBBI, NewMIs[1]);
MFI->insert(MBBI, NewMIs[0]);
return NewMIs[0];
}
// Branch analysis.
bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
TBB = nullptr;
FBB = nullptr;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false; // Empty blocks are easy.
--I;
// Walk backwards from the end of the basic block until the branch is
// analyzed or we give up.
while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
// Flag to be raised on unanalyzeable instructions. This is useful in cases
// where we want to clean up on the end of the basic block before we bail
// out.
bool CantAnalyze = false;
// Skip over DEBUG values and predicated nonterminators.
while (I->isDebugInstr() || !I->isTerminator()) {
if (I == MBB.begin())
return false;
--I;
}
if (isIndirectBranchOpcode(I->getOpcode()) ||
isJumpTableBranchOpcode(I->getOpcode())) {
// Indirect branches and jump tables can't be analyzed, but we still want
// to clean up any instructions at the tail of the basic block.
CantAnalyze = true;
} else if (isUncondBranchOpcode(I->getOpcode())) {
TBB = I->getOperand(0).getMBB();
} else if (isCondBranchOpcode(I->getOpcode())) {
// Bail out if we encounter multiple conditional branches.
if (!Cond.empty())
return true;
assert(!FBB && "FBB should have been null.");
FBB = TBB;
TBB = I->getOperand(0).getMBB();
Cond.push_back(I->getOperand(1));
Cond.push_back(I->getOperand(2));
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
CantAnalyze = !isPredicated(*I);
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
}
// Cleanup code - to be run for unpredicated unconditional branches and
// returns.
if (!isPredicated(*I) &&
(isUncondBranchOpcode(I->getOpcode()) ||
isIndirectBranchOpcode(I->getOpcode()) ||
isJumpTableBranchOpcode(I->getOpcode()) ||
I->isReturn())) {
// Forget any previous condition branch information - it no longer applies.
Cond.clear();
FBB = nullptr;
// If we can modify the function, delete everything below this
// unconditional branch.
if (AllowModify) {
MachineBasicBlock::iterator DI = std::next(I);
while (DI != MBB.end()) {
MachineInstr &InstToDelete = *DI;
++DI;
InstToDelete.eraseFromParent();
}
}
}
if (CantAnalyze)
return true;
if (I == MBB.begin())
return false;
--I;
}
// We made it past the terminators without bailing out - we must have
// analyzed this branch successfully.
return false;
}
unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return 0;
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (!isCondBranchOpcode(I->getOpcode()))
return 1;
// Remove the branch.
I->eraseFromParent();
return 2;
}
unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded) const {
assert(!BytesAdded && "code size not handled");
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
int BccOpc = !AFI->isThumbFunction()
? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
"ARM branch conditions have two components!");
// For conditional branches, we use addOperand to preserve CPSR flags.
if (!FBB) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else
BuildMI(&MBB, DL, get(BccOpc))
.addMBB(TBB)
.addImm(Cond[0].getImm())
.add(Cond[1]);
return 1;
}
// Two-way conditional branch.
BuildMI(&MBB, DL, get(BccOpc))
.addMBB(TBB)
.addImm(Cond[0].getImm())
.add(Cond[1]);
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
bool ARMBaseInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
Cond[0].setImm(ARMCC::getOppositeCondition(CC));
return false;
}
bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
if (MI.isBundle()) {
MachineBasicBlock::const_instr_iterator I = MI.getIterator();
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
int PIdx = I->findFirstPredOperandIdx();
if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
return true;
}
return false;
}
int PIdx = MI.findFirstPredOperandIdx();
return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
}
bool ARMBaseInstrInfo::PredicateInstruction(
MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
unsigned Opc = MI.getOpcode();
if (isUncondBranchOpcode(Opc)) {
MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
return true;
}
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1) {
MachineOperand &PMO = MI.getOperand(PIdx);
PMO.setImm(Pred[0].getImm());
MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
return true;
}
return false;
}
bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const {
if (Pred1.size() > 2 || Pred2.size() > 2)
return false;
ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
if (CC1 == CC2)
return true;
switch (CC1) {
default:
return false;
case ARMCC::AL:
return true;
case ARMCC::HS:
return CC2 == ARMCC::HI;
case ARMCC::LS:
return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
case ARMCC::GE:
return CC2 == ARMCC::GT;
case ARMCC::LE:
return CC2 == ARMCC::LT;
}
}
bool ARMBaseInstrInfo::DefinesPredicate(
MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
bool Found = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
(MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
Pred.push_back(MO);
Found = true;
}
}
return Found;
}
bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
for (const auto &MO : MI.operands())
if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Offset = MI.getOperand(Op + 1);
return Offset.getReg() != 0;
}
// Load with negative register offset requires additional 1cyc and +I unit
// for Cortex A57
bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Offset = MI.getOperand(Op + 1);
const MachineOperand &Opc = MI.getOperand(Op + 2);
assert(Opc.isImm());
assert(Offset.isReg());
int64_t OpcImm = Opc.getImm();
bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
return (isSub && Offset.getReg() != 0);
}
bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Opc = MI.getOperand(Op + 2);
unsigned OffImm = Opc.getImm();
return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
}
// Load, scaled register offset, not plus LSL2
bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Opc = MI.getOperand(Op + 2);
unsigned OffImm = Opc.getImm();
bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
return !SimpleScaled;
}
// Minus reg for ldstso addr mode
bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
unsigned Op) const {
unsigned OffImm = MI.getOperand(Op + 2).getImm();
return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
}
// Load, scaled register offset
bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
unsigned Op) const {
unsigned OffImm = MI.getOperand(Op + 2).getImm();
return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
}
static bool isEligibleForITBlock(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return true;
case ARM::tADC: // ADC (register) T1
case ARM::tADDi3: // ADD (immediate) T1
case ARM::tADDi8: // ADD (immediate) T2
case ARM::tADDrr: // ADD (register) T1
case ARM::tAND: // AND (register) T1
case ARM::tASRri: // ASR (immediate) T1
case ARM::tASRrr: // ASR (register) T1
case ARM::tBIC: // BIC (register) T1
case ARM::tEOR: // EOR (register) T1
case ARM::tLSLri: // LSL (immediate) T1
case ARM::tLSLrr: // LSL (register) T1
case ARM::tLSRri: // LSR (immediate) T1
case ARM::tLSRrr: // LSR (register) T1
case ARM::tMUL: // MUL T1
case ARM::tMVN: // MVN (register) T1
case ARM::tORR: // ORR (register) T1
case ARM::tROR: // ROR (register) T1
case ARM::tRSB: // RSB (immediate) T1
case ARM::tSBC: // SBC (register) T1
case ARM::tSUBi3: // SUB (immediate) T1
case ARM::tSUBi8: // SUB (immediate) T2
case ARM::tSUBrr: // SUB (register) T1
return !ARMBaseInstrInfo::isCPSRDefined(*MI);
}
}
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
if (!MI.isPredicable())
return false;
if (MI.isBundle())
return false;
if (!isEligibleForITBlock(&MI))
return false;
const ARMFunctionInfo *AFI =
MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
// Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
// In their ARM encoding, they can't be encoded in a conditional form.
if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
return false;
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(&MI);
}
return true;
}
namespace llvm {
template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
continue;
if (MO.getReg() != ARM::CPSR)
continue;
if (!MO.isDead())
return false;
}
// all definitions of CPSR are dead
return true;
}
} // end namespace llvm
/// GetInstSize - Return the size of the specified MachineInstr.
///
unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
const MCInstrDesc &MCID = MI.getDesc();
if (MCID.getSize())
return MCID.getSize();
switch (MI.getOpcode()) {
default:
// pseudo-instruction sizes are zero.
return 0;
case TargetOpcode::BUNDLE:
return getInstBundleLength(MI);
case ARM::MOVi16_ga_pcrel:
case ARM::MOVTi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel:
case ARM::t2MOVTi16_ga_pcrel:
return 4;
case ARM::MOVi32imm:
case ARM::t2MOVi32imm:
return 8;
case ARM::CONSTPOOL_ENTRY:
case ARM::JUMPTABLE_INSTS:
case ARM::JUMPTABLE_ADDRS:
case ARM::JUMPTABLE_TBB:
case ARM::JUMPTABLE_TBH:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI.getOperand(2).getImm();
case ARM::Int_eh_sjlj_longjmp:
return 16;
case ARM::tInt_eh_sjlj_longjmp:
return 10;
case ARM::tInt_WIN_eh_sjlj_longjmp:
return 12;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
return 20;
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
return 12;
case ARM::SPACE:
return MI.getOperand(1).getImm();
case ARM::INLINEASM:
case ARM::INLINEASM_BR: {
// If this machine instr is an inline asm, measure it.
unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
Size = alignTo(Size, 4);
return Size;
}
}
}
unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
unsigned Size = 0;
MachineBasicBlock::const_instr_iterator I = MI.getIterator();
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
Size += getInstSizeInBytes(*I);
}
return Size;
}
void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, bool KillSrc,
const ARMSubtarget &Subtarget) const {
unsigned Opc = Subtarget.isThumb()
? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
: ARM::MRS;
MachineInstrBuilder MIB =
BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
// There is only 1 A/R class MRS instruction, and it always refers to
// APSR. However, there are lots of other possibilities on M-class cores.
if (Subtarget.isMClass())
MIB.addImm(0x800);
MIB.add(predOps(ARMCC::AL))
.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
}
void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned SrcReg, bool KillSrc,
const ARMSubtarget &Subtarget) const {
unsigned Opc = Subtarget.isThumb()
? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
: ARM::MSR;
MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
if (Subtarget.isMClass())
MIB.addImm(0x800);
else
MIB.addImm(8);
MIB.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL))
.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL))
.add(condCodeOp());
return;
}
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
if (SPRDest && SPRSrc)
Opc = ARM::VMOVS;
else if (GPRDest && SPRSrc)
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::VORRq)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
MIB.add(predOps(ARMCC::AL));
return;
}
// Handle register classes that require multiple instructions.
unsigned BeginIdx = 0;
unsigned SubRegs = 0;
int Spacing = 1;
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VORRq;
BeginIdx = ARM::qsub_0;
SubRegs = 2;
} else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VORRq;
BeginIdx = ARM::qsub_0;
SubRegs = 4;
// Fall back to VMOVD.
} else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 2;
} else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 3;
} else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 4;
} else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
BeginIdx = ARM::gsub_0;
SubRegs = 2;
} else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 2;
Spacing = 2;
} else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 3;
Spacing = 2;
} else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
Opc = ARM::VMOVD;
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
} else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
Opc = ARM::VMOVS;
BeginIdx = ARM::ssub_0;
SubRegs = 2;
} else if (SrcReg == ARM::CPSR) {
copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
return;
} else if (DestReg == ARM::CPSR) {
copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
return;
}
assert(Opc && "Impossible reg-to-reg copy");
const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineInstrBuilder Mov;
// Copy register tuples backward when the first Dest reg overlaps with SrcReg.
if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
Spacing = -Spacing;
}
#ifndef NDEBUG
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
DstRegs.insert(Dst);
#endif
Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
Mov = Mov.add(predOps(ARMCC::AL));
// MOVr can set CC.
if (Opc == ARM::MOVr)
Mov = Mov.add(condCodeOp());
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
if (KillSrc)
Mov->addRegisterKilled(SrcReg, TRI);
}
bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
const MachineOperand *&Src,
const MachineOperand *&Dest) const {
// VMOVRRD is also a copy instruction but it requires
// special way of handling. It is more complex copy version
// and since that we are not considering it. For recognition
// of such instruction isExtractSubregLike MI interface fuction
// could be used.
// VORRq is considered as a move only if two inputs are
// the same register.
if (!MI.isMoveReg() ||
(MI.getOpcode() == ARM::VORRq &&
MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
return false;
Dest = &MI.getOperand(0);
Src = &MI.getOperand(1);
return true;
}
const MachineInstrBuilder &
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
return MIB.addReg(Reg, State, SubIdx);
}
void ARMBaseInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), Align);
switch (TRI->getSpillSize(*RC)) {
case 2:
if (ARM::HPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
} else
llvm_unreachable("Unknown reg class!");
break;
default:
llvm_unreachable("Unknown reg class!");
}
}
unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
default: break;
case ARM::STRrs:
case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
MI.getOperand(3).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
case ARM::STRi12:
case ARM::t2STRi12:
case ARM::tSTRspi:
case ARM::VSTRD:
case ARM::VSTRS:
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
case ARM::VST1q64:
case ARM::VST1d64TPseudo:
case ARM::VST1d64QPseudo:
if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
FrameIndex = MI.getOperand(0).getIndex();
return MI.getOperand(2).getReg();
}
break;
case ARM::VSTMQIA:
if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
SmallVector<const MachineMemOperand *, 1> Accesses;
if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
Accesses.size() == 1) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
return true;
}
return false;
}
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
switch (TRI->getSpillSize(*RC)) {
case 2:
if (ARM::HPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB;
if (Subtarget.hasV5TEOps()) {
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
// Fallback to LDM instruction, which has existed since the dawn of
// time.
MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI)
.addImm(16)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
.addFrameIndex(FI)
.addImm(16)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI)
.addImm(16)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
default:
llvm_unreachable("Unknown regclass!");
}
}
unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
MI.getOperand(3).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
case ARM::LDRi12:
case ARM::t2LDRi12:
case ARM::tLDRspi:
case ARM::VLDRD:
case ARM::VLDRS:
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
case ARM::VLD1q64:
case ARM::VLD1d8TPseudo:
case ARM::VLD1d16TPseudo:
case ARM::VLD1d32TPseudo:
case ARM::VLD1d64TPseudo:
case ARM::VLD1d8QPseudo:
case ARM::VLD1d16QPseudo:
case ARM::VLD1d32QPseudo:
case ARM::VLD1d64QPseudo:
if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
case ARM::VLDMQIA:
if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
SmallVector<const MachineMemOperand *, 1> Accesses;
if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
Accesses.size() == 1) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
return true;
}
return false;
}
/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
/// depending on whether the result is used.
void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
bool isThumb1 = Subtarget.isThumb1Only();
bool isThumb2 = Subtarget.isThumb2();
const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineBasicBlock *BB = MI->getParent();
MachineInstrBuilder LDM, STM;
if (isThumb1 || !MI->getOperand(1).isDead()) {
MachineOperand LDWb(MI->getOperand(1));
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
: isThumb1 ? ARM::tLDMIA_UPD
: ARM::LDMIA_UPD))
.add(LDWb);
} else {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
}
if (isThumb1 || !MI->getOperand(0).isDead()) {
MachineOperand STWb(MI->getOperand(0));
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
: isThumb1 ? ARM::tSTMIA_UPD
: ARM::STMIA_UPD))
.add(STWb);
} else {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
}
MachineOperand LDBase(MI->getOperand(3));
LDM.add(LDBase).add(predOps(ARMCC::AL));
MachineOperand STBase(MI->getOperand(2));
STM.add(STBase).add(predOps(ARMCC::AL));
// Sort the scratch registers into ascending order.
const TargetRegisterInfo &TRI = getRegisterInfo();
SmallVector<unsigned, 6> ScratchRegs;
for(unsigned I = 5; I < MI->getNumOperands(); ++I)
ScratchRegs.push_back(MI->getOperand(I).getReg());
llvm::sort(ScratchRegs,
[&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
return TRI.getEncodingValue(Reg1) <
TRI.getEncodingValue(Reg2);
});
for (const auto &Reg : ScratchRegs) {
LDM.addReg(Reg, RegState::Define);
STM.addReg(Reg, RegState::Kill);
}
BB->erase(MI);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
"LOAD_STACK_GUARD currently supported only for MachO.");
expandLoadStackGuard(MI);
MI.getParent()->erase(MI);
return true;
}
if (MI.getOpcode() == ARM::MEMCPY) {
expandMEMCPY(MI);
return true;
}
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats
// when using NEON v2f32 instructions for f32 arithmetic.
unsigned DstRegS = MI.getOperand(0).getReg();
unsigned SrcRegS = MI.getOperand(1).getReg();
if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
return false;
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
&ARM::DPRRegClass);
unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
&ARM::DPRRegClass);
if (!DstRegD || !SrcRegD)
return false;
// We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
// legal if the COPY already defines the full DstRegD, and it isn't a
// sub-register insertion.
if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
return false;
// A dead copy shouldn't show up here, but reject it just in case.
if (MI.getOperand(0).isDead())
return false;
// All clear, widen the COPY.
LLVM_DEBUG(dbgs() << "widening: " << MI);
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
// Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
// or some other super-register.
int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
if (ImpDefIdx != -1)
MI.RemoveOperand(ImpDefIdx);
// Change the opcode and operands.
MI.setDesc(get(ARM::VMOVD));
MI.getOperand(0).setReg(DstRegD);
MI.getOperand(1).setReg(SrcRegD);
MIB.add(predOps(ARMCC::AL));
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
// are reading an undefined value from SrcRegD, but a proper value from
// SrcRegS.
MI.getOperand(1).setIsUndef();
MIB.addReg(SrcRegS, RegState::Implicit);
// SrcRegD may actually contain an unrelated value in the ssub_1
// sub-register. Don't kill it. Only kill the ssub_0 sub-register.
if (MI.getOperand(1).isKill()) {
MI.getOperand(1).setIsKill(false);
MI.addRegisterKilled(SrcRegS, TRI, true);
}
LLVM_DEBUG(dbgs() << "replaced by: " << MI);
return true;
}
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
MachineConstantPool *MCP = MF.getConstantPool();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
assert(MCPE.isMachineConstantPoolEntry() &&
"Expecting a machine constantpool entry!");
ARMConstantPoolValue *ACPV =
static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
unsigned PCLabelId = AFI->createPICLabelUId();
ARMConstantPoolValue *NewCPV = nullptr;
// FIXME: The below assumes PIC relocation model and that the function
// is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
// zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
NewCPV = ARMConstantPoolConstant::Create(
cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
else if (ACPV->isExtSymbol())
NewCPV = ARMConstantPoolSymbol::
Create(MF.getFunction().getContext(),
cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
else if (ACPV->isBlockAddress())
NewCPV = ARMConstantPoolConstant::
Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
else if (ACPV->isLSDA())
NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
ARMCP::CPLSDA, 4);
else if (ACPV->isMachineBasicBlock())
NewCPV = ARMConstantPoolMBB::
Create(MF.getFunction().getContext(),
cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
return PCLabelId;
}
void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig.getOpcode();
switch (Opcode) {
default: {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
MachineFunction &MF = *MBB.getParent();
unsigned CPI = Orig.getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
.addConstantPoolIndex(CPI)
.addImm(PCLabelId)
.cloneMemRefs(Orig);
break;
}
}
}
MachineInstr &
ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MachineInstr &Orig) const {
MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
MachineBasicBlock::instr_iterator I = Cloned.getIterator();
for (;;) {
switch (I->getOpcode()) {
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
MachineFunction &MF = *MBB.getParent();
unsigned CPI = I->getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
I->getOperand(1).setIndex(CPI);
I->getOperand(2).setImm(PCLabelId);
break;
}
}
if (!I->isBundledWithSucc())
break;
++I;
}
return Cloned;
}
bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
const MachineInstr &MI1,
const MachineRegisterInfo *MRI) const {
unsigned Opcode = MI0.getOpcode();
if (Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
Opcode == ARM::tLDRpci_pic ||
Opcode == ARM::LDRLIT_ga_pcrel ||
Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
Opcode == ARM::tLDRLIT_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel) {
if (MI1.getOpcode() != Opcode)
return false;
if (MI0.getNumOperands() != MI1.getNumOperands())
return false;
const MachineOperand &MO0 = MI0.getOperand(1);
const MachineOperand &MO1 = MI1.getOperand(1);
if (MO0.getOffset() != MO1.getOffset())
return false;
if (Opcode == ARM::LDRLIT_ga_pcrel ||
Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
Opcode == ARM::tLDRLIT_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel)
// Ignore the PC labels.
return MO0.getGlobal() == MO1.getGlobal();
const MachineFunction *MF = MI0.getParent()->getParent();
const MachineConstantPool *MCP = MF->getConstantPool();
int CPI0 = MO0.getIndex();
int CPI1 = MO1.getIndex();
const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
if (isARMCP0 && isARMCP1) {
ARMConstantPoolValue *ACPV0 =
static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
ARMConstantPoolValue *ACPV1 =
static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
return ACPV0->hasSameValue(ACPV1);
} else if (!isARMCP0 && !isARMCP1) {
return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
}
return false;
} else if (Opcode == ARM::PICLDR) {
if (MI1.getOpcode() != Opcode)
return false;
if (MI0.getNumOperands() != MI1.getNumOperands())
return false;
unsigned Addr0 = MI0.getOperand(1).getReg();
unsigned Addr1 = MI1.getOperand(1).getReg();
if (Addr0 != Addr1) {
if (!MRI ||
!TargetRegisterInfo::isVirtualRegister(Addr0) ||
!TargetRegisterInfo::isVirtualRegister(Addr1))
return false;
// This assumes SSA form.
MachineInstr *Def0 = MRI->getVRegDef(Addr0);
MachineInstr *Def1 = MRI->getVRegDef(Addr1);
// Check if the loaded value, e.g. a constantpool of a global address, are
// the same.
if (!produceSameValue(*Def0, *Def1, MRI))
return false;
}
for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
// %12 = PICLDR %11, 0, 14, %noreg
const MachineOperand &MO0 = MI0.getOperand(i);
const MachineOperand &MO1 = MI1.getOperand(i);
if (!MO0.isIdenticalTo(MO1))
return false;
}
return true;
}
return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
/// determine if two loads are loading from the same base address. It should
/// only return true if the base pointers are the same and the only differences
/// between the two addresses is the offset. It also returns the offsets by
/// reference.
///
/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
/// is permanently disabled.
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const {
// Don't worry about Thumb: just ARM and Thumb2.
if (Subtarget.isThumb1Only()) return false;
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
return false;
switch (Load1->getMachineOpcode()) {
default:
return false;
case ARM::LDRi12:
case ARM::LDRBi12:
case ARM::LDRD:
case ARM::LDRH:
case ARM::LDRSB:
case ARM::LDRSH:
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
switch (Load2->getMachineOpcode()) {
default:
return false;
case ARM::LDRi12:
case ARM::LDRBi12:
case ARM::LDRD:
case ARM::LDRH:
case ARM::LDRSB:
case ARM::LDRSH:
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
// Check if base addresses and chain operands match.
if (Load1->getOperand(0) != Load2->getOperand(0) ||
Load1->getOperand(4) != Load2->getOperand(4))
return false;
// Index should be Reg0.
if (Load1->getOperand(3) != Load2->getOperand(3))
return false;
// Determine the offsets.
if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
isa<ConstantSDNode>(Load2->getOperand(1))) {
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
return true;
}
return false;
}
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
/// from the common base address. It returns true if it decides it's desirable
/// to schedule the two loads together. "NumLoads" is the number of loads that
/// have already been scheduled after Load1.
///
/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
/// is permanently disabled.
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
// Don't worry about Thumb: just ARM and Thumb2.
if (Subtarget.isThumb1Only()) return false;
assert(Offset2 > Offset1);
if ((Offset2 - Offset1) / 8 > 64)
return false;
// Check if the machine opcodes are different. If they are different
// then we consider them to not be of the same base address,
// EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
// In this case, they are considered to be the same because they are different
// encoding forms of the same basic instruction.
if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
!((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
(Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.
if (NumLoads >= 3)
return false;
return true;
}
bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
// Debug info is never a scheduling boundary. It's necessary to be explicit
// due to the special treatment of IT instructions below, otherwise a
// dbg_value followed by an IT will result in the IT instruction being
// considered a scheduling hazard, which is wrong. It should be the actual
// instruction preceding the dbg_value instruction(s), just like it is
// when debug info is not present.
if (MI.isDebugInstr())
return false;
// Terminators and labels can't be scheduled around.
if (MI.isTerminator() || MI.isPosition())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
// t2IT along with all instructions following it.
// FIXME: This is a big hammer. But the alternative is to add all potential
// true and anti dependencies to IT block instructions as implicit operands
// to the t2IT instruction. The added compile time and complexity does not
// seem worth it.
MachineBasicBlock::const_iterator I = MI;
// Make sure to skip any debug instructions
while (++I != MBB->end() && I->isDebugInstr())
;
if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
return true;
// Don't attempt to schedule around any instruction that defines
// a stack-oriented pointer, as it's unlikely to be profitable. This
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
// Calls don't actually change the stack pointer, even if they have imp-defs.
// No ARM calling conventions change the stack pointer. (X86 calling
// conventions sometimes do).
if (!MI.isCall() && MI.definesRegister(ARM::SP))
return true;
return false;
}
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
BranchProbability Probability) const {
if (!NumCycles)
return false;
// If we are optimizing for size, see if the branch in the predecessor can be
// lowered to cbn?z by the constant island lowering pass, and return false if
// so. This results in a shorter instruction sequence.
if (MBB.getParent()->getFunction().hasOptSize()) {
MachineBasicBlock *Pred = *MBB.pred_begin();
if (!Pred->empty()) {
MachineInstr *LastMI = &*Pred->rbegin();
if (LastMI->getOpcode() == ARM::t2Bcc) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
if (CmpMI)
return false;
}
}
}
return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
MBB, 0, 0, Probability);
}
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &TBB,
unsigned TCycles, unsigned TExtra,
MachineBasicBlock &FBB,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
if (!TCycles)
return false;
// In thumb code we often end up trading one branch for a IT block, and
// if we are cloning the instruction can increase code size. Prevent
// blocks with multiple predecesors from being ifcvted to prevent this
// cloning.
if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
return false;
}
// Attempt to estimate the relative costs of predication versus branching.
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
const unsigned ScalingUpFactor = 1024;
unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
unsigned UnpredCost;
if (!Subtarget.hasBranchPredictor()) {
// When we don't have a branch predictor it's always cheaper to not take a
// branch than take it, so we have to take that into account.
unsigned NotTakenBranchCost = 1;
unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
unsigned TUnpredCycles, FUnpredCycles;
if (!FCycles) {
// Triangle: TBB is the fallthrough
TUnpredCycles = TCycles + NotTakenBranchCost;
FUnpredCycles = TakenBranchCost;
} else {
// Diamond: TBB is the block that is branched to, FBB is the fallthrough
TUnpredCycles = TCycles + TakenBranchCost;
FUnpredCycles = FCycles + NotTakenBranchCost;
// The branch at the end of FBB will disappear when it's predicated, so
// discount it from PredCost.
PredCost -= 1 * ScalingUpFactor;
}
// The total cost is the cost of each path scaled by their probabilites
unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
UnpredCost = TUnpredCost + FUnpredCost;
// When predicating assume that the first IT can be folded away but later
// ones cost one cycle each
if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
}
} else {
unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
unsigned FUnpredCost =
Probability.getCompl().scale(FCycles * ScalingUpFactor);
UnpredCost = TUnpredCost + FUnpredCost;
UnpredCost += 1 * ScalingUpFactor; // The branch itself
UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
}
return PredCost <= UnpredCost;
}
bool
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
// Reduce false anti-dependencies to let the target's out-of-order execution
// engine do its thing.
return Subtarget.isProfitableToUnpredicate();
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
unsigned &PredReg) {
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx == -1) {
PredReg = 0;
return ARMCC::AL;
}
PredReg = MI.getOperand(PIdx+1).getReg();
return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
}
unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
if (Opc == ARM::tB)
return ARM::tBcc;
if (Opc == ARM::t2B)
return ARM::t2Bcc;
llvm_unreachable("Unknown unconditional branch opcode!");
}
MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
switch (MI.getOpcode()) {
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
// MOVCC can be commuted by inverting the condition.
unsigned PredReg = 0;
ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return nullptr;
MachineInstr *CommutedMI =
TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
if (!CommutedMI)
return nullptr;
// After swapping the MOVCC operands, also invert the condition.
CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
.setImm(ARMCC::getOppositeCondition(CC));
return CommutedMI;
}
}
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
// MI is folded into the MOVCC by predicating it.
if (!MI->isPredicable())
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
if (!MO.isReg())
continue;
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
return nullptr;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
return nullptr;
if (MO.isDef() && !MO.isDead())
return nullptr;
}
bool DontMoveAcrossStores = true;
if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
return nullptr;
return MI;
}
bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
SmallVectorImpl<MachineOperand> &Cond,
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const {
assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
// MOVCC operands:
// 0: Def.
// 1: True use.
// 2: False use.
// 3: Condition code.
// 4: CPSR use.
TrueOp = 1;
FalseOp = 2;
Cond.push_back(MI.getOperand(3));
Cond.push_back(MI.getOperand(4));
// We can always fold a def.
Optimizable = true;
return false;
}
MachineInstr *
ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
bool PreferFalse) const {
assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
if (!DefMI)
return nullptr;
// Find new register class to use.
MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
unsigned DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return nullptr;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
NewMI.add(DefMI->getOperand(i));
unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
else
NewMI.addImm(CondCode);
NewMI.add(MI.getOperand(4));
// DefMI is not the -S version that sets CPSR, so add an optional %noreg.
if (NewMI->hasOptionalDef())
NewMI.add(condCodeOp());
// The output register value when the predicate is false is an implicit
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
FalseReg.setImplicit();
NewMI.add(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// Update SeenMIs set: register newly created MI and erase removed DefMI.
SeenMIs.insert(NewMI);
SeenMIs.erase(DefMI);
// If MI is inside a loop, and DefMI is outside the loop, then kill flags on
// DefMI would be invalid when tranferred inside the loop. Checking for a
// loop is expensive, but at least remove kill flags if they are in different
// BBs.
if (DefMI->getParent() != MI.getParent())
NewMI->clearKillInfo();
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
}
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
/// def operand.
///
/// This will go away once we can teach tblgen how to set the optional CPSR def
/// operand itself.
struct AddSubFlagsOpcodePair {
uint16_t PseudoOpc;
uint16_t MachineOpc;
};
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::ADDSri, ARM::ADDri},
{ARM::ADDSrr, ARM::ADDrr},
{ARM::ADDSrsi, ARM::ADDrsi},
{ARM::ADDSrsr, ARM::ADDrsr},
{ARM::SUBSri, ARM::SUBri},
{ARM::SUBSrr, ARM::SUBrr},
{ARM::SUBSrsi, ARM::SUBrsi},
{ARM::SUBSrsr, ARM::SUBrsr},
{ARM::RSBSri, ARM::RSBri},
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
{ARM::tADDSi3, ARM::tADDi3},
{ARM::tADDSi8, ARM::tADDi8},
{ARM::tADDSrr, ARM::tADDrr},
{ARM::tADCS, ARM::tADC},
{ARM::tSUBSi3, ARM::tSUBi3},
{ARM::tSUBSi8, ARM::tSUBi8},
{ARM::tSUBSrr, ARM::tSUBrr},
{ARM::tSBCS, ARM::tSBC},
{ARM::tRSBS, ARM::tRSB},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
{ARM::t2ADDSrs, ARM::t2ADDrs},
{ARM::t2SUBSri, ARM::t2SUBri},
{ARM::t2SUBSrr, ARM::t2SUBrr},
{ARM::t2SUBSrs, ARM::t2SUBrs},
{ARM::t2RSBSri, ARM::t2RSBri},
{ARM::t2RSBSrs, ARM::t2RSBrs},
};
unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
return AddSubFlagsOpcodeMap[i].MachineOpc;
return 0;
}
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, unsigned DestReg,
unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
.addReg(BaseReg, RegState::Kill)
.add(predOps(Pred, PredReg))
.add(condCodeOp())
.setMIFlags(MIFlags);
return;
}
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
while (NumBytes) {
unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
assert(ThisVal && "Didn't extract field correctly");
// We will handle these bits from offset, clear them.
NumBytes &= ~ThisVal;
assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
// Build the new ADD / SUB.
unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg, RegState::Kill)
.addImm(ThisVal)
.add(predOps(Pred, PredReg))
.add(condCodeOp())
.setMIFlags(MIFlags);
BaseReg = DestReg;
}
}
bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
if (!Subtarget.hasMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
// instead. We can't modify those so make sure we're dealing with an
// instruction we understand.
bool IsPop = isPopOpcode(MI->getOpcode());
bool IsPush = isPushOpcode(MI->getOpcode());
if (!IsPush && !IsPop)
return false;
bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
MI->getOpcode() == ARM::VLDMDIA_UPD;
bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
MI->getOpcode() == ARM::tPOP ||
MI->getOpcode() == ARM::tPOP_RET;
assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
MI->getOperand(1).getReg() == ARM::SP)) &&
"trying to fold sp update into non-sp-updating push/pop");
// The VFP push & pop act on D-registers, so we can only fold an adjustment
// by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
// if this is violated.
if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
return false;
// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
// pred) so the list starts at 4. Thumb1 starts after the predicate.
int RegListIdx = IsT1PushPop ? 2 : 4;
// Calculate the space we'll need in terms of registers.
unsigned RegsNeeded;
const TargetRegisterClass *RegClass;
if (IsVFPPushPop) {
RegsNeeded = NumBytes / 8;
RegClass = &ARM::DPRRegClass;