Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better usage of load/store multiple #4

Open
wants to merge 5 commits into
base: nanomips-llvm16
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/Mips/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ add_llvm_target(MipsCodeGen
MipsTargetTransformInfo.cpp
MicroMipsSizeReduction.cpp
MipsMulMulBugPass.cpp
NanoMipsLoadStoreMultiple.cpp
NanoMipsLoadStoreOptimizer.cpp
NanoMipsMoveOptimizer.cpp
NanoMipsOptimizeJumpTables.cpp
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/Mips/Mips.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ FunctionPass *createNanoMipsMoveOptimizerPass();
FunctionPass *createNanoMipsRegisterReAllocationPass();
FunctionPass *createRedundantCopyEliminationPass();
FunctionPass *createNanoMipsCodeGenPreparePass();
FunctionPass *createNanoMipsLoadStoreMultiplePass();

InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
MipsSubtarget &,
Expand All @@ -58,12 +59,13 @@ void initializeMipsDelaySlotFillerPass(PassRegistry &);
void initializeMipsMulMulBugFixPass(PassRegistry &);
void initializeMipsPostLegalizerCombinerPass(PassRegistry &);
void initializeMipsPreLegalizerCombinerPass(PassRegistry &);
void initializeNMOptimizeJumpTablesPass (PassRegistry&);
void initializeNMOptimizeJumpTablesPass(PassRegistry &);
void initializeNanoMipsRegisterReAllocPass(PassRegistry &);
void initializeRedundantCopyEliminationPass(PassRegistry&);
void initializeRedundantCopyEliminationPass(PassRegistry &);
void initializeNanoMipsCodeGenPreparePass(PassRegistry &);
void initializeNMLoadStoreOptPass(PassRegistry&);
void initializeNMMoveOptPass(PassRegistry&);
void initializeNMLoadStoreOptPass(PassRegistry &);
void initializeNMMoveOptPass(PassRegistry &);
void initializeNMLoadStoreMultipleOptPass(PassRegistry &);
} // namespace llvm

#endif
5 changes: 4 additions & 1 deletion llvm/lib/Target/Mips/MipsTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
initializeMipsPostLegalizerCombinerPass(*PR);
initializeMipsMulMulBugFixPass(*PR);
initializeMipsDAGToDAGISelPass(*PR);
initializeNMLoadStoreMultipleOptPass(*PR);
initializeRedundantCopyEliminationPass(*PR);
initializeNMLoadStoreOptPass(*PR);
initializeNMMoveOptPass(*PR);
Expand Down Expand Up @@ -319,8 +320,10 @@ std::unique_ptr<CSEConfigBase> MipsPassConfig::getCSEConfig() const {
}

void MipsPassConfig::addPreSched2() {
if (getMipsSubtarget().hasNanoMips() && getOptLevel() != CodeGenOpt::None)
if (getMipsSubtarget().hasNanoMips() && getOptLevel() != CodeGenOpt::None) {
addPass(createNanoMipsLoadStoreOptimizerPass());
addPass(createNanoMipsLoadStoreMultiplePass());
}
}

void MipsPassConfig::addIRPasses() {
Expand Down
340 changes: 340 additions & 0 deletions llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
//===- NanoMipsLoadStoreMultiple.cpp - nanoMIPS load / store opt. pass
//--------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This file contains a pass that performs load / store related peephole
/// optimizations. This pass should be run after register allocation.
//
//===----------------------------------------------------------------------===//

#include "Mips.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/InitializePasses.h"

#include <cmath>

using namespace llvm;

#define DEBUG_TYPE "nanomips-lwm-swm"
#define NM_LOAD_STORE_OPT_NAME "nanoMIPS load/store multiple optimization pass"

static cl::opt<bool> DisableNMLoadStoreMultiple(
"disable-nm-lwm-swm", cl::Hidden, cl::init(false),
cl::desc("Disable NanoMips load/store multiple optimizations"));

namespace {
struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
struct LSIns {
unsigned Rt;
unsigned Rs;
int64_t Offset;
MachineBasicBlock *MBB;

LSIns(MachineInstr *MI) {
MBB = MI->getParent();
Rt = MI->getOperand(0).getReg().id();
Rs = MI->getOperand(1).getReg().id();
Offset = MI->getOperand(2).getImm();
}
};
using InstrList = SmallVector<MachineInstr *, 4>;
using MBBIter = MachineBasicBlock::iterator;
static char ID;
const MipsSubtarget *STI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
MCRegisterClass RC = MipsMCRegisterClasses[Mips::GPRNM32RegClassID];
DenseMap<unsigned, unsigned> RegToIndexMap;

NMLoadStoreMultipleOpt() : MachineFunctionPass(ID) {
// Initialize RegToIndexMap.
for (unsigned I = 0; I < RC.getNumRegs(); I++) {
unsigned R = RC.begin()[I];
RegToIndexMap[R] = I;
}
}
StringRef getPassName() const override { return NM_LOAD_STORE_OPT_NAME; }
bool runOnMachineFunction(MachineFunction &Fn) override;
unsigned getRegNo(unsigned Reg);
bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList);
bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize,
size_t &CurrSeqSize);
bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad);
void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad);
};
} // namespace

char NMLoadStoreMultipleOpt::ID = 0;

bool NMLoadStoreMultipleOpt::runOnMachineFunction(MachineFunction &Fn) {
if (DisableNMLoadStoreMultiple)
return false;
STI = &static_cast<const MipsSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
Modified |= generateLoadStoreMultiple(MBB, /*IsLoad=*/false);
Modified |= generateLoadStoreMultiple(MBB, /*IsLoad=*/true);
}

return Modified;
}

unsigned NMLoadStoreMultipleOpt::getRegNo(unsigned Reg) {
milica-lazarevic marked this conversation as resolved.
Show resolved Hide resolved
auto I = RegToIndexMap.find(Reg);

// Invalid register index.
if (I == RegToIndexMap.end())
return RC.getNumRegs();

return I->second;
}

// Here, we're sorting InstrList to be able to easily recognize sequences that
// are not sorted by the reg-offset pair. We're sorting ascending by register
// number. Later we check if the offsets are in the desired order. The
// exceptions are zero register stores. In that case, the sorting is done by the
// offset.
// Currently, the following case is not supported:
// lw a30, 4 (a9)
// lw a31, 8 (a9)
// lw a16, 12(a9)
void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList,
bool IsLoad) {
auto CompareInstructions = [this, IsLoad](MachineInstr *First,
MachineInstr *Second) {
Register FirstReg = First->getOperand(0).getReg();
Register SecondReg = Second->getOperand(0).getReg();
unsigned FirstRegNo = getRegNo(FirstReg);
unsigned SecondRegNo = getRegNo(SecondReg);

// For the zero register stores, sort instructions by the Offset.
if (!IsLoad && FirstRegNo == 0 && SecondRegNo == 0)
return First->getOperand(2).getImm() < Second->getOperand(2).getImm();
return FirstRegNo < SecondRegNo;
};
std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions);
}

// All instruction in the seqence should have the same Rs register, and
// different Rt register.
bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad,
InstrList Sequence) {
unsigned Opcode = MI.getOpcode();
// Make sure the instruction doesn't have any atomic, volatile or
// otherwise strictly ordered accesses.
for (auto &MMO : MI.memoperands())
if (MMO->isAtomic() || !MMO->isUnordered())
return false;

Register Rt, Rs;
if (IsLoad) {
// TODO: Handle unaligned loads and stores.
if (Opcode != Mips::LW_NM && Opcode != Mips::LWs9_NM)
return false;

Rt = MI.getOperand(0).getReg();
Rs = MI.getOperand(1).getReg();

// TODO: Rt and Rs can be equal, but only if that is the last load of
// the sequence.
if (Rt == Rs)
return false;

} else {
if (Opcode != Mips::SW_NM && Opcode != Mips::SWs9_NM)
return false;
Rt = MI.getOperand(0).getReg();
Rs = MI.getOperand(1).getReg();
}

if (Sequence.size() > 0) {
auto SeqRs = Sequence.back()->getOperand(1).getReg();
if (Rs != SeqRs)
return false;
}
auto RtExists = [&Rt](const MachineInstr *I) {
return I->getOperand(0).getReg() == Rt;
};
auto It = std::find_if(Sequence.begin(), Sequence.end(), RtExists);
// Zero register stores are a special case that does not require consequent
// $rt registers, but instead requires all $rt registers to be $zero.
if (It == Sequence.end() || getRegNo(Rt) == 0)
return true;
return false;
}

bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next,
size_t &GapSize,
size_t &CurrSeqSize) {
unsigned PrevRtNo = getRegNo(Prev.Rt);
unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0;
Register DesiredRtReg = RC.getRegister(DesiredRtNo);
if (Next.Offset == Prev.Offset + 4) {
// GAP, but offset ok
milica-lazarevic marked this conversation as resolved.
Show resolved Hide resolved
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 16(a4)
if (Next.Rt != DesiredRtReg) {
// TODO
return false;
} else {
return true;
}
} else {
// "full" GAP
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 20(a4)
bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0;
unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1);
if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) &&
Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) {
LivePhysRegs LiveRegs(*TRI);
computeLiveIns(LiveRegs, *Prev.MBB);
milica-lazarevic marked this conversation as resolved.
Show resolved Hide resolved
for (size_t i = 0; i < Gap; i++) {
assert(Register::isPhysicalRegister(DesiredRtNo + i) &&
"Desired register is not physical!");
if (!LiveRegs.available(*MRI, (DesiredRtReg)))
return false;
DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1);
milica-lazarevic marked this conversation as resolved.
Show resolved Hide resolved
}
GapSize += Gap;
CurrSeqSize += Gap;
return true;
}
}
return false;
}

bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
bool IsLoad) {
bool Modified = false;
struct Candidate {
InstrList Sequence;
size_t GapSize;
};
InstrList SequenceToSort;
SmallVector<InstrList, 3> SequenceList;
for (auto &MI : MBB) {
// CFI and debug instructions don't break the sequence.
if (MI.isCFIInstruction() || MI.isDebugInstr())
continue;
if (isValidLoadStore(MI, IsLoad, SequenceToSort)) {
SequenceToSort.push_back(&MI);
continue;
}
if (SequenceToSort.size() > 1) {
milica-lazarevic marked this conversation as resolved.
Show resolved Hide resolved
SequenceList.push_back(SequenceToSort);
SequenceToSort.clear();
}
}

SmallVector<Candidate, 3> Candidates;
InstrList Sequence;
size_t GapSize = 0;
size_t SeqSize = 0;
for (size_t i = 0; i < SequenceList.size(); i++) {
sortLoadStoreList(SequenceList[i], IsLoad);
for (auto &MI : SequenceList[i]) {
// Sequences cannot be longer than 8 instructions.
if (SeqSize == 8) {
Candidates.push_back({Sequence, GapSize});
milica-lazarevic marked this conversation as resolved.
Show resolved Hide resolved
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}
// When starting a new sequence, there's no need to do any checks.
if (Sequence.empty()) {
Sequence.push_back(MI);
SeqSize = 1;
continue;
}

if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) {
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}

Sequence.push_back(MI);
SeqSize++;
continue;
}

// At least 2 instructions are neccessary for a valid sequence.
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
}

// Sequence has either ended or has never been started.
if (!Sequence.empty()) {
Sequence.clear();
SeqSize = 0;
GapSize = 0;
}
}

// Make sure that the last sequence has been added to the Candidates list.
// TODO: Check if needed.
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
}

for (auto &C : Candidates) {
auto Seq = C.Sequence;
assert(Seq.size() > 1 && Seq.size() < 9);
auto *Base = Seq.front();
int64_t Offset = Base->getOperand(2).getImm();
// Sequence cannot be merged, if the offset is out of range.
if (!isInt<9>(Offset))
continue;

auto InsertBefore = std::next(MBBIter(Base));
unsigned Opcode = IsLoad ? Mips::LWM_NM : Mips::SWM_NM;
auto BMI =
BuildMI(MBB, InsertBefore, Base->getDebugLoc(), TII->get(Opcode))
.addReg(Base->getOperand(0).getReg(), IsLoad ? RegState::Define : 0)
.addReg(Base->getOperand(1).getReg())
.addImm(Offset)
.addImm(Seq.size() + C.GapSize);
BMI.cloneMergedMemRefs(Seq);
for (auto *MI : Seq) {
if (MI != Base)
BMI.addReg(MI->getOperand(0).getReg(),
IsLoad ? RegState::ImplicitDefine : RegState::Implicit);
MBB.erase(MI);
}

Modified = true;
}
return Modified;
}

INITIALIZE_PASS(NMLoadStoreMultipleOpt, DEBUG_TYPE, NM_LOAD_STORE_OPT_NAME,
false, false)

namespace llvm {
FunctionPass *createNanoMipsLoadStoreMultiplePass() {
return new NMLoadStoreMultipleOpt();
}
} // namespace llvm
Loading