Skip to content

Commit

Permalink
NanoMIPS: NMLoadStoreMultiple add reg gap support
Browse files Browse the repository at this point in the history
We're handling the situation where the instruction sequence is regular,
except for one instruction having a "wrong" Rt register number. A
sequence like that is optimizable if the register with the expected
register number is available. In that case, we're emitting one
additional move instruction after lwm/swm.
  • Loading branch information
milica-lazarevic committed Sep 13, 2024
1 parent 71b1bf2 commit 5ce4b7e
Show file tree
Hide file tree
Showing 2 changed files with 272 additions and 78 deletions.
181 changes: 103 additions & 78 deletions llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
unsigned Rs;
int64_t Offset;
MachineBasicBlock *MBB;
MachineInstr *MI;

LSIns(MachineInstr *MI) {
this->MI = MI;
MBB = MI->getParent();
Rt = MI->getOperand(0).getReg().id();
Rs = MI->getOperand(1).getReg().id();
Expand All @@ -45,6 +47,12 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
};
using InstrList = SmallVector<MachineInstr *, 4>;
using MBBIter = MachineBasicBlock::iterator;
struct Candidate {
InstrList Sequence;
size_t GapSize;
bool Move = false;
};
using CandidateList = SmallVector<Candidate, 3>;
static char ID;
const MipsSubtarget *STI;
const TargetInstrInfo *TII;
Expand All @@ -65,9 +73,11 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
unsigned getRegNo(unsigned Reg);
bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList);
bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize,
size_t &CurrSeqSize);
size_t &CurrSeqSize, bool &RegGap);
bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad);
void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad);
void findCandidatesForOptimization(InstrList &LoadStoreList,
CandidateList &Candidates);
};
} // namespace

Expand Down Expand Up @@ -125,6 +135,53 @@ void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList,
std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions);
}

void NMLoadStoreMultipleOpt::findCandidatesForOptimization(
InstrList &LoadStoreList, CandidateList &Candidates) {
InstrList Sequence;
size_t GapSize = 0, SeqSize = 0;
bool RegGap = false;

auto clearSeqence = [&Sequence, &GapSize, &SeqSize, &RegGap]() {
Sequence.clear();
GapSize = 0;
SeqSize = 0;
RegGap = false;
};

for (auto &MI : LoadStoreList) {
// Sequences cannot be longer than 8 instructions.
if (SeqSize == 8) {
Candidates.push_back({Sequence, GapSize});
clearSeqence();
}
// When starting a new sequence, there's no need to do any checks.
if (Sequence.empty()) {
Sequence.push_back(MI);
SeqSize = 1;
continue;
}

if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize, RegGap)) {
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
clearSeqence();
}

Sequence.push_back(MI);
SeqSize++;

if (RegGap) {
Candidates.push_back({Sequence, GapSize, true});
clearSeqence();
}
}

// Save the last valid sequence for this list. At least 2 instructions are
// neccessary for a valid sequence.
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
}

// All instruction in the seqence should have the same Rs register, and
// different Rt register.
bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad,
Expand Down Expand Up @@ -175,54 +232,64 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad,

bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next,
size_t &GapSize,
size_t &CurrSeqSize) {
size_t &CurrSeqSize,
bool &RegGap) {
unsigned PrevRtNo = getRegNo(Prev.Rt);
unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0;
Register DesiredRtReg = RC.getRegister(DesiredRtNo);
if (Next.Offset == Prev.Offset + 4) {
if (Next.Rt == DesiredRtReg)
return true;
// Next.Rt != DesiredRtReg
// GAP, but offset ok
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 16(a4)
if (Next.Rt != DesiredRtReg) {
// TODO
// For now, the instruction like lw a3, 16(a4) insterupts the sequence.
if (CurrSeqSize < 2)
return false;
} else {
return true;
}
} else {

assert(Register::isPhysicalRegister(DesiredRtNo) &&
"Desired register is not physical!");
if (MachineBasicBlock::LQR_Dead !=
Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI))
return false;

RegGap = true;
return true;
}
// Next.Offset != Prev.Offset + 4
bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0;
unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1);
if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) &&
Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) {
// "full" GAP
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 20(a4)
bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0;
unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1);
if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) &&
Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) {
LivePhysRegs LiveRegs(*TRI);
computeLiveIns(LiveRegs, *Prev.MBB);
for (size_t i = 0; i < Gap; i++) {
assert(Register::isPhysicalRegister(DesiredRtNo + i) &&
"Desired register is not physical!");
if (!LiveRegs.available(*MRI, (DesiredRtReg)))
return false;
DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1);
}
GapSize += Gap;
CurrSeqSize += Gap;
return true;
for (size_t i = 0; i < Gap; i++) {
assert(Register::isPhysicalRegister(DesiredRtNo + i) &&
"Desired register is not physical!");
if (MachineBasicBlock::LQR_Dead !=
Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI))
return false;
DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1);
}
GapSize += Gap;
CurrSeqSize += Gap;
return true;
}
return false;
}

bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
bool IsLoad) {
bool Modified = false;
struct Candidate {
InstrList Sequence;
size_t GapSize;
};

// TODO: Consider allowing interspersed arithmetic/logical operations in
// load/store sequences to reduce sensitivity to instruction ordering. Note
// that proper scheduling models will alter instruction order, increasing
// mixed memory and compute operations. Dependency checks will be required.
InstrList SequenceToSort;
SmallVector<InstrList, 3> SequenceList;
for (auto &MI : MBB) {
Expand All @@ -239,59 +306,11 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
}
}

SmallVector<Candidate, 3> Candidates;
CandidateList Candidates;
InstrList Sequence;
size_t GapSize = 0;
size_t SeqSize = 0;
for (size_t i = 0; i < SequenceList.size(); i++) {
sortLoadStoreList(SequenceList[i], IsLoad);
for (auto &MI : SequenceList[i]) {
// Sequences cannot be longer than 8 instructions.
if (SeqSize == 8) {
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}
// When starting a new sequence, there's no need to do any checks.
if (Sequence.empty()) {
Sequence.push_back(MI);
SeqSize = 1;
continue;
}

if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) {
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}

Sequence.push_back(MI);
SeqSize++;
continue;
}

// At least 2 instructions are neccessary for a valid sequence.
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
}

// Sequence has either ended or has never been started.
if (!Sequence.empty()) {
Sequence.clear();
SeqSize = 0;
GapSize = 0;
}
}

// Make sure that the last sequence has been added to the Candidates list.
// TODO: Check if needed.
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
findCandidatesForOptimization(SequenceList[i], Candidates);
}

for (auto &C : Candidates) {
Expand All @@ -312,6 +331,12 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
.addImm(Offset)
.addImm(Seq.size() + C.GapSize);
BMI.cloneMergedMemRefs(Seq);
if (C.Move) {
BuildMI(MBB, std::next(MBBIter(BMI.getInstr())), Base->getDebugLoc(),
TII->get(Mips::MOVE_NM))
.addReg(Seq.back()->getOperand(0).getReg(), RegState::Define)
.addReg(Seq[Seq.size() - 2]->getOperand(0).getReg() + 1);
}
for (auto *MI : Seq) {
if (MI != Base)
BMI.addReg(MI->getOperand(0).getReg(),
Expand Down
Loading

0 comments on commit 5ce4b7e

Please sign in to comment.