Skip to content

Commit

Permalink
NanoMIPS: NMLoadStoreMultiple add reg gap support
Browse files Browse the repository at this point in the history
We're handling the situation where the instruction sequence is regular,
except for one instruction having a "wrong" Rt register number. A
sequence like that is optimizable if the register with the expected
register number is available. In that case, we're emitting one
additional move instruction after lwm/swm.
  • Loading branch information
milica-lazarevic committed Sep 16, 2024
1 parent 97927ee commit 7c2e46b
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 78 deletions.
181 changes: 103 additions & 78 deletions llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
unsigned Rs;
int64_t Offset;
MachineBasicBlock *MBB;
MachineInstr *MI;

LSIns(MachineInstr *MI) {
this->MI = MI;
MBB = MI->getParent();
Rt = MI->getOperand(0).getReg().id();
Rs = MI->getOperand(1).getReg().id();
Expand All @@ -49,6 +51,12 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
};
using InstrList = SmallVector<MachineInstr *, 4>;
using MBBIter = MachineBasicBlock::iterator;
struct Candidate {
InstrList Sequence;
size_t GapSize;
bool Move = false;
};
using CandidateList = SmallVector<Candidate, 3>;
static char ID;
const MipsSubtarget *STI;
const TargetInstrInfo *TII;
Expand All @@ -69,9 +77,11 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
unsigned getRegNo(unsigned Reg);
bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList);
bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize,
size_t &CurrSeqSize);
size_t &CurrSeqSize, bool &RegGap);
bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad);
void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad);
void findCandidatesForOptimization(InstrList &LoadStoreList,
CandidateList &Candidates);
};
} // namespace

Expand Down Expand Up @@ -129,6 +139,53 @@ void NMLoadStoreMultipleOpt::sortLoadStoreList(InstrList &LoadStoreList,
std::sort(LoadStoreList.begin(), LoadStoreList.end(), CompareInstructions);
}

void NMLoadStoreMultipleOpt::findCandidatesForOptimization(
InstrList &LoadStoreList, CandidateList &Candidates) {
InstrList Sequence;
size_t GapSize = 0, SeqSize = 0;
bool RegGap = false;

auto clearSeqence = [&Sequence, &GapSize, &SeqSize, &RegGap]() {
Sequence.clear();
GapSize = 0;
SeqSize = 0;
RegGap = false;
};

for (auto &MI : LoadStoreList) {
// Sequences cannot be longer than 8 instructions.
if (SeqSize == 8) {
Candidates.push_back({Sequence, GapSize});
clearSeqence();
}
// When starting a new sequence, there's no need to do any checks.
if (Sequence.empty()) {
Sequence.push_back(MI);
SeqSize = 1;
continue;
}

if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize, RegGap)) {
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
clearSeqence();
}

Sequence.push_back(MI);
SeqSize++;

if (RegGap) {
Candidates.push_back({Sequence, GapSize, true});
clearSeqence();
}
}

// Save the last valid sequence for this list. At least 2 instructions are
// neccessary for a valid sequence.
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
}

// All instruction in the seqence should have the same Rs register, and
// different Rt register.
bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad,
Expand Down Expand Up @@ -179,54 +236,64 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad,

bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next,
size_t &GapSize,
size_t &CurrSeqSize) {
size_t &CurrSeqSize,
bool &RegGap) {
unsigned PrevRtNo = getRegNo(Prev.Rt);
unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0;
Register DesiredRtReg = RC.getRegister(DesiredRtNo);
if (Next.Offset == Prev.Offset + 4) {
if (Next.Rt == DesiredRtReg)
return true;
// Next.Rt != DesiredRtReg
// GAP, but offset ok
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 16(a4)
if (Next.Rt != DesiredRtReg) {
// TODO
// For now, the instruction like lw a3, 16(a4) interrupts the sequence.
if (CurrSeqSize < 2)
return false;
} else {
return true;
}
} else {

assert(Register::isPhysicalRegister(DesiredRtNo) &&
"Desired register is not physical!");
if (MachineBasicBlock::LQR_Dead !=
Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI))
return false;

RegGap = true;
return true;
}
// Next.Offset != Prev.Offset + 4
bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0;
unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1);
if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) &&
Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) {
// "full" GAP
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 20(a4)
bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0;
unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1);
if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) &&
Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) {
LivePhysRegs LiveRegs(*TRI);
computeLiveIns(LiveRegs, *Prev.MBB);
for (size_t i = 0; i < Gap; i++) {
assert(Register::isPhysicalRegister(DesiredRtNo + i) &&
"Desired register is not physical!");
if (!LiveRegs.available(*MRI, (DesiredRtReg)))
return false;
DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1);
}
GapSize += Gap;
CurrSeqSize += Gap;
return true;
for (size_t i = 0; i < Gap; i++) {
assert(Register::isPhysicalRegister(DesiredRtNo + i) &&
"Desired register is not physical!");
if (MachineBasicBlock::LQR_Dead !=
Prev.MBB->computeRegisterLiveness(TRI, DesiredRtReg, Prev.MI))
return false;
DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1);
}
GapSize += Gap;
CurrSeqSize += Gap;
return true;
}
return false;
}

bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
bool IsLoad) {
bool Modified = false;
struct Candidate {
InstrList Sequence;
size_t GapSize;
};

// TODO: Consider allowing interspersed arithmetic/logical operations in
// load/store sequences to reduce sensitivity to instruction ordering. Note
// that proper scheduling models will alter instruction order, increasing
// mixed memory and compute operations. Dependency checks will be required.
InstrList SequenceToSort;
SmallVector<InstrList, 3> SequenceList;
for (auto &MI : MBB) {
Expand All @@ -243,59 +310,11 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
}
}

SmallVector<Candidate, 3> Candidates;
CandidateList Candidates;
InstrList Sequence;
size_t GapSize = 0;
size_t SeqSize = 0;
for (size_t i = 0; i < SequenceList.size(); i++) {
sortLoadStoreList(SequenceList[i], IsLoad);
for (auto &MI : SequenceList[i]) {
// Sequences cannot be longer than 8 instructions.
if (SeqSize == 8) {
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}
// When starting a new sequence, there's no need to do any checks.
if (Sequence.empty()) {
Sequence.push_back(MI);
SeqSize = 1;
continue;
}

if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) {
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}

Sequence.push_back(MI);
SeqSize++;
continue;
}

// At least 2 instructions are neccessary for a valid sequence.
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
}

// Sequence has either ended or has never been started.
if (!Sequence.empty()) {
Sequence.clear();
SeqSize = 0;
GapSize = 0;
}
}

// Make sure that the last sequence has been added to the Candidates list.
// TODO: Check if needed.
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
findCandidatesForOptimization(SequenceList[i], Candidates);
}

for (auto &C : Candidates) {
Expand All @@ -316,6 +335,12 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
.addImm(Offset)
.addImm(Seq.size() + C.GapSize);
BMI.cloneMergedMemRefs(Seq);
if (C.Move) {
BuildMI(MBB, std::next(MBBIter(BMI.getInstr())), Base->getDebugLoc(),
TII->get(Mips::MOVE_NM))
.addReg(Seq.back()->getOperand(0).getReg(), RegState::Define)
.addReg(Seq[Seq.size() - 2]->getOperand(0).getReg() + 1);
}
for (auto *MI : Seq) {
if (MI != Base)
BMI.addReg(MI->getOperand(0).getReg(),
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_reg_gap.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \
# RUN: %s -o - | FileCheck %s

# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 4
# CHECK-NEXT: $a5_nm = MOVE_NM $a4_nm
--- |
%struct.bar = type { i32, i32, i32 }

define void @test4(i32 %n, ...) {
call void asm sideeffect "", ""()
ret void
}

define void @square(%struct.bar* %ints) {
%a1 = bitcast %struct.bar* %ints to i32*
%1 = load i32, i32* %a1, align 4
%b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1
%2 = load i32, i32* %b, align 4
%add = add nsw i32 %2, %1
%c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2
store i32 %add, i32* %c, align 4
ret void
}

...
---
name: test4
fixedStack:
- { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
body: |
bb.0 (%ir-block.0):
liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a5_nm, $a6_nm, $a7_nm
SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm
CFI_INSTRUCTION def_cfa_offset 32
SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32))
SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32))
SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8)
SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8)
SWs9_NM killed renamable $a5_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16)
SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32))
INLINEASM &"", 1 /* sideeffect attdialect */
RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm
...
---
name: square
body: |
bb.0 (%ir-block.0):
liveins: $a0_nm
renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1)
renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b)
renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm
SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c)
PseudoReturnNM undef $ra_nm
...

0 comments on commit 7c2e46b

Please sign in to comment.