Skip to content

Commit

Permalink
NanoMIPS: NMLoadStoreMultiple add gap support
Browse files Browse the repository at this point in the history
Beside regular instruction sequence, we're also looking for an
instruction sequence that's "missing" one(or more) instruction to be
complete and interchangeable with lwm/swm instruction. The sequence is
optimizable if the Rt register in missing instruction is available.
  • Loading branch information
milica-lazarevic committed Sep 13, 2024
1 parent cfe2d4a commit 71b1bf2
Show file tree
Hide file tree
Showing 3 changed files with 414 additions and 21 deletions.
101 changes: 80 additions & 21 deletions llvm/lib/Target/Mips/NanoMipsLoadStoreMultiple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

#include "Mips.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/InitializePasses.h"

#include <cmath>
Expand All @@ -32,8 +34,10 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
unsigned Rt;
unsigned Rs;
int64_t Offset;
MachineBasicBlock *MBB;

LSIns(MachineInstr *MI) {
MBB = MI->getParent();
Rt = MI->getOperand(0).getReg().id();
Rs = MI->getOperand(1).getReg().id();
Offset = MI->getOperand(2).getImm();
Expand All @@ -44,6 +48,8 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
static char ID;
const MipsSubtarget *STI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
MCRegisterClass RC = MipsMCRegisterClasses[Mips::GPRNM32RegClassID];
DenseMap<unsigned, unsigned> RegToIndexMap;

Expand All @@ -58,7 +64,8 @@ struct NMLoadStoreMultipleOpt : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &Fn) override;
unsigned getRegNo(unsigned Reg);
bool isValidLoadStore(MachineInstr &MI, bool IsLoad, InstrList);
bool isValidNextLoadStore(LSIns Prev, LSIns Next);
bool isValidNextLoadStore(LSIns Prev, LSIns Next, size_t &GapSize,
size_t &CurrSeqSize);
bool generateLoadStoreMultiple(MachineBasicBlock &MBB, bool IsLoad);
void sortLoadStoreList(InstrList &LoadStoreList, bool IsLoad);
};
Expand All @@ -69,6 +76,8 @@ char NMLoadStoreMultipleOpt::ID = 0;
bool NMLoadStoreMultipleOpt::runOnMachineFunction(MachineFunction &Fn) {
STI = &static_cast<const MipsSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
Expand Down Expand Up @@ -164,21 +173,56 @@ bool NMLoadStoreMultipleOpt::isValidLoadStore(MachineInstr &MI, bool IsLoad,
return false;
}

bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next) {
bool NMLoadStoreMultipleOpt::isValidNextLoadStore(LSIns Prev, LSIns Next,
size_t &GapSize,
size_t &CurrSeqSize) {
unsigned PrevRtNo = getRegNo(Prev.Rt);
unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0;
Register DesiredRtReg = RC.getRegister(DesiredRtNo);
if (Next.Offset == Prev.Offset + 4) {
unsigned DesiredRtNo = PrevRtNo != 0 ? (PrevRtNo + 1) : 0;
if (Next.Rt != RC.getRegister(DesiredRtNo))
// GAP, but offset ok
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 16(a4)
if (Next.Rt != DesiredRtReg) {
// TODO
return false;
return true;
} else {
return true;
}
} else {
// "full" GAP
// lw a0, 8(a4)
// lw a1, 12(a4)
// lw a3, 20(a4)
bool OffsetOk = ((Next.Offset - Prev.Offset) % 4) == 0;
unsigned Gap = abs((Next.Offset - Prev.Offset) / 4 - 1);
if (OffsetOk && (CurrSeqSize + Gap + 1 <= 8) &&
Next.Rt == RC.getRegister(PrevRtNo + Gap + 1)) {
LivePhysRegs LiveRegs(*TRI);
computeLiveIns(LiveRegs, *Prev.MBB);
for (size_t i = 0; i < Gap; i++) {
assert(Register::isPhysicalRegister(DesiredRtNo + i) &&
"Desired register is not physical!");
if (!LiveRegs.available(*MRI, (DesiredRtReg)))
return false;
DesiredRtReg = RC.getRegister(DesiredRtNo + i + 1);
}
GapSize += Gap;
CurrSeqSize += Gap;
return true;
}
}
return false;
}

bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
bool IsLoad) {
bool Modified = false;

struct Candidate {
InstrList Sequence;
size_t GapSize;
};
InstrList SequenceToSort;
SmallVector<InstrList, 3> SequenceList;
for (auto &MI : MBB) {
Expand All @@ -195,49 +239,64 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
}
}

SmallVector<InstrList, 3> Candidates;
SmallVector<Candidate, 3> Candidates;
InstrList Sequence;

size_t GapSize = 0;
size_t SeqSize = 0;
for (size_t i = 0; i < SequenceList.size(); i++) {
sortLoadStoreList(SequenceList[i], IsLoad);
for (auto &MI : SequenceList[i]) {
// Sequences cannot be longer than 8 instructions.
if (Sequence.size() == 8) {
Candidates.push_back(Sequence);
if (SeqSize == 8) {
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}
// When starting a new sequence, there's no need to do any checks.
if (Sequence.empty()) {
Sequence.push_back(MI);
SeqSize = 1;
continue;
}
if (!isValidNextLoadStore(Sequence.back(), MI)) {
if (Sequence.size() > 1)
Candidates.push_back(Sequence);

if (!isValidNextLoadStore(Sequence.back(), MI, GapSize, SeqSize)) {
if (SeqSize > 1)
Candidates.push_back({Sequence, GapSize});
Sequence.clear();
GapSize = 0;
SeqSize = 0;
}

Sequence.push_back(MI);
SeqSize++;
continue;
}

// At least 2 instructions are neccessary for a valid sequence.
if (Sequence.size() > 1)
Candidates.push_back(Sequence);
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
}

// Sequence has either ended or has never been started.
if (!Sequence.empty())
if (!Sequence.empty()) {
Sequence.clear();
SeqSize = 0;
GapSize = 0;
}
}

// Make sure that the last sequence has been added to the Candidates list.
// TODO: Check if needed.
if (Sequence.size() > 1)
Candidates.push_back(Sequence);
if (SeqSize > 1) {
Candidates.push_back({Sequence, GapSize});
SeqSize++;
}

for (auto &Seq : Candidates) {
for (auto &C : Candidates) {
auto Seq = C.Sequence;
assert(Seq.size() > 1 && Seq.size() < 9);

auto *Base = Seq.front();
int64_t Offset = Base->getOperand(2).getImm();
// Sequence cannot be merged, if the offset is out of range.
Expand All @@ -251,7 +310,7 @@ bool NMLoadStoreMultipleOpt::generateLoadStoreMultiple(MachineBasicBlock &MBB,
.addReg(Base->getOperand(0).getReg(), IsLoad ? RegState::Define : 0)
.addReg(Base->getOperand(1).getReg())
.addImm(Offset)
.addImm(Seq.size());
.addImm(Seq.size() + C.GapSize);
BMI.cloneMergedMemRefs(Seq);
for (auto *MI : Seq) {
if (MI != Base)
Expand Down
168 changes: 168 additions & 0 deletions llvm/test/CodeGen/Mips/nanomips/loadstoremultiple_full_gap.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@

# RUN: llc -mtriple=nanomips -verify-machineinstrs -run-pass nanomips-lwm-swm \
# RUN: %s -o - | FileCheck %s

# CHECK: SWM_NM $a1_nm, $sp_nm, 4, 7
--- |
; ModuleID = '../llvm-project/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple.ll'
source_filename = "../llvm-project/llvm/test/CodeGen/Mips/nanomips/loadstoremultiple.ll"
target datalayout = "e-m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128"
target triple = "nanomips"

%struct.bar = type { i32, i32, i32 }

define void @test4(i32 %n, ...) {
call void asm sideeffect "", ""()
ret void
}

define void @square(%struct.bar* %ints) {
%a1 = bitcast %struct.bar* %ints to i32*
%1 = load i32, i32* %a1, align 4
%b = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 1
%2 = load i32, i32* %b, align 4
%add = add nsw i32 %2, %1
%c = getelementptr inbounds %struct.bar, %struct.bar* %ints, i32 0, i32 2
store i32 %add, i32* %c, align 4
ret void
}

...
---
name: test4
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$a1_nm', virtual-reg: '' }
- { reg: '$a2_nm', virtual-reg: '' }
- { reg: '$a3_nm', virtual-reg: '' }
- { reg: '$a4_nm', virtual-reg: '' }
- { reg: '$a5_nm', virtual-reg: '' }
- { reg: '$a6_nm', virtual-reg: '' }
- { reg: '$a7_nm', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 32
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack:
- { id: 0, type: default, offset: -4, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, type: default, offset: -8, size: 4, alignment: 8, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, type: default, offset: -12, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, type: default, offset: -16, size: 4, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 4, type: default, offset: -20, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 5, type: default, offset: -24, size: 4, alignment: 8, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 6, type: default, offset: -28, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 7, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 8, type: default, offset: -28, size: 4, alignment: 4, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $a1_nm, $a2_nm, $a3_nm, $a4_nm, $a6_nm, $a7_nm
SAVE_NM 32, implicit-def $sp_nm, implicit $sp_nm
CFI_INSTRUCTION def_cfa_offset 32
SWs9_NM killed renamable $a7_nm, $sp_nm, 28 :: (store (s32))
SWs9_NM killed renamable $a3_nm, $sp_nm, 12 :: (store (s32))
SWs9_NM killed renamable $a2_nm, $sp_nm, 8 :: (store (s32) into %fixed-stack.5, align 8)
SWs9_NM killed renamable $a6_nm, $sp_nm, 24 :: (store (s32) into %fixed-stack.1, align 8)
SWs9_NM killed renamable $a4_nm, $sp_nm, 16 :: (store (s32) into %fixed-stack.3, align 16)
SWs9_NM killed renamable $a1_nm, $sp_nm, 4 :: (store (s32))
INLINEASM &"", 1 /* sideeffect attdialect */
RESTOREJRC_NM 32, implicit-def $sp_nm, implicit $sp_nm
...
---
name: square
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$a0_nm', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $a0_nm
renamable $a1_nm = LW_NM renamable $a0_nm, 0 :: (load (s32) from %ir.a1)
renamable $a2_nm = LWs9_NM renamable $a0_nm, 4 :: (load (s32) from %ir.b)
renamable $a1_nm = nsw ADDu_NM killed renamable $a2_nm, killed renamable $a1_nm
SW_NM killed renamable $a1_nm, killed renamable $a0_nm, 8 :: (store (s32) into %ir.c)
PseudoReturnNM undef $ra_nm
...
Loading

0 comments on commit 71b1bf2

Please sign in to comment.