Skip to content

Commit

Permalink
JIT: Optimize struct parameter register accesses in the backend
Browse files Browse the repository at this point in the history
This PR adds an optimization in lowering to utilize the new parameter
register to local mappings added in #110795. The optimization detects IR
that is going to result in stack spills/loads and instead replaces them
with scalar locals that will be able to stay in registers.

Physical promotion benefits especially from this as it creates the kind
of IR that the optimization ends up kicking in for. The heuristics of
physical promotion are updated to account for the fact that the backend
is now able to do this optimization, making physical promotion more
likely to promote struct parameters.
  • Loading branch information
jakobbotsch committed Dec 18, 2024
1 parent 6d1b57e commit 058e8dc
Show file tree
Hide file tree
Showing 8 changed files with 274 additions and 7 deletions.
4 changes: 4 additions & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5745,6 +5745,10 @@ void CodeGen::genFnProlog()
#else
genEnregisterOSRArgsAndLocals();
#endif
// OSR functions take no parameters in registers. Ensure no mappings
// are present.
//assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty());

compiler->lvaUpdateArgsWithInitialReg();
}
else
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4246,7 +4246,7 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename)
bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum)
{
LclVarDsc* varDsc = lvaGetDesc(varNum);
bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) ||
bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) ||
(varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);

#ifdef TARGET_ARM64
Expand Down
10 changes: 8 additions & 2 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4938,8 +4938,8 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
// that was set by past phases.
if (!isRecompute)
{
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam;
varDsc->lvSingleDef = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;

varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false);
}
Expand Down Expand Up @@ -5033,6 +5033,12 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
}
}

if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0))
{
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}

// If we have JMP, all arguments must have a location
// even if we don't use them inside the method
if (compJmpOpUsed && varDsc->lvIsParam && (varDsc->lvRefCnt() == 0))
Expand Down
190 changes: 190 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7959,6 +7959,11 @@ void Lowering::MapParameterRegisterLocals()
}
}

if (!comp->opts.IsOSR())
{
FindInducedParameterRegisterLocals();
}

#ifdef DEBUG
if (comp->verbose)
{
Expand All @@ -7974,6 +7979,191 @@ void Lowering::MapParameterRegisterLocals()
#endif
}

//------------------------------------------------------------------------
// Lowering::FindInducedParameterRegisterLocals:
// Find locals that would be profitable to map from parameter registers,
// based on IR in the initialization block.
//
void Lowering::FindInducedParameterRegisterLocals()
{
LocalSet storedToLocals(comp->getAllocator(CMK_ABI));
// Now look for optimization opportunities in the first block: places where
// we read fields out of struct parameters that can be mapped cleanly. This
// is frequently created by physical promotion.
for (GenTree* node : LIR::AsRange(comp->fgFirstBB))
{
GenTreeLclVarCommon* storeLcl;
if (node->DefinesLocal(comp, &storeLcl))
{
storedToLocals.Emplace(storeLcl->GetLclNum(), true);
continue;
}

if (node->OperIs(GT_LCL_ADDR))
{
storedToLocals.Emplace(node->AsLclVarCommon()->GetLclNum(), true);
continue;
}

if (!node->OperIs(GT_LCL_FLD))
{
continue;
}

GenTreeLclFld* fld = node->AsLclFld();
if (fld->GetLclNum() >= comp->info.compArgsCount)
{
continue;
}

if (storedToLocals.Lookup(fld->GetLclNum()))
{
// LCL_FLD does not necessarily take the value of the parameter
// anymore.
continue;
}

const ABIPassingInformation& dataAbiInfo = comp->lvaGetParameterABIInfo(fld->GetLclNum());
const ABIPassingSegment* regSegment = nullptr;
for (const ABIPassingSegment& segment : dataAbiInfo.Segments())
{
if (!segment.IsPassedInRegister())
{
continue;
}

if ((segment.Offset != fld->GetLclOffs()) || (segment.Size != genTypeSize(fld)) || (varTypeUsesIntReg(fld) != genIsValidIntReg(segment.GetRegister())))
{
continue;
}

// This is a match, but check if it is already remapped.
// TODO-CQ: If it is already remapped, we can reuse the value from
// the remapping.
if (comp->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()) == nullptr)
{
regSegment = &segment;
}

break;
}

if (regSegment == nullptr)
{
continue;
}

JITDUMP("LCL_FLD use [%06u] of unenregisterable parameter corresponds to ", Compiler::dspTreeID(fld));
DBEXEC(VERBOSE, regSegment->Dump());
JITDUMP("\n");

// Now see if we want to introduce a new local for this value, or if we
// can reuse one because this is the source of a store (frequently
// created by physical promotion).
LIR::Use use;
if (!LIR::AsRange(comp->fgFirstBB).TryGetUse(fld, &use))
{
JITDUMP(" ..but no use was found\n");
continue;
}

unsigned remappedLclNum = TryReuseLocalForParameterAccess(use, storedToLocals);

if (remappedLclNum == BAD_VAR_NUM)
{
remappedLclNum = comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("struct parameter register %s", getRegName(regSegment->GetRegister()))));
comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet();
JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum);
}
else
{
JITDUMP("Reusing local V%02u for store from struct parameter register %s. Store:\n", remappedLclNum, getRegName(regSegment->GetRegister()));
DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User());

// The store will be a no-op, so get rid of it
LIR::AsRange(comp->fgFirstBB).Remove(use.User(), true);
use = LIR::Use();
}

comp->m_paramRegLocalMappings->Emplace(regSegment, remappedLclNum, 0);
comp->lvaGetDesc(remappedLclNum)->lvIsParamRegTarget = true;

JITDUMP("New mapping: ");
DBEXEC(VERBOSE, regSegment->Dump());
JITDUMP(" -> V%02u\n", remappedLclNum);

GenTree* paramRegValue = comp->gtNewLclvNode(remappedLclNum, genActualType(fld));
GenTree* storeField = comp->gtNewStoreLclFldNode(fld->GetLclNum(), fld->TypeGet(), regSegment->Offset, paramRegValue);

// Store actual parameter local from new reg local
LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeField));
LowerNode(paramRegValue);
LowerNode(storeField);

JITDUMP("Parameter spill:\n");
DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeField);

// Insert explicit normalization for small types (the LCL_FLD we
// are replacing comes with this normalization).
if (varTypeIsSmall(fld))
{
GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld));
GenTree* normalizeLcl = comp->gtNewCastNode(TYP_INT, lcl, false, fld->TypeGet());
GenTree* storeNormalizedLcl = comp->gtNewStoreLclVarNode(remappedLclNum, normalizeLcl);
LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeNormalizedLcl));
LowerNode(lcl);
LowerNode(normalizeLcl);
LowerNode(storeNormalizedLcl);

JITDUMP("Parameter normalization:\n");
DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeNormalizedLcl);
}

// If we still have a valid use, then replace the LCL_FLD with a
// LCL_VAR of the remapped parameter register local.
if (use.IsInitialized())
{
GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld));
LIR::AsRange(comp->fgFirstBB).InsertAfter(fld, lcl);
use.ReplaceWith(lcl);
LowerNode(lcl);
JITDUMP("New user tree range:\n");
DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User());
fld->gtBashToNOP();
}
}
}

unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals)
{
GenTree* useNode = use.User();

if (!useNode->OperIs(GT_STORE_LCL_VAR))
{
return BAD_VAR_NUM;
}

LclVarDsc* destLclDsc = comp->lvaGetDesc(useNode->AsLclVarCommon());

if (destLclDsc->lvIsParamRegTarget)
{
return BAD_VAR_NUM;
}

if (destLclDsc->TypeGet() == TYP_STRUCT)
{
return BAD_VAR_NUM;
}

if (storedToLocals.Lookup(useNode->AsLclVarCommon()->GetLclNum()))
{
// Destination may change value before this access
return BAD_VAR_NUM;
}

return useNode->AsLclVarCommon()->GetLclNum();
}

#ifdef DEBUG

//------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ class Lowering final : public Phase
static bool CheckBlock(Compiler* compiler, BasicBlock* block);
#endif // DEBUG

typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, bool> LocalSet;

void MapParameterRegisterLocals();
void FindInducedParameterRegisterLocals();
unsigned TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals);

void LowerBlock(BasicBlock* block);
GenTree* LowerNode(GenTree* node);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,7 @@ void LinearScan::identifyCandidatesExceptionDataflow()

assert(varDsc->lvLiveInOutOfHndlr);

if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam)
if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam && !varDsc->lvIsParamRegTarget)
{
assert(varDsc->lvMustInit);
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2066,7 +2066,7 @@ void LinearScan::insertZeroInitRefPositions()
while (iter.NextElem(&varIndex))
{
LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex);
if (!varDsc->lvIsParam && isCandidateVar(varDsc))
if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && isCandidateVar(varDsc))
{
JITDUMP("V%02u is a finally var:", compiler->lvaTrackedIndexToLclNum(varIndex));
Interval* interval = getIntervalForLocalVar(varIndex);
Expand Down
67 changes: 65 additions & 2 deletions src/coreclr/jit/promotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,13 +767,36 @@ class LocalUses

unsigned countReadBacks = 0;
weight_t countReadBacksWtd = 0;
// For parameters or OSR locals we always need one read back.
if (lcl->lvIsParam || lcl->lvIsOSRLocal)

// For OSR locals we always need one read back.
if (lcl->lvIsOSRLocal)
{
countReadBacks++;
countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp);
}

// For parameters, the backend may be able to map it directly from a register.
if (lcl->lvIsParam)
{
if (MapsToRegister(comp, access, lclNum))
{
// No promotion will result in a store to stack in the prolog.
costWithout += COST_STRUCT_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp);
sizeWithout += COST_STRUCT_ACCESS_SIZE;

// Promotion we cost like the normal reg accesses above
costWith += COST_REG_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp);
sizeWith += COST_REG_ACCESS_SIZE;
}
else
{
// Otherwise we expect no prolog work to be required if we
// don't promote, and we need a read back from the stack.
countReadBacks++;
countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp);
}
}

// If the struct is stored from a call (either due to a multireg
// return or by being passed as the retbuffer) then we need a readback
// after.
Expand Down Expand Up @@ -1000,6 +1023,46 @@ class LocalUses

return nullptr;
}

//------------------------------------------------------------------------
// MapsToRegister:
// Check if a specific access in the specified parameter local is
// expected to map to a register.
//
// Parameters:
// comp - Compiler instance
// access - Access in the local
// lclNum - Parameter lcl num
//
// Returns:
// Pointer to a matching access, or nullptr if no match was found.
//
bool MapsToRegister(Compiler* comp, const Access& access, unsigned lclNum)
{
assert(lclNum < comp->info.compArgsCount);

if (comp->lvaGetDesc(lclNum)->lvIsImplicitByRef)
{
return false;
}

const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum);
if (abiInfo.HasAnyStackSegment())
{
return false;
}

for (const ABIPassingSegment& seg : abiInfo.Segments())
{
if ((access.Offset == seg.Offset) && (genTypeSize(access.AccessType) == seg.Size) &&
(varTypeUsesIntReg(access.AccessType) == genIsValidIntReg(seg.GetRegister())))
{
return true;
}
}

return false;
}
};

// Struct used to save all struct stores involving physical promotion candidates.
Expand Down

0 comments on commit 058e8dc

Please sign in to comment.