Skip to content

Commit

Permalink
[JIT] Fold some bitwise operations to vpternlog (dotnet#91227)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ruihan-Yin authored Oct 21, 2023
1 parent 821280f commit c7fd55c
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 2 deletions.
117 changes: 116 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25741,6 +25741,18 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const
}
}

//------------------------------------------------------------------------
// OperIsBitwiseHWIntrinsic: Is this HWIntrinsic a bitwise logic intrinsic node.
//
// Return Value:
// Whether "this" is a bitwise logic intrinsic node.
//
bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const
{
genTreeOps Oper = HWOperGet();
return Oper == GT_AND || Oper == GT_OR || Oper == GT_XOR || Oper == GT_AND_NOT;
}

//------------------------------------------------------------------------------
// OperRequiresAsgFlag : Check whether the operation requires GTF_ASG flag regardless
// of the children's flags.
Expand Down Expand Up @@ -25963,7 +25975,7 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)
//------------------------------------------------------------------------------
// HWOperGet : Returns Oper based on the HWIntrinsicId
//
genTreeOps GenTreeHWIntrinsic::HWOperGet()
genTreeOps GenTreeHWIntrinsic::HWOperGet() const
{
switch (GetHWIntrinsicId())
{
Expand All @@ -25972,6 +25984,8 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet()
case NI_SSE2_And:
case NI_AVX_And:
case NI_AVX2_And:
case NI_AVX512F_And:
case NI_AVX512DQ_And:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_And:
#endif
Expand All @@ -25991,13 +26005,40 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet()
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
case NI_AVX512F_Xor:
case NI_AVX512DQ_Xor:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Xor:
#endif
{
return GT_XOR;
}

#if defined(TARGET_XARCH)
case NI_SSE_Or:
case NI_SSE2_Or:
case NI_AVX_Or:
case NI_AVX2_Or:
case NI_AVX512F_Or:
case NI_AVX512DQ_Or:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Or:
#endif
{
return GT_OR;
}

#if defined(TARGET_XARCH)
case NI_SSE_AndNot:
case NI_SSE2_AndNot:
case NI_AVX_AndNot:
case NI_AVX2_AndNot:
case NI_AVX512F_AndNot:
case NI_AVX512DQ_AndNot:
{
return GT_AND_NOT;
}
#endif
// TODO: Handle other cases

default:
Expand Down Expand Up @@ -26784,6 +26825,80 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree

return 0;
}

//------------------------------------------------------------------------
// GetTernaryControlByte: calculate the value of the control byte for ternary node
// with given logic nodes on the input.
//
// Return value: the value of the ternary control byte.
uint8_t GenTreeHWIntrinsic::GetTernaryControlByte(GenTreeHWIntrinsic* second) const
{
// we assume we have a structure like:
/*
/- A
+- B
t1 = binary logical op1

/- C
+- t1
t2 = binary logical op2
*/

// To calculate the control byte value:
// The way the constants work is we have three keys:
// * A: 0xF0
// * B: 0xCC
// * C: 0xAA
//
// To compute the correct control byte, you simply perform the corresponding operation on these keys. So, if you
// wanted to do (A & B) ^ C, you would compute (0xF0 & 0xCC) ^ 0xAA or 0x6A.
assert(second->Op(1) == this || second->Op(2) == this);
const uint8_t A = 0xF0;
const uint8_t B = 0xCC;
const uint8_t C = 0xAA;

genTreeOps firstOper = HWOperGet();
genTreeOps secondOper = second->HWOperGet();

uint8_t AB = 0;
uint8_t ABC = 0;

if (firstOper == GT_AND)
{
AB = A & B;
}
else if (firstOper == GT_OR)
{
AB = A | B;
}
else if (firstOper == GT_XOR)
{
AB = A ^ B;
}
else
{
unreached();
}

if (secondOper == GT_AND)
{
ABC = AB & C;
}
else if (secondOper == GT_OR)
{
ABC = AB | C;
}
else if (secondOper == GT_XOR)
{
ABC = AB ^ C;
}
else
{
unreached();
}

return ABC;
}
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS

unsigned GenTreeLclFld::GetSize() const
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6313,12 +6313,14 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
bool OperIsEmbBroadcastCompatible() const;
bool OperIsBroadcastScalar() const;
bool OperIsCreateScalarUnsafe() const;
bool OperIsBitwiseHWIntrinsic() const;

bool OperRequiresAsgFlag() const;
bool OperRequiresCallFlag() const;
bool OperRequiresGlobRefFlag() const;

unsigned GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3);
uint8_t GetTernaryControlByte(GenTreeHWIntrinsic* second) const;

ClassLayout* GetLayout(Compiler* compiler) const;

Expand Down Expand Up @@ -6408,7 +6410,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic

static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2);

genTreeOps HWOperGet();
genTreeOps HWOperGet() const;

private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);
Expand Down
76 changes: 76 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,82 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
LowerFusedMultiplyAdd(node);
break;

case NI_SSE_And:
case NI_SSE2_And:
case NI_AVX_And:
case NI_AVX2_And:
case NI_AVX512F_And:
case NI_AVX512DQ_And:
case NI_SSE_Or:
case NI_SSE2_Or:
case NI_AVX_Or:
case NI_AVX2_Or:
case NI_AVX512F_Or:
case NI_AVX512DQ_Or:
case NI_SSE_Xor:
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
case NI_AVX512F_Xor:
case NI_AVX512DQ_Xor:
{
if (!comp->IsBaselineVector512IsaSupportedOpportunistically())
{
break;
}
GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);

LIR::Use use;
if (BlockRange().TryGetUse(node, &use))
{
// search for structure like:
/*
/- A
+- B
t1 = binary logical op1
/- C
+- t1
t2 = binary logical op2
*/
GenTree* second = use.User();
if (!second->OperIs(GT_HWINTRINSIC) || !second->AsHWIntrinsic()->OperIsBitwiseHWIntrinsic())
{
break;
}

if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT)
{
// currently ANDNOT logic cannot be optimized by the ternary node.
break;
}
GenTree* op3 = second->AsHWIntrinsic()->Op(1) == node ? second->AsHWIntrinsic()->Op(2)
: second->AsHWIntrinsic()->Op(1);
GenTree* control = comp->gtNewIconNode(node->GetTernaryControlByte(second->AsHWIntrinsic()));
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
unsigned simdSize = node->GetSimdSize();
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
GenTree* ternaryNode =
comp->gtNewSimdTernaryLogicNode(simdType, op1, op2, op3, control, simdBaseJitType, simdSize);
BlockRange().InsertBefore(second, control, ternaryNode);
LIR::Use finalRes;
if (BlockRange().TryGetUse(second, &finalRes))
{
finalRes.ReplaceWith(ternaryNode);
}
else
{
ternaryNode->SetUnusedValue();
}
GenTree* next = node->gtNext;
BlockRange().Remove(node);
BlockRange().Remove(second);
return next;
}
break;
}

default:
break;
}
Expand Down

0 comments on commit c7fd55c

Please sign in to comment.