From b05ccaf451bca11fda5437003d54d7975cd8e575 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 17 Jul 2024 05:51:29 -0700 Subject: [PATCH] Revert "[SLP]Improve minbitwidth analysis for trun'ed gather nodes." This reverts commit 6425f2d66740b84fc3027b649cd4baf660c384e8 to fix the buildbost issues reported in https://lab.llvm.org/buildbot/#/builders/95/builds/1404. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +------ .../X86/int-bitcast-minbitwidth.ll | 6 +-- .../X86/minbitwidth-transformed-operand.ll | 22 +++++---- .../Transforms/SLPVectorizer/X86/resched.ll | 45 +++++++++---------- .../SLPVectorizer/X86/shuffle-multivector.ll | 13 +++--- .../orig-btiwidth-les-projected.ll | 8 ++-- 6 files changed, 47 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ccb6734d5618c0..7bdbbecb7f0d85 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15522,21 +15522,8 @@ void BoUpSLP::computeMinimumValueSizes() { auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot, bool IsProfitableToDemoteRoot, unsigned Opcode, unsigned Limit, bool IsTruncRoot, - bool IsSignedCmp) -> unsigned { + bool IsSignedCmp) { ToDemote.clear(); - // Check if the root is trunc and the next node is gather/buildvector, then - // keep trunc in scalars, which is free in most cases. - if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 && - E.Idx > (IsStoreOrInsertElt ? 2 : 1)) { - ToDemote.push_back(E.Idx); - const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE; - auto It = MinBWs.find(UserTE); - if (It != MinBWs.end()) - return It->second.first; - return DL->getTypeSizeInBits( - E.UserTreeIndices.back().UserTE->Scalars.front()->getType()); - } - unsigned VF = E.getVectorFactor(); auto *TreeRootIT = dyn_cast(E.Scalars.front()->getType()); if (!TreeRootIT || !Opcode) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll index 97e505f4319c6b..789d73947d1c73 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll @@ -5,9 +5,9 @@ define void @t(i64 %v) { ; CHECK-LABEL: define void @t( ; CHECK-SAME: i64 [[V:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[V]] to i16 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 57b5d2af48ee60..032625a1199f9f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -5,16 +5,20 @@ define void @test(i64 %d.promoted.i) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> , i64 0, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[D_PROMOTED_I]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i1> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i1> , <16 x i1> [[TMP4]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i1> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP6]]) +; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I_1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 0 +; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0 ; CHECK-NEXT: store i32 [[TMP10]], ptr null, align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 4ed52247c2ef37..b7237cbb02bb32 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -11,31 +11,26 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] -; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2 -; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SHR_1_I_I]] to i8 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[SHR_2_I_I]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[TMP16]] to <8 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP18]], <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i8> [[TMP19]], -; CHECK-NEXT: store <16 x i8> [[TMP20]], ptr undef, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5 +; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6 +; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_4_I_I]], i32 5 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_5_I_I]], i32 6 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_6_I_I]], i32 7 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = trunc <16 x i32> [[TMP13]] to <16 x i8> +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i8> [[TMP14]], +; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr undef, align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll index c2555889f59816..143052a3d9cd07 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-163 | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-160 | FileCheck %s define void @test1(i128 %p0, i128 %p1, i128 %p2, i128 %p3, <4 x i128> %vec) { ; CHECK-LABEL: @test1( @@ -14,14 +14,13 @@ define void @test1(i128 %p0, i128 %p1, i128 %p2, i128 %p3, <4 x i128> %vec) { ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[T5:%.*]] = trunc i128 [[P1]] to i32 ; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP3]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i128> [[VEC:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i128> [[VEC]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = sdiv <4 x i32> [[TMP8]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i128> [[TMP1]], <2 x i128> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i128> [[VEC:%.*]], <4 x i128> [[TMP9]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i128> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = sdiv <4 x i32> [[TMP8]], [[TMP11]] ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP13]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP12]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/orig-btiwidth-les-projected.ll b/llvm/test/Transforms/SLPVectorizer/orig-btiwidth-les-projected.ll index 88503aeb6071ff..531e964053482e 100644 --- a/llvm/test/Transforms/SLPVectorizer/orig-btiwidth-les-projected.ll +++ b/llvm/test/Transforms/SLPVectorizer/orig-btiwidth-les-projected.ll @@ -5,10 +5,10 @@ define i32 @test(i4 %0) { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: i4 [[TMP0:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i4> , i4 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i4> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[ADD_R:%.*]] = extractelement <2 x i4> [[TMP2]], i32 0 -; CHECK-NEXT: [[ADD_R14:%.*]] = extractelement <2 x i4> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i8 0 to i4 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i8 0 to i4 +; CHECK-NEXT: [[ADD_R:%.*]] = or i4 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ADD_R14:%.*]] = or i4 0, [[TMP2]] ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i4 [[ADD_R]], [[ADD_R14]] ; CHECK-NEXT: ret i32 0 ;