Skip to content

Commit

Permalink
Reduce ProbabilisticWithAsciiCharSearchValues overhead on non-ASCII t…
Browse files Browse the repository at this point in the history
…exts (dotnet#89224)
  • Loading branch information
MihaZupan authored Jul 20, 2023
1 parent 31bcc75 commit 2ee61bb
Showing 1 changed file with 12 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ internal override int IndexOfAny(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[0]))
{
// We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
Expand Down Expand Up @@ -100,7 +102,9 @@ internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[0]))
{
// Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
Expand Down Expand Up @@ -134,7 +138,9 @@ ref MemoryMarshal.GetReference(span),

internal override int LastIndexOfAny(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[^1]))
{
// We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
Expand Down Expand Up @@ -186,7 +192,9 @@ ref MemoryMarshal.GetReference(span),

internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[^1]))
{
// Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
Expand Down

0 comments on commit 2ee61bb

Please sign in to comment.