From 2ee61bbe7bb04e97159f017c4f9eaf486dc46c1f Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Thu, 20 Jul 2023 05:01:07 +0200 Subject: [PATCH] Reduce ProbabilisticWithAsciiCharSearchValues overhead on non-ASCII texts (#89224) --- .../ProbabilisticWithAsciiCharSearchValues.cs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs index 065f2cd5e8937..8a8f09a0d327f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs @@ -40,7 +40,9 @@ internal override int IndexOfAny(ReadOnlySpan span) { int offset = 0; - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[0])) { // We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character. // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). @@ -100,7 +102,9 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) { int offset = 0; - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[0])) { // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized( @@ -134,7 +138,9 @@ ref MemoryMarshal.GetReference(span), internal override int LastIndexOfAny(ReadOnlySpan span) { - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[^1])) { // We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character. // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). @@ -186,7 +192,9 @@ ref MemoryMarshal.GetReference(span), internal override int LastIndexOfAnyExcept(ReadOnlySpan span) { - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[^1])) { // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized(