Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize String.toFloatOrNull() #5364

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 204 additions & 24 deletions libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt
Original file line number Diff line number Diff line change
Expand Up @@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =
public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =
screenFloatValue(this) { it.toBigDecimal(mathContext) }

/**
* Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`
*/
private object ScreenFloatValueRegEx {
@JvmField val value = run {
val Digits = "(\\p{Digit}+)"
val HexDigits = "(\\p{XDigit}+)"
val Exp = "[eE][+-]?$Digits"

val HexString = "(0[xX]$HexDigits(\\.)?)|" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt
"(0[xX]$HexDigits?(\\.)$HexDigits)" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt

val Number = "($Digits(\\.)?($Digits?)($Exp)?)|" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt
"(\\.($Digits)($Exp)?)|" + // . Digits ExponentPart_opt FloatTypeSuffix_opt
"(($HexString)[pP][+-]?$Digits)" // HexSignificand BinaryExponent

val fpRegex = "[\\x00-\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\x00-\\x20]*"

Regex(fpRegex)
}
}

private inline fun <T> screenFloatValue(str: String, parse: (String) -> T): T? {
return try {
if (ScreenFloatValueRegEx.value.matches(str))
if (isValidFloat(str))
parse(str)
else
null
} catch (e: NumberFormatException) { // overflow
} catch (_: NumberFormatException) { // overflow
null
}
}

private const val LengthOfNaN = 2 // "NaN".length - 1
private const val LengthOfInfinity = 7 // "Infinity".length - 1

private fun isValidFloat(s: String): Boolean {
// A float can have one of two representations:
//
// 1. Standard:
// - With an integer part only: 1234
// - With integer and fractional parts: 1234.4678
// - With a fractional part only: .4678
//
// Optional sign prefix: + or -
// Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)
// Optional suffix: f, F, d, or D (for instance 12.34f or .34D)
//
// 2. Hexadecimal:
// - With an integer part only: 0x12ab
// - With integer and fractional parts: 0x12ab.CD78
// - With a fractional part only: 0x.CD78
//
// Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)
//
// Optional sign prefix: + or -
// Optional suffix: f, F, d, or D (for instance 12.34f or .34D)
//
// Two special cases:
// "NaN" and "Infinity" strings, can have an optional sign prefix (+ or -)
//
// Implementation notes:
// - The pattern "myChar.code or 0x20 == 'x'.code" is used to perform a case-insensitive
// comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into
// a lower case one.

var start = 0
var end = s.length - 1

// Skip leading spaces
while (start <= end && s[start].code <= 0x20) start++

// Empty/whitespace string
if (start > end) return false

// Skip trailing spaces
while (end > start && s[end].code <= 0x20) end--

// Number starts with a positive or negative sign
if (s[start] == '+' || s[start] == '-') start++
// If we have nothing after the sign, the string is invalid
if (start > end) return false

var hasIntegerPart: Boolean
var hasFractionalPart = false
var isHex = false

// Might be a hex string
if (s[start] == '0') {
start++
// A "0" on its own is valid
if (start > end) return true

// Test for [xX] to see if we truly have a hex string
if (s[start].code or 0x20 == 'x'.code) {
start++

// Look for hex digits after the 0x prefix
var checkpoint = start
while (start <= end) {
val d = s[start]
if (d.isAsciiDigit() || d.isHexLetter()) {
start++
} else {
break
}
}
// Check if we found 0x*****, otherwise, the hex number might be of the
// form 0x.*******
hasIntegerPart = checkpoint != start

// A hex string must have an exponent, the string is invalid if we only found an
// integer part
if (start > end) return false

if (s[start] == '.') {
start++

// Look for hex digits for the fractional part
checkpoint = start
while (start <= end) {
val d = s[start]
if (d.isAsciiDigit() || d.isHexLetter()) {
start++
} else {
break
}
}

// Did we find a fractional part?
hasFractionalPart = checkpoint != start
}

// A string must have an integer part, or a fractional part, or both
if (!hasIntegerPart && !hasFractionalPart) return false

// A hex string must have an exponent, the string is invalid if we only found an
// integer and/or fractional part
if (start > end) return false

isHex = true
} else {
// Rewind the 0 we just parsed to make things easier below and try to parse a non-
// hexadecimal string representation of a float
start--
}
}

// Parse a non-hexadecimal representations
if (!isHex) {
// Look for digits before the decimal separator, if any
var checkpoint = start
while (start <= end && s[start].isAsciiDigit()) start++

// If there's no integer part, the float might be of the form .1234
hasIntegerPart = checkpoint != start

// A non-hexadecimal representation only needs an integer part, we can stop here
if (start > end) return hasIntegerPart

if (s[start] == '.') {
start++

// Look for the fractional part
checkpoint = start
while (start <= end && s[start].isAsciiDigit()) start++

// Did we find a fractional part?
hasFractionalPart = checkpoint != start
}

// A string must have an integer part, or a fractional part, or both
if (!hasIntegerPart && !hasFractionalPart) {
// Special case non-finite constants
val constant = when (end) {
start + LengthOfNaN -> {
"NaN"
}
start + LengthOfInfinity -> {
"Infinity"
}
else -> {
// If we don't have enough characters left for the 2 known constants, just bail
return false
}
}
return s.indexOf(constant, start, false) == start
}

// If we have either, we can stop here if we've run out of characters
if (start > end) return true
}

// Look for an exponent:
// - Mandatory for hexadecimal strings (marked by a p or P)
// - Optional for "regular" strings (marked by an e or E)
var l = s[start++].code or 0x20
if (l != if (isHex) 'p'.code else 'e'.code) {
// We're here if the exponent character is not valid, but if the string is a "regular"
// string, it could be a valid f/F/d/D suffix, so check for that (it must be the last
// character too)
return !isHex && (l == 'f'.code || l == 'd'.code) && start > end
}

// An exponent must be followed by digits
if (start > end) return false

// There may be a sign prefix before the exponent digits
if (s[start] == '+' || s[start] == '-') {
start++
if (start > end) return false
}

// Look for digits after the exponent and its optional sign
while (start <= end && s[start].isAsciiDigit()) start++

// The last suffix is optional, the string is valid here
if (start > end) return true

// We may have an optional fFdD suffix
if (start == end) {
l = s[start].code or 0x20
return l == 'f'.code || l == 'd'.code
}

// Anything left is invalid
return false
}

@kotlin.internal.InlineOnly
private inline fun Char.isAsciiDigit() = (this - '0').toChar().code < 10

@kotlin.internal.InlineOnly
private inline fun Char.isHexLetter() = ((this.code or 0x20) - 'a'.code).toChar().code < 6