From 6a77dd9ab641ef4b7d92b440ee00d38a094b7a94 Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Sat, 24 Aug 2024 11:18:47 -0400 Subject: [PATCH] header updates --- include/universal/math/math.md | 2 +- include/universal/math/math_constants.hpp | 1 + include/universal/math/math_functions.hpp | 3 +- .../universal/math/{math => mathlib_shim.hpp} | 13 +- include/universal/math/stub/abs.hpp | 3 +- include/universal/math/stub/classify.hpp | 27 ---- include/universal/math/stub/complex.hpp | 3 +- .../universal/math/stub/error_and_gamma.hpp | 3 +- include/universal/math/stub/exponent.hpp | 3 +- include/universal/math/stub/fractional.hpp | 3 +- include/universal/math/stub/hyperbolic.hpp | 3 +- include/universal/math/stub/hypot.hpp | 3 +- include/universal/math/stub/logarithm.hpp | 3 +- include/universal/math/stub/minmax.hpp | 3 +- include/universal/math/stub/next.hpp | 3 +- include/universal/math/stub/pow.hpp | 3 +- include/universal/math/stub/sqrt.hpp | 3 +- include/universal/math/stub/trigonometry.hpp | 3 +- include/universal/math/stub/truncate.hpp | 3 +- include/universal/native/ieee754.hpp | 7 - include/universal/native/ieee754_numeric.hpp | 8 ++ include/universal/number/dd/dd_impl.hpp | 133 +++++++++++++----- .../verification/cfloat_test_suite.hpp | 1 - static/dd/api/api.cpp | 116 +++++++++++++-- static/native/float/mathlib.cpp | 6 +- 25 files changed, 251 insertions(+), 108 deletions(-) rename include/universal/math/{math => mathlib_shim.hpp} (68%) diff --git a/include/universal/math/math.md b/include/universal/math/math.md index 567afcb1f..b3dc52f66 100644 --- a/include/universal/math/math.md +++ b/include/universal/math/math.md @@ -5,8 +5,8 @@ |---------------|-------------| | abs ( ) | absolute value of an integer. The absolute value of a number is always positive. Only integer values are supported in C. | | floor ( ) | nearest integer which is less than or equal to the argument passed to this function. | -| round ( ) | nearest integer value of the float/double/long double argument passed to this function. If decimal value is from “.1 to .5”, it returns integer value less than the argument. If decimal value is from “.6 to .9”, it returns the integer value greater than the argument. | | ceil ( ) | return nearest integer value which is greater than or equal to the argument passed to this function. | +| round ( ) | nearest integer value of the float/double/long double argument passed to this function. If decimal value is from “.1 to .5”, it returns integer value less than the argument. If decimal value is from “.6 to .9”, it returns the integer value greater than the argument. | | sin ( ) | return sine value. | | cos ( ) | return cosine. | | cosh ( ) | return hyperbolic cosine. | diff --git a/include/universal/math/math_constants.hpp b/include/universal/math/math_constants.hpp index 997a5f178..25f22cd1f 100644 --- a/include/universal/math/math_constants.hpp +++ b/include/universal/math/math_constants.hpp @@ -2,6 +2,7 @@ // math_constants.hpp: definition of math constants // // Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/math_functions.hpp b/include/universal/math/math_functions.hpp index 42b0816a1..0eba95145 100644 --- a/include/universal/math/math_functions.hpp +++ b/include/universal/math/math_functions.hpp @@ -1,7 +1,8 @@ #pragma once // math_functions.hpp: definition of universal mathematical functions // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/math b/include/universal/math/mathlib_shim.hpp similarity index 68% rename from include/universal/math/math rename to include/universal/math/mathlib_shim.hpp index e3794072e..256926a3d 100644 --- a/include/universal/math/math +++ b/include/universal/math/mathlib_shim.hpp @@ -1,13 +1,16 @@ -// : standard header of the universal math library shim layer -// which injects the standard math library functions for native IEEE-754 types into the sw::universal namespace. +// : standard header of the universal math library shim layer +// which injects the standard math library functions for native IEEE-754 types into +// the sw::universal namespace. +// // For example, std::abs(float) is shimmed to // template sw::universal::abs(NativeFloat) // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#ifndef _UNIVERSAL_MATH_STANDARD_HEADER_ -#define _UNIVERSAL_MATH_STANDARD_HEADER_ +#ifndef _UNIVERSAL_MATHLIB_STANDARD_HEADER_ +#define _UNIVERSAL_MATHLIB_STANDARD_HEADER_ //////////////////////////////////////////////////////////////////////////////////////// /// BEHAVIORAL COMPILATION SWITCHES diff --git a/include/universal/math/stub/abs.hpp b/include/universal/math/stub/abs.hpp index b37da52ea..73ebf1084 100644 --- a/include/universal/math/stub/abs.hpp +++ b/include/universal/math/stub/abs.hpp @@ -1,7 +1,8 @@ #pragma once // abs.hpp: templated abs function stub for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/classify.hpp b/include/universal/math/stub/classify.hpp index 5fe5cfc6d..1348880c4 100644 --- a/include/universal/math/stub/classify.hpp +++ b/include/universal/math/stub/classify.hpp @@ -69,19 +69,6 @@ namespace sw { namespace universal { #elif defined(_MSC_VER) /* Microsoft Visual Studio. --------------------------------- */ - // STD LIB function for IEEE floats: Categorizes floating point value arg into the following categories: zero, subnormal, normal, infinite, NAN, or implementation-defined category. - template::value, Scalar>::type> - int fpclassify(const Scalar& v) { - return std::fpclassify(v); - } - - // STD LIB function for IEEE floats: - template::value, Scalar>::type> - inline bool isfinite(const Scalar& v) { - return !std::isnan(v) && !std::isinf(v); - } #elif defined(__PGI) /* Portland Group PGCC/PGCPP. ------------------------------- */ @@ -92,18 +79,4 @@ namespace sw { namespace universal { #endif - // Universal function supported by all number systems - - template::value, Scalar>::type> - inline bool isdenorm(const Scalar& v) { - return !std::isnormal(v) && !std::isnan(v) && !std::isinf(v); - } - - // define an alias issubnorm for isdenorm - template::value, Scalar>::type> - inline bool issubnorm(const Scalar& v) { return isdenorm(v); } - - }} // namespace sw::universal diff --git a/include/universal/math/stub/complex.hpp b/include/universal/math/stub/complex.hpp index f61404d88..735541443 100644 --- a/include/universal/math/stub/complex.hpp +++ b/include/universal/math/stub/complex.hpp @@ -1,7 +1,8 @@ #pragma once // complex.hpp: templated complex functions stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include diff --git a/include/universal/math/stub/error_and_gamma.hpp b/include/universal/math/stub/error_and_gamma.hpp index 1a51522b9..296e1b5b1 100644 --- a/include/universal/math/stub/error_and_gamma.hpp +++ b/include/universal/math/stub/error_and_gamma.hpp @@ -1,7 +1,8 @@ #pragma once // error_gamma.hpp: templated error and gamma function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/exponent.hpp b/include/universal/math/stub/exponent.hpp index ec91a5318..a3da97676 100644 --- a/include/universal/math/stub/exponent.hpp +++ b/include/universal/math/stub/exponent.hpp @@ -1,7 +1,8 @@ #pragma once // exponent.hpp: templated exponent function stubs for native floating-point // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/fractional.hpp b/include/universal/math/stub/fractional.hpp index 0d1006108..0f44dc5e0 100644 --- a/include/universal/math/stub/fractional.hpp +++ b/include/universal/math/stub/fractional.hpp @@ -1,7 +1,8 @@ #pragma once // fractional.hpp: templated fractional function stubs for native floating-point // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/hyperbolic.hpp b/include/universal/math/stub/hyperbolic.hpp index 8d2783fca..2a9869448 100644 --- a/include/universal/math/stub/hyperbolic.hpp +++ b/include/universal/math/stub/hyperbolic.hpp @@ -1,7 +1,8 @@ #pragma once // hyperbolic.hpp: templated hyperbolic function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/hypot.hpp b/include/universal/math/stub/hypot.hpp index b541d4b67..fa51e879a 100644 --- a/include/universal/math/stub/hypot.hpp +++ b/include/universal/math/stub/hypot.hpp @@ -1,7 +1,8 @@ #pragma once // hypot.hpp: templated hypotenuse function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/logarithm.hpp b/include/universal/math/stub/logarithm.hpp index 448ec30b1..fcd367ec0 100644 --- a/include/universal/math/stub/logarithm.hpp +++ b/include/universal/math/stub/logarithm.hpp @@ -1,7 +1,8 @@ #pragma once // logarithm.hpp: templated logarithm function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/minmax.hpp b/include/universal/math/stub/minmax.hpp index 053283154..c7301e637 100644 --- a/include/universal/math/stub/minmax.hpp +++ b/include/universal/math/stub/minmax.hpp @@ -1,7 +1,8 @@ #pragma once // math_minmax.hpp: templated min/max function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/next.hpp b/include/universal/math/stub/next.hpp index be30a36e8..b2aa016cc 100644 --- a/include/universal/math/stub/next.hpp +++ b/include/universal/math/stub/next.hpp @@ -1,7 +1,8 @@ #pragma once // next.hpp: templated nextafter/nexttoward function stubs for native floating-point // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/pow.hpp b/include/universal/math/stub/pow.hpp index c2d610821..031a42413 100644 --- a/include/universal/math/stub/pow.hpp +++ b/include/universal/math/stub/pow.hpp @@ -1,7 +1,8 @@ #pragma once // pow.hpp: templated pow function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/sqrt.hpp b/include/universal/math/stub/sqrt.hpp index 35ade9f05..e209692d0 100644 --- a/include/universal/math/stub/sqrt.hpp +++ b/include/universal/math/stub/sqrt.hpp @@ -1,7 +1,8 @@ #pragma once // sqrt.hpp: templated sqrt function stub for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/trigonometry.hpp b/include/universal/math/stub/trigonometry.hpp index 550c80be0..5a9349a9b 100644 --- a/include/universal/math/stub/trigonometry.hpp +++ b/include/universal/math/stub/trigonometry.hpp @@ -1,7 +1,8 @@ #pragma once // trigonometric.hpp: templated trigonometric function stubs for native floating-point // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/math/stub/truncate.hpp b/include/universal/math/stub/truncate.hpp index 74ab4d546..7cadb307b 100644 --- a/include/universal/math/stub/truncate.hpp +++ b/include/universal/math/stub/truncate.hpp @@ -1,7 +1,8 @@ #pragma once // truncate.hpp: templated truncation function stubs for native floating-point (trunc, round, floor, and ceil) for posits // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. diff --git a/include/universal/native/ieee754.hpp b/include/universal/native/ieee754.hpp index 20ef24d62..848416530 100644 --- a/include/universal/native/ieee754.hpp +++ b/include/universal/native/ieee754.hpp @@ -22,13 +22,6 @@ #include #include -//////////////////////////////////////////////////////////////////////////////////////// -// enable throwing specific exceptions long double special handling of 128bit fields -#if !defined(BITBLOCK_THROW_ARITHMETIC_EXCEPTION) -// default is to use std::cerr for signalling an error -#define BITBLOCK_THROW_ARITHMETIC_EXCEPTION 0 -#endif - // if the compiler environment allows, set up // constexpr compatible bit casts, otherwise // fallback to nonconstexpr bit casts. diff --git a/include/universal/native/ieee754_numeric.hpp b/include/universal/native/ieee754_numeric.hpp index 9c8c473b6..eca84c949 100644 --- a/include/universal/native/ieee754_numeric.hpp +++ b/include/universal/native/ieee754_numeric.hpp @@ -28,6 +28,14 @@ namespace sw { namespace universal { return (std::fpclassify(a) == FP_ZERO); } + // check if the floating-point number is a denorm + template::value, Real >::type + > + inline bool isdenorm(const Real& a) { + return (std::fpclassify(a) == FP_SUBNORMAL); + } + // compile time power of 2 template::value, Real >::type diff --git a/include/universal/number/dd/dd_impl.hpp b/include/universal/number/dd/dd_impl.hpp index 23c45e61f..eede5afc1 100644 --- a/include/universal/number/dd/dd_impl.hpp +++ b/include/universal/number/dd/dd_impl.hpp @@ -107,7 +107,7 @@ class dd { } } - // raw limb constructor: no argument checking + // raw limb constructor: no argument checking, arguments need to be properly aligned constexpr dd(double h, double l) noexcept : hi{ h }, lo{ l } {} // initializers for native types @@ -274,8 +274,33 @@ class dd { return operator/=(dd(rhs)); } - // unary operators + // overloaded unary operators dd& operator++() { + if ((hi == 0.0 && lo == 0.0) || sw::universal::isdenorm(hi)) { + // move into or through the subnormal range of the high limb + hi = std::nextafter(hi, +INFINITY); + } + else if (isfinite(hi)) { + int highScale = sw::universal::scale(hi); + if (lo == 0.0) { + // the second limb cannot be a denorm, so we need to jump to the first normal value + // in the binade that is 2^-53 below that of the high limb + lo = std::ldexp(1.0, highScale - 53); + } + else { + int lowScale = sw::universal::scale(lo); + lo = std::nextafter(lo, +INFINITY); + int newLowScale = sw::universal::scale(lo); + // check for overflow: could be transitioning into the next binade, or INF in last binade + if (lowScale < newLowScale || isinf(lo)) { + lo = 0.0; + hi = std::nextafter(hi, +INFINITY); + } + } + } + else { + // the double-double is INF/NAN and will stay INF/NAN + } return *this; } dd operator++(int) { @@ -284,6 +309,33 @@ class dd { return tmp; } dd& operator--() { + if ((hi == 0.0 && lo == 0.0) || sw::universal::isdenorm(hi)) { + // move into or through the subnormal range of the high limb + hi = std::nextafter(hi, -INFINITY); + } + else if (isfinite(hi)) { + int highScale = sw::universal::scale(hi); + if (lo == 0.0) { + // we need to drop into a lower binade, thus we need to update the high limb first + hi = std::nextafter(hi, -INFINITY); + int highScale = sw::universal::scale(hi); + // next, the low limb needs to become the largest value 2^-53 below the new high limb + lo = std::ldexp(0.9999999999999999, highScale - 52); // 52 because we are all 1's and need to be one below the full shift + } + else { + int lowScale = sw::universal::scale(lo); + lo = std::nextafter(lo, -INFINITY); + int newLowScale = sw::universal::scale(lo); + // check for overflow + if (lowScale < newLowScale || isinf(lo)) { + lo = 0.0; + hi = std::nextafter(hi, -INFINITY); + } + } + } + else { + // the double-double is INF/NAN and will stay INF/NAN + } return *this; } dd operator--(int) { @@ -882,43 +934,52 @@ inline std::string to_binary(const dd& number, bool nibbleMarker = false) { // print lo fraction bits decoder.d = number.low(); - // high limb low limb - // 52 51 ..... 3210 52 51 ...... 3210 - // h. ffff ffff ...... ffff ffff h. ffff ffff ...... ffff ffff - // 105 104 53 52 51 ...... 3210 dd_bit - // | <--- exponent is exp(hi) - 53 - // h. ffff ffff ...... ffff ffff 0. 0000 000h. ffff ffff ...... ffff ffff - // | <----- exponent would be exp(hi) - 61 - // h. ffff ffff ...... ffff ffff 0. 0000 0000 ...... 000h. ffff ffff ...... ffff ffff - // | <----- exponent would be exp(hi) - 102 - // h. ffff ffff ...... ffff ffff 0. 0000 0000 ...... 0000 000h. ffff ffff ...... ffff ffff - // | <----- exponent would be exp(hi) - 106 - // the low segment is always in normal form - int lowExponent = static_cast(decoder.parts.exponent) - ieee754_parameter::bias; - - assert(highExponent >= lowExponent + 53 && "exponent of lower limb is not-aligned"); - - // enumerate in the bit offset space of the double-double - // that means, the first bit of the second limb is bit (105 - 53) == 52 and it cycles down to 0 - // representing 2^-53 through 2^-106 relative to the MSB of the high limb - int offset = highExponent - 53 - lowExponent - 1; - mask = (1ull << 51); - s << '|'; // visual delineation between the two limbs - for (int ddbit = 52; ddbit >= 0; --ddbit) { - if (offset == 0) { - s << (decoder.d == 0.0 ? '0' : '1'); // show hidden bit when not-zero - } - else if (offset > 0) { - // we have to introduce a leading zero as the hidden bit is positioned at a lower ddbit offset + if (decoder.d == 0.0) { // special case that has unaligned scales between lo and hi + s << '|'; // visual delineation between the two limbs + for (int ddbit = 52; ddbit >= 0; --ddbit) { s << '0'; + if (nibbleMarker && ddbit != 0 && (ddbit % 4) == 0) s << '\''; } - else { - // we have reached the fraction bits - s << ((decoder.parts.fraction & mask) ? '1' : '0'); - mask >>= 1; + } + else { + // high limb low limb + // 52 51 ..... 3210 52 51 ...... 3210 + // h. ffff ffff ...... ffff ffff h. ffff ffff ...... ffff ffff + // 105 104 53 52 51 ...... 3210 dd_bit + // | <--- exponent is exp(hi) - 53 + // h. ffff ffff ...... ffff ffff 0. 0000 000h. ffff ffff ...... ffff ffff + // | <----- exponent would be exp(hi) - 61 + // h. ffff ffff ...... ffff ffff 0. 0000 0000 ...... 000h. ffff ffff ...... ffff ffff + // | <----- exponent would be exp(hi) - 102 + // h. ffff ffff ...... ffff ffff 0. 0000 0000 ...... 0000 000h. ffff ffff ...... ffff ffff + // | <----- exponent would be exp(hi) - 106 + // the low segment is always in normal form + int lowExponent = static_cast(decoder.parts.exponent) - ieee754_parameter::bias; + + assert(highExponent >= lowExponent + 53 && "exponent of lower limb is not-aligned"); + + // enumerate in the bit offset space of the double-double + // that means, the first bit of the second limb is bit (105 - 53) == 52 and it cycles down to 0 + // representing 2^-53 through 2^-106 relative to the MSB of the high limb + int offset = highExponent - 53 - lowExponent; + mask = (1ull << 51); + s << '|'; // visual delineation between the two limbs + for (int ddbit = 52; ddbit >= 0; --ddbit) { + if (offset == 0) { + s << (decoder.d == 0.0 ? '0' : '1'); // show hidden bit when not-zero + } + else if (offset > 0) { + // we have to introduce a leading zero as the hidden bit is positioned at a lower ddbit offset + s << '0'; + } + else { + // we have reached the fraction bits + s << ((decoder.parts.fraction & mask) ? '1' : '0'); + mask >>= 1; + } + if (nibbleMarker && ddbit != 0 && (ddbit % 4) == 0) s << '\''; + --offset; } - if (nibbleMarker && ddbit != 0 && (ddbit % 4) == 0) s << '\''; - --offset; } return s.str(); diff --git a/include/universal/verification/cfloat_test_suite.hpp b/include/universal/verification/cfloat_test_suite.hpp index 70b027076..33d69a18e 100644 --- a/include/universal/verification/cfloat_test_suite.hpp +++ b/include/universal/verification/cfloat_test_suite.hpp @@ -11,7 +11,6 @@ #include #include -#include #include // error/success reporting namespace sw { namespace universal { diff --git a/static/dd/api/api.cpp b/static/dd/api/api.cpp index 24e0efe5a..3264479af 100644 --- a/static/dd/api/api.cpp +++ b/static/dd/api/api.cpp @@ -20,6 +20,13 @@ namespace sw { namespace universal { + void ReportDoubleDoubleOperation(const dd& a, const std::string& op, const dd& b, const dd& c, int precision = 32) { + auto defaultPrecision = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << a << op << b << " = " << c << '\n'; + std::cout << std::setprecision(defaultPrecision); + } + template void Progression(Real v) { using namespace sw::universal; @@ -131,22 +138,77 @@ try { } // default behavior - std::cout << "+--------- Default dd has subnormals, but no supernormals ---------+\n"; + std::cout << "+--------- Default double-double behavior ---------+\n"; { uint64_t big = (1ull << 53); - std::cout << to_binary(big) << " : " << big << '\n'; - dd a(big), b(1.0), c{}; - c = a + b; - ReportValue(a, "a"); - ReportValue(b, "b"); - ReportValue(c, "c"); + ReportValue(big, "2^53", 20); + // if we use double, we would not be able to capture the information of the variable b == 1.0 in the sum of a + b + { + double a(big), b(1.0), c{}; + c = a + b; + ReportValue(a, "a as double", 20, 16); + ReportValue(b, "b as double", 20, 16); + ReportValue(c, "c as double", 20, 16); + } + // the extra precision of the double-double makes it possible to use that information + { + dd a(big), b(1.0), c{}; + c = a + b; + ReportValue(a, "a as double-double", 20, 16); + ReportValue(b, "b as double-double", 20, 16); + ReportValue(c, "c as double-double", 20, 16); + } } // arithmetic behavior std::cout << "+--------- Default dd has subnormals, but no supernormals ---------+\n"; { - dd a(2.0), b(4.0); - ArithmeticOperators(a, b); + dd a(2.0), b(4.0), c{}; + // these are integers, so we don't need much precision + int precision = 2; + c = a + b; + ReportDoubleDoubleOperation(a, "+", b, c, precision); + c = a - b; + ReportDoubleDoubleOperation(a, "-", b, c, precision); + c = a * b; + ReportDoubleDoubleOperation(a, "*", b, c, precision); + c = a / b; + ReportDoubleDoubleOperation(a, "/", b, c, precision); + + // increment + a = 0.0; + ReportValue(a, " 0.0"); + ++a; + ReportValue(a, "nextafter 0.0"); + a = 1.0; + ReportValue(a, " 1.0"); + ++a; + ReportValue(a, "nextafter 1.0", 20, 32); + + // decrement + a = 0.0; + ReportValue(a, " 0.0"); + --a; + ReportValue(a, "nextbelow 0.0"); + a = 1.0; + ReportValue(a, " 1.0"); + --a; + ReportValue(a, "nextbelow 1.0", 20, 32); + + { + double d(0.0); + if (iszero(d)) std::cout << d << " is zero\n"; + d = std::nextafter(d, +INFINITY); + if (isdenorm(d)) std::cout << d << " is a subnormal number\n"; + } + { + dd d(0.0); + if (iszero(d)) std::cout << d << " is zero\n"; + ++d; + if (isdenorm(d)) std::cout << d << " is a subnormal number\n"; + } + + } // helper api @@ -213,7 +275,7 @@ try { for (int i = 1; i < 53; ++i) { x0 = 1.0 + (std::pow(2.0, -double(i))); a.set(x0, x1); - std::cout << a << " : " << to_binary(x0) << " : " << std::setprecision(7) << x0 << std::setprecision(precisionForRange) << '\n'; + std::cout << a << " : " << to_binary(x0) << " : " << std::setprecision(17) << x0 << std::setprecision(precisionForRange) << '\n'; } // x0 is 1.0 + eps() at this point std::cout << to_binary(dd(x0, x1)) << '\n'; @@ -226,12 +288,30 @@ try { for (int i = 0; i < 54; ++i) { x1 = (std::pow(2.0, -53.0 - double(i))); a.set(x0, x1); - std::cout << a << " : " << to_binary(x1) << " : " << std::setprecision(7) << x1 << std::setprecision(precisionForRange) << '\n'; + std::cout << a << " : " << to_binary(x1) << " : " << std::setprecision(17) << x1 << std::setprecision(precisionForRange) << '\n'; + } + // print the full double-double binary pattern + std::cout << "\nvalue and binary pattern of the double-double\n"; + precisionForRange = 32; + std::cout << std::setprecision(precisionForRange); + std::cout << centered("double-double", precisionForRange + 6) << " : "; + std::cout << centered("binary form of double-double", 110) << '\n'; + for (int i = 0; i < 54; ++i) { + x1 = (std::pow(2.0, -53.0 - double(i))); + a.set(x0, x1); + std::cout << a << " : " << to_binary(a) << '\n'; } + // NOTE: the value of the last lower limb is half an ulp below the dd ulp at 1.0. + // We cannot represent that bit in the binary form, but it rounds up in the decimal form as information + // see below for the pattern + // .... + // 1.00000000000000000000000000000010e+00 : 0b0.01111111111.0000000000000000000000000000000000000000000000000000|00000000000000000000000000000000000000000000000000100 + // 1.00000000000000000000000000000005e+00 : 0b0.01111111111.0000000000000000000000000000000000000000000000000000|00000000000000000000000000000000000000000000000000010 + // 1.00000000000000000000000000000002e+00 : 0b0.01111111111.0000000000000000000000000000000000000000000000000000|00000000000000000000000000000000000000000000000000001 + // 1.00000000000000000000000000000001e+00 : 0b0.01111111111.0000000000000000000000000000000000000000000000000000|00000000000000000000000000000000000000000000000000000 std::cout << std::setprecision(defaultPrecision); } - std::cout << "+--------- set specific values of interest --------+\n"; { dd a; // uninitialized @@ -244,7 +324,7 @@ try { a.minpos(); std::cout << "minpos double-double : " << to_binary(a) << " : " << a << " : " << scale(a) << '\n'; a.zero(); - std::cout << "zero : " << to_binary(a) << " : " << a << " : " << scale(a) << '\n'; + std::cout << "zero : " << to_binary(a) << " : " << a << " : " << scale(a) << '\n'; a.minneg(); std::cout << "minneg double-double : " << to_binary(a) << " : " << a << " : " << scale(a) << '\n'; a.maxneg(); @@ -291,9 +371,19 @@ try { Real a; // uninitialized std::cout << type_tag(a) << '\n'; + // the high and low limb of the double-double have a strict exponent relationship + // the setbit(s) API doesn't know anything about that relationship, and it is + // the repsonsbility of the driver to assure the relationship is adhered to + // otherwise it is not a valid double-double a.setbits(0x0000); std::cout << to_binary(a) << " : " << a << '\n'; + // if we are going to set lower limb bits, we are creating a non-zero lower limb + // which will need a specific relative exponent to the high limb + // set that first + double high = (1ull << 53); + double low = 1.0; + a.set(high, low); a.setbit(8); std::cout << to_binary(a) << " : " << a << " : set bit 8 assuming 0-based" << '\n'; a.setbits(0xffff); diff --git a/static/native/float/mathlib.cpp b/static/native/float/mathlib.cpp index 2d46ef8aa..d5b5ebc39 100644 --- a/static/native/float/mathlib.cpp +++ b/static/native/float/mathlib.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include @@ -187,8 +187,8 @@ try { #if REGRESSION_LEVEL_1 nrOfFailedTestCases += ReportTestResult(VerifyMathlibShim< float >(reportTestCases), "float", test_tag); -// nrOfFailedTestCases += ReportTestResult(VerifyMathlibShim< posit<8,2> >(reportTestCases), "posit<8,2>", test_tag); -// nrOfFailedTestCases += ReportTestResult(VerifyMathlibShim< cfloat<8,2> >(reportTestCases), "cfloat<8,2>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyMathlibShim< posit<8,2> >(reportTestCases), "posit<8,2>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyMathlibShim< cfloat<8,2> >(reportTestCases), "cfloat<8,2>", test_tag); #endif