diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index a725098fa..103507acd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,3 +1,3 @@ { - "image": "stillwater/builders:clang16builder" + "image": "stillwater/builders:clang18builder" } diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index b21926d94..06c478964 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -2,7 +2,7 @@ name: CMake on: push: - branches: [ v3.78, dev, main ] + branches: [ v3.79, dev, main ] pull_request: branches: [ main ] diff --git a/CMakeLists.txt b/CMakeLists.txt index 41c8c9594..6c8b6d533 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ if(NOT DEFINED UNIVERSAL_VERSION_MAJOR) set(UNIVERSAL_VERSION_MAJOR 3) endif() if(NOT DEFINED UNIVERSAL_VERSION_MINOR) - set(UNIVERSAL_VERSION_MINOR 78) + set(UNIVERSAL_VERSION_MINOR 79) endif() if(NOT DEFINED UNIVERSAL_VERSION_PATCH) set(UNIVERSAL_VERSION_PATCH 1) @@ -130,8 +130,8 @@ option(BUILD_NUMBER_FIXPNTS "Set to ON to build static fixed-point option(BUILD_NUMBER_BFLOATS "Set to ON to build static bfloat tests" OFF) option(BUILD_NUMBER_CFLOATS "Set to ON to build static cfloat tests" OFF) option(BUILD_NUMBER_DFLOATS "Set to ON to build static dfloat tests" OFF) -option(BUILD_NUMBER_DDS "Set to ON to build static double-double tests" OFF) -option(BUILD_NUMBER_QDS "Set to ON to build static quad-double tests" OFF) +option(BUILD_NUMBER_DOUBLE_DOUBLE "Set to ON to build static double-double tests" OFF) +option(BUILD_NUMBER_QUAD_DOUBLE "Set to ON to build static quad-double tests" OFF) option(BUILD_NUMBER_AREALS "Set to ON to build static areal tests" OFF) option(BUILD_NUMBER_UNUM1S "Set to ON to build static unum type 1 tests" OFF) option(BUILD_NUMBER_UNUM2S "Set to ON to build static unum type 2 tests" OFF) @@ -662,8 +662,8 @@ if(BUILD_NUMBER_STATICS) set(BUILD_NUMBER_BFLOATS ON) set(BUILD_NUMBER_CFLOATS ON) set(BUILD_NUMBER_DFLOATS ON) - set(BUILD_NUMBER_DDS ON) - set(BUILD_NUMBER_QDS ON) + set(BUILD_NUMBER_DOUBLE_DOUBLE ON) + set(BUILD_NUMBER_QUAD_DOUBLE ON) set(BUILD_NUMBER_AREALS ON) set(BUILD_NUMBER_UNUM1S ON) set(BUILD_NUMBER_UNUM2S ON) @@ -827,14 +827,14 @@ add_subdirectory("static/dfloat") endif(BUILD_NUMBER_DFLOATS) # double-double floats -if(BUILD_NUMBER_DDS) +if(BUILD_NUMBER_DOUBLE_DOUBLE) add_subdirectory("static/dd") -endif(BUILD_NUMBER_DDS) +endif(BUILD_NUMBER_DOUBLE_DOUBLE) # quad-double floats -if(BUILD_NUMBER_QDS) +if(BUILD_NUMBER_QUAD_DOUBLE) add_subdirectory("static/qd") -endif(BUILD_NUMBER_QDS) +endif(BUILD_NUMBER_QUAD_DOUBLE) # conversion tests suites if(BUILD_NUMBER_CONVERSIONS) diff --git a/docker/Dockerfile.clang11builder b/docker/Dockerfile.clang11builder index f5369b9b2..ac064c7ec 100644 --- a/docker/Dockerfile.clang11builder +++ b/docker/Dockerfile.clang11builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang12builder b/docker/Dockerfile.clang12builder index 668bcb291..bddfd29de 100644 --- a/docker/Dockerfile.clang12builder +++ b/docker/Dockerfile.clang12builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang13builder b/docker/Dockerfile.clang13builder index a69316529..f9daf41f3 100644 --- a/docker/Dockerfile.clang13builder +++ b/docker/Dockerfile.clang13builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang14builder b/docker/Dockerfile.clang14builder index e2e6bb044..cd2100e9f 100644 --- a/docker/Dockerfile.clang14builder +++ b/docker/Dockerfile.clang14builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang15builder b/docker/Dockerfile.clang15builder index 58a8b45d3..f5123c37c 100644 --- a/docker/Dockerfile.clang15builder +++ b/docker/Dockerfile.clang15builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang16builder b/docker/Dockerfile.clang16builder index 9e8d7943d..0a5df989c 100644 --- a/docker/Dockerfile.clang16builder +++ b/docker/Dockerfile.clang16builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang17builder b/docker/Dockerfile.clang17builder index f089c3327..ba5e45a2d 100644 --- a/docker/Dockerfile.clang17builder +++ b/docker/Dockerfile.clang17builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/Dockerfile.clang18builder b/docker/Dockerfile.clang18builder index 2ba204efa..dcf882d9c 100644 --- a/docker/Dockerfile.clang18builder +++ b/docker/Dockerfile.clang18builder @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends -V \ curl \ vim \ gdb \ + gdbserver \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/build_build_containers.sh b/docker/build_build_containers.sh index 825e31c50..efa833fa2 100755 --- a/docker/build_build_containers.sh +++ b/docker/build_build_containers.sh @@ -28,3 +28,7 @@ docker build --target clang15builder -t stillwater/builders:clang15builder -f Do docker push stillwater/builders:clang15builder docker build --target clang16builder -t stillwater/builders:clang16builder -f Dockerfile.clang16builder . docker push stillwater/builders:clang16builder +docker build --target clang17builder -t stillwater/builders:clang17builder -f Dockerfile.clang17builder . +docker push stillwater/builders:clang17builder +docker build --target clang18builder -t stillwater/builders:clang18builder -f Dockerfile.clang18builder . +docker push stillwater/builders:clang18builder diff --git a/docker/build_release_container.sh b/docker/build_release_container.sh index b0a586c4e..de971018a 100755 --- a/docker/build_release_container.sh +++ b/docker/build_release_container.sh @@ -5,7 +5,7 @@ # example would be to strace an executable to find its dependencies MAJOR=v3 -MINOR=78 +MINOR=79 VERSION="$MAJOR.$MINOR" if [[ $# == 0 ]]; then diff --git a/docker/build_test_container.sh b/docker/build_test_container.sh index c83f2bb3a..86e141c7c 100755 --- a/docker/build_test_container.sh +++ b/docker/build_test_container.sh @@ -11,7 +11,7 @@ # example would be to strace an executable to find its dependencies MAJOR=v3 -MINOR=78 +MINOR=79 VERSION="$MAJOR.$MINOR" if [[ $# == 0 ]]; then diff --git a/include/universal/blas/blas.hpp b/include/universal/blas/blas.hpp index 043baf458..988c6b33b 100644 --- a/include/universal/blas/blas.hpp +++ b/include/universal/blas/blas.hpp @@ -7,10 +7,11 @@ // // Super-simple BLAS implementation to aid application, // numerical, and reproducibility examples. - #ifndef _UNIVERSAL_BLAS_LIBRARY #define _UNIVERSAL_BLAS_LIBRARY +#include + // aggregation types for serialization constexpr uint32_t UNIVERSAL_AGGREGATE_SCALAR = 0x1001; constexpr uint32_t UNIVERSAL_AGGREGATE_VECTOR = 0x2002; diff --git a/include/universal/native/error_free_ops.hpp b/include/universal/native/error_free_ops.hpp index 2202f5d26..bb50e8d91 100644 --- a/include/universal/native/error_free_ops.hpp +++ b/include/universal/native/error_free_ops.hpp @@ -110,22 +110,85 @@ namespace sw { namespace universal { return s; } - // ThreeSum + // ThreeSum enumerations /// - /// three_sum computes the relationship a + b + c = s + r + /// three_sum computes the relationship x + y + z = r0 + r1 + r2 /// - /// input - /// input - /// input value, output residual - inline void three_sum(volatile double& a, volatile double& b, volatile double& c) { - volatile double t1, t2, t3; + /// input, yields output r0 (==sum) + /// input, yields output r1 + /// input, yields output r2 + inline void three_sum(volatile double& x, volatile double& y, volatile double& z) { + volatile double u, v, w; + + u = two_sum(x, y, v); + x = two_sum(z, u, w); // x = r0 (==sum) + y = two_sum(v, w, z); // y = r1, and z = r2 + } + + /// + /// three_sum2 computes the relationship x + y + z = r0 + r1 + /// + /// input, yields output r0 (==sum) + /// input, yields output r1 + /// input + inline void three_sum2(volatile double& x, volatile double& y, double z) { + volatile double u, v, w; + + u = two_sum(x, y, v); + x = two_sum(z, u, w); // x = r0 (==sum) + y = v + w; // y = r1 + } - t1 = two_sum(a, b, t2); - a = two_sum(c, t1, t3); - b = two_sum(t2, t3, c); + /// + /// three_sum3 computes the relationship x + y + z = r0 + /// just the sum of (x, y, z) without any residuals + /// + /// input + /// input + /// input + /// the (rounded) sum of (x + y + z) + inline double three_sum3(double x, double y, double z) { + double u = x + y; + return u + z; // traditional information loss if z << (x + y) and/or y << x } + /* */ + + /// + /// quick_three_accumulate calculates the relationship a + b + c = s + r + /// s = quick_three_accum(a, b, c) adds c to the dd-pair (a, b). + /// If the result does not fit in two doubles, then the sum is + /// output into s and (a, b) contains the remainder.Otherwise + /// s is zero and (a, b) contains the sum. + /// + /// + /// + /// + /// + inline double quick_three_accumulation(volatile double& a, volatile double& b, double c) { + volatile double s; + bool za, zb; + + s = two_sum(b, c, b); + s = two_sum(a, s, a); + + za = (a != 0.0); + zb = (b != 0.0); + + if (za && zb) + return s; + + if (!zb) { + b = a; + a = s; + } + else { + a = s; + } + + return 0.0; + } // Split @@ -165,8 +228,7 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the product of a * b - inline double two_prod(double a, double b, volatile double& r) - { + inline double two_prod(double a, double b, volatile double& r) { volatile double p = a * b; if (std::isfinite(p)) { #if defined( QD_FMS ) @@ -192,8 +254,7 @@ namespace sw { namespace universal { /// the square product of a inline double two_sqr(double a, volatile double& r) { volatile double p = a * a; - if (std::isfinite(p)) - { + if (std::isfinite(p)) { #if defined( QD_FMS ) err = QD_FMS(a, a, p); #else @@ -208,6 +269,30 @@ namespace sw { namespace universal { } + // Computes the nearest integer to d + inline double nint(double d) { + if (d == std::floor(d)) return d; + return std::floor(d + 0.5); + } + + // Computes the truncated integer + inline double aint(double d) { + return (d >= 0.0) ? std::floor(d) : std::ceil(d); + } + + /* These are provided to give consistent + interface for double with double-double and quad-double. */ + inline void sincosh(double t, double& sinh_t, double& cosh_t) { + sinh_t = std::sinh(t); + cosh_t = std::cosh(t); + } + + // square of argument t + inline double sqr(double t) { + return t * t; + } + + /// /// renorm adjusts the quad-double to a canonical form /// A quad-double number is an unevaluated sum of four IEEE double numbers. @@ -225,7 +310,7 @@ namespace sw { namespace universal { /// /// inline void renorm(volatile double& a0, volatile double& a1, volatile double& a2, volatile double& a3) { - volatile double s0, s1, s2 = 0.0, s3 = 0.0; + volatile double s0, s1, s2{ 0.0 }, s3{ 0.0 }; if (std::isinf(a0)) return; @@ -274,7 +359,7 @@ namespace sw { namespace universal { /// reference to a3 /// reference to a4 inline void renorm(volatile double& a0, volatile double& a1, volatile double& a2, volatile double& a3, volatile double& a4) { - volatile double s0, s1, s2 = 0.0, s3 = 0.0; + volatile double s0, s1, s2{ 0.0 }, s3{ 0.0 }; if (std::isinf(a0)) return; diff --git a/include/universal/native/extract_fields.hpp b/include/universal/native/extract_fields.hpp index 66348ae8f..e5ae735bd 100644 --- a/include/universal/native/extract_fields.hpp +++ b/include/universal/native/extract_fields.hpp @@ -257,21 +257,5 @@ template } return bIsInf; } - - inline void setFields(float& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { - float_decoder decoder; - decoder.parts.sign = s; - decoder.parts.exponent = rawExponentBits & 0xFF; - decoder.parts.fraction = rawFractionBits & 0x7FFFFF; - value = decoder.f; - } - - inline void setFields(double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { - double_decoder decoder; - decoder.parts.sign = s; - decoder.parts.exponent = rawExponentBits & 0x7FF; - decoder.parts.fraction = rawFractionBits & 0xF'FFFF'FFFF'FFFF; - value = decoder.d; - } }} // namespace sw::universal diff --git a/include/universal/native/ieee754.hpp b/include/universal/native/ieee754.hpp index d6b8de1be..20ef24d62 100644 --- a/include/universal/native/ieee754.hpp +++ b/include/universal/native/ieee754.hpp @@ -33,6 +33,7 @@ // constexpr compatible bit casts, otherwise // fallback to nonconstexpr bit casts. #include +#include // functions that do not need to be constexpr #include diff --git a/include/universal/native/ieee754_parameter.hpp b/include/universal/native/ieee754_parameter.hpp index f4a22942a..ba8475618 100644 --- a/include/universal/native/ieee754_parameter.hpp +++ b/include/universal/native/ieee754_parameter.hpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include namespace sw { namespace universal { diff --git a/include/universal/native/manipulators.hpp b/include/universal/native/manipulators.hpp index 22be964bc..d2b021bd5 100644 --- a/include/universal/native/manipulators.hpp +++ b/include/universal/native/manipulators.hpp @@ -297,7 +297,7 @@ namespace sw { namespace universal { template::value, Real >::type > - inline std::string color_print(Real number) { + inline std::string color_print(Real number, bool nibbleMarker = false) { std::stringstream s; bool sign{ false }; @@ -322,7 +322,7 @@ namespace sw { namespace universal { uint64_t mask = (1 << (ieee754_parameter::ebits - 1)); for (int i = (ieee754_parameter::ebits - 1); i >= 0; --i) { s << cyan << ((rawExponent & mask) ? '1' : '0'); - // if (i > 0 && i % 4 == 0) s << cyan << '\''; + if (nibbleMarker && i > 0 && i % 4 == 0) s << cyan << '\''; mask >>= 1; } } @@ -333,7 +333,7 @@ namespace sw { namespace universal { uint64_t mask = (uint64_t(1) << (ieee754_parameter::fbits - 1)); for (int i = (ieee754_parameter::fbits - 1); i >= 0; --i) { s << magenta << ((rawFraction & mask) ? '1' : '0'); - // if (i > 0 && i % 4 == 0) s << magenta << '\''; + if (nibbleMarker && i > 0 && i % 4 == 0) s << magenta << '\''; mask >>= 1; } diff --git a/include/universal/native/set_fields.hpp b/include/universal/native/set_fields.hpp new file mode 100644 index 000000000..7a2ae2c78 --- /dev/null +++ b/include/universal/native/set_fields.hpp @@ -0,0 +1,173 @@ +#pragma once +// set_fields.hpp: configure constexpr/nonconst manipulation functions for IEEE-754 native types using C++20 +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + +namespace sw { namespace universal { + +#if BIT_CAST_IS_CONSTEXPR +#include // C++20 bit_cast + + inline BIT_CAST_CONSTEXPR void setbit(float& v, unsigned index, bool b = true) { + uint32_t raw = std::bit_cast(v); + uint32_t mask = (1ull << index); // do we want to bound check? + if (b) raw |= mask; else raw &= ~mask; + v = std::bit_cast(raw); + } + + inline BIT_CAST_CONSTEXPR void setbit(double& v, unsigned index, bool b = true) { + uint64_t raw = std::bit_cast(v); + uint64_t mask = (1ull << index); + if (b) raw |= mask; else raw &= ~mask; + v = std::bit_cast(raw); + } + +#if LONG_DOUBLE_SUPPORT + +// Clang bit_cast<> can't deal with long double + +#if defined(LONG_DOUBLE_DOWNCAST) + // specialization to set fields on a long double + inline void setFields(long double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { + double dv = double(value); + setFields(dv, s, rawExponentBits, rawFractionBits); + value = (long double)(dv); + } + +#else // !DOWNCAST + +/* + struct blob { + std::uint64_t hi; + std::uint64_t fraction; + } raw; + raw = std::bit_cast(value); + s = (ieee754_parameter::smask & raw.hi); + rawExponentBits = (ieee754_parameter::emask & raw.hi); + rawFractionBits = (ieee754_parameter::fmask & raw.fraction); +*/ + /* + ETLO 8/1/2024: not able to make std::bit_cast<> work for long double + // specialization to set fields on a long double + inline BIT_CAST_CONSTEXPR setbit(long double& v, unsigned index, bool b) { + struct blob { + std::uint64_t hi; + std::uint64_t fraction; + } bits; + bits = std::bit_cast(value); + uint64_t raw{}; + if (index < 64) raw = bits.fraction; else raw = bits.hi; + uint64_t mask = (1ull << index); + if (b) raw |= mask; else raw &= ~mask; + if (index < 64) bits.fraction = raw; else bits.hi = raw; + v = std::bit_cast(bits); + } + */ + // falling back to non-constexpr + // specialization to set fields on a long double + inline void setFields(long double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { + long_double_decoder decoder; + decoder.parts.sign = s; + decoder.parts.exponent = rawExponentBits & 0x7FFF; + decoder.parts.fraction = rawFractionBits & 0xFFFF'FFFF'FFFF'FFFF; + value = decoder.ld; + } + +#endif // LONG_DOUBLE_DOWNCAST +#endif // LONG_DOUBLE_SUPPORT + +#else // !BIT_CAST_IS_CONSTEXPR + + inline void setbit(float& v, unsigned index, bool b = true) { + float_decoder decoder; + decoder.f = v; + if (index == 31) { + decoder.parts.sign = b; + } + else if (index < 23) { // 22...0 are fraction bits + uint32_t raw = decoder.parts.fraction; + uint32_t mask = (1ull << index); + if (b) raw |= mask; else raw &= ~mask; + decoder.parts.fraction = raw; + } + else if (index < 32) { + uint32_t raw = decoder.parts.exponent; + uint32_t mask = (1ull << (index - 23)); + if (b) raw |= mask; else raw &= ~mask; + decoder.parts.exponent = raw; + } + v = decoder.f; + } + + inline void setbit(double& v, unsigned index, bool b = true) { + double_decoder decoder; + decoder.d = v; + if (index == 63) { + decoder.parts.sign = b; + } + else if (index < 52) { // 51...0 are fraction bits + uint64_t raw = decoder.parts.fraction; + uint64_t mask = (1ull << index); + if (b) raw |= mask; else raw &= ~mask; + decoder.parts.fraction = raw; + } + else if (index < 64) { + uint64_t raw = decoder.parts.exponent; + uint64_t mask = (1ull << (index - 52)); + if (b) raw |= mask; else raw &= ~mask; + decoder.parts.exponent = raw; + } + v = decoder.d; + } + +//////////////////////////////////////////////////////////////////////// +// nonconst setFields on single precision floating-point + + inline void setFields(float& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { + float_decoder decoder; + decoder.parts.sign = s; + decoder.parts.exponent = rawExponentBits & 0xFF; + decoder.parts.fraction = rawFractionBits & 0x7FFFFF; + value = decoder.f; + } + +//////////////////////////////////////////////////////////////////////// +// nonconst setFields on double precision floating-point + + inline void setFields(double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { + double_decoder decoder; + decoder.parts.sign = s; + decoder.parts.exponent = rawExponentBits & 0x7FF; + decoder.parts.fraction = rawFractionBits & 0xF'FFFF'FFFF'FFFF; + value = decoder.d; + } + +#if LONG_DOUBLE_SUPPORT +// Clang bit_cast<> doesn't appear to deal with long double +#define LONG_DOUBLE_DOWNCAST +#ifdef LONG_DOUBLE_DOWNCAST + inline void setFields(long double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { + double dv = double(value); + setFields(dv, s, rawExponentBits, rawFractionBits); + value = (long double)(dv); + } +#else + // specialization to extract fields from a long double + inline void setFields(long double& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept { + long_double_decoder decoder; + decoder.parts.sign = s; + decoder.parts.exponent = rawExponentBits & 0x7FFF; + decoder.parts.fraction = rawFractionBits & 0xFFFF'FFFF'FFFF'FFFF; + value = decoder.ld; + } +#endif // LONG_DOUBLE_DOWNCAST +#endif // LONG_DOUBLE_SUPPORT +#endif // BIT_CAST_IS_CONSTEXPR + +}} // namespace sw::universal diff --git a/include/universal/number/bfloat/bfloat16_impl.hpp b/include/universal/number/bfloat/bfloat16_impl.hpp index f97cce9de..696da86e8 100644 --- a/include/universal/number/bfloat/bfloat16_impl.hpp +++ b/include/universal/number/bfloat/bfloat16_impl.hpp @@ -54,6 +54,7 @@ class bfloat16 { template::value, Real >::type> constexpr bfloat16& convert_ieee754(Real rhs) noexcept { + float f = float(rhs); uint16_t pun[2]; std::memcpy(pun, &f, 4); @@ -130,7 +131,6 @@ class bfloat16 { constexpr bfloat16(unsigned long long iv) noexcept : _bits{} { *this = iv; } constexpr bfloat16(float iv) noexcept : _bits{} { *this = iv; } constexpr bfloat16(double iv) noexcept : _bits{} { *this = iv; } - constexpr bfloat16(long double iv) noexcept : _bits{} { *this = iv; } // assignment operators for native types constexpr bfloat16& operator=(signed char rhs) noexcept { return convert_signed(rhs); } @@ -145,12 +145,17 @@ class bfloat16 { constexpr bfloat16& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); } constexpr bfloat16& operator=(float rhs) noexcept { return convert_ieee754(rhs); } constexpr bfloat16& operator=(double rhs) noexcept { return convert_ieee754(rhs); } - constexpr bfloat16& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } // conversion operators - explicit operator float() const noexcept { return convert_to_ieee754(); } - explicit operator double() const noexcept { return convert_to_ieee754(); } - explicit operator long double() const noexcept { return convert_to_ieee754(); } + explicit operator float() const noexcept { return convert_to_ieee754(); } + explicit operator double() const noexcept { return convert_to_ieee754(); } + + +#if LONG_DOUBLE_SUPPORT + constexpr bfloat16(long double iv) noexcept : _bits{} { *this = iv; } + constexpr bfloat16& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } + explicit operator long double() const noexcept { return convert_to_ieee754(); } +#endif // prefix operators bfloat16 operator-() const noexcept { @@ -445,14 +450,19 @@ std::string to_binary(bfloat16 bf, bool bNibbleMarker = false) { unsigned short mask = 0x8000u; s << (bits & mask ? "0b1." : "0x0."); mask >>= 1; - for (unsigned i = 1; i < 16; ++i) { - if (9 == i) { - s << '.'; - } - else if (bNibbleMarker && (4 == i || 8 == i || 12 == i)) { + // exponent bits + for (unsigned i = 0; i < 8; ++i) { + if (bNibbleMarker && (4 == i)) { s << '\''; } - + s << (bits & mask ? '1' : '0'); + mask >>= 1; + } + s << '.'; + for (unsigned i = 0; i < 7; ++i) { + if (bNibbleMarker && (3 == i)) { + s << '\''; + } s << (bits & mask ? '1' : '0'); mask >>= 1; } diff --git a/include/universal/number/dd/attributes.hpp b/include/universal/number/dd/attributes.hpp index 6760d00d6..4973332d6 100644 --- a/include/universal/number/dd/attributes.hpp +++ b/include/universal/number/dd/attributes.hpp @@ -1,5 +1,5 @@ #pragma once -// attributes.hpp: information functions for decimal floating-point type and value attributes +// attributes.hpp: information functions for double-double (dd) floating-point type and value attributes // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -8,7 +8,7 @@ namespace sw { namespace universal { -// functions to provide details about the properties of a doubledouble (dd) configuration +// functions to provide details about the properties of a double-double (dd) configuration inline bool sign(const dd& a) { return a.sign(); } @@ -17,7 +17,7 @@ namespace sw { namespace universal { return a.scale(); } - // generate the maxneg through maxpos value range of a doubledouble configuration + // generate the maxneg through maxpos value range of a double-double configuration std::string dd_range() { dd v; std::stringstream s; @@ -31,7 +31,7 @@ namespace sw { namespace universal { } /* - // report dynamic range of a type, specialized for a doubledouble + // report dynamic range of a type, specialized for a double-double std::string dynamic_range(const dd& a) { std::stringstream s; dd b(SpecificValue::maxneg), c(SpecificValue::minneg), d(SpecificValue::minpos), e(SpecificValue::maxpos); diff --git a/include/universal/number/dd/dd.hpp b/include/universal/number/dd/dd.hpp index 70336e491..70a5633ab 100644 --- a/include/universal/number/dd/dd.hpp +++ b/include/universal/number/dd/dd.hpp @@ -1,4 +1,4 @@ -// arbitrary configuration decimal floating-point arithmetic standard header +// double-double floating-point arithmetic standard header // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -48,7 +48,7 @@ // enable native sqrt implementation // #if !defined(DOUBLEDOUBLE_NATIVE_SQRT) -#define DOUBLEDOUBLE_NATIVE_SQRT 0 +#define DOUBLEDOUBLE_NATIVE_SQRT 1 #endif /////////////////////////////////////////////////////////////////////////////////////// diff --git a/include/universal/number/dd/dd_fwd.hpp b/include/universal/number/dd/dd_fwd.hpp index 30aaec8eb..aece068ef 100644 --- a/include/universal/number/dd/dd_fwd.hpp +++ b/include/universal/number/dd/dd_fwd.hpp @@ -1,5 +1,5 @@ #pragma once -// dfloat_fwd.hpp : forward declarations of the decimal floating-point dfloat environment +// dd_fwd.hpp : forward declarations of the double-double (dd) floating-point environment // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT diff --git a/include/universal/number/dd/dd_impl.hpp b/include/universal/number/dd/dd_impl.hpp index dc16437ff..abe081d40 100644 --- a/include/universal/number/dd/dd_impl.hpp +++ b/include/universal/number/dd/dd_impl.hpp @@ -29,11 +29,6 @@ namespace sw { namespace universal { - struct uint128_t { - uint128_t() : limb{ 0 } {} - uint64_t limb[2]; - }; - // fwd references to free functions used in to_digits() dd operator*(const dd& lhs, const dd&); dd pown(dd const&, int); @@ -291,16 +286,10 @@ class dd { constexpr void setbit(unsigned index, bool b = true) noexcept { if (index < 64) { // set bit in lower limb - uint64_t raw = std::bit_cast(lo); - uint64_t mask = (1ull << index); - if (b) raw |= mask; else raw &= ~mask; - lo = std::bit_cast(raw); + sw::universal::setbit(lo, index, b); } else if (index < 128) { // set bit in upper limb - uint64_t raw = std::bit_cast(hi); - uint64_t mask = (1ull << (index - 64)); - if (b) raw |= mask; else raw &= ~mask; - hi = std::bit_cast(raw); + sw::universal::setbit(hi, index-64, b); } else { // NOP if index out of bounds @@ -311,6 +300,10 @@ class dd { lo = 0.0; } + // argument is not protected for speed + double operator[](int index) const { return (index == 0 ? hi : lo); } + double& operator[](int index) { return (index == 0 ? hi : lo); } + // create specific number system values of interest constexpr dd& maxpos() noexcept { hi = 1.7976931348623157e+308; @@ -349,7 +342,7 @@ class dd { constexpr bool isone() const noexcept { return hi == 1.0 && lo == 0.0; } constexpr bool ispos() const noexcept { return hi > 0.0; } constexpr bool isneg() const noexcept { return hi < 0.0; } - constexpr bool isnan(int NaNType = NAN_TYPE_EITHER) const noexcept { + BIT_CAST_CONSTEXPR bool isnan(int NaNType = NAN_TYPE_EITHER) const noexcept { bool negative = isneg(); int nan_type; bool isNaN = checkNaN(hi, nan_type); @@ -359,7 +352,7 @@ class dd { (NaNType == NAN_TYPE_SIGNALLING ? isNegNaN : (NaNType == NAN_TYPE_QUIET ? isPosNaN : false))); } - constexpr bool isinf(int InfType = INF_TYPE_EITHER) const noexcept { + BIT_CAST_CONSTEXPR bool isinf(int InfType = INF_TYPE_EITHER) const noexcept { bool negative = isneg(); int inf_type; bool isInf = checkInf(hi, inf_type); @@ -373,61 +366,9 @@ class dd { constexpr bool sign() const noexcept { return (hi < 0.0); } constexpr int scale() const noexcept { return _extractExponent(hi); } constexpr int exponent() const noexcept { return _extractExponent(hi); } - uint128_t fraction() const noexcept { - uint128_t frac{ }; - int e; - double l = std::frexp(lo, &e); - frac.limb[0] = static_cast(l); - - return frac; - } constexpr double high() const noexcept { return hi; } constexpr double low() const noexcept { return lo; } - // precondition: string s must be all digits - void round_string(char* s, int precision, int* decimalPoint) const { - int nrDigits = precision; - // round decimal string and propagate carry - int lastDigit = nrDigits - 1; - if (s[lastDigit] >= '5') { - int i = nrDigits - 2; - s[i]++; - while (i > 0 && s[i] > '9') { - s[i] -= 10; - s[--i]++; - } - } - - // if first digit is 10, shift everything. - if (s[0] > '9') { - for (int i = precision; i >= 2; i--) s[i] = s[i - 1]; - s[0] = '1'; - s[1] = '0'; - - (*decimalPoint)++; // increment decimal point - ++precision; - } - - s[precision] = 0; // add termination null - } - - void append_exponent(std::string& str, int e) const { - str += (e < 0 ? '-' : '+'); - e = std::abs(e); - int k; - if (e >= 100) { - k = (e / 100); - str += static_cast('0' + k); - e -= 100 * k; - } - - k = (e / 10); - str += static_cast('0' + k); - e -= 10 * k; - - str += static_cast('0' + e); - } - // convert to string containing digits number of digits std::string to_string(std::streamsize precision = 7, std::streamsize width = 15, bool fixed = false, bool scientific = true, bool internal = false, bool left = false, bool showpos = false, bool uppercase = false, char fill = ' ') const { std::string s; @@ -643,6 +584,50 @@ class dd { return Real(hi + lo); } + // precondition: string s must be all digits + void round_string(char* s, int precision, int* decimalPoint) const { + int nrDigits = precision; + // round decimal string and propagate carry + int lastDigit = nrDigits - 1; + if (s[lastDigit] >= '5') { + int i = nrDigits - 2; + s[i]++; + while (i > 0 && s[i] > '9') { + s[i] -= 10; + s[--i]++; + } + } + + // if first digit is 10, shift everything. + if (s[0] > '9') { + for (int i = precision; i >= 2; i--) s[i] = s[i - 1]; + s[0] = '1'; + s[1] = '0'; + + (*decimalPoint)++; // increment decimal point + ++precision; + } + + s[precision] = 0; // add termination null + } + + void append_exponent(std::string& str, int e) const { + str += (e < 0 ? '-' : '+'); + e = std::abs(e); + int k; + if (e >= 100) { + k = (e / 100); + str += static_cast('0' + k); + e -= 100 * k; + } + + k = (e / 10); + str += static_cast('0' + k); + e -= 10 * k; + + str += static_cast('0' + e); + } + /// /// to_digits generates the decimal digits representing /// @@ -796,9 +781,9 @@ constexpr dd dd_e (2.718281828459045091e+00, 1.445646891729250158e-16); constexpr dd dd_log2 (6.931471805599452862e-01, 2.319046813846299558e-17); constexpr dd dd_log10 (2.302585092994045901e+00, -2.170756223382249351e-16); -constexpr dd dd_eps = 4.93038065763132e-32; // 2^-104 -constexpr dd dd_min_normalized = 2.0041683600089728e-292; // = 2^(-1022 + 53) -constexpr dd dd_max(1.79769313486231570815e+308, 9.97920154767359795037e+291); +constexpr double dd_eps = 4.93038065763132e-32; // 2^-104 +constexpr double dd_min_normalized = 2.0041683600089728e-292; // = 2^(-1022 + 53) +constexpr dd dd_max (1.79769313486231570815e+308, 9.97920154767359795037e+291); constexpr dd dd_safe_max(1.7976931080746007281e+308, 9.97920154767359795037e+291); @@ -828,47 +813,39 @@ inline std::string to_pair(const dd& v, int precision = 17) { inline std::string to_binary(const dd& number, bool bNibbleMarker = false) { std::stringstream s; - double_decoder decoder; - decoder.d = number.high(); + constexpr int nrLimbs = 2; + for (int i = 0; i < nrLimbs; ++i) { + double_decoder decoder; + decoder.d = number[i]; + + std::string label = "x[" + std::to_string(i) + "]"; + s << label << " : "; + s << "0b"; + // print sign bit + s << (decoder.parts.sign ? '1' : '0') << '.'; + + // print exponent bits + { + uint64_t mask = 0x400; + for (int bit = 10; bit >= 0; --bit) { + s << ((decoder.parts.exponent & mask) ? '1' : '0'); + if (bNibbleMarker && bit != 0 && (bit % 4) == 0) s << '\''; + mask >>= 1; + } + } - s << "0b"; - // print sign bit - s << (decoder.parts.sign ? '1' : '0') << '.'; + s << '.'; - // print exponent bits - { - uint64_t mask = 0x400; - for (int i = 10; i >= 0; --i) { - s << ((decoder.parts.exponent & mask) ? '1' : '0'); - if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\''; + // print hi fraction bits + uint64_t mask = (uint64_t(1) << 51); + for (int bit = 51; bit >= 0; --bit) { + s << ((decoder.parts.fraction & mask) ? '1' : '0'); + if (bNibbleMarker && bit != 0 && (bit % 4) == 0) s << '\''; mask >>= 1; } - } - s << '.'; - - // print hi fraction bits - uint64_t mask = (uint64_t(1) << 51); - for (int i = 51; i >= 0; --i) { - s << ((decoder.parts.fraction & mask) ? '1' : '0'); - if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\''; - mask >>= 1; - } - - // print lo fraction bits - decoder.d = number.low(); - if (bNibbleMarker) { - s << (decoder.parts.exponent == 0 ? "\'0\'" : "\'1\'"); // articulate the hidden bit, e == 0 covers both denorm and zero hidden bit status - } - else { - // still delineate the two segments so you can quickly pick up the hidden bit value and start of the second segment - s << (decoder.parts.exponent == 0 ? "\'0" : "\'1"); // articulate the hidden bit, e == 0 covers both denorm and zero hidden bit status - } - mask = (uint64_t(1) << 51); - for (int i = 51; i >= 0; --i) { - s << ((decoder.parts.fraction & mask) ? '1' : '0'); - if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\''; - mask >>= 1; + // s << " : " << number[i]; + if (i < 1) s << ", "; } return s.str(); @@ -917,16 +894,81 @@ inline dd floor(dd const& a) { return dd(hi, lo); } +// Round to Nearest integer +inline dd nint(const dd& a) { + double hi = nint(a.high()); + double lo; + + if (hi == a.high()) { + /* High word is an integer already. Round the low word.*/ + lo = nint(a.low()); + + /* Renormalize. This is needed if x[0] = some integer, x[1] = 1/2.*/ + hi = quick_two_sum(hi, lo, lo); + } + else { + /* High word is not an integer. */ + lo = 0.0; + if (std::abs(hi - a.high()) == 0.5 && a.low() < 0.0) { + /* There is a tie in the high word, consult the low word + to break the tie. */ + hi -= 1.0; /* NOTE: This does not cause INEXACT. */ + } + } + + return dd(hi, lo); +} + +// double plus double yielding a double-double +inline dd add(double a, double b) { + if (std::isnan(a) || std::isnan(b)) return dd(SpecificValue::snan); + double s, e; + s = two_sum(a, b, e); + return dd(s, e); +} + +// double minus double yielding a double-double +inline dd sub(double a, double b) { + if (std::isnan(a) || std::isnan(b)) return dd(SpecificValue::snan); + double s, e; + s = two_sum(a, -b, e); + return dd(s, e); +} + // double times double yielding a double-double inline dd mul(double a, double b) { - double p, e; - p = two_prod(a, b, e); - return dd(p, e); + if (std::isnan(a) || std::isnan(b)) return dd(SpecificValue::snan); + double p, e; + p = two_prod(a, b, e); + return dd(p, e); } -// double-double * double, where double is a power of 2. */ +// double divide by double yielding a double-double +inline dd div(double a, double b) { + if (std::isnan(a) || std::isnan(b)) return dd(SpecificValue::snan); + + if (b == 0.0) return (sign(a) ? dd(SpecificValue::infneg) : dd(SpecificValue::infpos)); + + double q1 = a / b; // initial approximation + + // Compute residual: a - q1 * b + volatile double p2; + double p1 = two_prod(q1, b, p2); + volatile double e; + double s = two_diff(a, p1, e); + e -= p2; + + // get next approximation + double q2 = (s + e) / b; + + // normalize + s = quick_two_sum(q1, q2, e); + return dd(s, e); +} + +// double-double * double, where double is a power of 2 inline dd mul_pwr2(const dd& a, double b) { - return dd(a.high() * b, a.low() * b); + return dd(a.high() * b, a.low() * b); } // quad-double operators @@ -1032,7 +1074,7 @@ inline dd pown(dd const& a, int n) { switch (N) { case 0: if (a.iszero()) { -// error("(dd_real::pown): Invalid argument."); + std::cerr << "pown: invalid argument\n"; errno = EDOM; return dd(SpecificValue::qnan); } @@ -1067,7 +1109,6 @@ inline dd pown(dd const& a, int n) { //////////////////////// stream operators ///////////////////////////////// - // stream out a decimal floating-point representation of the double-double inline std::ostream& operator<<(std::ostream& ostr, const dd& v) { std::ios_base::fmtflags fmt = ostr.flags(); diff --git a/include/universal/number/dd/exceptions.hpp b/include/universal/number/dd/exceptions.hpp index 31f6d3ce1..a4f9c8070 100644 --- a/include/universal/number/dd/exceptions.hpp +++ b/include/universal/number/dd/exceptions.hpp @@ -1,5 +1,5 @@ #pragma once -// exceptions.hpp: definition of arbitrary configuration doubledouble exceptions +// exceptions.hpp: definition of arbitrary configuration double-double exceptions // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -9,9 +9,9 @@ namespace sw { namespace universal { -// base class for doubledouble arithmetic exceptions +// base class for double-double arithmetic exceptions struct dd_arithmetic_exception : public universal_arithmetic_exception { - dd_arithmetic_exception(const std::string& err) : universal_arithmetic_exception(std::string("doubledouble arithmetic exception: ") + err) {}; + dd_arithmetic_exception(const std::string& err) : universal_arithmetic_exception(std::string("double-double arithmetic exception: ") + err) {}; }; ////////////////////////////////////////////////////////////////////////////////////////////////// @@ -57,19 +57,8 @@ struct dd_negative_nroot_arg : public dd_arithmetic_exception { /// REAL INTERNAL OPERATION EXCEPTIONS struct dd_internal_exception : public universal_internal_exception { - dd_internal_exception(const std::string& err) : universal_internal_exception(std::string("doubledouble internal exception: ") + err) {}; + dd_internal_exception(const std::string& err) : universal_internal_exception(std::string("double-double internal exception: ") + err) {}; }; -struct dd_shift_too_large : public dd_internal_exception { - dd_shift_too_large() : dd_internal_exception("shift value too large for given doubledouble") {} -}; - -struct dd_hpos_too_large : public dd_internal_exception { - dd_hpos_too_large() : dd_internal_exception("position of hidden bit too large for given doubledouble") {} -}; - -//struct dd_rbits_too_large : dd_internal_exception { -// dd_rbits_too_large(const std::string& error = "number of remaining bits too large for this fraction") :dd_internal_exception(error) {} -//}; }} // namespace sw::universal diff --git a/include/universal/number/dd/manipulators.hpp b/include/universal/number/dd/manipulators.hpp index f196e10ab..31c24e169 100644 --- a/include/universal/number/dd/manipulators.hpp +++ b/include/universal/number/dd/manipulators.hpp @@ -1,4 +1,4 @@ -// manipulators.hpp: definitions of helper functions for doubledouble type manipulation +// manipulators.hpp: definitions of helper functions for double-double (dd) type manipulation // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -8,6 +8,7 @@ #include #include #include +#include // pull in the color printing for shells utility #include @@ -15,46 +16,15 @@ namespace sw { namespace universal { // Generate a type tag for a doubledouble std::string type_tag(const dd& = {}) { - return std::string("doubledouble"); + return std::string("double-double"); } // generate a binary, color-coded representation of the doubledouble std::string color_print(const dd& r, bool nibbleMarker = false) { - constexpr unsigned es = 11; - constexpr unsigned fbits = 106; std::stringstream s; - bool sign{ false }; - blockbinary e{ 0 }; - blockbinary f{ 0 }; - sign = r.sign(); - e = r.exponent(); - uint128_t raw = r.fraction(); - f.setbits(raw.limb[0]); - - Color red(ColorCode::FG_RED); - Color yellow(ColorCode::FG_YELLOW); - Color blue(ColorCode::FG_BLUE); - Color magenta(ColorCode::FG_MAGENTA); - Color cyan(ColorCode::FG_CYAN); - Color white(ColorCode::FG_WHITE); - Color def(ColorCode::FG_DEFAULT); - - // sign bit - s << red << (sign ? '1' : '0'); - - // exponent bits - for (int i = int(es) - 1; i >= 0; --i) { - s << cyan << (e.test(static_cast(i)) ? '1' : '0'); - if ((i - es) > 0 && ((i - es) % 4) == 0 && nibbleMarker) s << yellow << '\''; - } - - // fraction bits - for (int i = int(fbits) - 1; i >= 0; --i) { - s << magenta << (f.test(static_cast(i)) ? '1' : '0'); - if (i > 0 && (i % 4) == 0 && nibbleMarker) s << yellow << '\''; - } - - s << def; + double high = r.high(); + double low = r.low(); + s << color_print(high, nibbleMarker) << ", " << color_print(low, nibbleMarker); return s.str(); } diff --git a/include/universal/number/dd/math/classify.hpp b/include/universal/number/dd/math/classify.hpp index d9ff3bc7e..e1a6dd268 100644 --- a/include/universal/number/dd/math/classify.hpp +++ b/include/universal/number/dd/math/classify.hpp @@ -1,5 +1,5 @@ #pragma once -// classify.hpp: classification functions for doubledouble (dd) floating-point +// classify.hpp: classification functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -14,31 +14,31 @@ int fpclassify(const dd& a) { } // STD LIB function for IEEE floats: Determines if the given floating point number arg is a cfloative or negative infinity. -// specialized for doubledouble (dd) +// specialized for double-double (dd) inline bool isinf(const dd& a) { return (std::fpclassify(a.high()) == FP_INFINITE); } // STD LIB function for IEEE floats: Determines if the given floating point number arg is a not-a-number (NaN) value. -// specialized for doubledouble (dd) +// specialized for double-double (dd) inline bool isnan(const dd& a) { return (std::fpclassify(a.high()) == FP_NAN); } // STD LIB function for IEEE floats: Determines if the given floating point number arg has finite value i.e. it is normal, subnormal or zero, but not infinite or NaN. -// specialized for doubledouble (dd) +// specialized for double-double (dd) inline bool isfinite(const dd& a) { return (std::fpclassify(a.high()) != FP_INFINITE) && (std::fpclassify(a.high()) != FP_NAN); } // STD LIB function for IEEE floats: Determines if the given floating point number arg is normal, i.e. is neither zero, subnormal, infinite, nor NaN. -// specialized for doubledouble (dd) +// specialized for double-double (dd) inline bool isnormal(const dd& a) { return (std::fpclassify(a.high()) == FP_NORMAL); } // STD LIB function for IEEE floats: Determines if the given floating point number arg is denormal, i.e. is neither zero, normal, infinite, nor NaN. -// specialized for doubledouble (dd) +// specialized for double-double (dd) inline bool isdenorm(const dd& a) { return (std::fpclassify(a.high()) == FP_SUBNORMAL); } diff --git a/include/universal/number/dd/math/error_and_gamma.hpp b/include/universal/number/dd/math/error_and_gamma.hpp index 663df96b0..6260e53c4 100644 --- a/include/universal/number/dd/math/error_and_gamma.hpp +++ b/include/universal/number/dd/math/error_and_gamma.hpp @@ -9,7 +9,7 @@ namespace sw { namespace universal { // Compute the error function erf(x) = 2 over sqrt(PI) times Integral from 0 to x of e ^ (-t)^2 dt - dd erf(dd x) { + dd erf(dd x) { return dd(std::erf(double(x.high()))); } diff --git a/include/universal/number/dd/math/exponent.hpp b/include/universal/number/dd/math/exponent.hpp index 81f10a900..db614b6af 100644 --- a/include/universal/number/dd/math/exponent.hpp +++ b/include/universal/number/dd/math/exponent.hpp @@ -1,7 +1,7 @@ #pragma once -// exponent.hpp: exponent functions for double-double floating-point +// exponent.hpp: exponent functions for double-double (dd) floating-point // -// algorithms courtesy Scibuilders, Jack Poulson +// base algorithm strategy courtesy Scibuilder, Jack Poulson // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -15,21 +15,21 @@ namespace sw { namespace universal { static const int n_inv_fact = 15; static const double inv_fact[n_inv_fact][2] = { - { 1.66666666666666657e-01, 9.25185853854297066e-18}, - { 4.16666666666666644e-02, 2.31296463463574266e-18}, - { 8.33333333333333322e-03, 1.15648231731787138e-19}, - { 1.38888888888888894e-03, -5.30054395437357706e-20}, - { 1.98412698412698413e-04, 1.72095582934207053e-22}, - { 2.48015873015873016e-05, 2.15119478667758816e-23}, - { 2.75573192239858925e-06, -1.85839327404647208e-22}, - { 2.75573192239858883e-07, 2.37677146222502973e-23}, - { 2.50521083854417202e-08, -1.44881407093591197e-24}, - { 2.08767569878681002e-09, -1.20734505911325997e-25}, - { 1.60590438368216133e-10, 1.25852945887520981e-26}, - { 1.14707455977297245e-11, 2.06555127528307454e-28}, - { 7.64716373181981641e-13, 7.03872877733453001e-30}, - { 4.77947733238738525e-14, 4.39920548583408126e-31}, - { 2.81145725434552060e-15, 1.65088427308614326e-31} + { 1.66666666666666657e-01, 9.25185853854297066e-18}, // 1/3! + { 4.16666666666666644e-02, 2.31296463463574266e-18}, // 1/4! + { 8.33333333333333322e-03, 1.15648231731787138e-19}, // 1/5! + { 1.38888888888888894e-03, -5.30054395437357706e-20}, // 1/6! + { 1.98412698412698413e-04, 1.72095582934207053e-22}, // 1/7! + { 2.48015873015873016e-05, 2.15119478667758816e-23}, // 1/8! + { 2.75573192239858925e-06, -1.85839327404647208e-22}, // 1/9! + { 2.75573192239858883e-07, 2.37677146222502973e-23}, // 1/10! + { 2.50521083854417202e-08, -1.44881407093591197e-24}, // 1/11! + { 2.08767569878681002e-09, -1.20734505911325997e-25}, // 1/12! + { 1.60590438368216133e-10, 1.25852945887520981e-26}, // 1/13! + { 1.14707455977297245e-11, 2.06555127528307454e-28}, // 1/14! + { 7.64716373181981641e-13, 7.03872877733453001e-30}, // 1/15! + { 4.77947733238738525e-14, 4.39920548583408126e-31}, // 1/16! + { 2.81145725434552060e-15, 1.65088427308614326e-31} // 1/17! }; // Base-e exponential function @@ -86,10 +86,15 @@ dd exp(const dd& a) { return ldexp(s, static_cast(m)); } - // Base-2 exponential function +dd exp2(dd x) { + return dd(std::exp2(double(x))); +} // Base-10 exponential function +dd exp10(dd x) { + return dd(std::pow(10.0, double(x))); +} // Base-e exponential function exp(x)-1 dd expm1(dd x) { diff --git a/include/universal/number/dd/math/logarithm.hpp b/include/universal/number/dd/math/logarithm.hpp index 37d0fee86..8905ce872 100644 --- a/include/universal/number/dd/math/logarithm.hpp +++ b/include/universal/number/dd/math/logarithm.hpp @@ -1,6 +1,8 @@ #pragma once -// logarithm.hpp: logarithm functions for doubledouble (dd) floating-point +// logarithm.hpp: logarithm functions for double-double (dd) floating-point // +// base algorithm strategy courtesy Scibuilder, Jack Poulson +// // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT // @@ -9,201 +11,113 @@ namespace sw { namespace universal { +/// +/// Natural logarithm (base = e) +/// +/// input +/// natural logarithm of a +dd log(const dd& a) { + if (a.isnan() || a.isinf()) return a; + if (a.iszero()) return dd(SpecificValue::infneg); -// assumes 0.0 < a < inf -dd _log(dd const& a) { -#ifdef LATER - int k; - dd f = std::frexp(a, &k); // 0.5 <= |f| < 1.0 - if (f < _inv_sqrt2) { - f = std::ldexp(f, 1); - --k; - } - - // sqrt( 0.5 ) <= f < sqrt( 2.0 ) - // -0.1715... <= s < 0.1715... - // - double res[3]; - res[0] = two_sum(f.high(), 1.0, res[1]); - res[1] = two_sum(f.high(), res[1], res[2]); - dd f_plus = res[0] == 0.0 ? dd(res[1], res[2]) : dd(res[0], res[1]); - res[0] = two_sum(f.high(), -1.0, res[1]); - res[1] = two_sum(f.low(), res[1], res[2]); - dd f_minus = res[0] == 0.0 ? dd_real(res[1], res[2]) : dd(res[0], res[1]); - - dd s = f_minus / f_plus; - - // calculate log( f ) = log( 1 + s ) - log( 1 - s ) - // - // log( 1+s ) = s - s^2/2 + s^3/3 - s^4/4 ... - // log( 1-s ) = -s + s^2/2 - s^3/3 - s^4/4 ... - // log( f ) = 2*s + 2s^3/3 + 2s^5/5 + ... - // - dd s2 = s * s; - - // TODO - economize the power series using Chebyshev polynomials - // - dd x = inv_int[41]; - for (int i = 41; i > 1; i -= 2) { - x = std::Fma(x, s2, inv_int[i - 2]); - } - x *= std::ldexp(s, 1); // x *= 2*s - - return std::Fma(k, _ln2, x); -#endif - return dd(std::log(a.high()), 0.0); -} + if (a.isone()) return 0.0; -// assumes -1.0 < a < 2.0 -// -dd _log1p(dd const& a) { -#ifdef LATER - static const dd a_max = _sqrt2 - 1.0; - static const dd a_min = _inv_sqrt2 - 1.0; - - int ilog = std::ilogb(a) + 1; // 0.5 <= frac < 1.0 - - if (ilog < -std::numeric_limits
::digits / 2) // |a| <= 2^-54 - error O( 2^-108) - return a; - if (ilog < -std::numeric_limits
::digits / 3) // |a| <= 2^-36 - error O( 2^-108) - return a * std::Fma(a, -0.5, 1.0); - if (ilog < -std::numeric_limits
::digits / 4) // |a| <= 2^-27 - error O( 2^-108) - return a * std::Fma(a, -std::Fma(a, -_third, 0.5), 1.0); - - dd f_minus = a; - int k = 0; - - if ((a > a_max) || (a < a_min)) - { - double res[3]; - res[0] = two_sum(a.high(), 1.0, res[1]); - res[1] = two_sum(a.low(), res[1], res[2]); - dd f_p1 = res[0] == 0.0 ? dd(res[1], res[2]) : dd_real(res[0], res[1]); - - f_p1 = std::frexp(f_p1, &k); // 0.5 <= |f_p1| < 1.0; k <= 2 - if (f_p1 < _inv_sqrt2) { - --k; - std::ldexp(f_p1, 1); - } - - // at this point, we have 2^k * ( 1.0 + f ) = 1.0 + a - // sqrt( 0.5 ) <= 1.0 + f <= sqrt( 2.0 ) - // - // f = 2^-k * a - ( 1.0 - 2^-k ) - double df[2]; - df[0] = two_sum(1.0, -std::ldexp(1.0, -k), df[1]); - f_minus = std::ldexp(a, -k) - dd_real(df[0], df[1]); + if (a.sign()) { + std::cerr << "log: non-positive argument\n"; + errno = EDOM; + return dd(SpecificValue::qnan); } - dd f_plus = f_minus + 2.0; - dd s = f_minus / f_plus; - - // calculate log( f ) = log( 1 + s ) - log( 1 - s ) - // - // log( 1+s ) = s - s^2/2 + s^3/3 - s^4/4 ... - // log( 1-s ) = -s + s^2/2 - s^3/3 - s^4/4 ... - // log( f ) = 2*s + 2s^3/3 + 2s^5/5 + ... - // - dd s2 = s * s; - - // TODO - economize the power series using Chebyshev polynomials - // - dd x = inv_int[41]; - for (int i = 41; i > 1; i -= 2) { - x = std::Fma(x, s2, inv_int[i - 2]); - } - x *= std::ldexp(s, 1); // x *= 2*s + /* Strategy. The Taylor series for log converges much more + slowly than that of exp, due to the lack of the factorial + term in the denominator. Hence this routine instead tries + to determine the root of the function - return std::Fma(k, _ln2, x); -#endif - return dd(std::log1p(a.high()), 0.0); -} + f(x) = exp(x) - a -// Natural logarithm of x -dd log(dd const& a) { - if (a.isnan()) return a; + using Newton iteration. The iteration is given by - if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + x' = x - f(x)/f'(x) + = x - (1 - a * exp(-x)) + = x + a * exp(-x) - 1. - if (a.isone()) return 0.0; + Only one iteration is needed, since Newton's iteration + approximately doubles the number of digits per iteration. + */ - if (a.sign()) { -// error("(dd::log): Non-positive argument."); - errno = EDOM; - return std::numeric_limits< dd >::quiet_NaN(); - } - - if (a.isinf()) return a; - - return _log(a); + dd x = std::log(a.high()); // Initial approximation + x = x + a * exp(-x) - 1.0; + return x; } -// Binary logarithm of x -dd log2(dd const& a) -{ - if (a.isnan()) return a; +/// +/// binary logarithm (base = 2) +/// +/// input +/// binary logarithm of a +dd log2(const dd& a) { + if (a.isnan() || a.isinf()) return a; - if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + if (a.iszero()) return dd(SpecificValue::infneg); if (a.isone()) return 0.0; if (a.sign()) { -// error("(dd_real::log2): Non-positive argument."); + std::cerr << "log2: non-positive argument\n"; errno = EDOM; - return std::numeric_limits< dd >::quiet_NaN(); + return dd(SpecificValue::qnan); } - if (a.isinf()) return a; - - dd _lge{}; - return _lge * _log(a); + return log(a) * dd_lge; } -// Decimal logarithm of x -dd log10(dd const& a) { - if (a.isnan()) return a; +/// +/// decimal logarithm (base = 10) +/// +/// input +/// binary logarithm of a +dd log10(const dd& a) { + if (a.isnan() || a.isinf()) return a; - if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + if (a.iszero()) return dd(SpecificValue::infneg); if (a.isone()) return 0.0; if (a.sign()) { -// error("(dd_real::log10): Non-positive argument."); + std::cerr << "log10: non-positive argument\n"; errno = EDOM; - return std::numeric_limits< dd >::quiet_NaN(); + return dd(SpecificValue::qnan); } - if (a.isinf()) return a; - - dd _loge{}; - return _loge * _log(a); + return log(a) / dd_log10; } - -// Natural logarithm of 1+x -dd log1p(dd const& a) + +/// +/// Natural logarithm of 1+x +/// +/// input +/// binary logarithm of a +dd log1p(const dd& a) { - if (a.isnan()) return a; + if (a.isnan() || a.isinf()) return a; - if (a.iszero()) return 0.0; + if (a.iszero()) return dd(0.0); - if (a == -1.0) return -std::numeric_limits< dd >::infinity(); + if (a == -1.0) return dd(SpecificValue::infneg); if (a < -1.0) { -// error("(dd_real::log): Non-positive argument."); + std::cerr << "log1p: non-positive argument\n"; errno = EDOM; - return std::numeric_limits< dd >::quiet_NaN(); + return dd(SpecificValue::qnan); } - if (a.isinf()) return a; - - if ((a >= 2.0) || (a <= -0.5)) // a >= 2.0 - no loss of significant bits - use log() - return _log(1.0 + a); + return log(1.0 + a); - // at this point, -1.0 < a < 2.0 - // - return _log1p(a); + // at this point, -1.0 < a < 2.0 + // return _log1p(a); + return log(1.0 + a); // TODO: evaluate loss of precision } }} // namespace sw::universal diff --git a/include/universal/number/dd/math/next.hpp b/include/universal/number/dd/math/next.hpp index 9c2e54e7f..ca896806e 100644 --- a/include/universal/number/dd/math/next.hpp +++ b/include/universal/number/dd/math/next.hpp @@ -1,5 +1,5 @@ #pragma once -// next.hpp: nextafter/nexttoward functions for doubledouble floating-point +// next.hpp: nextafter/nexttoward functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT diff --git a/include/universal/number/dd/math/old_logarithm.hpp b/include/universal/number/dd/math/old_logarithm.hpp new file mode 100644 index 000000000..7150a623f --- /dev/null +++ b/include/universal/number/dd/math/old_logarithm.hpp @@ -0,0 +1,258 @@ +#pragma once +// logarithm.hpp: logarithm functions for double-double (dd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + + const dd dd_inv_int[] = { + dd(std::numeric_limits< dd >::infinity()), // 1/0 + dd("1.0"), // 1/1 + dd("0.5"), // 1/2 + dd("0.3333333333333333333333333333333333333"), // 1/3 + dd("0.25"), // 1/4 + dd("0.2"), // 1/5 + dd("0.1666666666666666666666666666666666667"), // 1/6 + dd("0.1428571428571428571428571428571428571"), // 1/7 + dd("0.125"), // 1/8 + dd("0.1111111111111111111111111111111111111"), // 1/9 + dd("0.1"), // 1/10 + dd("0.0909090909090909090909090909090909091"), // 1/11 + dd("0.0833333333333333333333333333333333333"), // 1/12 + dd("0.0769230769230769230769230769230769231"), // 1/13 + dd("0.0714285714285714285714285714285714286"), // 1/14 + dd("0.0666666666666666666666666666666666667"), // 1/15 + dd("0.0625"), // 1/16 + dd("0.0588235294117647058823529411764705882"), // 1/17 + dd("0.0555555555555555555555555555555555556"), // 1/18 + dd("0.0526315789473684210526315789473684211"), // 1/19 + dd("0.05"), // 1/20 + dd("0.0476190476190476190476190476190476190"), // 1/21 + dd("0.0454545454545454545454545454545454545"), // 1/22 + dd("0.0434782608695652173913043478260869565"), // 1/23 + dd("0.0416666666666666666666666666666666667"), // 1/24 + dd("0.04"), // 1/25 + dd("0.0384615384615384615384615384615384615"), // 1/26 + dd("0.0370370370370370370370370370370370370"), // 1/27 + dd("0.0357142857142857142857142857142857143"), // 1/28 + dd("0.0344827586206896551724137931034482759"), // 1/29 + dd("0.0333333333333333333333333333333333333"), // 1/30 + dd("0.0322580645161290322580645161290322581"), // 1/31 + dd("0.03125"), // 1/32 + dd("0.0303030303030303030303030303030303030"), // 1/33 + dd("0.0294117647058823529411764705882352941"), // 1/34 + dd("0.0285714285714285714285714285714285714"), // 1/35 + dd("0.0277777777777777777777777777777777778"), // 1/36 + dd("0.0270270270270270270270270270270270270"), // 1/37 + dd("0.0263157894736842105263157894736842105"), // 1/38 + dd("0.0256410256410256410256410256410256410"), // 1/39 + dd("0.025"), // 1/40 + dd("0.0243902439024390243902439024390243902") // 1/41 + }; + + const dd dd_third("0.333333333333333333333333333"); + + dd Fma(const dd& a, const dd& b, const dd& c) { + double p[4]; + qd_mul(a, b, p); + qd_add(p, c, p); + p[0] = two_sum(p[0], p[1] + p[2] + p[3], p[1]); + return dd(p[0], p[1]); + } + +// assumes 0.0 < a < inf +dd _log(dd const& a) { + int k; + dd fraction = frexp(a, &k); // 0.5 <= |fraction| < 1.0 + if (fraction < dd_inv_sqrt2) { + fraction = ldexp(fraction, 1); + --k; + } + + // sqrt( 0.5 ) <= f < sqrt( 2.0 ) + // -0.1715... <= s < 0.1715... + // + double res[3]; + res[0] = two_sum(fraction.high(), 1.0, res[1]); + res[1] = two_sum(fraction.high(), res[1], res[2]); + dd f_plus = res[0] == 0.0 ? dd(res[1], res[2]) : dd(res[0], res[1]); + res[0] = two_sum(fraction.high(), -1.0, res[1]); + res[1] = two_sum(fraction.low(), res[1], res[2]); + dd f_minus = res[0] == 0.0 ? dd(res[1], res[2]) : dd(res[0], res[1]); + + dd s = f_minus / f_plus; + + // calculate log( f ) = log( 1 + s ) - log( 1 - s ) + // + // log( 1+s ) = s - s^2/2 + s^3/3 - s^4/4 ... + // log( 1-s ) = -s + s^2/2 - s^3/3 - s^4/4 ... + // log( f ) = 2*s + 2s^3/3 + 2s^5/5 + ... + // + dd s2 = s * s; + + // TODO - economize the power series using Chebyshev polynomials + // + dd x = dd_inv_int[41]; + for (int i = 41; i > 1; i -= 2) { + x = fma(x, s2, dd_inv_int[i - 2]); + } + x *= ldexp(s, 1); // x *= 2*s + + return fma(k, dd_ln2, x); +} + +// assumes -1.0 < a < 2.0 +// +dd _log1p(dd const& a) { + static const dd a_max = dd_sqrt2 - 1.0; + static const dd a_min = dd_inv_sqrt2 - 1.0; + + int ilog = std::ilogb(a) + 1; // 0.5 <= frac < 1.0 + + if (ilog < -std::numeric_limits
::digits / 2) // |a| <= 2^-54 - error O( 2^-108) + return a; + if (ilog < -std::numeric_limits
::digits / 3) // |a| <= 2^-36 - error O( 2^-108) + return a * Fma(a, -0.5, 1.0); + if (ilog < -std::numeric_limits
::digits / 4) // |a| <= 2^-27 - error O( 2^-108) + return a * Fma(a, -Fma(a, -dd_third, 0.5), 1.0); + + dd f_minus = a; + int k = 0; + + if ((a > a_max) || (a < a_min)) + { + double res[3]; + res[0] = two_sum(a.high(), 1.0, res[1]); + res[1] = two_sum(a.low(), res[1], res[2]); + dd f_p1 = res[0] == 0.0 ? dd(res[1], res[2]) : dd(res[0], res[1]); + + f_p1 = frexp(f_p1, &k); // 0.5 <= |f_p1| < 1.0; k <= 2 + if (f_p1 < dd_inv_sqrt2) { + --k; + ldexp(f_p1, 1); + } + + // at this point, we have 2^k * ( 1.0 + f ) = 1.0 + a + // sqrt( 0.5 ) <= 1.0 + f <= sqrt( 2.0 ) + // + // f = 2^-k * a - ( 1.0 - 2^-k ) + double df[2]; + df[0] = two_sum(1.0, -std::ldexp(1.0, -k), df[1]); + f_minus = ldexp(a, -k) - dd(df[0], df[1]); + } + + dd f_plus = f_minus + 2.0; + dd s = f_minus / f_plus; + + // calculate log( f ) = log( 1 + s ) - log( 1 - s ) + // + // log( 1+s ) = s - s^2/2 + s^3/3 - s^4/4 ... + // log( 1-s ) = -s + s^2/2 - s^3/3 - s^4/4 ... + // log( f ) = 2*s + 2s^3/3 + 2s^5/5 + ... + // + dd s2 = s * s; + + // TODO - economize the power series using Chebyshev polynomials + // + dd x = dd_inv_int[41]; + for (int i = 41; i > 1; i -= 2) { + x = Fma(x, s2, dd_inv_int[i - 2]); + } + x *= ldexp(s, 1); // x *= 2*s + + return Fma(k, dd_ln2, x); + +} + +// Natural logarithm of x +dd log(dd const& a) { + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log: non-positive argument\n"; + errno = EDOM; + return std::numeric_limits< dd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + return _log(a); +} + +// Binary logarithm of x +dd log2(dd const& a) +{ + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log2: non-positive argument\n"; + errno = EDOM; + return std::numeric_limits< dd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + dd _lge{}; + return _lge * _log(a); +} + +// Decimal logarithm of x +dd log10(dd const& a) { + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log10: non-positive argument\n"; + errno = EDOM; + return std::numeric_limits< dd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + dd _loge{}; + return _loge * _log(a); +} + +// Natural logarithm of 1+x +dd log1p(dd const& a) +{ + if (a.isnan()) return a; + + if (a.iszero()) return 0.0; + + if (a == -1.0) return -std::numeric_limits< dd >::infinity(); + + if (a < -1.0) { + std::cerr << "log1p: non-positive argument\n"; + errno = EDOM; + return std::numeric_limits< dd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + + if ((a >= 2.0) || (a <= -0.5)) // a >= 2.0 - no loss of significant bits - use log() + return _log(1.0 + a); + + // at this point, -1.0 < a < 2.0 + // + return _log1p(a); +} + +}} // namespace sw::universal diff --git a/include/universal/number/dd/math/sqrt.hpp b/include/universal/number/dd/math/sqrt.hpp index f75315a3a..2c819cbb7 100644 --- a/include/universal/number/dd/math/sqrt.hpp +++ b/include/universal/number/dd/math/sqrt.hpp @@ -1,5 +1,5 @@ #pragma once -// sqrt.hpp: sqrt functions for doubledouble (dd) floats +// sqrt.hpp: sqrt functions for double-double (dd) floats // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -15,9 +15,9 @@ namespace sw { namespace universal { #if DOUBLEDOUBLE_NATIVE_SQRT - /* Computes the square root of the double-double number dd. - NOTE: dd must be a non-negative number. */ - inline dd sqrt(const dd& a) { + // Computes the square root of the double-double number dd. + // NOTE: dd must be a non-negative number + inline dd sqrt(dd a) { /* Strategy: Use Karp's trick: if x is an approximation to sqrt(a), then @@ -33,7 +33,7 @@ namespace sw { namespace universal { #if DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION if (a.isneg()) throw dd_negative_sqrt_arg(); #else - if (a.isneg()) std::cerr << "doubledouble argument to sqrt is negative: " << a << std::endl; + if (a.isneg()) std::cerr << "double-double argument to sqrt is negative: " << a << std::endl; #endif double x = 1.0 / std::sqrt(a.high()); @@ -43,15 +43,15 @@ namespace sw { namespace universal { #else - // sqrt shim for doubledouble + // sqrt shim for double-double inline dd sqrt(dd a) { #if DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION if (a.isneg()) throw dd_negative_sqrt_arg(); #else // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION - if (a.isneg()) std::cerr << "doubledouble argument to sqrt is negative: " << a << std::endl; + if (a.isneg()) std::cerr << "double-double argument to sqrt is negative: " << a << std::endl; #endif // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION if (a.iszero()) return a; - return dd(std::sqrt(a.high())); + return dd(std::sqrt(double(a))); } #endif // ! DOUBLEDOUBLE_NATIVE_SQRT @@ -91,11 +91,11 @@ namespace sw { namespace universal { #else // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION if (n <= 0) { - std::cerr << "doubledouble nroot argument is negative: " << n << std::endl; + std::cerr << "double-double nroot argument is negative: " << n << std::endl; } if (n % 2 == 0 && a.isneg()) { - std::cerr << "doubledouble nroot argument is negative: " << n << std::endl; + std::cerr << "double-double nroot argument is negative: " << n << std::endl; return dd(SpecificValue::snan); } diff --git a/include/universal/number/dd/math/trigonometry.hpp b/include/universal/number/dd/math/trigonometry.hpp index 38da2c8db..66505eae8 100644 --- a/include/universal/number/dd/math/trigonometry.hpp +++ b/include/universal/number/dd/math/trigonometry.hpp @@ -1,5 +1,7 @@ #pragma once -// trigonometry.hpp: trigonometry support for double-double floating-point +// trigonometry.hpp: trigonometry function support for double-double floating-point +// +// algorithms and constants courtesy of Scibuilders, Jack Poulson // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -8,5 +10,429 @@ namespace sw { namespace universal { + // pi/16 + static constexpr dd _pi16(1.963495408493620697e-01, 7.654042494670957545e-18); + + // Table of sin(k * pi/16) and cos(k * pi/16). + static const double sin_table[4][2] = { + {1.950903220161282758e-01, -7.991079068461731263e-18}, + {3.826834323650897818e-01, -1.005077269646158761e-17}, + {5.555702330196021776e-01, 4.709410940561676821e-17}, + {7.071067811865475727e-01, -4.833646656726456726e-17} + }; + + static const double cos_table[4][2] = { + {9.807852804032304306e-01, 1.854693999782500573e-17}, + {9.238795325112867385e-01, 1.764504708433667706e-17}, + {8.314696123025452357e-01, 1.407385698472802389e-18}, + {7.071067811865475727e-01, -4.833646656726456726e-17} + }; + + /* Computes sin(a) using Taylor series. + Assumes |a| <= pi/32. */ + inline dd sin_taylor(const dd& a) { + const double thresh = 0.5 * std::abs(double(a)) * dd_eps; + + if (a.iszero()) return 0.0; + + dd x = -sqr(a); + dd s{ a }; + dd r{ a }; + dd t{}; + int i = 0; + do { + r *= x; + t = r * dd(inv_fact[i][0], inv_fact[i][1]); + s += t; + i += 2; + } while (i < n_inv_fact && std::abs(double(t)) > thresh); + + return s; + } + + inline dd cos_taylor(const dd& a) { + const double thresh = 0.5 * dd_eps; + + if (a.iszero()) return 1.0; + + dd x = -sqr(a); + dd r{ x }; + dd s = 1.0 + mul_pwr2(r, 0.5); + dd t{}; + int i = 1; + do { + r *= x; + t = r * dd(inv_fact[i][0], inv_fact[i][1]); + s += t; + i += 2; + } while (i < n_inv_fact && std::abs(double(t)) > thresh); + + return s; + } + + inline void sincos_taylor(const dd& a, dd& sin_a, dd& cos_a) { + if (a.iszero()) { + sin_a = 0.0; + cos_a = 1.0; + return; + } + + sin_a = sin_taylor(a); + cos_a = sqrt(1.0 - sqr(sin_a)); + } + + + inline dd sin(const dd& a) { + + /* Strategy. To compute sin(x), we choose integers a, b so that + + x = s + a * (pi/2) + b * (pi/16) + + and |s| <= pi/32. Using the fact that + + sin(pi/16) = 0.5 * sqrt(2 - sqrt(2 + sqrt(2))) + + we can compute sin(x) from sin(s), cos(s). This greatly + increases the convergence of the sine Taylor series. */ + + if (a.iszero()) return 0.0; + + // approximately reduce modulo 2*pi + dd z = nint(a / dd_2pi); + dd r = a - dd_2pi * z; + + // approximately reduce modulo pi/2 and then modulo pi/16. + dd t; + double q = std::floor(r.high() / dd_pi2.high() + 0.5); + t = r - dd_pi2 * q; + int j = static_cast(q); + q = std::floor(t.high() / _pi16.high() + 0.5); + t -= _pi16 * q; + int k = static_cast(q); + int abs_k = std::abs(k); + + if (j < -2 || j > 2) { + std::cerr << "sin: Cannot reduce modulo pi/2\n"; + return dd(SpecificValue::snan); + } + + if (abs_k > 4) { + std::cerr << "(dd::sin): Cannot reduce modulo pi/16\n"; + return dd(SpecificValue::snan); + } + + if (k == 0) { + switch (j) { + case 0: + return sin_taylor(t); + case 1: + return cos_taylor(t); + case -1: + return -cos_taylor(t); + default: + return -sin_taylor(t); + } + } + + dd u(cos_table[abs_k - 1][0], cos_table[abs_k - 1][1]); + dd v(sin_table[abs_k - 1][0], sin_table[abs_k - 1][1]); + dd sin_t, cos_t; + sincos_taylor(t, sin_t, cos_t); + if (j == 0) { + if (k > 0) { + r = u * sin_t + v * cos_t; + } + else { + r = u * sin_t - v * cos_t; + } + } + else if (j == 1) { + if (k > 0) { + r = u * cos_t - v * sin_t; + } + else { + r = u * cos_t + v * sin_t; + } + } + else if (j == -1) { + if (k > 0) { + r = v * sin_t - u * cos_t; + } + else if (k < 0) { + r = -u * cos_t - v * sin_t; + } + } + else { + if (k > 0) { + r = -u * sin_t - v * cos_t; + } + else { + r = v * cos_t - u * sin_t; + } + } + + return r; + } + + inline dd cos(const dd& a) { + + if (a.iszero()) return 1.0; + + // approximately reduce modulo 2*pi + dd z = nint(a / dd_2pi); + dd r = a - z * dd_2pi; + + // approximately reduce modulo pi/2 and then modulo pi/16 + dd t; + double q = std::floor(r.high() / dd_pi2.high() + 0.5); + t = r - dd_pi2 * q; + int j = static_cast(q); + q = std::floor(t.high() / _pi16.high() + 0.5); + t -= _pi16 * q; + int k = static_cast(q); + int abs_k = std::abs(k); + + if (j < -2 || j > 2) { + std::cerr << "cos: Cannot reduce modulo pi/2\n"; + return dd(SpecificValue::snan); + } + + if (abs_k > 4) { + std::cerr << "cos: Cannot reduce modulo pi / 16\n"; + return dd(SpecificValue::snan); + } + + if (k == 0) { + switch (j) { + case 0: + return cos_taylor(t); + case 1: + return -sin_taylor(t); + case -1: + return sin_taylor(t); + default: + return -cos_taylor(t); + } + } + + dd sin_t, cos_t; + sincos_taylor(t, sin_t, cos_t); + dd u(cos_table[abs_k - 1][0], cos_table[abs_k - 1][1]); + dd v(sin_table[abs_k - 1][0], sin_table[abs_k - 1][1]); + + if (j == 0) { + if (k > 0) { + r = u * cos_t - v * sin_t; + } + else { + r = u * cos_t + v * sin_t; + } + } + else if (j == 1) { + if (k > 0) { + r = -u * sin_t - v * cos_t; + } + else { + r = v * cos_t - u * sin_t; + } + } + else if (j == -1) { + if (k > 0) { + r = u * sin_t + v * cos_t; + } + else { + r = u * sin_t - v * cos_t; + } + } + else { + if (k > 0) { + r = v * sin_t - u * cos_t; + } + else { + r = -u * cos_t - v * sin_t; + } + } + + return r; + } + + inline void sincos(const dd& a, dd& sin_a, dd& cos_a) { + + if (a.iszero()) { + sin_a = 0.0; + cos_a = 1.0; + return; + } + + // approximately reduce modulo 2*pi + dd z = nint(a / dd_2pi); + dd r = a - dd_2pi * z; + + // approximately reduce module pi/2 and pi/16 + dd t; + double q = std::floor(r.high() / dd_pi2.high() + 0.5); + t = r - dd_pi2 * q; + int j = static_cast(q); + int abs_j = std::abs(j); + q = std::floor(t.high() / _pi16.high() + 0.5); + t -= _pi16 * q; + int k = static_cast(q); + int abs_k = std::abs(k); + + if (abs_j > 2) { + std::cerr << "sincos: Cannot reduce modulo pi/2\n"; + cos_a = sin_a = dd(SpecificValue::snan); + return; + } + + if (abs_k > 4) { + std::cerr << "sincos: Cannot reduce modulo pi/16\n"; + cos_a = sin_a = dd(SpecificValue::snan); + return; + } + + dd sin_t, cos_t; + dd s, c; + + sincos_taylor(t, sin_t, cos_t); + + if (abs_k == 0) { + s = sin_t; + c = cos_t; + } + else { + dd u(cos_table[abs_k - 1][0], cos_table[abs_k - 1][1]); + dd v(sin_table[abs_k - 1][0], sin_table[abs_k - 1][1]); + + if (k > 0) { + s = u * sin_t + v * cos_t; + c = u * cos_t - v * sin_t; + } + else { + s = u * sin_t - v * cos_t; + c = u * cos_t + v * sin_t; + } + } + + if (abs_j == 0) { + sin_a = s; + cos_a = c; + } + else if (j == 1) { + sin_a = c; + cos_a = -s; + } + else if (j == -1) { + sin_a = -c; + cos_a = s; + } + else { + sin_a = -s; + cos_a = -c; + } + + } + + inline dd atan2(const dd& y, const dd& x) { + /* Strategy: Instead of using Taylor series to compute + arctan, we instead use Newton's iteration to solve + the equation + + sin(z) = y/r or cos(z) = x/r + + where r = sqrt(x^2 + y^2). + The iteration is given by + + z' = z + (y - sin(z)) / cos(z) (for equation 1) + z' = z - (x - cos(z)) / sin(z) (for equation 2) + + Here, x and y are normalized so that x^2 + y^2 = 1. + If |x| > |y|, then first iteration is used since the + denominator is larger. Otherwise, the second is used. + */ + + if (x.iszero()) { + + if (y.iszero()) { + /* Both x and y is zero. */ + std::cerr << "atan2: Both arguments zero\n"; + return dd(SpecificValue::snan); + } + + return (y.ispos()) ? dd_pi2 : -dd_pi2; + } + else if (y.iszero()) { + return (x.ispos()) ? dd(0.0) : dd_pi; + } + + if (x == y) { + return (y.ispos()) ? dd_pi4 : -dd_3pi4; + } + + if (x == -y) { + return (y.ispos()) ? dd_3pi4 : -dd_pi4; + } + + dd r = sqrt(sqr(x) + sqr(y)); + dd xx = x / r; + dd yy = y / r; + + // Compute double precision approximation to atan. + dd z = std::atan2(double(y), double(x)); + dd sin_z, cos_z; + + if (std::abs(xx.high()) > std::abs(yy.high())) { + // Use Newton iteration 1. z' = z + (y - sin(z)) / cos(z) + sincos(z, sin_z, cos_z); + z += (yy - sin_z) / cos_z; + } + else { + // Use Newton iteration 2. z' = z - (x - cos(z)) / sin(z) + sincos(z, sin_z, cos_z); + z -= (xx - cos_z) / sin_z; + } + + return z; + } + + inline dd atan(const dd& a) { + return atan2(a, dd(1.0)); + } + + inline dd tan(const dd& a) { + dd s, c; + sincos(a, s, c); + return s / c; + } + + inline dd asin(const dd& a) { + dd abs_a = abs(a); + + if (abs_a > 1.0) { + std::cerr << "asin: Argument out of domain\n"; + return dd(SpecificValue::snan); + } + + if (abs_a.isone()) { + return (a.ispos()) ? dd_pi2 : -dd_pi2; + } + + return atan2(a, sqrt(1.0 - sqr(a))); + } + + inline dd acos(const dd& a) { + dd abs_a = abs(a); + + if (abs_a > 1.0) { + std::cerr << "acos: Argument out of domain\n"; + return dd(SpecificValue::snan); + } + + if (abs_a.isone()) { + return (a.ispos()) ? dd(0.0) : dd_pi; + } + + return atan2(sqrt(1.0 - sqr(a)), a); + } }} // namespace sw::universal diff --git a/include/universal/number/dfloat/dfloat_impl.hpp b/include/universal/number/dfloat/dfloat_impl.hpp index 1ff2bced2..22532e5e0 100644 --- a/include/universal/number/dfloat/dfloat_impl.hpp +++ b/include/universal/number/dfloat/dfloat_impl.hpp @@ -22,7 +22,7 @@ namespace sw { namespace universal { -// dfloat is an adaptive precision decimal floating-point type +// dfloat is an fixed size, arbitrary configuration decimal floating-point type template class dfloat { public: @@ -89,34 +89,43 @@ class dfloat { } // initializers for native types - explicit dfloat(signed char iv) { *this = iv; } - explicit dfloat(short iv) { *this = iv; } - explicit dfloat(int iv) { *this = iv; } - explicit dfloat(long iv) { *this = iv; } - explicit dfloat(long long iv) { *this = iv; } - explicit dfloat(char iv) { *this = iv; } - explicit dfloat(unsigned short iv) { *this = iv; } - explicit dfloat(unsigned int iv) { *this = iv; } - explicit dfloat(unsigned long iv) { *this = iv; } - explicit dfloat(unsigned long long iv) { *this = iv; } - explicit dfloat(float iv) { *this = iv; } - explicit dfloat(double iv) { *this = iv; } - explicit dfloat(long double iv) { *this = iv; } + explicit dfloat(signed char iv) noexcept { *this = iv; } + explicit dfloat(short iv) noexcept { *this = iv; } + explicit dfloat(int iv) noexcept { *this = iv; } + explicit dfloat(long iv) noexcept { *this = iv; } + explicit dfloat(long long iv) noexcept { *this = iv; } + explicit dfloat(char iv) noexcept { *this = iv; } + explicit dfloat(unsigned short iv) noexcept { *this = iv; } + explicit dfloat(unsigned int iv) noexcept { *this = iv; } + explicit dfloat(unsigned long iv) noexcept { *this = iv; } + explicit dfloat(unsigned long long iv) noexcept { *this = iv; } + explicit dfloat(float iv) noexcept { *this = iv; } + explicit dfloat(double iv) noexcept { *this = iv; } // assignment operators for native types - dfloat& operator=(signed char rhs) { return convert_signed(rhs); } - dfloat& operator=(short rhs) { return convert_signed(rhs); } - dfloat& operator=(int rhs) { return convert_signed(rhs); } - dfloat& operator=(long rhs) { return convert_signed(rhs); } - dfloat& operator=(long long rhs) { return convert_signed(rhs); } - dfloat& operator=(char rhs) { return convert_unsigned(rhs); } - dfloat& operator=(unsigned short rhs) { return convert_unsigned(rhs); } - dfloat& operator=(unsigned int rhs) { return convert_unsigned(rhs); } - dfloat& operator=(unsigned long rhs) { return convert_unsigned(rhs); } - dfloat& operator=(unsigned long long rhs) { return convert_unsigned(rhs); } - dfloat& operator=(float rhs) { return convert_ieee754(rhs); } - dfloat& operator=(double rhs) { return convert_ieee754(rhs); } - dfloat& operator=(long double rhs) { return convert_ieee754(rhs); } + dfloat& operator=(signed char rhs) noexcept { return convert_signed(rhs); } + dfloat& operator=(short rhs) noexcept { return convert_signed(rhs); } + dfloat& operator=(int rhs) noexcept { return convert_signed(rhs); } + dfloat& operator=(long rhs) noexcept { return convert_signed(rhs); } + dfloat& operator=(long long rhs) noexcept { return convert_signed(rhs); } + dfloat& operator=(char rhs) noexcept { return convert_unsigned(rhs); } + dfloat& operator=(unsigned short rhs) noexcept { return convert_unsigned(rhs); } + dfloat& operator=(unsigned int rhs) noexcept { return convert_unsigned(rhs); } + dfloat& operator=(unsigned long rhs) noexcept { return convert_unsigned(rhs); } + dfloat& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); } + dfloat& operator=(float rhs) noexcept { return convert_ieee754(rhs); } + dfloat& operator=(double rhs) noexcept { return convert_ieee754(rhs); } + + // conversion operators + explicit operator float() const noexcept { return float(convert_to_ieee754()); } + explicit operator double() const noexcept { return convert_to_ieee754(); } + + +#if LONG_DOUBLE_SUPPORT + explicit dfloat(long double iv) noexcept { *this = iv; } + dfloat& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } + explicit operator long double() const noexcept { return convert_to_ieee754(); } +#endif // prefix operators dfloat operator-() const { @@ -124,11 +133,6 @@ class dfloat { return negated; } - // conversion operators - explicit operator float() const { return float(toNativeFloatingPoint()); } - explicit operator double() const { return float(toNativeFloatingPoint()); } - explicit operator long double() const { return toNativeFloatingPoint(); } - // arithmetic operators dfloat& operator+=(const dfloat& rhs) { return *this; @@ -202,13 +206,11 @@ class dfloat { } // selectors - bool iszero() const noexcept { return false; } - bool isone() const noexcept { return true; } - bool isodd() const noexcept { return false; } - bool iseven() const noexcept { return !isodd(); } - bool ispos() const noexcept { return false; } - bool isneg() const noexcept { return false; } - int scale() const noexcept { return 0; } + constexpr bool iszero() const noexcept { return false; } + constexpr bool isone() const noexcept { return true; } + constexpr bool ispos() const noexcept { return false; } + constexpr bool isneg() const noexcept { return false; } + constexpr int scale() const noexcept { return 0; } // convert to string containing digits number of digits std::string str(size_t nrDigits = 0) const { @@ -252,13 +254,14 @@ class dfloat { } protected: - bt _block[nrBlocks]; + bt _block[nrBlocks]; // do we want to pack the digits: two per byte? // HELPER methods - // convert to native floating-point, use conversion rules to cast down to float and double - long double toNativeFloatingPoint() const { - long double ld = 0; + // convert to native floating-point + template + Real convert_to_ieee754() const { + Real ld = 0; return ld; } diff --git a/include/universal/number/fixpnt/fixpnt_impl.hpp b/include/universal/number/fixpnt/fixpnt_impl.hpp index a954ff531..06e27462b 100644 --- a/include/universal/number/fixpnt/fixpnt_impl.hpp +++ b/include/universal/number/fixpnt/fixpnt_impl.hpp @@ -116,9 +116,9 @@ class fixpnt { static constexpr unsigned MSU = nrBlocks - 1; static constexpr bt MSU_MASK = bt(bt(~0) >> (nrBlocks * bitsInBlock - nbits)); - constexpr fixpnt() noexcept : _block(0) {} - - constexpr fixpnt(const fixpnt&) noexcept = default; + // constructors + fixpnt() noexcept = default; + fixpnt(const fixpnt&) noexcept = default; fixpnt(fixpnt&&) noexcept = default; constexpr fixpnt& operator=(const fixpnt&) noexcept = default; @@ -211,18 +211,18 @@ class fixpnt { } // initializers for native types - constexpr fixpnt(signed char initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(short initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(int initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(long initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(long long initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(char initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(unsigned short initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(unsigned int initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(unsigned long initial_value) noexcept : fixpnt{convert(initial_value)} {} - constexpr fixpnt(unsigned long long initial_value) noexcept : fixpnt{convert(initial_value)} {} - BIT_CAST_CONSTEXPR fixpnt(float initial_value) noexcept : fixpnt{convert(initial_value)} {} - BIT_CAST_CONSTEXPR fixpnt(double initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(signed char initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(short initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(int initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(long initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(long long initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(char initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(unsigned short initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(unsigned int initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(unsigned long initial_value) noexcept : fixpnt{convert(initial_value)} {} + constexpr fixpnt(unsigned long long initial_value) noexcept : fixpnt{convert(initial_value)} {} + BIT_CAST_CONSTEXPR fixpnt(float initial_value) noexcept : fixpnt{convert(initial_value)} {} + BIT_CAST_CONSTEXPR fixpnt(double initial_value) noexcept : fixpnt{convert(initial_value)} {} // access operator for bits // this needs a proxy to be able to create l-values @@ -241,14 +241,14 @@ class fixpnt { constexpr fixpnt& operator=(unsigned int rhs) noexcept { return *this = convert(rhs); } constexpr fixpnt& operator=(unsigned long rhs) noexcept { return *this = convert(rhs); } constexpr fixpnt& operator=(unsigned long long rhs) noexcept { return *this = convert(rhs); } - BIT_CAST_CONSTEXPR fixpnt& operator=(float rhs) noexcept { return *this = convert(rhs); } - BIT_CAST_CONSTEXPR fixpnt& operator=(double rhs) noexcept { return *this = convert(rhs); } + BIT_CAST_CONSTEXPR fixpnt& operator=(float rhs) noexcept { return *this = convert(rhs); } + BIT_CAST_CONSTEXPR fixpnt& operator=(double rhs) noexcept { return *this = convert(rhs); } // guard long double support to enable ARM and RISC-V embedded environments #if LONG_DOUBLE_SUPPORT - fixpnt(long double initial_value) noexcept : fixpnt{ convert(initial_value) } {} - fixpnt& operator=(long double rhs) noexcept { return convert(rhs); } - explicit operator long double() const noexcept { return to_native(); } + fixpnt(long double initial_value) noexcept : fixpnt{ convert(initial_value) } {} + fixpnt& operator=(long double rhs) noexcept { return *this = convert(rhs); } + explicit operator long double() const noexcept { return to_native(); } #endif // assign the value of the textual representation to the fixpnt: can be binary/octal/decimal/hexadecimal @@ -1974,7 +1974,7 @@ inline std::ostream& operator<<(std::ostream& ostr, const fixpnt; - constexpr Fixed eps = std::numeric_limits::epsilon(); + Fixed eps{ std::numeric_limits::epsilon() }; Fixed y(a); Fixed x(a); x >>= 1; // divide by 2 diff --git a/include/universal/number/qd/attributes.hpp b/include/universal/number/qd/attributes.hpp new file mode 100644 index 000000000..7dbd3ca18 --- /dev/null +++ b/include/universal/number/qd/attributes.hpp @@ -0,0 +1,65 @@ +#pragma once +// attributes.hpp: information functions for quad-double floating-point type and value attributes +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +// functions to provide details about the properties of a quad-double (qd) configuration + inline bool sign(const qd& a) { + return a.sign(); + } + + inline int scale(const qd& a) { + return a.scale(); + } + + // generate the maxneg through maxpos value range of a quad-double configuration + std::string qd_range() { + qd v; + std::stringstream s; + s << std::setw(80) << type_tag(v) << " : [ " + << v.maxneg() << " ... " + << v.minneg() << " " + << "0 " + << v.minpos() << " ... " + << v.maxpos() << " ]"; + return s.str(); + } + + /* + // report dynamic range of a type, specialized for a quad-double + std::string dynamic_range(const qd& a) { + std::stringstream s; + qd b(SpecificValue::maxneg), c(SpecificValue::minneg), d(SpecificValue::minpos), e(SpecificValue::maxpos); + s << type_tag(a) << ": "; + s << "minpos scale " << std::setw(10) << d.scale() << " "; + s << "maxpos scale " << std::setw(10) << e.scale() << '\n'; + s << "[" << b << " ... " << c << ", -0, +0, " << d << " ... " << e << "]\n"; + s << "[" << to_binary(b) << " ... " << to_binary(c) << ", -0, +0, " << to_binary(d) << " ... " << to_binary(e) << "]\n"; + qd ninf(SpecificValue::infneg), pinf(SpecificValue::infpos); + s << "inclusive range = (" << to_binary(ninf) << ", " << to_binary(pinf) << ")\n"; + s << "inclusive range = (" << ninf << ", " << pinf << ")\n"; + return s.str(); + } + */ + + int minpos_scale(const qd& b) { + qd c(b); + return c.minpos().scale(); + } + + int maxpos_scale(const qd& b) { + qd c(b); + return c.maxpos().scale(); + } + + int max_negative_scale(const qd& b) { + qd c(b); + return c.maxneg().scale(); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/exceptions.hpp b/include/universal/number/qd/exceptions.hpp new file mode 100644 index 000000000..34f9df02c --- /dev/null +++ b/include/universal/number/qd/exceptions.hpp @@ -0,0 +1,64 @@ +#pragma once +// exceptions.hpp: definition of arbitrary configuration quad-double exceptions +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + +// base class for quad-double arithmetic exceptions +struct qd_arithmetic_exception : public universal_arithmetic_exception { + qd_arithmetic_exception(const std::string& err) : universal_arithmetic_exception(std::string("quad-double arithmetic exception: ") + err) {}; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////// +/// specialized exceptions to aid application level exception handling + +// invalid_argument is thrown when a mathematical function argument is invalid +struct qd_invalid_argument : public qd_arithmetic_exception { + qd_invalid_argument() : qd_arithmetic_exception("invalid argument") {} +}; + +// not_a_number is thrown when a lvar is NaN +struct qd_not_a_number : public qd_arithmetic_exception { + qd_not_a_number() : qd_arithmetic_exception("not a number") {} +}; + +// divide by zero arithmetic exception for reals +struct qd_divide_by_zero : public qd_arithmetic_exception { + qd_divide_by_zero() : qd_arithmetic_exception("divide by zero") {} +}; + +// divide_by_nan is thrown when the denominator in a division operator is NaN +struct qd_divide_by_nan : public qd_arithmetic_exception { + qd_divide_by_nan() : qd_arithmetic_exception("divide by nan") {} +}; + +// operand_is_nan is thrown when an rvar in a binary operator is NaN +struct qd_operand_is_nan : public qd_arithmetic_exception { + qd_operand_is_nan() : qd_arithmetic_exception("operand is nan") {} +}; + +// negative argument to sqrt +struct qd_negative_sqrt_arg : public qd_arithmetic_exception { + qd_negative_sqrt_arg() : qd_arithmetic_exception("negative sqrt argument") {} +}; + +// negative argument to nroot +struct qd_negative_nroot_arg : public qd_arithmetic_exception { + qd_negative_nroot_arg() : qd_arithmetic_exception("negative nroot argument") {} +}; + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/// REAL INTERNAL OPERATION EXCEPTIONS + +struct qd_internal_exception : public universal_internal_exception { + qd_internal_exception(const std::string& err) : universal_internal_exception(std::string("quad-double internal exception: ") + err) {}; +}; + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/manipulators.hpp b/include/universal/number/qd/manipulators.hpp new file mode 100644 index 000000000..ed5af5265 --- /dev/null +++ b/include/universal/number/qd/manipulators.hpp @@ -0,0 +1,43 @@ +// manipulators.hpp: definitions of helper functions for quad-double type manipulation +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +// pull in the color printing for shells utility +#include + +namespace sw { namespace universal { + + // Generate a type tag for a quad-double + inline std::string type_tag(const qd& = {}) { + return std::string("quad-double"); + } + + // generate a binary, color-coded representation of the quad-double + inline std::string color_print(qd const& r, bool nibbleMarker = false) { + //constexpr unsigned es = 11; + //constexpr unsigned fbits = 106; + std::stringstream s; + + /* + Color red(ColorCode::FG_RED); + Color yellow(ColorCode::FG_YELLOW); + Color blue(ColorCode::FG_BLUE); + Color magenta(ColorCode::FG_MAGENTA); + Color cyan(ColorCode::FG_CYAN); + Color white(ColorCode::FG_WHITE); + Color def(ColorCode::FG_DEFAULT); + */ + for (int i = 0; i < 4; ++i) { + s << color_print(r[i], nibbleMarker); + if (i < 3) s << ", "; + } + return s.str(); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/classify.hpp b/include/universal/number/qd/math/classify.hpp new file mode 100644 index 000000000..9f02eaef4 --- /dev/null +++ b/include/universal/number/qd/math/classify.hpp @@ -0,0 +1,55 @@ +#pragma once +// classify.hpp: classification functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +// STD LIB function for IEEE floats: Categorizes floating point value arg into the following categories: zero, subnormal, normal, infinite, NAN, or implementation-defined category. +int fpclassify(const qd& a) { + return (std::fpclassify(a[0])); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is a cfloative or negative infinity. +// specialized for quad-double (qd) +inline bool isinf(const qd& a) { + return (std::fpclassify(a[0]) == FP_INFINITE); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is a not-a-number (NaN) value. +// specialized for quad-double (qd) +inline bool isnan(const qd& a) { + return (std::fpclassify(a[0]) == FP_NAN); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg has finite value i.e. it is normal, subnormal or zero, but not infinite or NaN. +// specialized for quad-double (qd) +inline bool isfinite(const qd& a) { + return (std::fpclassify(a[0]) != FP_INFINITE) && (std::fpclassify(a[0]) != FP_NAN); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is normal, i.e. is neither zero, subnormal, infinite, nor NaN. +// specialized for quad-double (qd) +inline bool isnormal(const qd& a) { + return (std::fpclassify(a[0]) == FP_NORMAL); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is denormal, i.e. is neither zero, normal, infinite, nor NaN. +// specialized for quad-double (qd) +inline bool isdenorm(const qd& a) { + return (std::fpclassify(a[0]) == FP_SUBNORMAL); +} + +inline bool iszero(const qd& a) { + return (std::fpclassify(a[0]) == FP_ZERO); +} + +bool signbit(qd const& a) { + auto signA = std::copysign(1.0, a[0]); + return signA < 0.0; +} + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/complex.hpp b/include/universal/number/qd/math/complex.hpp new file mode 100644 index 000000000..79adf6c46 --- /dev/null +++ b/include/universal/number/qd/math/complex.hpp @@ -0,0 +1,12 @@ +#pragma once +// complex.hpp: complex support for double-double floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/error_and_gamma.hpp b/include/universal/number/qd/math/error_and_gamma.hpp new file mode 100644 index 000000000..c3f78063a --- /dev/null +++ b/include/universal/number/qd/math/error_and_gamma.hpp @@ -0,0 +1,21 @@ +#pragma once +// error_and_gamma.hpp: error/gamma function support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // Compute the error function erf(x) = 2 over sqrt(PI) times Integral from 0 to x of e ^ (-t)^2 dt + qd erf(qd x) { + return qd(std::erf(x[0])); + } + + // Compute the complementary error function: 1 - erf(x) + qd erfc(qd x) { + return qd(std::erfc(x[0])); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/exponent.hpp b/include/universal/number/qd/math/exponent.hpp new file mode 100644 index 000000000..5c42e8c98 --- /dev/null +++ b/include/universal/number/qd/math/exponent.hpp @@ -0,0 +1,107 @@ +#pragma once +// exponent.hpp: exponent functions for quad-double (qd) floating-point +// +// algorithms courtesy Scibuilders, Jack Poulson +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // fwd reference + qd ldexp(const qd&, int); + + constexpr unsigned n_inv_fact = 15; + static const qd inv_fact[n_inv_fact] = { + qd(1.66666666666666657e-01, 9.25185853854297066e-18, 5.13581318503262866e-34, 2.85094902409834186e-50), + qd(4.16666666666666644e-02, 2.31296463463574266e-18, 1.28395329625815716e-34, 7.12737256024585466e-51), + qd(8.33333333333333322e-03, 1.15648231731787138e-19, 1.60494162032269652e-36, 2.22730392507682967e-53), + qd(1.38888888888888894e-03, -5.30054395437357706e-20, -1.73868675534958776e-36, -1.63335621172300840e-52), + qd(1.98412698412698413e-04, 1.72095582934207053e-22, 1.49269123913941271e-40, 1.29470326746002471e-58), + qd(2.48015873015873016e-05, 2.15119478667758816e-23, 1.86586404892426588e-41, 1.61837908432503088e-59), + qd(2.75573192239858925e-06, -1.85839327404647208e-22, 8.49175460488199287e-39, -5.72661640789429621e-55), + qd(2.75573192239858883e-07, 2.37677146222502973e-23, -3.26318890334088294e-40, 1.61435111860404415e-56), + qd(2.50521083854417202e-08, -1.44881407093591197e-24, 2.04267351467144546e-41, -8.49632672007163175e-58), + qd(2.08767569878681002e-09, -1.20734505911325997e-25, 1.70222792889287100e-42, 1.41609532150396700e-58), + qd(1.60590438368216133e-10, 1.25852945887520981e-26, -5.31334602762985031e-43, 3.54021472597605528e-59), + qd(1.14707455977297245e-11, 2.06555127528307454e-28, 6.88907923246664603e-45, 5.72920002655109095e-61), + qd(7.64716373181981641e-13, 7.03872877733453001e-30, -7.82753927716258345e-48, 1.92138649443790242e-64), + qd(4.77947733238738525e-14, 4.39920548583408126e-31, -4.89221204822661465e-49, 1.20086655902368901e-65), + qd(2.81145725434552060e-15, 1.65088427308614326e-31, -2.87777179307447918e-50, 4.27110689256293549e-67) + }; + + qd exp(const qd& x) { + /* Strategy: We first reduce the size of x by noting that + + exp(kr + m * log(2)) = 2^m * exp(r)^k + + where m and k are integers. By choosing m appropriately + we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is + evaluated using the familiar Taylor series. Reducing the + argument substantially speeds up the convergence. + */ + + constexpr double k = double(1ull << 16); + constexpr double inv_k = 1.0 / k; + + if (x[0] <= -709.0) return qd(0.0); + + if (x[0] >= 709.0) return qd(SpecificValue::infpos); + + if (x.iszero()) return qd(1.0); + + if (x.isone()) return qd_e; + + double m = std::floor(x[0] / qd_log2[0] + 0.5); + qd r = mul_pwr2(x - qd_log2 * m, inv_k); + qd s, p, t; + double thresh = inv_k * qd_eps; + + p = sqr(r); + s = r + mul_pwr2(p, 0.5); + int i = 0; + do { + p *= r; + t = p * inv_fact[i++]; + s += t; + } while (std::abs(double(t)) > thresh && i < 9); + + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s += 1.0; + return ldexp(s, static_cast(m)); + } + +// Base-2 exponential function +qd exp2(const qd& x) { + return qd(std::exp2(double(x))); +} + +// Base-10 exponential function +qd exp10(const qd& x) { + return qd(std::pow(10.0, double(x))); +} + +// Base-e exponential function exp(x)-1 +qd expm1(const qd& x) { + return qd(std::expm1(double(x))); +} + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/fractional.hpp b/include/universal/number/qd/math/fractional.hpp new file mode 100644 index 000000000..637f03d73 --- /dev/null +++ b/include/universal/number/qd/math/fractional.hpp @@ -0,0 +1,21 @@ +#pragma once +// fractional.hpp: fractional support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // fmod retuns x - n*y where n = x/y with the fractional part truncated + qd fmod(const qd& x, const qd& y) { + return qd(std::fmod(double(x), double(y))); + } + + // shim to stdlib + qd remainder(const qd& x, const qd& y) { + return qd(std::remainder(double(x), double(y))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/hyperbolic.hpp b/include/universal/number/qd/math/hyperbolic.hpp new file mode 100644 index 000000000..33f7b0a49 --- /dev/null +++ b/include/universal/number/qd/math/hyperbolic.hpp @@ -0,0 +1,41 @@ +#pragma once +// hyperbolic.hpp: hyperbolic function support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // hyperbolic sine of an angle of x radians + qd sinh(qd x) { + return qd(std::sinh(double(x))); + } + + // hyperbolic cosine of an angle of x radians + qd cosh(qd x) { + return qd(std::cosh(double(x))); + } + + // hyperbolic tangent of an angle of x radians + qd tanh(qd x) { + return qd(std::tanh(double(x))); + } + + // hyperbolic cotangent of an angle of x radians + qd atanh(qd x) { + return qd(std::atanh(double(x))); + } + + // hyperbolic cosecant of an angle of x radians + qd acosh(qd x) { + return qd(std::acosh(double(x))); + } + + // hyperbolic secant of an angle of x radians + qd asinh(qd x) { + return qd(std::asinh(double(x))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/hypot.hpp b/include/universal/number/qd/math/hypot.hpp new file mode 100644 index 000000000..41969f3d1 --- /dev/null +++ b/include/universal/number/qd/math/hypot.hpp @@ -0,0 +1,15 @@ +#pragma once +// hypot.hpp: hypot support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + qd hypot(const qd& x, const qd& y) { + return qd(std::hypot(double(x), double(y))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/logarithm.hpp b/include/universal/number/qd/math/logarithm.hpp new file mode 100644 index 000000000..4150281d0 --- /dev/null +++ b/include/universal/number/qd/math/logarithm.hpp @@ -0,0 +1,122 @@ +#pragma once +// logarithm.hpp: logarithm functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + qd log(const qd& a) { + if (a.isnan() || a.isinf()) return a; + + if (a.iszero()) return qd(SpecificValue::infneg); + + if (a.isone()) return 0.0; + + if (a[0] <= 0.0) { + std::cerr << "log: non-positive argument\n"; + errno = EDOM; + return qd(SpecificValue::qnan); + } + + /* Strategy. The Taylor series for log converges much more + slowly than that of exp, due to the lack of the factorial + term in the denominator. Hence this routine instead tries + to determine the root of the function + + f(x) = exp(x) - a + + using Newton iteration. The iteration is given by + + x' = x - f(x)/f'(x) + = x - (1 - a * exp(-x)) + = x + a * exp(-x) - 1. + + Two iteration is needed, since Newton's iteration + approximately doubles the number of digits per iteration. + */ + + qd x = std::log(a[0]); // Initial approximation + + x = x + a * exp(-x) - 1.0; + x = x + a * exp(-x) - 1.0; + x = x + a * exp(-x) - 1.0; + + return x; + } + + /// +/// binary logarithm (base = 2) +/// +/// input +/// binary logarithm of a + qd log2(const qd& a) { + if (a.isnan() || a.isinf()) return a; + + if (a.iszero()) return qd(SpecificValue::infneg); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log2: non-positive argument\n"; + errno = EDOM; + return qd(SpecificValue::qnan); + } + + return log(a) * qd_lge; + } + + /// + /// decimal logarithm (base = 10) + /// + /// input + /// binary logarithm of a + qd log10(const qd& a) { + if (a.isnan() || a.isinf()) return a; + + if (a.iszero()) return qd(SpecificValue::infneg); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log10: non-positive argument\n"; + errno = EDOM; + return qd(SpecificValue::qnan); + } + + + return log(a) / qd_log10; + } + + /// + /// Natural logarithm of 1+x + /// + /// input + /// binary logarithm of a + qd log1p(const qd& a) { + if (a.isnan() || a.isinf()) return a; + + if (a.iszero()) return qd(0.0); + + if (a == -1.0) return qd(SpecificValue::infneg); + + if (a < -1.0) { + std::cerr << "log1p: non-positive argument\n"; + errno = EDOM; + return qd(SpecificValue::qnan); + } + + if (a.isinf()) return a; + + if ((a >= 2.0) || (a <= -0.5)) // a >= 2.0 - no loss of significant bits - use log() + return log(1.0 + a); + + // at this point, -1.0 < a < 2.0 + // return _log1p(a); + return log(1.0 + a); // TODO: evaluate loss of precision + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/minmax.hpp b/include/universal/number/qd/math/minmax.hpp new file mode 100644 index 000000000..997ce74ba --- /dev/null +++ b/include/universal/number/qd/math/minmax.hpp @@ -0,0 +1,19 @@ +#pragma once +// minmax.hpp: minmax support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + qd min(const qd& x, const qd& y) { + return qd(std::min(double(x), double(y))); + } + + qd max(const qd& x, const qd& y) { + return qd(std::max(double(x), double(y))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/next.hpp b/include/universal/number/qd/math/next.hpp new file mode 100644 index 000000000..eb4cc0d6c --- /dev/null +++ b/include/universal/number/qd/math/next.hpp @@ -0,0 +1,70 @@ +#pragma once +// next.hpp: nextafter/nexttoward functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +/* +Parameters + x Base value. + t Value toward which the return value is approximated. +If both parameters compare equal, the function returns t. + +Return Value + The next representable value after x in the direction of t. + + If x is the largest finite value representable in the type, + and the result is infinite or not representable, an overflow range error occurs. + + If an overflow range error occurs: + - And math_errhandling has MATH_ERRNO set: the global variable errno is set to ERANGE. + - And math_errhandling has MATH_ERREXCEPT set: FE_OVERFLOW is raised. + */ +qd nextafter(qd x, qd target) { + if (x == target) return target; + if (target.isnan()) { + if (x.isneg()) { + --x; + } + else { + ++x; + } + } + else { + if (x > target) { + --x; + } + else { + ++x; + } + } + return x; +} + +qd nexttoward(qd x, qd target) { + if (x == target) return x; + if (target.isnan()) { + if (x.isneg()) { + --x; + } + else { + ++x; + } + } + else { + if (x > target) { + --x; + } + else { + ++x; + } + } + return x; +} + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/numerics.hpp b/include/universal/number/qd/math/numerics.hpp new file mode 100644 index 000000000..54317c9a7 --- /dev/null +++ b/include/universal/number/qd/math/numerics.hpp @@ -0,0 +1,40 @@ +#pragma once +// numerics.hpp: numerics functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + // clang implementation is calling these functions so we need implementations for doubledouble (qd) + + + // copysign returns a value with the magnitude of a, and the sign of b + qd copysign(const qd& a, const qd& b) { + auto signA = std::copysign(1.0, a[0]); + auto signB = std::copysign(1.0, b[0]); + + return signA != signB ? -a : a; + } + + // decompose quad-double into a fraction and an exponent + qd frexp(const qd& a, int* pexp) { + double a0 = std::frexp(a[0], pexp); + double a1 = std::ldexp(a[1], -*pexp); + double a2 = std::ldexp(a[2], -*pexp); + double a3 = std::ldexp(a[3], -*pexp); + return qd(a0, a1, a2, a3); + } + + // recompose quad-double from a fraction and an exponent + qd ldexp(const qd& a, int exponent) { + static_assert(std::numeric_limits< qd >::radix == 2, "CONFIGURATION: qd radix must be 2!"); + static_assert(std::numeric_limits< double >::radix == 2, "CONFIGURATION: double radix must be 2!"); + + return qd(std::ldexp(a[0], exponent), std::ldexp(a[1], exponent), std::ldexp(a[2], exponent), std::ldexp(a[3], exponent)); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/pow.hpp b/include/universal/number/qd/math/pow.hpp new file mode 100644 index 000000000..48cf69124 --- /dev/null +++ b/include/universal/number/qd/math/pow.hpp @@ -0,0 +1,69 @@ +#pragma once +// pow.hpp: pow functions for quad-double (qd) floating-point +// +// algorithms courtesy Scibuilders, Jack Poulson +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + // fwd reference + qd exp(const qd&); + + // power function + qd pow(const qd& a, const qd& b) { + return exp(b * log(a)); + } + + // power function of a qd to double + qd pow(const qd& x, double y) { + return pow(x, qd(y)); + } + + // Computes the n-th power of a quad-double number. + // NOTE: 0^0 causes an error. + qd npwr(const qd& a, int n) { + if (n == 0) { +#if DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.iszero()) throw qd_invalid_argument(); +#else // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.iszero()) { + std::cerr << "(npwr): Invalid argument\n"; + return qd(SpecificValue::snan); + } +#endif // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION + return 1.0; + } + + qd r = a; + qd s = 1.0; + int N = std::abs(n); + + if (N > 1) { + // Use binary exponentiation + while (N > 0) { + if (N % 2 == 1) { + s *= r; + } + N /= 2; + if (N > 0) r = sqr(r); + } + } else { + s = r; + } + + // if n is negative then compute the reciprocal + if (n < 0) return (1.0 / s); + return s; + } + + qd pow(const qd& a, int n) { + return npwr(a, n); + } + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/sqrt.hpp b/include/universal/number/qd/math/sqrt.hpp new file mode 100644 index 000000000..2d6ab09cf --- /dev/null +++ b/include/universal/number/qd/math/sqrt.hpp @@ -0,0 +1,115 @@ +#pragma once +// sqrt.hpp: sqrt functions for quad-double (qd) floats +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +#ifndef QUADDOUBLE_NATIVE_SQRT +#define QUADDOUBLE_NATIVE_SQRT 0 +#endif + +namespace sw { namespace universal { + +#if QUADDOUBLE_NATIVE_SQRT + + // Computes the square root of the quad-double number qd. + // NOTE: qd must be a non-negative number + inline qd sqrt(qd const &a) { + /* Strategy: Use Karp's trick: if x is an approximation + to sqrt(a), then + + sqrt(a) = a*x + [a - (a*x)^2] * x / 2 (approx) + + The approximation is accurate to twice the accuracy of x. + Also, the multiplication (a*x) and [-]*x can be done with + only half the precision. + */ + + if (a.iszero()) return qd{}; + +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.isneg()) throw qd_negative_sqrt_arg(); +#else + if (a.isneg()) std::cerr << "quad-double argument to sqrt is negative: " << a << std::endl; +#endif + + double x = 1.0 / std::sqrt(a[0]); + double ax = a[0] * x; + return aqd(ax, (a - sqr(qd(ax)))[0] * (x * 0.5)); + } + +#else + + // sqrt shim for quad-double + inline qd sqrt(qd const& a) { +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.isneg()) throw qd_negative_sqrt_arg(); +#else // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.isneg()) std::cerr << "quad-double argument to sqrt is negative: " << a << std::endl; +#endif // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.iszero()) return a; + return qd(std::sqrt(double(a))); + } + +#endif // ! QUADDOUBLE_NATIVE_SQRT + + // reciprocal sqrt + inline qd rsqrt(qd const& a) { + qd v = sqrt(a); + return reciprocal(v); + } + + + /* Computes the n-th root of the quad-double number a. + NOTE: n must be a positive integer. + NOTE: If n is even, then a must not be negative. */ + qd nroot(const qd& a, int n) { + /* Strategy: Use Newton iteration for the function + + f(x) = x^(-n) - a + + to find its root a^{-1/n}. The iteration is thus + + x' = x + x * (1 - a * x^n) / n + + which converges quadratically. We can then find + a^{1/n} by taking the reciprocal. + */ + +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (n <= 0) throw qd_negative_nroot_arg(); + + if (n % 2 == 0 && a.isneg()) throw qd_negative_nroot_arg(); + +#else // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (n <= 0) { + std::cerr << "quad-double nroot argument is negative: " << n << std::endl; + } + + if (n % 2 == 0 && a.isneg()) { + std::cerr << "quad-double nroot argument is negative: " << n << std::endl; + return qd(SpecificValue::snan); + } + +#endif // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + + if (n == 1) return a; + if (n == 2) return sqrt(a); + + if (a.iszero()) return qd(0.0); + + // Note a^{-1/n} = exp(-log(a)/n) + qd r = abs(a); + qd x = std::exp(-std::log(r[0]) / n); + + // Perform Newton's iteration. + x += x * (1.0 - r * pown(x, n)) / static_cast(n); + if (a[0] < 0.0) x = -x; + + return 1.0/x; + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/trigonometry.hpp b/include/universal/number/qd/math/trigonometry.hpp new file mode 100644 index 000000000..caccfa31c --- /dev/null +++ b/include/universal/number/qd/math/trigonometry.hpp @@ -0,0 +1,77 @@ +#pragma once +// trigonometry.hpp: trigonometry function support for quad-double (qd) floating-point +// +// algorithms and constants courtesy of Scibuilders, Jack Poulson +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +namespace sw { namespace universal { + + // value representing an angle expressed in radians + // One radian is equivalent to 180/PI degrees + + // sine of an angle of x radians + + qd sin(const qd& x) { + return qd(std::sin(double(x))); + } + + // cosine of an angle of x radians + + qd cos(const qd& x) { + return qd(std::cos(double(x))); + } + + // tangent of an angle of x radians + + qd tan(const qd& x) { + return qd(std::tan(double(x))); + } + + // cotangent of an angle of x radians + + qd atan(const qd& x) { + return qd(std::atan(double(x))); + } + + // Arc tangent with two parameters + + qd atan2(qd y, const qd& x) { + return qd(std::atan2(double(y), double(x))); + } + + // cosecant of an angle of x radians + + qd acos(const qd& x) { + return qd(std::acos(double(x))); + } + + // secant of an angle of x radians + + qd asin(const qd& x) { + return qd(std::asin(double(x))); + } + + // cotangent an angle of x radians + + qd cot(const qd& x) { + return qd(std::tan(std::numbers::pi*2 - double(x))); + } + + // secant of an angle of x radians + + qd sec(const qd& x) { + return qd(1.0 / std::cos(double(x))); + } + + // cosecant of an angle of x radians + + qd csc(const qd& x) { + return qd(1.0 / std::sin(double(x))); + } + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/truncate.hpp b/include/universal/number/qd/math/truncate.hpp new file mode 100644 index 000000000..bcbcb3236 --- /dev/null +++ b/include/universal/number/qd/math/truncate.hpp @@ -0,0 +1,23 @@ +#pragma once +// truncate.hpp: truncate support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // Truncate value by rounding toward zero, returning the nearest integral value that is not larger in magnitude than x + qd trunc(qd x) { + return qd(std::trunc(double(x))); + } + + // Round to nearest: returns the integral value that is nearest to x, with halfway cases rounded away from zero + qd round(qd x) { + return qd(std::round(double(x))); + } + + // floor and ceil are being used in the class definition and are defined in that file + +}} // namespace sw::universal diff --git a/include/universal/number/qd/mathlib.hpp b/include/universal/number/qd/mathlib.hpp new file mode 100644 index 000000000..cb923ea57 --- /dev/null +++ b/include/universal/number/qd/mathlib.hpp @@ -0,0 +1,24 @@ +#pragma once +// mathlib.hpp: definition of mathematical functions for the quad-double floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +#include + +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/include/universal/number/qd/numeric_limits.hpp b/include/universal/number/qd/numeric_limits.hpp new file mode 100644 index 000000000..53bba8c2d --- /dev/null +++ b/include/universal/number/qd/numeric_limits.hpp @@ -0,0 +1,82 @@ +#pragma once +// numeric_limits.hpp: definition of numeric_limits for quad-double types +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +namespace std { + +template<> +class numeric_limits< sw::universal::qd > { +public: + using QuadDouble = sw::universal::qd; + static constexpr bool is_specialized = true; + static constexpr QuadDouble min() { // return minimum value + // return QuadDouble(sw::universal::SpecificValue::minpos); + return QuadDouble(radix * (numeric_limits< double >::min() / numeric_limits< double >::epsilon())); + } + static constexpr QuadDouble max() { // return maximum value + //return QuadDouble(sw::universal::SpecificValue::maxpos); + return QuadDouble(numeric_limits< double >::max()); + } + static constexpr QuadDouble lowest() { // return most negative value + //return QuadDouble(sw::universal::SpecificValue::maxneg); + return (-(max)()); + } + static constexpr QuadDouble epsilon() { // return smallest effective increment from 1.0 + return numeric_limits< double >::epsilon() * numeric_limits< double >::epsilon() / radix; + } + static constexpr QuadDouble round_error() { // return largest rounding error + return QuadDouble(1.0 / radix); + } + static constexpr QuadDouble denorm_min() { // return minimum denormalized value + // return QuadDouble(sw::universal::SpecificValue::minpos); + return 0.0; + } + static constexpr QuadDouble infinity() { // return positive infinity + return QuadDouble(sw::universal::SpecificValue::infpos); + //return numeric_limits< double >::infinity(); + } + static constexpr QuadDouble quiet_NaN() { // return non-signaling NaN + return QuadDouble(sw::universal::SpecificValue::qnan); + //return numeric_limits< double >::quiet_NaN(); + } + static constexpr QuadDouble signaling_NaN() { // return signaling NaN + return QuadDouble(sw::universal::SpecificValue::snan); + //return numeric_limits< double >::signaling_NaN(); + } + + static constexpr int digits = 2 * std::numeric_limits::digits; + static constexpr int digits10 = static_cast(digits * 0.30103); + static constexpr int max_digits10 = digits10; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr int radix = 2; + + // C++ specification: min_exponent is one more than the smallest negative power + // of the radix that is a valid normalized number + static constexpr int min_exponent = QuadDouble::MIN_EXP_NORMAL + 1; + static constexpr int min_exponent10 = static_cast(min_exponent * 0.30103); + // C++ specification: max_exponent is one more than the largest integer power + // of the radix that is a valid finite floating-point number + static constexpr int max_exponent = QuadDouble::MAX_EXP; + static constexpr int max_exponent10 = static_cast(max_exponent * 0.30103); + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr float_denorm_style has_denorm = denorm_absent; + static constexpr bool has_denorm_loss = false; + + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = false; + static constexpr bool is_modulo = false; + static constexpr bool traps = false; + static constexpr bool tinyness_before = false; + static constexpr float_round_style round_style = round_toward_zero; +}; + +} diff --git a/include/universal/number/qd/qd.hpp b/include/universal/number/qd/qd.hpp new file mode 100644 index 000000000..4a8c7a88b --- /dev/null +++ b/include/universal/number/qd/qd.hpp @@ -0,0 +1,76 @@ +// quad-double floating-point arithmetic standard header +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#ifndef _QUADDOUBLE_STANDARD_HEADER_ +#define _QUADDOUBLE_STANDARD_HEADER_ + +//////////////////////////////////////////////////////////////////////////////////////// +/// COMPILATION DIRECTIVES TO DIFFERENT COMPILERS +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// required std libraries +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// BEHAVIORAL COMPILATION SWITCHES + +//////////////////////////////////////////////////////////////////////////////////////// +// enable/disable the ability to use literals in binary logic and arithmetic operators +#if !defined(QUADDOUBLE_ENABLE_LITERALS) +// default is to enable them +#define QUADDOUBLE_ENABLE_LITERALS 1 +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// enable throwing specific exceptions for arithmetic errors +// left to application to enable +#if !defined(QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION) +// default is to use std::cerr for signalling an error +#define QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION 0 +#define QUADDOUBLE_EXCEPT noexcept +#else +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION +#define QUADDOUBLE_EXCEPT +#else +#define QUADDOUBLE_EXCEPT noexcept +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// enable native sqrt implementation +// +#if !defined(QUADDOUBLE_NATIVE_SQRT) +#define QUADDOUBLE_NATIVE_SQRT 0 +#endif + +/////////////////////////////////////////////////////////////////////////////////////// +// bring in the trait functions +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// INCLUDE FILES that make up the library +#include +#include +#include +#include +#include + +// useful functions to work with doubledoubles +#include +#include + +/////////////////////////////////////////////////////////////////////////////////////// +/// elementary math functions library +#include + +#endif diff --git a/include/universal/number/qd/qd_fwd.hpp b/include/universal/number/qd/qd_fwd.hpp new file mode 100644 index 000000000..648e9d573 --- /dev/null +++ b/include/universal/number/qd/qd_fwd.hpp @@ -0,0 +1,24 @@ +#pragma once +// qd_fwd.hpp : forward declarations for the quad-double floating-point environment +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + // forward references + class qd; + + bool parse(const std::string& number, qd& v); + + inline qd abs(qd const&); + inline qd sqrt(qd const&); + qd fabs(qd); + + qd fma(qd const&, qd const&, qd const&); + +}} // namespace sw::universal + diff --git a/include/universal/number/qd/qd_impl.hpp b/include/universal/number/qd/qd_impl.hpp new file mode 100644 index 000000000..4bc2ebcac --- /dev/null +++ b/include/universal/number/qd/qd_impl.hpp @@ -0,0 +1,1582 @@ +#pragma once +// qd_impl.hpp: implementation of the double-double floating-point number system described in +// +// Sherry Li, David Bailey, LBNL, "Library for Double-Double and Quad-Double Arithmetic", 2008 +// https://www.researchgate.net/publication/228570156_Library_for_Double-Double_and_Quad-Double_Arithmetic +// +// Adapted core subroutines from QD library by Yozo Hida +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +#include +#include +#include + +// supporting types and functions +#include +#include +#include +#include +// qd exception structure +#include +#include + +namespace sw { namespace universal { + +// fwd references to free functions +qd operator+(const qd&, const qd&); +qd operator-(const qd&, const qd&); +qd operator*(const qd&, const qd&); +qd operator/(const qd&, const qd&); +qd pown(const qd&, int); + +// qd is an unevaluated quadruple of IEEE-754 doubles that provides a (1,11,212) floating-point triple +class qd { +public: + static constexpr unsigned nbits = 256; + static constexpr unsigned es = 11; + static constexpr unsigned fbits = 212; // number of fraction digits + // exponent characteristics are the same as native double precision floating-point + static constexpr int EXP_BIAS = ((1 << (es - 1u)) - 1l); + static constexpr int MAX_EXP = (es == 1) ? 1 : ((1 << es) - EXP_BIAS - 1); + static constexpr int MIN_EXP_NORMAL = 1 - EXP_BIAS; + static constexpr int MIN_EXP_SUBNORMAL = 1 - EXP_BIAS - int(fbits); // the scale of smallest ULP + + /// trivial constructor + qd() = default; + + qd(const qd&) = default; + qd(qd&&) = default; + + qd& operator=(const qd&) = default; + qd& operator=(qd&&) = default; + + // converting constructors + qd(const std::string& stringRep) : x{0} { assign(stringRep); } + + // specific value constructor + constexpr qd(const SpecificValue code) noexcept : x{0.0} { + switch (code) { + case SpecificValue::maxpos: + maxpos(); + break; + case SpecificValue::minpos: + minpos(); + break; + case SpecificValue::zero: + default: + zero(); + break; + case SpecificValue::minneg: + minneg(); + break; + case SpecificValue::maxneg: + maxneg(); + break; + case SpecificValue::infpos: + setinf(false); + break; + case SpecificValue::infneg: + setinf(true); + break; + case SpecificValue::nar: // approximation as qds don't have a NaR + case SpecificValue::qnan: + setnan(NAN_TYPE_QUIET); + break; + case SpecificValue::snan: + setnan(NAN_TYPE_SIGNALLING); + break; + } + } + + // raw limb constructor: no argument checking + constexpr qd(double x0) noexcept : x{ 0 } { x[0] = x0; } + constexpr qd(double x0, double x1) noexcept : x{ 0 } { x[0] = x0; x[1] = x1; } + constexpr qd(double x0, double x1, double x2, double x3) noexcept : x{ 0 } { x[0] = x0; x[1] = x1; x[2] = x2; x[3] = x3; } + + // initializers for native types + constexpr qd(signed char iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(short iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(int iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(long iv) noexcept { *this = iv; } + constexpr qd(long long iv) noexcept { *this = iv; } + constexpr qd(char iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(unsigned short iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(unsigned int iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(unsigned long iv) noexcept { *this = iv; } + constexpr qd(unsigned long long iv) noexcept { *this = iv; } + constexpr qd(float iv) noexcept : x{0} { x[0] = iv; } + + // assignment operators for native types + constexpr qd& operator=(signed char rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(short rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(int rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(long rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(long long rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(unsigned char rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned short rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned int rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned long rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(float rhs) noexcept { return convert_ieee754(rhs); } + constexpr qd& operator=(double rhs) noexcept { return convert_ieee754(rhs); } + + // conversion operators + explicit operator int() const noexcept { return convert_to_signed(); } + explicit operator long() const noexcept { return convert_to_signed(); } + explicit operator long long() const noexcept { return convert_to_signed(); } + explicit operator unsigned int() const noexcept { return convert_to_unsigned(); } + explicit operator unsigned long() const noexcept { return convert_to_unsigned(); } + explicit operator unsigned long long() const noexcept { return convert_to_unsigned(); } + explicit operator float() const noexcept { return convert_to_ieee754(); } + explicit operator double() const noexcept { return convert_to_ieee754(); } + + +#if LONG_DOUBLE_SUPPORT + // can't be constexpr as remainder calculation requires volatile designation + qd(long double iv) noexcept { *this = iv; } + qd& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } + explicit operator long double() const noexcept { return convert_to_ieee754(); } +#endif + + // prefix operators + constexpr qd operator-() const noexcept { + return qd(-x[0], -x[1], -x[2], -x[3]); + } + + // arithmetic operators +#define IEEE_ERROR_BOUND 1 + qd& operator+=(const qd& rhs) { +#if defined(IEEE_ERROR_BOUND) + return *this = accurate_addition(*this, rhs); +#else // !IEEE_ERROR_BOUND -> CRAY_ERROR_BOUND + return *this = approximate_addition(*this, rhs); +#endif + } + qd& operator+=(double rhs) { + return operator+=(qd(rhs)); + } + qd& operator-=(const qd& rhs) { + return *this += -rhs; + } + qd& operator-=(double rhs) { + return *this += qd(-rhs); + } +#define ACCURATE_MULTIPLICATION 1 + qd& operator*=(const qd& rhs) { +#if defined(ACCURATE_MULTIPLICATION) + return *this = accurate_multiplication(*this, rhs); +#else + return *this = approximate_multiplication(*this, rhs); +#endif + } + qd& operator*=(double rhs) { + double q0, q1, q2; + + double p0 = two_prod(x[0], rhs, q0); + double p1 = two_prod(x[1], rhs, q1); + double p2 = two_prod(x[2], rhs, q2); + double p3 = x[3] * rhs; + + double s0 = p0; + double s2; + double s1 = two_sum(q0, p1, s2); + + three_sum(s2, q1, p2); + + three_sum2(q1, q2, p3); + double s3 = q1; + + double s4 = q2 + p2; + + sw::universal::renorm(s0, s1, s2, s3, s4); + x[0] = s0; + x[1] = s1; + x[2] = s2; + x[3] = s3; + return *this; + } +#define ACCURATE_DIVISION 1 + qd& operator/=(const qd& rhs) { +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (isnan()) throw qd_not_a_number(); + if (rhs.isnan()) throw qd_divide_by_nan(); + if (rhs.iszero()) { + if (iszero()) throw qd_not_a_number(); + throw qd_divide_by_zero(); + } +#else + if (isnan()) return *this; + + if (rhs.isnan()) { + *this = rhs; + return *this; + } + + if (rhs.iszero()) { + if (iszero()) { + *this = qd(SpecificValue::qnan); + } + else { + *this = (sign() ? qd(SpecificValue::infneg) : qd(SpecificValue::infpos)); + } + return *this; + } +#endif + +#if ACCURATE_DIVISION + return *this = accurate_division(*this, rhs); +#else + return *this = approximate_division(*this, rhs); +#endif + } + qd& operator/=(double rhs) { + return operator/=(qd(rhs)); + } + + // unary operators + qd& operator++() { + return *this; + } + qd operator++(int) { + qd tmp(*this); + operator++(); + return tmp; + } + qd& operator--() { + return *this; + } + qd operator--(int) { + qd tmp(*this); + operator--(); + return tmp; + } + + // modifiers + constexpr void clear() noexcept { x[0] = 0.0; x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setzero() noexcept { x[0] = 0.0; x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setinf(bool sign = true) noexcept { x[0] = (sign ? -INFINITY : INFINITY); x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setnan(int NaNType = NAN_TYPE_SIGNALLING) noexcept { x[0] = (NaNType == NAN_TYPE_SIGNALLING ? std::numeric_limits::signaling_NaN() : std::numeric_limits::quiet_NaN()); x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setsign(bool sign = true) noexcept { if (sign && x[0] > 0.0) x[0] = -x[0]; } + + constexpr void setbit(unsigned index, bool b = true) noexcept { + if (index < 64) { // set bit in lowest limb + sw::universal::setbit(x[3], index, b); + } + else if (index < 128) { // set bit in second to lowest limb + sw::universal::setbit(x[2], index - 64, b); + } + else if (index < 192) { // set bit in second to upper limb + sw::universal::setbit(x[1], index - 128, b); + } + else if (index < 128) { // set bit in upper limb + sw::universal::setbit(x[0], index - 192, b); + } + else { + // NOP if index out of bounds + } + } + constexpr void setbits(uint64_t value) noexcept { + x[0] = static_cast(value); + x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; + } + + void renorm() noexcept { + sw::universal::renorm(x[0], x[1], x[2], x[3]); + } + void renorm(double r) noexcept { + sw::universal::renorm(x[0], x[1], x[2], x[3], r); + } + + // argument is not protected for speed + double operator[](int index) const { return x[index]; } + double& operator[](int index) { return x[index]; } + + // create specific number system values of interest + constexpr qd& maxpos() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& minpos() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& zero() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& minneg() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& maxneg() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + + qd& assign(const std::string& txt) { + qd v; + if (parse(txt, v)) *this = v; + return *this; // Is this what we want? when the string is not valid, keep the current value? + } + + // selectors + constexpr bool iszero() const noexcept { return x[0] == 0.0; } + constexpr bool isone() const noexcept { return x[0] == 1.0 && x[1] == 0.0; } + constexpr bool ispos() const noexcept { return x[0] > 0.0; } + constexpr bool isneg() const noexcept { return x[0] < 0.0; } + BIT_CAST_CONSTEXPR bool isnan(int NaNType = NAN_TYPE_EITHER) const noexcept { + bool negative = isneg(); + int nan_type; + bool isNaN = checkNaN(x[0], nan_type); + bool isNegNaN = isNaN && negative; + bool isPosNaN = isNaN && !negative; + return (NaNType == NAN_TYPE_EITHER ? (isNegNaN || isPosNaN) : + (NaNType == NAN_TYPE_SIGNALLING ? isNegNaN : + (NaNType == NAN_TYPE_QUIET ? isPosNaN : false))); + } + BIT_CAST_CONSTEXPR bool isinf(int InfType = INF_TYPE_EITHER) const noexcept { + bool negative = isneg(); + int inf_type; + bool isInf = checkInf(x[0], inf_type); + bool isNegInf = isInf && negative; + bool isPosInf = isInf && !negative; + return (InfType == INF_TYPE_EITHER ? (isNegInf || isPosInf) : + (InfType == INF_TYPE_NEGATIVE ? isNegInf : + (InfType == INF_TYPE_POSITIVE ? isPosInf : false))); + } + + constexpr bool sign() const noexcept { return (x[0] < 0.0); } + constexpr int scale() const noexcept { return _extractExponent(x[0]); } + constexpr int exponent() const noexcept { return _extractExponent(x[0]); } + + + //////////////////////////////////////////////////////////////////////////////////////////////////////// + /// arithmetic operator helpers + + qd accurate_addition(const qd& a, const qd& b) { + double u, v; + int i{ 0 }, j{ 0 }, k{ 0 }; + if (std::abs(a[i]) > std::abs(b[j])) { + u = a[i++]; + } + else { + u = b[j++]; + } + if (std::abs(a[i]) > std::abs(b[j])) { + v = a[i++]; + } + else { + v = b[j++]; + } + + u = quick_two_sum(u, v, v); + + double c[4] = { 0.0, 0.0, 0.0, 0.0 }; + while (k < 4) { + if (i >= 4 && j >= 4) { + c[k] = u; + if (k < 3) { + c[++k] = v; + } + break; + } + double t; + if (i >= 4) { + t = b[j++]; + } + else if (j >= 4) { + t = a[i++]; + } + else if (std::abs(a[i]) > std::abs(b[j])) { + t = a[i++]; + } + else { + t = b[j++]; + } + + double s = quick_three_accumulation(u, v, t); + + if (s != 0.0) { + c[k++] = s; + } + } + + // add the rest + for (k = i; k < 4; k++) c[3] += a[k]; + for (k = j; k < 4; k++) c[3] += b[k]; + + sw::universal::renorm(c[0], c[1], c[2], c[3]); + return qd(c[0], c[1], c[2], c[3]); + } + + qd approximate_addition(const qd& a, const qd& b) { + volatile double s0, s1, s2, s3; + volatile double t0, t1, t2, t3; + + s0 = two_sum(a[0], b[0], t0); + s1 = two_sum(a[1], b[1], t1); + s2 = two_sum(a[2], b[2], t2); + s3 = two_sum(a[3], b[3], t3); + + s1 = two_sum(s1, t0, t0); + three_sum(s2, t0, t1); + three_sum2(s3, t0, t2); + t0 = t0 + t1 + t3; + + sw::universal::renorm(s0, s1, s2, s3, t0); + return qd(s0, s1, s2, s3); + } + + qd approximate_addition_explicit(const qd& a, const qd& b) { + // Same as approximate_addition, but addition re-organized to guide bad compilers + + double s0 = a[0] + b[0]; + double s1 = a[1] + b[1]; + double s2 = a[2] + b[2]; + double s3 = a[3] + b[3]; + + double v0 = s0 - a[0]; + double v1 = s1 - a[1]; + double v2 = s2 - a[2]; + double v3 = s3 - a[3]; + + double u0 = s0 - v0; + double u1 = s1 - v1; + double u2 = s2 - v2; + double u3 = s3 - v3; + + double w0 = a[0] - u0; + double w1 = a[1] - u1; + double w2 = a[2] - u2; + double w3 = a[3] - u3; + + u0 = b[0] - v0; + u1 = b[1] - v1; + u2 = b[2] - v2; + u3 = b[3] - v3; + + double t0 = w0 + u0; + double t1 = w1 + u1; + double t2 = w2 + u2; + double t3 = w3 + u3; + + s1 = two_sum(s1, t0, t0); + three_sum(s2, t0, t1); + three_sum2(s3, t0, t2); + t0 = t0 + t1 + t3; + + sw::universal::renorm(s0, s1, s2, s3, t0); + return qd(s0, s1, s2, s3); + } + + /* quad-double * quad-double + a0 * b0 0 + a0 * b1 1 + a1 * b0 2 + a0 * b2 3 + a1 * b1 4 + a2 * b0 5 + a0 * b3 6 + a1 * b2 7 + a2 * b1 8 + a3 * b0 9 + */ + qd approximate_multiplication(const qd& a, const qd& b) { + double p0, p1, p2, p3, p4, p5; + double q0, q1, q2, q3, q4, q5; + double t0, t1; + double s0, s1, s2; + + p0 = two_prod(a[0], b[0], q0); + + p1 = two_prod(a[0], b[1], q1); + p2 = two_prod(a[1], b[0], q2); + + p3 = two_prod(a[0], b[2], q3); + p4 = two_prod(a[1], b[1], q4); + p5 = two_prod(a[2], b[0], q5); + + // Start accumulation of partials + three_sum(p1, p2, q0); + + // Six-Three Sum of p2, q1, q2, p3, p4, p5 + three_sum(p2, q1, q2); + three_sum(p3, p4, p5); + // compute (s0, s1, s2) = (p2, q1, q2) + (p3, p4, p5) + s0 = two_sum(p2, p3, t0); + s1 = two_sum(q1, p4, t1); + s2 = q2 + p5; + s1 = two_sum(s1, t0, t0); + s2 += (t0 + t1); + + // O(eps^3) order terms + s1 += a[0] * b[3] + a[1] * b[2] + a[2] * b[1] + a[3] * b[0] + q0 + q3 + q4 + q5; + sw::universal::renorm(p0, p1, s0, s1, s2); + return qd(p0, p1, s0, s1); + } + + qd accurate_multiplication(const qd& a, const qd& b) { + volatile double q0, q1, q2, q3, q4, q5; + double p0 = two_prod(a[0], b[0], q0); + + double p1 = two_prod(a[0], b[1], q1); + double p2 = two_prod(a[1], b[0], q2); + + double p3 = two_prod(a[0], b[2], q3); + double p4 = two_prod(a[1], b[1], q4); + double p5 = two_prod(a[2], b[0], q5); + + // Start Accumulation + three_sum(p1, p2, q0); + + // Six-Three Sum of p2, q1, q2, p3, p4, p5 + three_sum(p2, q1, q2); + three_sum(p3, p4, p5); + // compute (s0, s1, s2) = (p2, q1, q2) + (p3, p4, p5) + double t0, t1; + double s0 = two_sum(p2, p3, t0); + double s1 = two_sum(q1, p4, t1); + double s2 = q2 + p5; + s1 = two_sum(s1, t0, t0); + s2 += (t0 + t1); + + // O(eps^3) order terms + double q6, q7, q8, q9; + double p6 = two_prod(a[0], b[3], q6); + double p7 = two_prod(a[1], b[2], q7); + double p8 = two_prod(a[2], b[1], q8); + double p9 = two_prod(a[3], b[0], q9); + + // Nine-Two-Sum of q0, s1, q3, q4, q5, p6, p7, p8, p9 + q0 = two_sum(q0, q3, q3); + q4 = two_sum(q4, q5, q5); + p6 = two_sum(p6, p7, p7); + p8 = two_sum(p8, p9, p9); + // Compute (t0, t1) = (q0, q3) + (q4, q5) + t0 = two_sum(q0, q4, t1); + t1 += (q3 + q5); + // Compute (r0, r1) = (p6, p7) + (p8, p9) + double r1; + double r0 = two_sum(p6, p8, r1); + r1 += (p7 + p9); + // Compute (q3, q4) = (t0, t1) + (r0, r1) + q3 = two_sum(t0, r0, q4); + q4 += (t1 + r1); + // Compute (t0, t1) = (q3, q4) + s1 + t0 = two_sum(q3, s1, t1); + t1 += q4; + + // O(eps^4) terms -- Nine-One-Sum + t1 += a[1] * b[3] + a[2] * b[2] + a[3] * b[1] + q6 + q7 + q8 + q9 + s2; + + sw::universal::renorm(p0, p1, s0, t0, t1); + return qd(p0, p1, s0, t0); + } + + qd approximate_division(const qd& a, const qd& b) { + qd r{}; + + double q0 = a[0] / b[0]; + r = a - (b * q0); + + double q1 = r[0] / b[0]; + r -= (b * q1); + + double q2 = r[0] / b[0]; + r -= (b * q2); + + double q3 = r[0] / b[0]; + + sw::universal::renorm(q0, q1, q2, q3); + return qd(q0, q1, q2, q3); + } + + qd accurate_division(const qd& a, const qd& b) { + qd r{}; + + double q0 = a[0] / b[0]; + r = a - (b * q0); + + double q1 = r[0] / b[0]; + r -= (b * q1); + + double q2 = r[0] / b[0]; + r -= (b * q2); + + double q3 = r[0] / b[0]; + r -= (b * q3); + + double q4 = r[0] / b[0]; + + sw::universal::renorm(q0, q1, q2, q3, q4); + return qd(q0, q1, q2, q3); + } + + + //////////////////////////////////////////////////////////////////////////////////////////////////////// + /// decimal string converter helpers + + // convert to string containing digits number of digits + std::string to_string(std::streamsize precision = 7, std::streamsize width = 15, bool fixed = false, bool scientific = true, bool internal = false, bool left = false, bool showpos = false, bool uppercase = false, char fill = ' ') const { + std::string s; + bool negative = sign() ? true : false; + int e{ 0 }; + if (fixed && scientific) fixed = false; // scientific format takes precedence + if (isnan()) { + s = uppercase ? "NAN" : "nan"; + negative = false; + } + else { + if (negative) { s += '-'; } else { if (showpos) s += '+'; } + + if (isinf()) { + s += uppercase ? "INF" : "inf"; + } + else if (iszero()) { + s += '0'; + if (precision > 0) { + s += '.'; + s.append(static_cast(precision), '0'); + } + } + else { + int powerOfTenScale = static_cast(std::log10(std::fabs(x[0]))); + int integerDigits = (fixed ? (powerOfTenScale + 1) : 1); + int nrDigits = integerDigits + static_cast(precision); + + int nrDigitsForFixedFormat = nrDigits; + if (fixed) + nrDigitsForFixedFormat = std::max(60, nrDigits); // can be much longer than the max accuracy for double-double + + // a number in the range of [0.5, 1.0) to be printed with zero precision + // must be rounded up to 1 to print correctly + if (fixed && (precision == 0) && (std::abs(x[0]) < 1.0)) { + s += (std::abs(x[0]) >= 0.5) ? '1' : '0'; + return s; + } + + if (fixed && nrDigits <= 0) { + // process values with negative exponents (powerOfTenScale < 0) + s += '0'; + if (precision > 0) { + s += '.'; + s.append(static_cast(precision), '0'); + } + } + else { + char* t; + + if (fixed) { + t = new char[static_cast(nrDigitsForFixedFormat + 1)]; + to_digits(t, e, nrDigitsForFixedFormat); + } + else { + t = new char[static_cast(nrDigits + 1)]; + to_digits(t, e, nrDigits); + } + + if (fixed) { + // round the decimal string + round_string(t, nrDigits, &integerDigits); + + if (integerDigits > 0) { + int i; + for (i = 0; i < integerDigits; ++i) s += t[i]; + if (precision > 0) { + s += '.'; + for (int j = 0; j < precision; ++j, ++i) s += t[i]; + } + } + else { + s += "0."; + if (integerDigits < 0) s.append(static_cast(-integerDigits), '0'); + for (int i = 0; i < nrDigits; ++i) s += t[i]; + } + } + else { + s += t[0]; + if (precision > 0) s += '.'; + + for (int i = 1; i <= precision; ++i) + s += t[i]; + + } + delete[] t; + } + } + + // trap for improper offset with large values + // without this trap, output of values of the for 10^j - 1 fail for j > 28 + // and are output with the point in the wrong place, leading to a dramatically off value + if (fixed && (precision > 0)) { + // make sure that the value isn't dramatically larger + double from_string = atof(s.c_str()); + + // if this ratio is large, then we've got problems + if (std::fabs(from_string / x[0]) > 3.0) { + + // loop on the string, find the point, move it up one + // don't act on the first character + for (std::string::size_type i = 1; i < s.length(); ++i) { + if (s[i] == '.') { + s[i] = s[i - 1]; + s[i - 1] = '.'; + break; + } + } + + from_string = atof(s.c_str()); + // if this ratio is large, then the string has not been fixed + if (std::fabs(from_string / x[0]) > 3.0) { + //error("Re-rounding unsuccessful in large number fixed point trap."); + } + } + } + + if (!fixed && !isinf()) { + // construct the exponent + s += uppercase ? 'E' : 'e'; + append_exponent(s, e); + } + } + + // process any fill + size_t strLength = s.length(); + if (strLength < static_cast(width)) { + size_t nrCharsToFill = (width - strLength); + if (internal) { + if (negative) + s.insert(static_cast(1), nrCharsToFill, fill); + else + s.insert(static_cast(0), nrCharsToFill, fill); + } + else if (left) { + s.append(nrCharsToFill, fill); + } + else { + s.insert(static_cast(0), nrCharsToFill, fill); + } + } + + return s; + } + +protected: + double x[4]; // fixed four (4) limbs, x[0] is highest order limb, x[3] is lowest order limb + + //////////////////////////////////////////////////////////////////////////////////////////////////////// + // private helper methods + + constexpr qd& convert_signed(int64_t v) noexcept { + if (0 == v) { + setzero(); + } + else { + x[0] = static_cast(v); + x[1] = static_cast(v - static_cast(x[0])); + } + return *this; + } + + constexpr qd& convert_unsigned(uint64_t v) noexcept { + if (0 == v) { + setzero(); + } + else { + x[0] = static_cast(v); + x[1] = static_cast(v - static_cast(x[0])); // difference is always positive + } + return *this; + } + + // no need to SFINAE this as it is an internal method that we ONLY call when we know the argument type is a native float + constexpr qd& convert_ieee754(float rhs) noexcept { + x[0] = double(rhs); + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& convert_ieee754(double rhs) noexcept { + x[0] = double(rhs); + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } +#if LONG_DOUBLE_SUPPORT + qd& convert_ieee754(long double rhs) { + volatile long double truncated = static_cast(double(rhs)); + volatile double remainder = static_cast(rhs - truncated); + x[0] = static_cast(truncated); + x[1] = remainder; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } +#endif + + // convert to native unsigned integer, use C++ conversion rules to cast down to float and double + template + Unsigned convert_to_unsigned() const noexcept { + int64_t h = static_cast(x[0]); + int64_t l = static_cast(x[1]); + return Unsigned(h + l); + } + + // convert to native unsigned integer, use C++ conversion rules to cast down to float and double + template + Signed convert_to_signed() const noexcept { + int64_t h = static_cast(x[0]); + int64_t l = static_cast(x[1]); + return Signed(h + l); + } + + // convert to native floating-point, use C++ conversion rules to cast down to float and double + template + Real convert_to_ieee754() const noexcept { + return Real(x[0] + x[1] + x[2] + x[3]); + } + + + //////////////////////////////////////////////////////////////////////////////////////////////////////// + /// functional helpers + + // precondition: string s must be all digits + void round_string(char* s, int precision, int* decimalPoint) const { + int nrDigits = precision; + // round decimal string and propagate carry + int lastDigit = nrDigits - 1; + if (s[lastDigit] >= '5') { + int i = nrDigits - 2; + s[i]++; + while (i > 0 && s[i] > '9') { + s[i] -= 10; + s[--i]++; + } + } + + // if first digit is 10, shift everything. + if (s[0] > '9') { + for (int i = precision; i >= 2; i--) s[i] = s[i - 1]; + s[0] = '1'; + s[1] = '0'; + + (*decimalPoint)++; // increment decimal point + ++precision; + } + + s[precision] = 0; // aqd termination null + } + + void append_exponent(std::string& str, int e) const { + str += (e < 0 ? '-' : '+'); + e = std::abs(e); + int k; + if (e >= 100) { + k = (e / 100); + str += static_cast('0' + k); + e -= 100 * k; + } + + k = (e / 10); + str += static_cast('0' + k); + e -= 10 * k; + + str += static_cast('0' + e); + } + + + //////////////////////////////////////////////////////////////////////////////////////////////////////// + /// decimal string converter helper + + + /// + /// to_digits generates the decimal digits representing + /// + /// + /// + /// + void to_digits(char* s, int& exponent, int precision) const { + constexpr qd _one(1.0), _ten(10.0); + constexpr double _log2(0.301029995663981); + double hi = x[0]; + //double lo = x[1]; + + if (iszero()) { + std::cout << "I am zero\n"; + exponent = 0; + for (int i = 0; i < precision; ++i) s[i] = '0'; + s[precision] = 0; // termination null + return; + } + + // First determine the (approximate) exponent. + // std::frexp(*this, &e); // e is appropriate for 0.5 <= x < 1 + int e; + std::frexp(hi, &e); + --e; // adjust e as frexp gives a binary e that is 1 too big + e = static_cast(_log2 * e); // estimate the power of ten exponent + qd r = abs(*this); + if (e < 0) { + if (e < -300) { + //r = qd(std::ldexp(r[0], 53), std::ldexp(r[1], 53)); + r *= pown(_ten, -e); + //r = qd(std::ldexp(r[0], -53), std::ldexp(r[1], -53)); + } + else { + r *= pown(_ten, -e); + } + } + else { + if (e > 0) { + if (e > 300) { + //r = qd(std::ldexp(r[0], -53), std::ldexp(r[1], -53)); + r /= pown(_ten, e); + //r = qd(std::ldexp(r[0], 53), std::ldexp(r[1], 53)); + } + else { + r /= pown(_ten, e); + } + } + } + + // Fix exponent if we have gone too far + if (r >= _ten) { + r /= _ten; + ++e; + } + else { + if (r < 1.0) { + r *= _ten; + --e; + } + } + + if ((r >= _ten) || (r < _one)) { + std::cerr << "to_digits() failed to compute exponent\n"; + return; + } + + // at this point the value is normalized to a decimal value between (0, 10) + // generate the digits + int nrDigits = precision + 1; + for (int i = 0; i < nrDigits; ++i) { + int mostSignificantDigit = static_cast(r[0]); + r -= mostSignificantDigit; + r *= 10.0; + + s[i] = static_cast(mostSignificantDigit + '0'); + } + + // Fix out of range digits + for (int i = nrDigits - 1; i > 0; --i) { + if (s[i] < '0') { + s[i - 1]--; + s[i] += 10; + } + else { + if (s[i] > '9') { + s[i - 1]++; + s[i] -= 10; + } + } + } + + if (s[0] <= '0') { + std::cerr << "to_digits() non-positive leading digit\n"; + return; + } + + // Round and propagate carry + int lastDigit = nrDigits - 1; + if (s[lastDigit] >= '5') { + int i = nrDigits - 2; + s[i]++; + while (i > 0 && s[i] > '9') { + s[i] -= 10; + s[--i]++; + } + } + + // If first digit is 10, shift left and increment exponent + if (s[0] > '9') { + ++e; + for (int i = precision; i >= 2; --i) { + s[i] = s[i - 1]; + } + s[0] = '1'; + s[1] = '0'; + } + + s[precision] = 0; // termination null + exponent = e; + } + +private: + + // qd - qd logic comparisons + friend bool operator==(const qd& lhs, const qd& rhs); + friend bool operator!=(const qd& lhs, const qd& rhs); + friend bool operator<=(const qd& lhs, const qd& rhs); + friend bool operator>=(const qd& lhs, const qd& rhs); + friend bool operator<(const qd& lhs, const qd& rhs); + friend bool operator>(const qd& lhs, const qd& rhs); + + // qd - literal logic comparisons + friend bool operator==(const qd& lhs, const double rhs); + + // literal - qd logic comparisons + friend bool operator==(const double lhs, const qd& rhs); + + friend bool operator<(const qd& lhs, const qd& rhs); + +}; + +//////////////////////// precomputed constants of note ///////////////////////////////// + +// precomputed quad-double constants courtesy of constants example program + +// Golden ratio PHI +constexpr qd qd_phi (1.6180339887498949, -5.4321152036825061e-17, 2.6543252083815655e-33, -3.3049919975020988e-50); +constexpr qd qd_inv_phi(0.6180339887498949, -5.4321152036825061e-17, 2.6543252083815655e-33, -3.3049919975021111e-50); +// Euler's number e +constexpr qd qd_e (2.7182818284590451, 1.4456468917292502e-16, -2.1277171080381768e-33, 1.515630159841219e-49); +constexpr qd qd_inv_e (0.36787944117144233, -1.2428753672788363e-17, -5.830044851072742e-34, -2.8267977849017436e-50); + +// pi multiples and fractions +constexpr qd qd_2pi (6.2831853071795862, 2.4492935982947064e-16, -5.9895396194366793e-33, 2.2249084417267313e-49); +constexpr qd qd_pi (3.1415926535897931, 1.2246467991473532e-16, -2.9947698097183397e-33, 1.1124542208633657e-49); +constexpr qd qd_pi2 (1.5707963267948966, 6.123233995736766e-17, -1.4973849048591698e-33, 5.5622711043168283e-50); +constexpr qd qd_pi4 (0.78539816339744828, 3.061616997868383e-17, -7.4869245242958492e-34, 2.7811355521584142e-50); +constexpr qd qd_3pi4 (2.3561944901923448, 9.1848509936051484e-17, 3.9168984647504003e-33, -2.586798163270486e-49); +constexpr qd qd_inv_pi (0.31830988618379069, -1.9678676675182486e-17, -1.0721436282893004e-33, 8.053563926594112e-50); +constexpr qd qd_inv_pi2(0.63661977236758138, -3.9357353350364972e-17, -2.1442872565786008e-33, 1.6107127853188224e-49); + +// natural logarithm (base = e) +constexpr qd qd_ln2 (0.69314718055994529, 2.3190468138462996e-17, 5.7077084384162121e-34, -3.5824322106018105e-50); +constexpr qd qd_lne (1.0, 0.0, 0.0, 0.0); +constexpr qd qd_ln10 (2.3025850929940459, -2.1707562233822494e-16, -9.9842624544657766e-33, -4.0233574544502071e-49); +// binary logarithm (base = 2) +constexpr qd qd_lg2 (1.0, 0.0, 0.0, 0.0); +constexpr qd qd_lge (1.4426950408889634, 2.0355273740931033e-17, -1.0614659956117258e-33, -1.3836716780181395e-50); +constexpr qd qd_lg10 (3.3219280948873622, 1.661617516973592e-16, 1.2215512178458181e-32, 5.9551189702782481e-49); +// common logarithm (base = 10) +constexpr qd qd_log2 (0.3010299956639812, -2.8037281277851704e-18, 5.4719484023146385e-35, 5.1051389831070996e-51); +constexpr qd qd_loge (0.43429448190325182, 1.0983196502167651e-17, 3.717181233110959e-34, 7.7344843465042927e-51); +constexpr qd qd_log10 (1.0, 0.0, 0.0, 0.0); + +constexpr qd qd_sqrt2 (1.4142135623730951, -9.6672933134529135e-17, 4.1386753086994136e-33, 4.9355469914683538e-50); +constexpr qd qd_inv_sqrt2(0.70710678118654757, -4.8336466567264567e-17, 2.0693376543497068e-33, 2.4677734957341745e-50); + + +constexpr qd qd_max(1.79769313486231570815e+308, 9.97920154767359795037e+291); + +constexpr double qd_eps = 4.93038065763132e-32; // 2^-104 +constexpr double qd_min_normalized = 2.0041683600089728e-292; // = 2^(-1022 + 53) + +//////////////////////// helper functions ///////////////////////////////// + +inline std::string to_quad(const qd& v, int precision = 17) { + std::stringstream s; + s << std::setprecision(precision) << "( " << v[0] << ", " << v[1] << ", " << v[2] << ", " << v[3] << ')'; + return s.str(); +} + +inline std::string to_binary(const qd& number, bool bNibbleMarker = false) { + std::stringstream s; + constexpr int nrLimbs = 4; + for (int i = 0; i < nrLimbs; ++i) { + double_decoder decoder; + decoder.d = number[i]; + + std::string label = "x[" + std::to_string(i) + "]"; + s << std::setw(20) << label << " : "; + s << "0b"; + // print sign bit + s << (decoder.parts.sign ? '1' : '0') << '.'; + + // print exponent bits + { + uint64_t mask = 0x400; + for (int bit = 10; bit >= 0; --bit) { + s << ((decoder.parts.exponent & mask) ? '1' : '0'); + if (bNibbleMarker && bit != 0 && (bit % 4) == 0) s << '\''; + mask >>= 1; + } + } + + s << '.'; + + // print hi fraction bits + uint64_t mask = (uint64_t(1) << 51); + for (int bit = 51; bit >= 0; --bit) { + s << ((decoder.parts.fraction & mask) ? '1' : '0'); + if (bNibbleMarker && bit != 0 && (bit % 4) == 0) s << '\''; + mask >>= 1; + } + + s << " : " << number[i]; + if (i < 3) s << '\n'; + } + + return s.str(); +} + +//////////////////////// math functions ///////////////////////////////// + +inline qd reciprocal(const qd& a) { + return qd(1.0) / a; +} + +inline qd abs(const qd& a) { + return (a[0] < 0.0) ? -a : a; +} + +inline qd ceil(const qd& a) { + double x0{ 0.0 }, x1{ 0.0 }, x2{ 0.0 }, x3{ 0.0 }; + x0 = std::ceil(a[0]); + + if (x0 == a[0]) { + x1 = std::ceil(a[1]); + + if (x1 == a[1]) { + x2 = std::ceil(a[2]); + + if (x2 == a[2]) { + x3 = std::ceil(a[3]); + } + } + + renorm(x0, x1, x2, x3); + return qd(x0, x1, x2, x3); + } + + return qd(x0, x1, x2, x3); +} + +inline qd floor(const qd& a) { + double x0{ 0.0 }, x1{ 0.0 }, x2{ 0.0 }, x3{ 0.0 }; + x0 = std::floor(a[0]); + + if (x0 == a[0]) { + x1 = std::floor(a[1]); + + if (x1 == a[1]) { + x2 = std::floor(a[2]); + + if (x2 == a[2]) { + x3 = std::floor(a[3]); + } + } + + renorm(x0, x1, x2, x3); + return qd(x0, x1, x2, x3); + } + + return qd(x0, x1, x2, x3); +} + +// Round to Nearest integer +qd nint(const qd& a) { + double x0{ 0.0 }, x1{ 0.0 }, x2{ 0.0 }, x3{ 0.0 }; + x0 = nint(a[0]); + + if (x0 == a[0]) { + // First double is already an integer + x1 = nint(a[1]); + + if (x1 == a[1]) { + // Second double is already an integer + x2 = nint(a[2]); + + if (x2 == a[2]) { + // Third double is already an integer + x3 = nint(a[3]); + } + else { + if (std::abs(x2 - a[2]) == 0.5 && a[3] < 0.0) { + x2 -= 1.0; + } + } + + } + else { + if (std::abs(x1 - a[1]) == 0.5 && a[2] < 0.0) { + x1 -= 1.0; + } + } + + } + else { + /* First double is not an integer. */ + if (std::abs(x0 - a[0]) == 0.5 && a[1] < 0.0) { + x0 -= 1.0; + } + } + + renorm(x0, x1, x2, x3); + return qd(x0, x1, x2, x3); +} + +// Round to Nearest integer quick version. May be off by one when qd is very close to the middle of two integers. +inline qd quick_nint(const qd& a) { + qd r = qd(nint(a[0]), nint(a[1]), nint(a[2]), nint(a[3])); + r.renorm(); + return r; +} + +// quad-double * double, where double is a power of 2 +inline qd mul_pwr2(const qd& a, double b) { + return qd(a[0] * b, a[1] * b, a[2] * b, a[3] * b); +} + +/* quad-double ^ 2 = (x0 + x1 + x2 + x3) ^ 2 + = x0 ^ 2 + 2 x0 * x1 + (2 x0 * x2 + x1 ^ 2) + + (2 x0 * x3 + 2 x1 * x2) */ +inline qd sqr(const qd& a) { + volatile double q0, q1, q2, q3; + double p0 = two_sqr(a[0], q0); + double p1 = two_prod(2.0 * a[0], a[1], q1); + double p2 = two_prod(2.0 * a[0], a[2], q2); + double p3 = two_sqr(a[1], q3); + + p1 = two_sum(q0, p1, q0); + + q0 = two_sum(q0, q1, q1); + p2 = two_sum(p2, p3, p3); + + double t0, t1; + double s0 = two_sum(q0, p2, t0); + double s1 = two_sum(q1, p3, t1); + + s1 = two_sum(s1, t0, t0); + t0 += t1; + + s1 = quick_two_sum(s1, t0, t0); + p2 = quick_two_sum(s0, s1, t1); + p3 = quick_two_sum(t1, t0, q0); + + double p4 = 2.0 * a[0] * a[3]; + double p5 = 2.0 * a[1] * a[2]; + + p4 = two_sum(p4, p5, p5); + q2 = two_sum(q2, q3, q3); + + t0 = two_sum(p4, q2, t1); + t1 = t1 + p5 + q3; + + p3 = two_sum(p3, t0, p4); + p4 = p4 + q0 + t1; + + renorm(p0, p1, p2, p3, p4); + return qd(p0, p1, p2, p3); +} + +// Computes pow(qd, n), where n is an integer +qd pown(const qd& a, int n) { + if (n == 0) + return 1.0; + + qd r{ a }; // odd-case multiplier + qd s{ 1.0 }; + int N = std::abs(n); + + if (N > 1) { + while (N > 0) { + if (N % 2 == 1) { + s *= r; + } + N /= 2; + if (N > 0) r = sqr(r); + } + } + else { + s = r; + } + + if (n < 0) + return (qd(1.0) / s); + + return s; +} + +//////////////////////// stream operators ///////////////////////////////// + +// stream out a decimal floating-point representation of the quad-double +inline std::ostream& operator<<(std::ostream& ostr, const qd& v) { + std::ios_base::fmtflags fmt = ostr.flags(); + std::streamsize precision = ostr.precision(); + std::streamsize width = ostr.width(); + char fillChar = ostr.fill(); + bool showpos = fmt & std::ios_base::showpos; + bool uppercase = fmt & std::ios_base::uppercase; + bool fixed = fmt & std::ios_base::fixed; + bool scientific = fmt & std::ios_base::scientific; + bool internal = fmt & std::ios_base::internal; + bool left = fmt & std::ios_base::left; + return ostr << v.to_string(precision, width, fixed, scientific, internal, left, showpos, uppercase, fillChar); +} + +// stream in an ASCII decimal floating-point format and assign it to a quad-double +inline std::istream& operator>>(std::istream& istr, qd& v) { + std::string txt; + istr >> txt; + if (!parse(txt, v)) { + std::cerr << "unable to parse -" << txt << "- into a quad-double value\n"; + } + return istr; +} + +////////////////// string operators + +// parse a decimal ASCII floating-point format and make a quad-double (qd) out of it +bool parse(const std::string& number, qd& value) { + char const* p = number.c_str(); + + // Skip any leading spaces + while (std::isspace(*p)) ++p; + + qd r{ 0.0 }; + int nrDigits{ 0 }; + int decimalPoint{ -1 }; + int sign{ 0 }, eSign{ 1 }; + int e{ 0 }; + bool done{ false }, parsingMantissa{ true }; + char ch; + while (!done && (ch = *p) != '\0') { + if (std::isdigit(ch)) { + if (parsingMantissa) { + int digit = ch - '0'; + r *= 10.0; + r += static_cast(digit); + ++nrDigits; + } + else { // parsing exponent section + int digit = ch - '0'; + e *= 10; + e += digit; + } + } + else { + switch (ch) { + case '.': + if (decimalPoint >= 0) return false; + decimalPoint = nrDigits; + break; + + case '-': + case '+': + if (parsingMantissa) { + if (sign != 0 || nrDigits > 0) return false; + sign = (ch == '-' ? -1 : 1); + } + else { + eSign = (ch == '-' ? -1 : 1); + } + break; + + case 'E': + case 'e': + parsingMantissa = false; + break; + + default: + return false; + } + } + + ++p; + } + e *= eSign; + + if (decimalPoint >= 0) e -= (nrDigits - decimalPoint); + qd _ten(10.0, 0.0); + if (e > 0) { + r *= pown(_ten, e); + } + else { + if (e < 0) r /= pown(_ten, -e); + } + value = (sign == -1) ? -r : r; + return true; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - qd binary logic operators + +// equal: precondition is that the storage is properly nulled in all arithmetic paths +inline bool operator==(const qd& lhs, const qd& rhs) { + return (lhs[0] == rhs[0]) && (lhs[1] == rhs[1] && lhs[2] == rhs[2]) && (lhs[3] == rhs[3]); +} + +inline bool operator!=(const qd& lhs, const qd& rhs) { + return !operator==(lhs, rhs); +} + +inline bool operator< (const qd& lhs, const qd& rhs) { + return (lhs[0] < rhs[0] || + (lhs[0] == rhs[0] && (lhs[1] < rhs[1] || + (lhs[1] == rhs[1] && (lhs[2] < rhs[2] || + (lhs[2] == rhs[2] && lhs[3] < rhs[3])))))); +} + +inline bool operator> (const qd& lhs, const qd& rhs) { + return operator< (rhs, lhs); +} + +inline bool operator<=(const qd& lhs, const qd& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} + +inline bool operator>=(const qd& lhs, const qd& rhs) { + return !operator< (lhs, rhs); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - literal binary logic operators +// +// equal: precondition is that the storage is properly nulled in all arithmetic paths +inline bool operator==(const qd& lhs, double rhs) { + return operator==(lhs, qd(rhs)); +} + +inline bool operator!=(const qd& lhs, double rhs) { + return !operator==(lhs, rhs); +} + +inline bool operator< (const qd& lhs, double rhs) { + return operator<(lhs, qd(rhs)); +} + +inline bool operator> (const qd& lhs, double rhs) { + return operator< (qd(rhs), lhs); +} + +inline bool operator<=(const qd& lhs, double rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} + +inline bool operator>=(const qd& lhs, double rhs) { + return !operator< (lhs, rhs); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// literal - qd binary logic operators +// +// equal: precondition is that the storage is properly nulled in all arithmetic paths +inline bool operator==(double lhs, const qd& rhs) { + return operator==(qd(lhs), rhs); +} + +inline bool operator!=(double lhs, const qd& rhs) { + return !operator==(lhs, rhs); +} + +inline bool operator< (double lhs, const qd& rhs) { + return operator<(qd(lhs), rhs); +} + +inline bool operator> (double lhs, const qd& rhs) { + return operator< (rhs, lhs); +} + +inline bool operator<=(double lhs, const qd& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} + +inline bool operator>=(double lhs, const qd& rhs) { + return !operator< (lhs, rhs); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - qd binary arithmetic operators +// BINARY ADDITION +inline qd operator+(const qd& lhs, const qd& rhs) { + qd sum{ lhs }; + sum += rhs; + return sum; +} +// BINARY SUBTRACTION +inline qd operator-(const qd& lhs, const qd& rhs) { + qd diff{ lhs }; + diff -= rhs; + return diff; +} +// BINARY MULTIPLICATION +inline qd operator*(const qd& lhs, const qd& rhs) { + qd mul{ lhs }; + mul *= rhs; + return mul; +} +// BINARY DIVISION +inline qd operator/(const qd& lhs, const qd& rhs) { + qd ratio{ lhs }; + ratio /= rhs; + return ratio; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - literal binary arithmetic operators +// BINARY ADDITION +inline qd operator+(const qd& lhs, double rhs) { + return operator+(lhs, qd(rhs)); +} +// BINARY SUBTRACTION +inline qd operator-(const qd& lhs, double rhs) { + return operator-(lhs, qd(rhs)); +} +// BINARY MULTIPLICATION +inline qd operator*(const qd& lhs, double rhs) { + return operator*(lhs, qd(rhs)); +} +// BINARY DIVISION +inline qd operator/(const qd& lhs, double rhs) { + return operator/(lhs, qd(rhs)); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// literal - qd binary arithmetic operators +// BINARY ADDITION +inline qd operator+(double lhs, const qd& rhs) { + return operator+(qd(lhs), rhs); +} +// BINARY SUBTRACTION +inline qd operator-(double lhs, const qd& rhs) { + return operator-(qd(lhs), rhs); +} +// BINARY MULTIPLICATION +inline qd operator*(double lhs, const qd& rhs) { + return operator*(qd(lhs), rhs); +} +// BINARY DIVISION +inline qd operator/(double lhs, const qd& rhs) { + return operator/(qd(lhs), rhs); +} + +}} // namespace sw::universal diff --git a/include/universal/traits/dd_traits.hpp b/include/universal/traits/dd_traits.hpp index 8ead7520c..07b56d6ba 100644 --- a/include/universal/traits/dd_traits.hpp +++ b/include/universal/traits/dd_traits.hpp @@ -1,5 +1,5 @@ #pragma once -// lns_traits.hpp : traits for doubledouble (dd) arithmetic type +// dd_traits.hpp : traits for double-double (dd) arithmetic type // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -9,7 +9,7 @@ namespace sw { namespace universal { -// define a trait for doubledouble (dd) types +// define a trait for double-double (dd) type template struct is_dd_trait : false_type diff --git a/include/universal/traits/metaprogramming.hpp b/include/universal/traits/metaprogramming.hpp index 18c629baf..7dba07487 100644 --- a/include/universal/traits/metaprogramming.hpp +++ b/include/universal/traits/metaprogramming.hpp @@ -1,7 +1,8 @@ #pragma once // metaprogramming.hpp : meta-programming patterns // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include diff --git a/include/universal/traits/qd_traits.hpp b/include/universal/traits/qd_traits.hpp new file mode 100644 index 000000000..f1c9f58df --- /dev/null +++ b/include/universal/traits/qd_traits.hpp @@ -0,0 +1,31 @@ +#pragma once +// qd_traits.hpp : traits for quad-double (qd) arithmetic type +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + +// define a trait for quad-double (qd) type +template +struct is_qd_trait + : false_type +{ +}; + +template<> +struct is_qd_trait< qd > + : true_type +{ +}; + +template +constexpr bool is_qd = is_qd_trait<_Ty>::value; + +template +using enable_if_qd = std::enable_if_t, _Ty>; + +}} // namespace sw::universal diff --git a/playground/CMakeLists.txt b/playground/CMakeLists.txt index 3d16f61a0..65ab9ae12 100644 --- a/playground/CMakeLists.txt +++ b/playground/CMakeLists.txt @@ -5,7 +5,6 @@ set(COMPLEX_SRCS complex.cpp) set(REAL_SRCS efunc_posits.cpp efunc_valids.cpp gismo_test.cpp - meta_programming.cpp serialization.cpp skeleton.cpp type_test.cpp diff --git a/playground/meta_programming.cpp b/playground/meta_programming.cpp deleted file mode 100644 index 497021ba3..000000000 --- a/playground/meta_programming.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// meta_programming.cpp: playground to experiment with meta programming techniques to generalize functions and algorithms -// -// Copyright (C) 2017 Stillwater Supercomputing, Inc. -// SPDX-License-Identifier: MIT -// -// This file is part of the universal numbers project, which is released under an MIT Open Source license. - -// enable posit arithmetic exceptions -#define POSIT_THROW_ARITHMETIC_EXCEPTION 1 -#include - -template -struct hasSerialize { - typedef char yes[1]; - typedef yes no[2]; - - // helper to determine if serialize is a function - template - struct reallyHas; - - template static yes& test(reallyHas* /*unused*/) { yes y{0}; return y; } - template static yes& test(reallyHas* /*unused*/) { yes y{0}; return y; } - - template static no& test(...) { no n{0}; return n; } - - // constant used as return value for the test - static const bool value = sizeof(test(0)) == sizeof(yes); -}; - -struct A {}; - -std::string to_string(const A& a) { - return "I am an A"; -} - -struct B { - std::string serialize() const { - return "I am a B"; - } -}; - -struct C { - std::string serialize; -}; - -std::string to_string(const C& c) { - return "I am a C"; -} - -namespace sw { - // typeless struct, will always fail substitution - template - struct enable_if {}; - - // specialization for type T - template - struct enable_if { - using type = T; - }; - - template - typename enable_if::value, std::string>::type serialize(const T& obj) { - return obj.serialize(); - } - template - typename enable_if::value, std::string>::type serialize(const T& obj) { - return to_string(obj); - } -} - -int main(int argc, char** argv) -try { - bool bSuccess = true; - - A a; - B b; - C c; - - /* goal - cout << serialize(a) << endl; - cout << serialize(b) << endl; - cout << serialize(c) << endl; - */ - - std::cout << hasSerialize::value << '\n'; - std::cout << hasSerialize::value << '\n'; - std::cout << hasSerialize::value << '\n'; - - // pedantic - sw::enable_if::type t1; // type t1 is an int - sw::enable_if::value, int>::type t2; // t2 is an int - // to get rid of warnings in g++ - t1 = t2 = 1; - std::cout << t1 << t2 << '\n'; - // enable_if::type t3; doesn't compile as enable_if doesn't have a type type - // enable_if::value, int>::type t4; doesn't compile as enable_if doesn't have a type type - - // with enable_if we have the indirection to dispatch the right function: serialize or to_string - std::cout << sw::serialize(a) << '\n'; - std::cout << sw::serialize(b) << '\n'; - std::cout << sw::serialize(c) << '\n'; - - return (bSuccess ? EXIT_SUCCESS : EXIT_FAILURE); -} -catch (char const* msg) { - std::cerr << "Caught exception: " << msg << std::endl; - return EXIT_FAILURE; -} -catch (const sw::universal::posit_arithmetic_exception& err) { - std::cerr << "Uncaught posit arithmetic exception: " << err.what() << std::endl; - return EXIT_FAILURE; -} -catch (const sw::universal::quire_exception& err) { - std::cerr << "Uncaught quire exception: " << err.what() << std::endl; - return EXIT_FAILURE; -} -catch (const sw::universal::posit_internal_exception& err) { - std::cerr << "Uncaught posit internal exception: " << err.what() << std::endl; - return EXIT_FAILURE; -} -catch (const std::runtime_error& err) { - std::cerr << "Uncaught runtime exception: " << err.what() << std::endl; - return EXIT_FAILURE; -} -catch (...) { - std::cerr << "Caught unknown exception" << std::endl; - return EXIT_FAILURE; -} diff --git a/static/bfloat/api/api.cpp b/static/bfloat/api/api.cpp index 68aadbc54..e4b16c13b 100644 --- a/static/bfloat/api/api.cpp +++ b/static/bfloat/api/api.cpp @@ -205,7 +205,7 @@ try { std::cout << std::scientific; } - std::cout << "+--------- special value properties bfloat16 vs IEEE754 --------+\n"; + std::cout << "+--------- special value properties bfloat16 vs IEEE-754 --------+\n"; { float fa; fa = NAN; @@ -229,23 +229,38 @@ try { } { - std::cout << "bfloat(INFINITY): " << bfloat16(INFINITY) << "\n"; - std::cout << "bfloat(-INFINITY): " << bfloat16(-INFINITY) << "\n"; + std::cout << "bfloat16(INFINITY): " << bfloat16(INFINITY) << "\n"; + std::cout << "bfloat16(-INFINITY): " << bfloat16(-INFINITY) << "\n"; - std::cout << "bfloat(std::numeric_limits::infinity()) : " << bfloat16(std::numeric_limits::infinity()) << "\n"; - std::cout << "bfloat(-std::numeric_limits::infinity()) : " << bfloat16(-std::numeric_limits::infinity()) << "\n"; + std::cout << "bfloat16(std::numeric_limits::infinity()) : " << bfloat16(std::numeric_limits::infinity()) << "\n"; + std::cout << "bfloat16(-std::numeric_limits::infinity()) : " << bfloat16(-std::numeric_limits::infinity()) << "\n"; std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; - std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; - std::cout << "-2 * std::numeric_limits::infinity() : " << -2 * std::numeric_limits::infinity() << "\n"; + std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; + std::cout << "-2 * std::numeric_limits::infinity() : " << -2 * std::numeric_limits::infinity() << "\n"; std::cout << "sw::universal::nextafter(bfloat16(0), std::numeric_limits::infinity()) : " << sw::universal::nextafter(bfloat16(-0), std::numeric_limits::infinity()) << "\n"; - std::cout << "std::nextafter(float(0), std::numeric_limits::infinity()) : " << std::nextafter(float(-0), std::numeric_limits::infinity()) << "\n"; + std::cout << "std::nextafter(float(0), std::numeric_limits::infinity()) : " << std::nextafter(float(-0), std::numeric_limits::infinity()) << "\n"; std::cout << "sw::universal::nextafter(bfloat16(0), -std::numeric_limits::infinity()) : " << sw::universal::nextafter(bfloat16(0), -std::numeric_limits::infinity()) << "\n"; - std::cout << "std::nextafter(float(0), -std::numeric_limits::infinity()) : " << std::nextafter(float(0), -std::numeric_limits::infinity()) << "\n"; + std::cout << "std::nextafter(float(0), -std::numeric_limits::infinity()) : " << std::nextafter(float(0), -std::numeric_limits::infinity()) << "\n"; + + std::cout << "bfloat16(std::numeric_limits::quiet_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << bfloat16(std::numeric_limits::quiet_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; + std::cout << "bfloat16(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << bfloat16(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; + std::cout << "bfloat16(std::numeric_limits::quiet_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << bfloat16(std::numeric_limits::quiet_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; + std::cout << "bfloat16(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << bfloat16(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; + + float float_sNaN{ std::numeric_limits::signaling_NaN() }; + ReportValue(float_sNaN, "float_sNaN"); + bfloat16 bfloat_sNaN{ float_sNaN }; + ReportValue(bfloat_sNaN, "bfloat_sNaN"); + to_binary(bfloat_sNaN); + + float float_qNaN{ std::numeric_limits::quiet_NaN() }; + ReportValue(float_qNaN, "float_qNaN"); + bfloat16 bfloat_qNaN{ float_qNaN }; + ReportValue(bfloat_qNaN, "bfloat_qNaN"); + to_binary(bfloat_qNaN); - std::cout << "cfloat(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << bfloat16(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; - std::cout << "cfloat(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << bfloat16(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; } ReportTestSuiteResults(test_suite, nrOfFailedTestCases); diff --git a/static/bfloat/arithmetic/arithmetic.cpp b/static/bfloat/arithmetic/arithmetic.cpp index 926323e06..6e02414fa 100644 --- a/static/bfloat/arithmetic/arithmetic.cpp +++ b/static/bfloat/arithmetic/arithmetic.cpp @@ -62,7 +62,7 @@ try { #if REGRESSION_LEVEL_1 - constexpr unsigned nrOfRandoms = 1000; + constexpr unsigned nrOfRandoms = 1000000; std::stringstream adds; adds << test_tag << " " << nrOfRandoms << " random adds"; std::string description = adds.str(); diff --git a/static/cfloat/api/api.cpp b/static/cfloat/api/api.cpp index 756998461..6cee0fe5f 100644 --- a/static/cfloat/api/api.cpp +++ b/static/cfloat/api/api.cpp @@ -21,7 +21,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "cfloat<> Application Programming Interface tests"; + std::string test_suite = "cfloat<> Application Programming Interface demonstration"; int nrOfFailedTestCases = 0; // important behavioral traits diff --git a/static/dd/api/api.cpp b/static/dd/api/api.cpp index b53c210c2..542d2dfa2 100644 --- a/static/dd/api/api.cpp +++ b/static/dd/api/api.cpp @@ -16,8 +16,6 @@ #include #include - - namespace sw { namespace universal { @@ -25,7 +23,7 @@ namespace sw { void Progression(Real v) { using namespace sw::universal; - auto oldPrec = std::cout.precision(); + auto defaultPrecision = std::cout.precision(); float f{ float(v) }; std::cout << std::setprecision(7); std::cout << to_binary(f, true) << " : " << f << '\n'; @@ -37,17 +35,17 @@ namespace sw { dd a{ v }; std::cout << std::setprecision(35); std::cout << to_binary(a, true) << " : " << a << '\n'; - std::cout << std::setprecision(oldPrec); + std::cout << std::setprecision(defaultPrecision); } dd parse(const std::string& str) { using namespace sw::universal; dd v(str); - auto oldPrec = std::cout.precision(); + auto defaultPrecision = std::cout.precision(); std::cout << std::setprecision(std::numeric_limits::digits10); std::cout << "string: " << str << " = ( " << v.high() << ", " << v.low() << ") "; - std::cout << std::setprecision(oldPrec); + std::cout << std::setprecision(defaultPrecision); return v; } @@ -73,10 +71,10 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble (dd) API tests"; + std::string test_suite = "double-double (dd) API tests"; int nrOfFailedTestCases = 0; - auto oldPrec = std::cout.precision(); + auto defaultPrecision = std::cout.precision(); // important behavioral traits { @@ -85,7 +83,7 @@ try { } // default behavior - std::cout << "+--------- Default dd has subnormals, but no supernormals\n"; + std::cout << "+--------- Default dd has subnormals, but no supernormals ---------+\n"; { uint64_t big = (1ull << 53); std::cout << to_binary(big) << " : " << big << '\n'; @@ -97,13 +95,56 @@ try { } // arithmetic behavior - std::cout << "+--------- Default dd has subnormals, but no supernormals\n"; + std::cout << "+--------- Default dd has subnormals, but no supernormals ---------+\n"; { dd a(2.0), b(4.0); ArithmeticOperators(a, b); } - std::cout << "+--------- fraction bit progressions \n"; + // helper api + std::cout << "+--------- helpers to go from double to double-double ---------+\n"; + { + double a, b, c; + a = 1.0; + b = ulp(1.0) / 2.0; + c = a + b; + dd dd_c = add(a, b); + std::cout << "demonstrating cancellation of information when adding\n"; + ReportValue(a, "a = 1.0"); + ReportValue(c, "c = a + ulp(1.0)/2"); + std::cout << "double c = " << std::setprecision(16) << c << std::setprecision(defaultPrecision) << '\n'; + std::cout << "dd c = " << std::setprecision(32) << dd_c << std::setprecision(defaultPrecision) << '\n'; + + std::cout << "demonstrating cancellation of information when subtracting\n"; + c = a - b; + dd_c = sub(a, b); + ReportValue(a, "a = 1.0"); + ReportValue(c, "c = a - ulp(1.0)/2"); + std::cout << "double c = " << std::setprecision(16) << c << std::setprecision(defaultPrecision) << '\n'; + std::cout << "dd c = " << std::setprecision(32) << dd_c << std::setprecision(defaultPrecision) << '\n'; + + std::cout << "demonstrating cancellation of information when multiplying\n"; + double x = ulp(1.0); + double y = 1.5 + x; + double z = x * y; + dd dd_z = mul(x, y); + ReportValue(z, "z = y * x"); + std::cout << "double z = " << std::setprecision(16) << z << std::setprecision(defaultPrecision) << '\n'; + std::cout << "dd z = " << std::setprecision(32) << dd_z << std::setprecision(defaultPrecision) << '\n'; + + std::cout << "demonstrating cancellation of information when dividing\n"; + x = ulp(1.0); + y = 1.5 + x; + z = y / x; + dd_z = div(y, x); + ReportValue(z, "z = y / x"); + std::cout << "double z = " << std::setprecision(16) << z << std::setprecision(defaultPrecision) << '\n'; + std::cout << "dd z = " << std::setprecision(32) << dd_z << std::setprecision(defaultPrecision) << '\n'; + + } + + // fraction bit behavior + std::cout << "+--------- fraction bit progressions ---------+\n"; { float fulp = ulp(1.0f); Progression(1.0f + fulp); @@ -114,7 +155,7 @@ try { } // report on the dynamic range of some standard configurations - std::cout << "+--------- Dynamic range doubledouble configurations --------+\n"; + std::cout << "+--------- Dynamic range doubledouble configurations ---------+\n"; { dd a; // uninitialized @@ -135,7 +176,7 @@ try { } // constexpr and specific values - std::cout << "+--------- constexpr and specific values --------+\n"; + std::cout << "+--------- constexpr and specific values ---------+\n"; { using Real = dd; @@ -153,7 +194,7 @@ try { } // set bit patterns - std::cout << "+--------- set bit patterns API --------+\n"; + std::cout << "+--------- set bit patterns API ---------+\n"; { using Real = dd; @@ -180,7 +221,7 @@ try { } // parse decimal strings - std::cout << "+--------- parse API --------+\n"; + std::cout << "+--------- parse API ---------+\n"; { std::string ddstr; dd v; @@ -219,7 +260,7 @@ try { std::cout << std::setprecision(37); print(std::cout, parse("2.718281828459045235360287471352662498")); //37 digits - std::cout << std::setprecision(oldPrec); + std::cout << std::setprecision(defaultPrecision); } std::cout << "+--------- set specific values of interest --------+\n"; @@ -287,8 +328,8 @@ try { std::cout << "sw::universal::nextafter(dd(0), -std::numeric_limits
::infinity()) : " << sw::universal::nextafter(dd(0), -std::numeric_limits
::infinity()) << "\n"; std::cout << "std::nextafter(float(0), -std::numeric_limits::infinity()) : " << std::nextafter(float(0), -std::numeric_limits::infinity()) << "\n"; - std::cout << "cfloat(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << dd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; - std::cout << "cfloat(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << dd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; + std::cout << "dd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << dd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; + std::cout << "dd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << dd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; } ReportTestSuiteResults(test_suite, nrOfFailedTestCases); diff --git a/static/dd/api/attributes.cpp b/static/dd/api/attributes.cpp index c0a3693d4..58fb7eb3b 100644 --- a/static/dd/api/attributes.cpp +++ b/static/dd/api/attributes.cpp @@ -1,4 +1,4 @@ -// attributes.cpp: attribute tests for Google Brain floating-point +// attributes.cpp: attribute tests for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -65,7 +65,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble attribute functions"; + std::string test_suite = "double-double attribute functions"; std::string test_tag = "attributes"; bool reportTestCases = true; int nrOfFailedTestCases = 0; diff --git a/static/dd/api/constants.cpp b/static/dd/api/constants.cpp index 5537386c3..04aafdf54 100644 --- a/static/dd/api/constants.cpp +++ b/static/dd/api/constants.cpp @@ -1,4 +1,4 @@ -// constants.cpp: test suite runner for creating and verifying doubledouble constants +// constants.cpp: test suite runner for creating and verifying double-double constants // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -31,6 +31,88 @@ namespace sw { std::cout << std::setprecision(oldPrec); } + const dd dd_inv_int[] = { + dd(std::numeric_limits< dd >::infinity()), // 1/0 + dd("1.0"), // 1/1 + dd("0.5"), // 1/2 + dd("0.3333333333333333333333333333333333333"), // 1/3 + dd("0.25"), // 1/4 + dd("0.2"), // 1/5 + dd("0.1666666666666666666666666666666666667"), // 1/6 + dd("0.1428571428571428571428571428571428571"), // 1/7 + dd("0.125"), // 1/8 + dd("0.1111111111111111111111111111111111111"), // 1/9 + dd("0.1"), // 1/10 + dd("0.0909090909090909090909090909090909091"), // 1/11 + dd("0.0833333333333333333333333333333333333"), // 1/12 + dd("0.0769230769230769230769230769230769231"), // 1/13 + dd("0.0714285714285714285714285714285714286"), // 1/14 + dd("0.0666666666666666666666666666666666667"), // 1/15 + dd("0.0625"), // 1/16 + dd("0.0588235294117647058823529411764705882"), // 1/17 + dd("0.0555555555555555555555555555555555556"), // 1/18 + dd("0.0526315789473684210526315789473684211"), // 1/19 + dd("0.05"), // 1/20 + dd("0.0476190476190476190476190476190476190"), // 1/21 + dd("0.0454545454545454545454545454545454545"), // 1/22 + dd("0.0434782608695652173913043478260869565"), // 1/23 + dd("0.0416666666666666666666666666666666667"), // 1/24 + dd("0.04"), // 1/25 + dd("0.0384615384615384615384615384615384615"), // 1/26 + dd("0.0370370370370370370370370370370370370"), // 1/27 + dd("0.0357142857142857142857142857142857143"), // 1/28 + dd("0.0344827586206896551724137931034482759"), // 1/29 + dd("0.0333333333333333333333333333333333333"), // 1/30 + dd("0.0322580645161290322580645161290322581"), // 1/31 + dd("0.03125"), // 1/32 + dd("0.0303030303030303030303030303030303030"), // 1/33 + dd("0.0294117647058823529411764705882352941"), // 1/34 + dd("0.0285714285714285714285714285714285714"), // 1/35 + dd("0.0277777777777777777777777777777777778"), // 1/36 + dd("0.0270270270270270270270270270270270270"), // 1/37 + dd("0.0263157894736842105263157894736842105"), // 1/38 + dd("0.0256410256410256410256410256410256410"), // 1/39 + dd("0.025"), // 1/40 + dd("0.0243902439024390243902439024390243902") // 1/41 + }; + + const dd dd_inv_fact[] = { + dd("1.0"), // 1/0! + dd("1.0"), // 1/1! + dd("0.5"), // 1/2! + dd("1.66666666666666666666666666666666667E-1"), // 1/3! + dd("4.16666666666666666666666666666666667E-2"), // 1/4! + dd("8.33333333333333333333333333333333333E-3"), // 1/5! + dd("1.38888888888888888888888888888888889E-3"), // 1/6! + dd("1.98412698412698412698412698412698413E-4"), // 1/7! + dd("2.48015873015873015873015873015873016E-5"), // 1/8! + dd("2.75573192239858906525573192239858907E-6"), // 1/9! + dd("2.75573192239858906525573192239858907E-7"), // 1/10! + dd("2.50521083854417187750521083854417188E-8"), // 1/11! + dd("2.08767569878680989792100903212014323E-9"), // 1/12! + dd("1.60590438368216145993923771701549479E-10"), // 1/13! + dd("1.14707455977297247138516979786821057E-11"), // 1/14! + dd("7.64716373181981647590113198578807044E-13"), // 1/15! + dd("4.77947733238738529743820749111754403E-14"), // 1/16! + dd("2.81145725434552076319894558301032002E-15"), // 1/17! + dd("1.56192069685862264622163643500573334E-16"), // 1/18! + dd("8.22063524662432971695598123687228075E-18"), // 1/19! + dd("4.11031762331216485847799061843614037E-19"), // 1/20! + dd("1.95729410633912612308475743735054304E-20"), // 1/21! + dd("8.89679139245057328674889744250246834E-22"), // 1/22! + dd("3.86817017063068403771691193152281232E-23"), // 1/23! + dd("1.61173757109611834904871330480117180E-24"), // 1/24! + dd("6.44695028438447339619485321920468721E-26"), // 1/25! + dd("2.47959626322479746007494354584795662E-27"), // 1/26! + dd("9.18368986379554614842571683647391340E-29"), // 1/27! + dd("3.27988923706983791015204172731211193E-30"), // 1/28! + dd("1.13099628864477169315587645769383170E-31"), // 1/29! + dd("3.76998762881590564385292152564610566E-33"), // 1/30! + dd("1.21612504155351794962997468569229215E-34"), // 1/31! + dd("3.80039075485474359259367089278841297E-36"), // 1/32! + dd("1.15163356207719502805868814932982211E-37"), // 1/33! + }; + void EnumerateConstants() { dd _zero("0.0"); report(_zero); dd _one("1.0"); report(_one); @@ -100,41 +182,43 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble constants"; - std::string test_tag = "dd constants"; - bool reportTestCases = false; + std::string test_suite = "double-double constants"; + std::string test_tag = "dd constants"; + bool reportTestCases = false; int nrOfFailedTestCases = 0; ReportTestSuiteHeader(test_suite, reportTestCases); #if MANUAL_TESTING - dd a, b, c; - double _third = 0.3333333333333333333333333333333; - double _third2 = _third * pow(2.0, -53.0); - double _short = 0.3333333333333333; - ReportValue(_short, "0.3333333333333333", 35, 32); - ReportValue(_third, "0.3333333333333333333333333333333", 35, 32); - - a = _third; - b = _third2; - ReportValue(a, "0.3333....", 35, 32); - ReportValue(b, "0.3333....", 35, 32); - c = a + b; - ReportValue(c, "0.3333....", 35, 32); - std::cout << to_pair(c) << '\n'; - - dd d(_third, _third2); - ReportValue(d, "0.3333....", 35, 32); - std::cout << to_pair(d) << '\n'; - - dd e("0.3333333333333333333333333333333333333333333333333"); - ReportValue(e, "0.3333....", 35, 32); - std::cout << to_pair(e) << '\n'; - - dd f(0.3333333333333333, 1.8503717077085935e-17); - ReportValue(f, "0.3333....", 35, 32); - std::cout << to_pair(f) << '\n'; + { + dd a, b, c; + double _third = 0.3333333333333333333333333333333; + double _third2 = _third * pow(2.0, -53.0); + double _short = 0.3333333333333333; + ReportValue(_short, "0.3333333333333333", 35, 32); + ReportValue(_third, "0.3333333333333333333333333333333", 35, 32); + + a = _third; + b = _third2; + ReportValue(a, "0.3333....", 35, 32); + ReportValue(b, "0.3333....", 35, 32); + c = a + b; + ReportValue(c, "0.3333....", 35, 32); + std::cout << to_pair(c) << '\n'; + + dd d(_third, _third2); + ReportValue(d, "0.3333....", 35, 32); + std::cout << to_pair(d) << '\n'; + + dd e("0.3333333333333333333333333333333333333333333333333"); + ReportValue(e, "0.3333....", 35, 32); + std::cout << to_pair(e) << '\n'; + + dd f(0.3333333333333333, 1.8503717077085935e-17); + ReportValue(f, "0.3333....", 35, 32); + std::cout << to_pair(f) << '\n'; + } // parsing scientific formats @@ -214,10 +298,10 @@ dd_inv_sqrt2 : 7.07106781186547524400844362104854e-01 vs 7.071067811865475244 */ auto oldPrec = std::cout.precision(); std::cout << std::setprecision(32); - for (auto e : constant_symbol_table) { - dd c(e.digits); - dd error = (c - e.value); - std::cout << std::left << std::setw(15) << e.name << " : " << c << " vs " << e.value << " : " << to_pair(c) << " : " << error << '\n'; + for (auto record : constant_symbol_table) { + dd a(record.digits); + dd error = (a - record.value); + std::cout << std::left << std::setw(15) << record.name << " : " << a << " vs " << record.value << " : " << to_pair(a) << " : " << error << '\n'; } std::cout << std::setprecision(oldPrec); diff --git a/static/dd/api/experiments.cpp b/static/dd/api/experiments.cpp index 86177b2cd..b807db733 100644 --- a/static/dd/api/experiments.cpp +++ b/static/dd/api/experiments.cpp @@ -1,4 +1,4 @@ -// experiments.cpp: experiments with the doubledouble floating-point number system +// experiments.cpp: experiments with the double-double floating-point number system // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -81,7 +81,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble (dd) experiments"; + std::string test_suite = "double-double (dd) experiments"; int nrOfFailedTestCases = 0; auto oldPrec = std::cout.precision(); @@ -97,6 +97,37 @@ try { } + std::cout << "Setting float bits\n"; + { + float v{0.0f}; + setbit(v,31); + ReportValue(v); + setbit(v,23); // set min normal + ReportValue(v); + setbit(v,23,false); setbit(v,0); // set smallest denorm + ReportValue(v); + } + std::cout << "Setting double bits\n"; + { + double v{0.0}; + setbit(v,63); + ReportValue(v); + setbit(v,52); // set min normal + ReportValue(v); + setbit(v,52,false); setbit(v,0); // set smallest denorm + ReportValue(v); + } + std::cout << "Setting double-double bits\n"; + { + dd v{0.0}; + v.setbit(127); + ReportValue(v); + v.setbit(116); // set min normal + ReportValue(v); + v.setbit(116,false); v.setbit(64); // set smallest denorm + ReportValue(v); + } + std::cout << "subnormal exponent adjustment\n"; { constexpr double smallestNormal = std::numeric_limits::min(); @@ -132,21 +163,24 @@ try { std::cout << "--------- decimal string rounding -------------\n"; { dd a{}; - int precision = 7; - int nrDigits = precision + 7; - char* s = new char[nrDigits + 1ull]; - int decimalPoint; - - s[0] = '1'; - for (int i = 1; i < nrDigits-1; ++i) { - s[i] = '5'; - } - s[nrDigits - 1] = '\0'; - std::cout << "input digits : " << s << '\n'; - decimalPoint = 7; // 15555.5 - a.round_string(s, precision, &decimalPoint); - std::cout << "rounded digits : " << s << " : decimal point at " << decimalPoint << '\n'; - delete[] s; + a.assign("1.5555555"); + std::cout << "default to_string() format : " << a.to_string() << '\n'; + a.assign("1.5555554"); + std::cout << "default to_string() format : " << a.to_string() << '\n'; + a.assign("1.5555556"); + std::cout << "default to_string() format : " << a.to_string() << '\n'; + a.assign("1.55555555"); + std::cout << "default to_string() format : " << a.to_string() << '\n'; + a.assign("1.55555554"); + std::cout << "default to_string() format : " << a.to_string() << '\n'; + a.assign("1.55555556"); + std::cout << "default to_string() format : " << a.to_string() << '\n'; + a.assign("1.55555555"); + std::cout << "to_string(precision=4) format : " << a.to_string(4) << '\n'; + a.assign("1.55555554"); + std::cout << "to_string(precision=4) format : " << a.to_string(4) << '\n'; + a.assign("1.55555556"); + std::cout << "to_string(precision=4) format : " << a.to_string(4) << '\n'; } std::cout << std::setprecision(oldPrec); diff --git a/static/dd/api/traits.cpp b/static/dd/api/traits.cpp index 8cd2feee3..17ac17dbf 100644 --- a/static/dd/api/traits.cpp +++ b/static/dd/api/traits.cpp @@ -1,4 +1,4 @@ -// traits.cpp: tests for type and number traits for doubledouble (dd) floating-point type +// traits.cpp: tests for type and number traits for double-double (dd) floating-point type // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -13,8 +13,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "bfloat traits"; - std::string test_tag = "traits"; + std::string test_suite = "double-double (dd) traits"; + std::string test_tag = "double-double traits"; bool reportTestCases = true; int nrOfFailedTestCases = 0; diff --git a/static/dd/arithmetic/addition.cpp b/static/dd/arithmetic/addition.cpp index 3dfaa43a7..63034b2e6 100644 --- a/static/dd/arithmetic/addition.cpp +++ b/static/dd/arithmetic/addition.cpp @@ -1,4 +1,4 @@ -// addition.cpp: test suite runner for addition of doubledouble floating-point values +// addition.cpp: test suite runner for addition of double-double (dd) floating-point values // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -32,8 +32,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble addition validation"; - std::string test_tag = "doubledouble addition"; + std::string test_suite = "double-double addition validation"; + std::string test_tag = "double-double addition"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/arithmetic/arithmetic.cpp b/static/dd/arithmetic/arithmetic.cpp index aaa3d23cf..de1f8b680 100644 --- a/static/dd/arithmetic/arithmetic.cpp +++ b/static/dd/arithmetic/arithmetic.cpp @@ -1,4 +1,4 @@ -// arithmetic.cpp: test suite runner of arithmetic operations on doubledouble (dd) floating-point +// arithmetic.cpp: test suite runner of arithmetic operations on double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -69,8 +69,8 @@ namespace sw { } } -constexpr unsigned labelWidth = 15; -constexpr unsigned precision = 25; +constexpr unsigned gLabelWidth = 15; +constexpr unsigned gPrecision = 25; double TwoSumTrace(double a, double b, double& r) { double s = a + b; @@ -90,10 +90,10 @@ void TraceTwoSum(double addend) { b = addend; s = two_sum(a, b, r); - ReportValue(a, "a", labelWidth, precision); - ReportValue(b, "b", labelWidth, precision); - ReportValue(s, "s", labelWidth, precision); - ReportValue(r, "r", labelWidth, precision); + ReportValue(a, "a", gLabelWidth, gPrecision); + ReportValue(b, "b", gLabelWidth, gPrecision); + ReportValue(s, "s", gLabelWidth, gPrecision); + ReportValue(r, "r", gLabelWidth, gPrecision); } void TraceTwoDiff(double differend) { @@ -103,10 +103,10 @@ void TraceTwoDiff(double differend) { b = differend; s = two_diff(a, b, r); - ReportValue(a, "a", labelWidth, precision); - ReportValue(b, "b", labelWidth, precision); - ReportValue(s, "s", labelWidth, precision); - ReportValue(r, "r", labelWidth, precision); + ReportValue(a, "a", gLabelWidth, gPrecision); + ReportValue(b, "b", gLabelWidth, gPrecision); + ReportValue(s, "s", gLabelWidth, gPrecision); + ReportValue(r, "r", gLabelWidth, gPrecision); } void TraceTwoProd(double base, double multiplicant) { @@ -116,10 +116,10 @@ void TraceTwoProd(double base, double multiplicant) { b = multiplicant; p = two_prod(a, b, r); - ReportValue(a, "a", labelWidth, precision); - ReportValue(b, "b", labelWidth, precision); - ReportValue(p, "p", labelWidth, precision); - ReportValue(r, "r", labelWidth, precision); + ReportValue(a, "a", gLabelWidth, gPrecision); + ReportValue(b, "b", gLabelWidth, gPrecision); + ReportValue(p, "p", gLabelWidth, gPrecision); + ReportValue(r, "r", gLabelWidth, gPrecision); } void TestArithmeticOp(const sw::universal::dd& a, sw::universal::RandomsOp op, const sw::universal::dd& b) { @@ -143,13 +143,41 @@ void TestArithmeticOp(const sw::universal::dd& a, sw::universal::RandomsOp op, c c = sqrt(a); binaryOp = false; break; + case RandomsOp::OPCODE_NOP: + case RandomsOp::OPCODE_ASSIGN: + case RandomsOp::OPCODE_IPA: // In Place Add + case RandomsOp::OPCODE_IPS: // In Place Sub + case RandomsOp::OPCODE_IPM: // In Place Mul + case RandomsOp::OPCODE_IPD: // In Place Div + case RandomsOp::OPCODE_EXP: + case RandomsOp::OPCODE_EXP2: + case RandomsOp::OPCODE_LOG: + case RandomsOp::OPCODE_LOG2: + case RandomsOp::OPCODE_LOG10: + case RandomsOp::OPCODE_SIN: + case RandomsOp::OPCODE_COS: + case RandomsOp::OPCODE_TAN: + case RandomsOp::OPCODE_ASIN: + case RandomsOp::OPCODE_ACOS: + case RandomsOp::OPCODE_ATAN: + case RandomsOp::OPCODE_SINH: + case RandomsOp::OPCODE_COSH: + case RandomsOp::OPCODE_TANH: + case RandomsOp::OPCODE_ASINH: + case RandomsOp::OPCODE_ACOSH: + case RandomsOp::OPCODE_ATANH: + case RandomsOp::OPCODE_POW: + case RandomsOp::OPCODE_HYPOT: + case RandomsOp::OPCODE_RAN: + std::cerr << "invalid operator: test ignored\n"; + break; default: std::cerr << "unknown operator: test ignored\n"; break; } - ReportValue(a, "a", labelWidth, precision); - if (binaryOp) ReportValue(b, "b", labelWidth, precision); - ReportValue(c, "c", labelWidth, precision); + ReportValue(a, "a", gLabelWidth, gPrecision); + if (binaryOp) ReportValue(b, "b", gLabelWidth, gPrecision); + ReportValue(c, "c", gLabelWidth, gPrecision); } @@ -161,9 +189,9 @@ namespace sw { dd one(1.0); dd error = one - a * oneOverA; - ReportValue(a, "a", labelWidth, precision); - ReportValue(oneOverA, "1/a", labelWidth, precision); - ReportValue(error, "error", labelWidth, precision); + ReportValue(a, "a", gLabelWidth, gPrecision); + ReportValue(oneOverA, "1/a", gLabelWidth, gPrecision); + ReportValue(error, "error", gLabelWidth, gPrecision); } void TestDivisionalIdentity(sw::universal::dd const& a) { @@ -172,9 +200,9 @@ namespace sw { dd one(1.0); dd error = one - a * oneOverA; - ReportValue(a, "a", labelWidth, precision); - ReportValue(oneOverA, "1/a", labelWidth, precision); - ReportValue(error, "error", labelWidth, precision); + ReportValue(a, "a", gLabelWidth, gPrecision); + ReportValue(oneOverA, "1/a", gLabelWidth, gPrecision); + ReportValue(error, "error", gLabelWidth, gPrecision); } void TestRandomReciprocalIdentities(int nrRandoms = 10) { @@ -220,8 +248,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble arithmetic validation"; - std::string test_tag = "doubledouble arithmetic"; + std::string test_suite = "double-double arithmetic validation"; + std::string test_tag = "double-double arithmetic"; bool reportTestCases = false; int nrOfFailedTestCases = 0; @@ -254,9 +282,9 @@ try { duble min_normal, max_normal; min_normal.setbits(0x001F'FFFF'FFFF'FFFFull); - ReportValue(min_normal, "min-normal", labelWidth, precision); + ReportValue(min_normal, "min-normal", gLabelWidth, gPrecision); max_normal.setbits(0x7FEF'FFFF'FFFF'FFFFull); - ReportValue(max_normal, "max-normal", labelWidth, precision); + ReportValue(max_normal, "max-normal", gLabelWidth, gPrecision); dd a, b, c; @@ -268,7 +296,7 @@ try { TestArithmeticOp(a, RandomsOp::OPCODE_MUL, b); TestArithmeticOp(a, RandomsOp::OPCODE_DIV, b); - ReportValue(1.0 / b.high(), "one over", labelWidth, precision); + ReportValue(1.0 / b.high(), "one over", gLabelWidth, gPrecision); std::cout << "\n\n\n"; TestReciprocalIdentity(dd(1.0)); diff --git a/static/dd/arithmetic/division.cpp b/static/dd/arithmetic/division.cpp index b8868e28b..bca3339ae 100644 --- a/static/dd/arithmetic/division.cpp +++ b/static/dd/arithmetic/division.cpp @@ -1,4 +1,4 @@ -// division.cpp: test suite runner for division of doubledouble floating-point values +// division.cpp: test suite runner for division of double-double (dd) floating-point values // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -31,8 +31,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble division validation"; - std::string test_tag = "doubledouble division"; + std::string test_suite = "double-double division validation"; + std::string test_tag = "double-double division"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/arithmetic/multiplication.cpp b/static/dd/arithmetic/multiplication.cpp index 763879833..a5314843c 100644 --- a/static/dd/arithmetic/multiplication.cpp +++ b/static/dd/arithmetic/multiplication.cpp @@ -1,4 +1,4 @@ -// multiplication.cpp: test suite runner for multiplication of doubledouble floating-point values +// multiplication.cpp: test suite runner for multiplication of double-double (dd) floating-point values // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -32,8 +32,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble multiplication validation"; - std::string test_tag = "doubledouble multiplication"; + std::string test_suite = "double-double multiplication validation"; + std::string test_tag = "double-double multiplication"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/arithmetic/subtraction.cpp b/static/dd/arithmetic/subtraction.cpp index 3e7312071..3f20107ae 100644 --- a/static/dd/arithmetic/subtraction.cpp +++ b/static/dd/arithmetic/subtraction.cpp @@ -1,4 +1,4 @@ -// subtraction.cpp: test suite runner for subtraction of doubledouble floating-point values +// subtraction.cpp: test suite runner for subtraction of double-double (dd) floating-point values // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -32,8 +32,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble subtraction validation"; - std::string test_tag = "doubledouble subtraction"; + std::string test_suite = "double-double subtraction validation"; + std::string test_tag = "double-double subtraction"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/conversion/conversion.cpp b/static/dd/conversion/conversion.cpp index 9a577b9ac..d3dc08af9 100644 --- a/static/dd/conversion/conversion.cpp +++ b/static/dd/conversion/conversion.cpp @@ -28,8 +28,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble conversion validation"; - std::string test_tag = "doubledouble conversion"; + std::string test_suite = "double-double conversion validation"; + std::string test_tag = "double-double conversion"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/conversion/to_string.cpp b/static/dd/conversion/to_string.cpp index f516a82d8..13237a325 100644 --- a/static/dd/conversion/to_string.cpp +++ b/static/dd/conversion/to_string.cpp @@ -136,8 +136,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble string conversion validation"; - std::string test_tag = "doubledouble string conversion"; + std::string test_suite = "double-double string conversion validation"; + std::string test_tag = "double-double string conversion"; bool reportTestCases = true; int nrOfFailedTestCases = 0; diff --git a/static/dd/logic/logic.cpp b/static/dd/logic/logic.cpp index f346aeced..bd0c51077 100644 --- a/static/dd/logic/logic.cpp +++ b/static/dd/logic/logic.cpp @@ -1,4 +1,4 @@ -// addition.cpp: test suite runner for addition on bfloat16s +// addition.cpp: test suite runner for logic operators on double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -32,8 +32,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble logic validation"; - std::string test_tag = "doubledouble logic"; + std::string test_suite = "double-double logic validation"; + std::string test_tag = "double-double logic"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/classify.cpp b/static/dd/math/classify.cpp index 246e2232c..141d2b486 100644 --- a/static/dd/math/classify.cpp +++ b/static/dd/math/classify.cpp @@ -1,4 +1,4 @@ -// classify.cpp: test suite runner for doubledouble (dd) classification functions +// classify.cpp: test suite runner for double-double (dd) classification functions // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -28,8 +28,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib classification function validation"; - std::string test_tag = "pow"; + std::string test_suite = "double-double mathlib classification function validation"; + std::string test_tag = "isfinite/isinf/isnan/isnormal/isdenorm/iszero/signbit"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/error_and_gamma.cpp b/static/dd/math/error_and_gamma.cpp index 604b5a8f9..e36cb30bb 100644 --- a/static/dd/math/error_and_gamma.cpp +++ b/static/dd/math/error_and_gamma.cpp @@ -1,4 +1,4 @@ -// error_and_gamma.cpp: test suite runner for error and gamma functions for double-double floating-point +// error_and_gamma.cpp: test suite runner for error and gamma functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -49,7 +49,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib error/gamma function validation"; + std::string test_suite = "double-double mathlib error/gamma function validation"; std::string test_tag = "error/gamma"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/exponent.cpp b/static/dd/math/exponent.cpp index 45f282f1b..f2a528268 100644 --- a/static/dd/math/exponent.cpp +++ b/static/dd/math/exponent.cpp @@ -48,8 +48,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib exponentiation function validation"; - std::string test_tag = "exp"; + std::string test_suite = "double-double mathlib exponentiation function validation"; + std::string test_tag = "exp/exp2/exp10/expm1"; bool reportTestCases = false; int nrOfFailedTestCases = 0; @@ -62,11 +62,10 @@ try { auto oldPrec = std::cout.precision(); for (int i = 0; i < 30; ++i) { std::string tag = "exp(" + std::to_string(i) + ")"; - double e = std::exp(double(i)); - dd dd_e = exp(dd(i)); - dd dd_diff = dd_e - dd(e); - double error = double(dd_diff); - std::cout << std::setw(20) << tag << " : " << std::setprecision(32) << dd_e << " : " << std::setprecision(15) << std::setw(20) << e << " : " << std::setw(25) << error << '\n'; + double exponentRef = std::exp(double(i)); + dd exponent = exp(dd(i)); + dd error = exponentRef - exponent; + std::cout << std::setw(20) << tag << " : " << std::setprecision(32) << exponentRef << " : " << exponent << " : " << std::setw(25) << error << '\n'; } std::cout << std::setprecision(oldPrec); diff --git a/static/dd/math/fractional.cpp b/static/dd/math/fractional.cpp index b09a46d92..05b5b83d2 100644 --- a/static/dd/math/fractional.cpp +++ b/static/dd/math/fractional.cpp @@ -1,4 +1,4 @@ -// fractional.cpp: test suite runner for fractional functions for double-double floating-point +// fractional.cpp: test suite runner for fractional functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -51,8 +51,8 @@ try { using namespace sw::universal; using std::fmod; - std::string test_suite = "doubledouble mathlib fractional function validation"; - std::string test_tag = "fractional"; + std::string test_suite = "double-double mathlib fractional function validation"; + std::string test_tag = "fmod/remainder"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/hyperbolic.cpp b/static/dd/math/hyperbolic.cpp index 0e3b10167..d4ac31b25 100644 --- a/static/dd/math/hyperbolic.cpp +++ b/static/dd/math/hyperbolic.cpp @@ -1,4 +1,4 @@ -// hyperbolic.cpp: test suite runner for hyperbolic functions for double-double floating-point +// hyperbolic.cpp: test suite runner for hyperbolic functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -30,7 +30,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib hyperbolic function validation"; + std::string test_suite = "double-double mathlib hyperbolic function validation"; std::string test_tag = "hyperbolic"; bool reportTestCases = false; int nrOfFailedTestCases = 0; @@ -39,14 +39,31 @@ try { #if MANUAL_TESTING - dd x = dd_pi4; - std::cout << "sinh( " << x << " ) = " << sinh(x) << '\n'; - std::cout << "cosh( " << x << " ) = " << cosh(x) << '\n'; - std::cout << "tanh( " << x << " ) = " << tanh(x) << '\n'; + std::cout << "ALL HYPERBOLIC FUNCTIONS ARE SHIMS TO DOUBLE\n"; - std::cout << "asinh( " << x << " ) = " << asinh(x) << '\n'; - std::cout << "acosh( " << x << " ) = " << acosh(x) << '\n'; - std::cout << "atanh( " << x << " ) = " << atanh(x) << '\n'; + { + std::cout << "double reference\n"; + double x = std::numbers::pi * 0.25; + std::cout << "sinh( " << x << " ) = " << sinh(x) << '\n'; + std::cout << "cosh( " << x << " ) = " << cosh(x) << '\n'; + std::cout << "tanh( " << x << " ) = " << tanh(x) << '\n'; + + std::cout << "asinh( " << x << " ) = " << asinh(x) << '\n'; + std::cout << "acosh( " << x << " ) = " << acosh(x) << '\n'; + std::cout << "atanh( " << x << " ) = " << atanh(x) << '\n'; + } + + { + std::cout << "double-double reference\n"; + dd x = dd_pi4; + std::cout << "sinh( " << x << " ) = " << sinh(x) << '\n'; + std::cout << "cosh( " << x << " ) = " << cosh(x) << '\n'; + std::cout << "tanh( " << x << " ) = " << tanh(x) << '\n'; + + std::cout << "asinh( " << x << " ) = " << asinh(x) << '\n'; + std::cout << "acosh( " << x << " ) = " << acosh(x) << '\n'; + std::cout << "atanh( " << x << " ) = " << atanh(x) << '\n'; + } ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return EXIT_SUCCESS; // ignore errors diff --git a/static/dd/math/hypot.cpp b/static/dd/math/hypot.cpp index d5795747c..9553b7d1f 100644 --- a/static/dd/math/hypot.cpp +++ b/static/dd/math/hypot.cpp @@ -1,4 +1,4 @@ -// hypot.cpp: test suite runner for hypot functions for double-double floating-point +// hypot.cpp: test suite runner for hypot functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -29,7 +29,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib hypot function validation"; + std::string test_suite = "double-double mathlib hypothenuse function validation"; std::string test_tag = "hypot"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/logarithm.cpp b/static/dd/math/logarithm.cpp index b2c54b160..42425fe77 100644 --- a/static/dd/math/logarithm.cpp +++ b/static/dd/math/logarithm.cpp @@ -1,36 +1,241 @@ -// logarithm.cpp: test suite runner for log/log1p/log2/log10 functions for doubledouble floating-point +// logarithm.cpp: test suite runner for log/log1p/log2/log10 functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include +#include #include #include #include -// generate specific test case -template -void GenerateLogTestCase(Ty fa) { - unsigned precision = 25; - unsigned width = 30; - Ty fref; - sw::universal::dd a, ref, v; - a = fa; - fref = std::log(fa); - ref = fref; - v = sw::universal::log(a); - auto oldPrec = std::cout.precision(); - std::cout << std::setprecision(precision); - std::cout << " -> log(" << fa << ") = " << std::setw(width) << fref << std::endl; - std::cout << " -> log( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; - std::cout << to_binary(ref) << "\n -> reference\n"; - std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; - std::cout << std::setprecision(oldPrec); +namespace sw { + namespace universal { + + dd trace_log(const dd& a) { + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< dd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log: non-positive argument\n"; + errno = EDOM; + return std::numeric_limits< dd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + /* Strategy. The Taylor series for log converges much more + slowly than that of exp, due to the lack of the factorial + term in the denominator. Hence this routine instead tries + to determine the root of the function + + f(x) = exp(x) - a + + using Newton iteration. The iteration is given by + + x' = x - f(x)/f'(x) + = x - (1 - a * exp(-x)) + = x + a * exp(-x) - 1. + + Only one iteration is needed, since Newton's iteration + approximately doubles the number of digits per iteration. + */ + + dd x = std::log(a.high()); // Initial approximation + std::cout << "initial approximation : " << to_binary(x) << '\n'; + x = x + a * exp(-x) - 1.0; + std::cout << "1st Newton iteration : " << to_binary(x) << '\n'; + x = x + a * exp(-x) - 1.0; + std::cout << "2nd Newton iteration : " << to_binary(x) << '\n'; + return x; + } + + // generate specific test case + template + void GenerateLogTestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + dd a, ref, v; + a = fa; + fref = std::log(fa); + ref = fref; + v = sw::universal::log(a); + dd error = (v - ref); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> log(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void GenerateLog2TestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + dd a, ref, v; + a = fa; + fref = std::log2(fa); + ref = fref; + v = sw::universal::log2(a); + dd error = (v - ref); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> log2(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log2( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void GenerateLog10TestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + dd a, ref, v; + a = fa; + fref = std::log10(fa); + ref = fref; + v = sw::universal::log10(a); + auto oldPrec = std::cout.precision(); + dd error = (v - ref); + std::cout << std::setprecision(precision); + std::cout << " -> log10(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log10( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void GenerateLog1pTestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + dd a, ref, v; + a = fa; + fref = std::log1p(fa); + ref = fref; + v = sw::universal::log1p(a); + auto oldPrec = std::cout.precision(); + dd error = (v - ref); + std::cout << std::setprecision(precision); + std::cout << " -> log1p(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log1p( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void ReportDoubleDoubleFunctionError(const std::string& op, const TestType& a, const TestType& ref, const TestType& error) { + std::cerr << op << " : " << a << " != " << ref << " : error : " << error << '\n'; + } + + template + int VerifyLogFunction(bool reportTestCases, double maxError = 1.0e-15) { + using std::log; + int nrOfFailedTestCases{ 0 }; + constexpr double eulersNr = std::numbers::e; + for (int i = -64; i < 65; ++i) { + double da = std::pow(eulersNr, double(i)); + TestType a = da; + double dref = log(da); + TestType ref = dref; + TestType v = log(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportDoubleDoubleFunctionError("log", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + template + int VerifyLog2Function(bool reportTestCases, double maxError = 1.0e-15) { + using std::log2; + int nrOfFailedTestCases{ 0 }; + for (int i = -64; i < 65; ++i) { + double da = std::pow(2.0, double(i)); + TestType a = da; + double dref = log2(da); + TestType ref = dref; + TestType v = log2(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportDoubleDoubleFunctionError("log2", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + template + int VerifyLog10Function(bool reportTestCases, double maxError = 1.0e-15) { + using std::log10; + int nrOfFailedTestCases{ 0 }; + for (int i = -64; i < 65; ++i) { + double da = std::pow(2.0, double(i)); + TestType a = da; + double dref = log10(da); + TestType ref = dref; + TestType v = log10(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportDoubleDoubleFunctionError("log10", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + template + int VerifyLog1pFunction(bool reportTestCases, double maxError = 1.0e-15) { + using std::log1p; + int nrOfFailedTestCases{ 0 }; + for (int i = -64; i < 65; ++i) { + double da = std::pow(2.0, double(i)); + TestType a = da; + double dref = log1p(da); + TestType ref = dref; + TestType v = log1p(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportDoubleDoubleFunctionError("log1p", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + + } } + // Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override -#define MANUAL_TESTING 1 +#define MANUAL_TESTING 0 // REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity // It is the responsibility of the regression test to organize the tests in a quartile progression. //#undef REGRESSION_LEVEL_OVERRIDE @@ -49,27 +254,78 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib logarithm function validation"; + std::string test_suite = "double-double mathlib logarithm function validation"; std::string test_tag = "log/log1p/log2/log10"; - bool reportTestCases = false; + bool reportTestCases = true; int nrOfFailedTestCases = 0; ReportTestSuiteHeader(test_suite, reportTestCases); + std::cerr << test_tag << '\n'; #if MANUAL_TESTING // generate individual testcases to hand trace/debug GenerateLogTestCase(1.0); GenerateLogTestCase(std::numbers::e); - for (int i = 2; i < 65; i *= 2) { - GenerateLogTestCase(pow(std::numbers::e, double(i))); + GenerateLogTestCase(pow(std::numbers::e, 2.0)); + + trace_log(dd(pow(std::numbers::e, 4.0))); + + GenerateLog2TestCase(1.0); + GenerateLog2TestCase(2.0); + GenerateLog2TestCase(4.0); + + { + std::stringstream s; + double maxError = 1.0e-14; + s << maxError; + std::string test_id = "log(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLogFunction
(reportTestCases, maxError), "double-double", test_id); } - //nrOfFailedTestCases += ReportTestResult(VerifyLogFunction
("Manual Testing", reportTestCases), "dd", test_tag); + { + std::stringstream s; + double maxError = 1.0e-29; + s << maxError; + std::string test_id = "log2(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLog2Function
(reportTestCases, maxError), "double-double", test_id); + } + + { + std::stringstream s; + double maxError = 1.0e-15; + s << maxError; + std::string test_id = "log10(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLog10Function
(reportTestCases, maxError), "double-double", test_id); + } + + { + std::stringstream s; + double maxError = 1.0e-14; + s << maxError; + std::string test_id = "log1p(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLog1pFunction
(reportTestCases, maxError), "double-double", test_id); + } ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return EXIT_SUCCESS; // ignore errors #else +#if REGRESSION_LEVEL_1 + std::cout << "NOTE: double-double log functions are LESS accurate than stdlib double: \ncurrently log() is accurate to just 14 digits, double-double should have 32 digits of accuracy\n"; + nrOfFailedTestCases += ReportTestResult(VerifyLogFunction
(reportTestCases, 1.0e-14), "double-double", "log()"); + nrOfFailedTestCases += ReportTestResult(VerifyLog2Function
(reportTestCases, 1.0e-14), "double-double", "log2()"); + nrOfFailedTestCases += ReportTestResult(VerifyLog10Function
(reportTestCases, 1.0e-14), "double-double", "log10()"); + nrOfFailedTestCases += ReportTestResult(VerifyLog1pFunction
(reportTestCases, 1.0e-14), "double-double", "log1p()"); +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); diff --git a/static/dd/math/minmax.cpp b/static/dd/math/minmax.cpp index 8a5a75625..c2a5961f5 100644 --- a/static/dd/math/minmax.cpp +++ b/static/dd/math/minmax.cpp @@ -1,4 +1,4 @@ -// minmax.cpp: test suite runner for minmax functions for double-double floating-point +// minmax.cpp: test suite runner for minmax functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -29,8 +29,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib minmax function validation"; - std::string test_tag = "minmax"; + std::string test_suite = "double-double mathlib minmax function validation"; + std::string test_tag = "min/max"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/next.cpp b/static/dd/math/next.cpp index d5022b2d9..3ab829597 100644 --- a/static/dd/math/next.cpp +++ b/static/dd/math/next.cpp @@ -1,4 +1,4 @@ -// next.cpp: test suite runner for nextafter/nextbefore functions for doubledouble (dd) +// next.cpp: test suite runner for nextafter/nextbefore functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -49,8 +49,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib nextafter/nextbefore function validation"; - std::string test_tag = "pow"; + std::string test_suite = "double-double mathlib nextafter/nextbefore function validation"; + std::string test_tag = "nextafter/nextbefore"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/pow.cpp b/static/dd/math/pow.cpp index a16dedaa3..7ef03af23 100644 --- a/static/dd/math/pow.cpp +++ b/static/dd/math/pow.cpp @@ -1,4 +1,4 @@ -// pow.cpp: test suite runner for pow function for double-double (dd) floats +// pow.cpp: test suite runner for pow function for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -49,7 +49,7 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib power function validation"; + std::string test_suite = "double-double mathlib power function validation"; std::string test_tag = "pow"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/math/sqrt.cpp b/static/dd/math/sqrt.cpp index 6a7ce97f6..2025ade8a 100644 --- a/static/dd/math/sqrt.cpp +++ b/static/dd/math/sqrt.cpp @@ -1,4 +1,4 @@ -// sqrt.cpp: test suite runner for sqrt function for doubledouble floating-point +// sqrt.cpp: test suite runner for sqrt function for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -70,9 +70,9 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib sqrt function validation"; + std::string test_suite = "double-double mathlib sqrt function validation"; std::string test_tag = "sqrt"; - bool reportTestCases = false; + bool reportTestCases = true; int nrOfFailedTestCases = 0; ReportTestSuiteHeader(test_suite, reportTestCases); diff --git a/static/dd/math/trigonometry.cpp b/static/dd/math/trigonometry.cpp index 787cd0501..488be16ce 100644 --- a/static/dd/math/trigonometry.cpp +++ b/static/dd/math/trigonometry.cpp @@ -1,4 +1,4 @@ -// trigonometry.cpp: test suite runner for trigonometry functions for double-double floating-point +// trigonometry.cpp: test suite runner for trigonometry functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -9,28 +9,217 @@ #include #include -// generate specific test case -template -void GenerateLogTestCase(Ty fa) { - unsigned precision = 25; - unsigned width = 30; - Ty fref; - sw::universal::dd a, ref, v; - a = fa; - fref = std::log(fa); - ref = fref; - v = sw::universal::log(a); - auto oldPrec = std::cout.precision(); - std::cout << std::setprecision(precision); - std::cout << " -> log(" << fa << ") = " << std::setw(width) << fref << std::endl; - std::cout << " -> log( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; - std::cout << to_binary(ref) << "\n -> reference\n"; - std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; - std::cout << std::setprecision(oldPrec); +template +int VerifySinFunction(bool reportTestCases) { + using std::sin, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + const double d2pi = 6.283185307179586476925286766559; + //const double piOver4 = 0.78539816339744830961566084581988; + //const double piOver8 = 0.39269908169872415480783042290994; + //const double piOver16 = 0.19634954084936207740391521145497; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + for (unsigned i = 0; i < samples; ++i) { + Real angle = Real(i) * increment; + double dangle = double(i) * dinc; + double ref = sin(dangle); + Real result = sin(angle); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cerr << "sin( " << angle << ") : " << sin(angle) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cerr << "sin( " << angle << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyCosFunction(bool reportTestCases) { + using std::cos, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + const double d2pi = 6.283185307179586476925286766559; + //const double piOver4 = 0.78539816339744830961566084581988; + //const double piOver8 = 0.39269908169872415480783042290994; + //const double piOver16 = 0.19634954084936207740391521145497; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + for (unsigned i = 0; i < samples; ++i) { + Real angle = Real(i) * increment; + double dangle = double(i) * dinc; + double ref = cos(dangle); + Real result = cos(angle); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cerr << "cos( " << angle << ") : " << cos(angle) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cerr << "cos( " << angle << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyTanFunction(bool reportTestCases) { + using std::tan, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + const double d2pi = 6.283185307179586476925286766559; + //const double piOver2 = 1.5707963267948966192313216916398; + //const double piOver4 = 0.78539816339744830961566084581988; + //const double piOver8 = 0.39269908169872415480783042290994; + //const double piOver16 = 0.19634954084936207740391521145497; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + // tan(x) is inf at pi/2 and 3pi/4 + // they are at 1/4 and 3/4s of the sample sequence + for (unsigned i = 0; i < samples; ++i) { + Real angle = Real(i) * increment; + double dangle = double(i) * dinc; + double ref = tan(dangle); + Real result = tan(angle); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (i == samples / 4 || i == 3 * samples / 4) { + // tan(x) approximation is expected to have a much smaller error + // std::cout << samples << " : " << i << '\n'; + if (error > 1e-01) continue; + std::cerr << "error : " << error << '\n'; + } + if (reportTestCases) std::cerr << "tan( " << angle << ") : " << tan(angle) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cerr << "tan( " << angle << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyArcsinFunction(bool reportTestCases) { + using std::asin, std::sin, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + // walk the domain of arcsin = [-1, 1] to the range of [ -pi/2, pi/2 ] + int samples{ 64 }; + double dinc{ 2.0 / double(samples) }; + Real increment{ dinc }; + for (int i = -samples / 2; i < samples / 2; ++i) { + Real rx = Real(i) * increment; + double dx = double(i) * dinc; + // std::cout << "dx " << dx << '\n'; + double ref = asin(dx); + Real result = asin(rx); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cout << "arcsin( " << rx << ") : " << asin(rx) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cout << "arcsin( " << rx << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyArccosFunction(bool reportTestCases) { + using std::acos, std::cos, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + // walk the domain of arccos = [-1, 1] to the range of [0, pi] + int samples{ 64 }; + double dinc{ 2.0 / double(samples) }; + Real increment{ dinc }; + for (int i = -samples / 2; i < samples / 2; ++i) { + Real rx = Real(i) * increment; + double dx = double(i) * dinc; + // std::cout << "dx " << dx << '\n'; + double ref = acos(dx); + Real result = acos(rx); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cout << "arccos( " << rx << ") : " << acos(rx) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cout << "arccos( " << rx << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyArctanFunction(bool reportTestCases) { + using std::atan, std::tan, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + // walk the domain of arctan = [ -inf, inf ] to the range of [ -pi/2, pi/2 ] + // we are going to use tan(x) to generate the values to inverse + const double d2pi = 6.283185307179586476925286766559; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + // tan(x) is inf at pi/2 and 3pi/4 + // they are at 1/4 and 3/4s of the sample sequence + for (unsigned i = 0; i < samples; ++i) { + + double dangle = double(i) * dinc; + double dx = tan(dangle); + + Real angle = Real(i) * increment; + Real rx = tan(angle); + + double ref = atan(dx); + Real result = atan(rx); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cout << "arctan( " << rx << ") : " << atan(rx) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cout << "arctan( " << rx << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; } // Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override -#define MANUAL_TESTING 1 +#define MANUAL_TESTING 0 // REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity // It is the responsibility of the regression test to organize the tests in a quartile progression. //#undef REGRESSION_LEVEL_OVERRIDE @@ -49,27 +238,74 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib trigonometry function validation"; - std::string test_tag = "trigonometry"; + std::string test_suite = "double-double mathlib trigonometry function validation"; + std::string test_tag = "sin/cos/tan asin/acos/atan"; bool reportTestCases = false; int nrOfFailedTestCases = 0; ReportTestSuiteHeader(test_suite, reportTestCases); #if MANUAL_TESTING - // generate individual testcases to hand trace/debug - GenerateLogTestCase(1.0); - GenerateLogTestCase(std::numbers::e); - for (int i = 2; i < 65; i *= 2) { - GenerateLogTestCase(pow(std::numbers::e, double(i))); + + std::cout << std::setw(10) << "sin(pi/4)" << " : " << sin(dd_pi4) << '\n'; + std::cout << std::setw(10) << "cos(pi/4)" << " : " << cos(dd_pi4) << '\n'; + std::cout << std::setw(10) << "tan(pi/4)" << " : " << tan(dd_pi4) << '\n'; + + { + dd a = sin(dd_pi4); + dd b = asin(a); + std::cout << "pi/4 : " << dd_pi4 << '\n'; + std::cout << "sin(pi/4) : " << a << '\n'; + std::cout << "asin(sin(pi/4) : " << b << '\n'; } +// std::cout << std::setw(10) << "asin(sin(pi/4))" << " : " << asin(sin(dd_pi4)) << '\n'; + std::cout << std::setw(10) << "acos(cos(pi/4))" << " : " << acos(cos(dd_pi4)) << '\n'; + std::cout << std::setw(10) << "atan(tan(pi/4))" << " : " << atan(tan(dd_pi4)) << '\n'; - //nrOfFailedTestCases += ReportTestResult(VerifyLogFunction
("Manual Testing", reportTestCases), "dd", test_tag); + VerifySinFunction(reportTestCases); + + dd piOver4("0.78539816339744830961566084581988"); + dd piOver8("0.39269908169872415480783042290994"); + dd piOver16("0.19634954084936207740391521145497"); + dd piOver32("0.01227184630308512983774470071594"); + + dd a = sin(piOver4); + + std::cout << "pi/4 : " << std::setprecision(32) << piOver4 << '\n'; + std::cout << "pi/8 : " << std::setprecision(32) << piOver8 << '\n'; + std::cout << "pi/16 : " << std::setprecision(32) << piOver16 << '\n'; + std::cout << "pi/32 : " << std::setprecision(32) << piOver32 << '\n'; + + dd b{}; + b = asin(dd(0)); + std::cout << b << '\n'; + b = asin(dd(-1.0)); + std::cout << b << '\n'; + b = asin(dd(1.0)); + std::cout << b << '\n'; ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return EXIT_SUCCESS; // ignore errors #else +#if REGRESSION_LEVEL_1 + nrOfFailedTestCases = ReportTestResult(VerifySinFunction
(reportTestCases) , " sin function", " sin(dd)"); + nrOfFailedTestCases = ReportTestResult(VerifyCosFunction
(reportTestCases) , " cos function", " cos(dd)"); + nrOfFailedTestCases = ReportTestResult(VerifyTanFunction
(reportTestCases) , " tan function", " tan(dd)"); + + nrOfFailedTestCases = ReportTestResult(VerifyArcsinFunction
(reportTestCases), "arcsin function", "asin(dd)"); + nrOfFailedTestCases = ReportTestResult(VerifyArccosFunction
(reportTestCases), "arccos function", "acos(dd)"); + nrOfFailedTestCases = ReportTestResult(VerifyArctanFunction
(reportTestCases), "arctan function", "atan(dd)"); +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); diff --git a/static/dd/math/truncate.cpp b/static/dd/math/truncate.cpp index e7397d909..fe2f12d26 100644 --- a/static/dd/math/truncate.cpp +++ b/static/dd/math/truncate.cpp @@ -1,4 +1,4 @@ -// truncate.cpp: test suite runner for truncate functions for double-double floating-point +// truncate.cpp: test suite runner for truncate functions for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -29,8 +29,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "doubledouble mathlib truncate function validation"; - std::string test_tag = "truncate"; + std::string test_suite = "double-double mathlib truncate function validation"; + std::string test_tag = "trunc/round/floor/ceil"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/dd/performance/perf.cpp b/static/dd/performance/perf.cpp index 523583d0f..45287785e 100644 --- a/static/dd/performance/perf.cpp +++ b/static/dd/performance/perf.cpp @@ -130,7 +130,7 @@ try { using namespace sw::universal; using namespace sw::universal::internal; - std::string test_suite = "dd operator performance benchmarking"; + std::string test_suite = "double-double operator performance benchmarking"; std::string test_tag = "performance"; bool reportTestCases = false; int nrOfFailedTestCases = 0; diff --git a/static/fixpnt/binary/api/api.cpp b/static/fixpnt/binary/api/api.cpp index 5d75e411b..376f168e3 100644 --- a/static/fixpnt/binary/api/api.cpp +++ b/static/fixpnt/binary/api/api.cpp @@ -34,8 +34,8 @@ int main() try { using namespace sw::universal; - std::string test_suite = "fixpnt arithmetic type API"; - std::string test_tag = "api"; + std::string test_suite = "fixpnt<> Application Programming Interface demonstration"; +// std::string test_tag = "api"; bool reportTestCases = false; int nrOfFailedTestCases = 0; @@ -48,12 +48,17 @@ try { { int start = nrOfFailedTestCases; // default construction using default arithmetic (Modulo) and default BlockType (uint8_t) - fixpnt<8, 4> a, b(-8.125f), c(7.875), d(-7.875); // replace with long double init d(-7.875l); - // b initialized to -8.125 in modular arithmetic becomes 7.875: -8.125 = b1000.0010 > maxneg -> becomes b0111.1110 + fixpnt<8, 4> a{}, b(-8.125f), c(7.875), d(-7.875); + // b initialized to -8.125 in modular arithmetic becomes -7.875: -8.125 = b1000.0010 > maxneg -> becomes b0111.1110 if (a != (c + d)) ++nrOfFailedTestCases; + std::cout << "a == (c + d) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (a != (b - c)) ++nrOfFailedTestCases; + std::cout << "a == (b - c) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; + std::cout << "FAIL\n"; + } + else { + std::cout << "PASS\n"; } } @@ -61,17 +66,23 @@ try { { int start = nrOfFailedTestCases; // construction with explicit arithmetic type and default BlockType (uint8_t) - fixpnt<8, 4, Modulo> a, b(-8.125), c(7.875), d(-7.875); + fixpnt<8, 4, Modulo> a{}, b(-8.125), c(7.875), d(-7.875); // b initialized to -8.125 in modular arithmetic becomes 7.875: -8.125 = b1000.0010 > maxneg -> becomes b0111.1110 if (a != (c + d)) ++nrOfFailedTestCases; + std::cout << "a == (c + d) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (a != (b - c)) ++nrOfFailedTestCases; + std::cout << "a == (b - c) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL: " << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; + std::cout << "FAIL\n"; + } + else { + std::cout << "PASS\n"; } } std::cout << "fixpnt type attributes\n"; { + // check important behavioral traits using TestType = fixpnt<8, 4, Modulo, uint8_t>; if constexpr (static_cast(std::is_trivial())) { ReportTrivialityOfType(); @@ -112,11 +123,15 @@ try { // if (0 != (c + d)) ++nrOfFailedTestCases; //cout << to_binary(c + d) << endl; if (a != b) ++nrOfFailedTestCases; - if (a != (d - 1)) ++nrOfFailedTestCases; // Saturate to maxneg + if (a != (d - 1)) ++nrOfFailedTestCases; // Saturate to maxneg + std::cout << "a == (d - 1) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (a != (d - 0.5)) ++nrOfFailedTestCases; // Saturate to maxneg + std::cout << "a == (d - 0.5) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (nrOfFailedTestCases - start > 0) { - std::cout << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; - std::cout << to_binary(d - 1) << ' ' << to_binary(d - 0.5) << '\n'; + std::cout << "FAIL\n"; + } + else { + std::cout << "PASS\n"; } } @@ -127,13 +142,17 @@ try { { int start = nrOfFailedTestCases; // construction with explicit arithmetic type and BlockType - fixpnt<16, 4, Modulo, uint16_t> a, b(-2048.125f), c(2047.875), d(-2047.875); + fixpnt<16, 4, Modulo, uint16_t> a{}, b(-2048.125f), c(2047.875), d(-2047.875); if (a != (c + d)) ++nrOfFailedTestCases; + std::cout << "a == (c + d) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; if (a != (b - c)) ++nrOfFailedTestCases; - // cout << to_binary(a, true) << ' ' << to_binary(b, true) << ' ' << to_binary(c, true) << ' ' << to_binary(d, true) << endl; + std::cout << "a == (b - c) : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; + std::cout << to_binary(a, true) << ' ' << to_binary(b, true) << ' ' << to_binary(c, true) << ' ' << to_binary(d, true) << '\n'; if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : construction " << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; - std::cout << a << ' ' << b << ' ' << c << ' ' << d << '\n'; + std::cout << "FAIL\n"; + } + else { + std::cout << "PASS\n"; } } @@ -173,7 +192,7 @@ try { // state/bit management constexpr unsigned nbits = 8; constexpr unsigned rbits = 4; - fixpnt a, b, c, d; + fixpnt a{}, b, c, d; for (unsigned i = 0; i < rbits; ++i) { a.setbit(i, true); } diff --git a/static/fixpnt/binary/api/complex_api.cpp b/static/fixpnt/binary/api/complex_api.cpp index b7e65680f..f86324aa5 100644 --- a/static/fixpnt/binary/api/complex_api.cpp +++ b/static/fixpnt/binary/api/complex_api.cpp @@ -7,6 +7,8 @@ #include #include #include +// According to the C++ ISO spec, paragraph 26.2/2: +// The effect of instantiating the template complex for any type other than float, double or long double is unspecified. #include // Configure the fixpnt template environment // first: enable general or specialized fixed-point configurations diff --git a/static/fixpnt/binary/arithmetic/sat_division.cpp b/static/fixpnt/binary/arithmetic/sat_division.cpp index c03c425b4..11c996481 100644 --- a/static/fixpnt/binary/arithmetic/sat_division.cpp +++ b/static/fixpnt/binary/arithmetic/sat_division.cpp @@ -143,7 +143,7 @@ void GenerateValueTable() { for (unsigned i = 0; i < NR_VALUES; ++i) { a.setbits(i); - std::cout << to_binary(i,nbits) << " : " << to_binary(a) << " = " << std::setw(10) << a << '\n'; + std::cout << to_binary(i,false, nbits) << " : " << to_binary(a) << " = " << std::setw(10) << a << '\n'; } } diff --git a/static/fixpnt/binary/complex/mod_complex_add.cpp b/static/fixpnt/binary/complex/mod_complex_add.cpp index 1df0b9844..9af44914b 100644 --- a/static/fixpnt/binary/complex/mod_complex_add.cpp +++ b/static/fixpnt/binary/complex/mod_complex_add.cpp @@ -7,6 +7,8 @@ #include #include #include +// According to the C++ ISO spec, paragraph 26.2/2: +// The effect of instantiating the template complex for any type other than float, double or long double is unspecified. #include // Configure the fixpnt template environment @@ -25,14 +27,14 @@ int VerifyComplexAddition(bool reportTestCases) { constexpr size_t NR_VALUES = (size_t(1) << nbits); FixedPoint maxpos(SpecificValue::maxpos), maxneg(SpecificValue::maxneg); int nrOfFailedTests = 0; - FixedPoint ar, ai, br, bi; + FixedPoint ar{ 0 }, ai{ 0 }, br{ 0 }, bi{ 0 }; std::complex a, b, result, ref; std::complex da, db, dc; for (size_t i = 0; i < NR_VALUES; i++) { ar.setbits(i); for (size_t j = 0; j < NR_VALUES; j++) { - ar.setbits(j); + ai.setbits(j); a = std::complex(ar, ai); da = std::complex(double(ar), double(ai)); diff --git a/static/fixpnt/binary/complex/mod_complex_mul.cpp b/static/fixpnt/binary/complex/mod_complex_mul.cpp index 72bd1e6a0..7a8fede73 100644 --- a/static/fixpnt/binary/complex/mod_complex_mul.cpp +++ b/static/fixpnt/binary/complex/mod_complex_mul.cpp @@ -7,6 +7,8 @@ #include #include #include +// According to the C++ ISO spec, paragraph 26.2/2: +// The effect of instantiating the template complex for any type other than float, double or long double is unspecified. #include // Configure the fixpnt template environment @@ -16,6 +18,7 @@ #define FIXPNT_THROW_ARITHMETIC_EXCEPTION 1 #include #include +#include // generate specific test case that you can trace with the trace conditions in fixed_point.hpp // for most bugs they are traceable with _trace_conversion and _trace_add @@ -50,11 +53,15 @@ int VerifyComplexMultiplication(bool reportTestCases) { FixedPoint ar, ai, br, bi; std::complex a, b, result, ref; + constexpr bool statusFeedback{ true }; + bool statusStringPresent{ false }; + unsigned nrTests{ 0 }; std::complex da, db, dc; + for (size_t i = 0; i < NR_VALUES; i++) { ar.setbits(i); for (size_t j = 0; j < NR_VALUES; j++) { - ar.setbits(j); + ai.setbits(j); a = std::complex(ar, ai); da = std::complex(double(ar), double(ai)); @@ -91,23 +98,61 @@ int VerifyComplexMultiplication(bool reportTestCases) { if (result.real() != ref.real() || result.imag() != ref.imag()) { nrOfFailedTests++; - if (reportTestCases) ReportBinaryArithmeticError("FAIL", "+", a, b, result, ref); + if (reportTestCases) ReportBinaryArithmeticError("FAIL", "*", a, b, result, ref); } else { - //if (reportTestCases) ReportBinaryArithmeticSuccess("PASS", "+", a, b, result, ref); + //if (reportTestCases) ReportBinaryArithmeticSuccess("PASS", "*", a, b, result, ref); + } + if (nrOfFailedTests > 24) return nrOfFailedTests; + if constexpr (statusFeedback) if (nrTests > 0 && (nrTests % (64 * 1024) == 0)) { + ++nrTests; + statusStringPresent = true; + std::cout << '.'; } - if (nrOfFailedTests > 100) return nrOfFailedTests; } } - } - if (i % 1024 == 0) std::cout << '.'; + } } - std::cout << std::endl; + if constexpr (statusFeedback) if (statusStringPresent) std::cout << std::endl; return nrOfFailedTests; } +template +void complex_mul(Real far, Real fai, Real fbr, Real fbi) { + + std::complex fa, fb, fc; + fa = std::complex(far, fai); + fb = std::complex(fbr, fbi); + fc = fa * fb; + std::cout << "complex : " << fc << '\n'; + FixedPoint cr, ci; + cr = fc.real(); + ci = fc.imag(); + std::cout << "fixpnt converted : (" << cr << ", " << ci << ")\n"; + + // manual complex multiply + FixedPoint ar{ far }, ai{ fai }, br{ fbr }, bi{ fbi }; + std::cout << "a = (" << ar << ", " << ai << ")\n"; + std::cout << "b = (" << br << ", " << bi << ")\n"; + FixedPoint r1 = ar * br; + FixedPoint r2 = ai * bi; + std::cout << "cr : " << r1 << " + " << r2 << '\n'; + FixedPoint i1 = ar * bi; + FixedPoint i2 = ai * br; + std::cout << "ci : " << i1 << " + " << i2 << '\n'; + cr = r1 + r2; + ci = i1 + i2; + std::complex a, b, c; + c = std::complex(cr, ci); + std::cout << "manual complex : " << c << '\n'; + a = std::complex(ar, ai); + b = std::complex(br, bi); + c = a * b; + std::cout << "complex : " << c << '\n'; +} + // Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override -#define MANUAL_TESTING 0 +#define MANUAL_TESTING 1 // REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity // It is the responsibility of the regression test to organize the tests in a quartile progression. //#undef REGRESSION_LEVEL_OVERRIDE @@ -136,42 +181,48 @@ try { #if MANUAL_TESTING +#pragma message("NOTE: fixpnt complex multiplication is failing: regression suite is disabled") { blockbinary<8> a, b; a.setbits(0x02); b.setbits(0x80); blockbinary<16> c; c = urmul2(a, b); - cout << a << " * " << b << " = " << c << " : " << (long long)c << endl; + std::cout << a << " * " << b << " = " << c << " : " << (long long)c << '\n'; c = urmul2(b, a); - cout << b << " * " << a << " = " << c << " : " << (long long)c << endl; + std::cout << b << " * " << a << " = " << c << " : " << (long long)c << '\n'; } - float fa = -8.0f; - float fb = 0.125f; - GenerateTestCase<8, 4>(fa, fb); - GenerateTestCase<8, 4>(fb, fa); - - // generate individual testcases to hand trace/debug - GenerateTestCase<4, 1>(-0.5f, -3.5f); - GenerateTestCase<4, 1>(-3.5f, -0.5f); - - // GenerateTestCase<8, 1>(0.5f, 0.5f); - GenerateTestCase<8, 1>(0.5f, -32.0f); - GenerateTestCase<8, 1>(-64.0f, 0.5f); - GenerateTestCase<8, 1>(0.0f, -64.0f); - GenerateTestCase<8, 1>(1.5f, -16.0f); - GenerateTestCase<8, 1>(1.5f, -64.0f); - GenerateTestCase<8, 1>(-64.0f, -63.5f); - GenerateTestCase<8, 1>(-63.5f, -64.0f); - GenerateTestCase<8, 1>(-64.0f, -63.0f); - GenerateTestCase<8, 1>(-64.0f, -62.5f); + { + using FixedPoint = fixpnt<4, 2, Modulo, uint8_t>; + FixedPoint ar, ai, br, bi; + std::complex a, b, c; - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<8, 1, Modulo, uint8_t>(reportTestCases), "fixpnt<8,1,Modulo,uint8_t>", test_tag); - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<8, 4, Modulo, uint8_t>(reportTestCases), "fixpnt<8,4,Modulo,uint8_t>", test_tag); + ar = 0.25, ai = 0.25, br = 0.25, bi = 0.5; + // (0.25 + 0.25i) * (0.25 + 0.5i) = + a = std::complex(ar, ai); + b = std::complex(br, bi); + c = a * b; + std::cout << c << '\n'; + ReportValue(c, "product"); -#ifdef STRESS_TESTING - // manual exhaustive test + float far, fai, fbr, fbi; + far = 0.25f; + fai = 0.25f; + fbr = 0.25f; + fbi = 0.5f; + complex_mul(far, fai, fbr, fbi); + + // this fails compared to a complex reference computation because each individual + // term in the cr and ci calculation gets rounded down, but the sum would have rounded up. + + // this would indicate that the regression suite algorithm isn't quite correct for + // small fixpnts, which are the only ones we test due to the cost of enumerating + // the full state space. + } + +#undef FULL_SET +#ifdef FULL_SET nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<4, 0, Modulo, uint8_t>(true), "fixpnt<4,0,Modulo,uint8_t>", test_tag); nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<4, 1, Modulo, uint8_t>(true), "fixpnt<4,1,Modulo,uint8_t>", test_tag); nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<4, 2, Modulo, uint8_t>(true), "fixpnt<4,2,Modulo,uint8_t>", test_tag); @@ -179,16 +230,24 @@ try { nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<4, 4, Modulo, uint8_t>(true), "fixpnt<4,4,Modulo,uint8_t>", test_tag); #endif +#ifdef STRESS_TESTING + // for an 8-bit fixpnt, the full state space of complex binary operators is 256^4 = 2^32 = 4billion + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<8, 1, Modulo, uint8_t>(reportTestCases), "fixpnt<8,1,Modulo,uint8_t>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication<8, 4, Modulo, uint8_t>(reportTestCases), "fixpnt<8,4,Modulo,uint8_t>", test_tag); +#endif + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return EXIT_SUCCESS; // ignore failures #else #if REGRESSION_LEVEL_1 - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 0, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 0,Modulo,uint8_t>", test_tag); - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 1, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 1,Modulo,uint8_t>", test_tag); - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 2, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 2,Modulo,uint8_t>", test_tag); - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 3, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 3,Modulo,uint8_t>", test_tag); - nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 4, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 4,Modulo,uint8_t>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 0, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 0, Modulo, uint8_t>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 1, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 1, Modulo, uint8_t>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 2, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 2, Modulo, uint8_t>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 3, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 3, Modulo, uint8_t>", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 4, 4, Modulo, uint8_t>(reportTestCases), "fixpnt< 4, 4, Modulo, uint8_t>", test_tag); + + nrOfFailedTestCases += ReportTestResult(VerifyComplexMultiplication< 5, 2, Modulo, uint8_t>(reportTestCases), "fixpnt< 5, 2, Modulo, uint8_t>", test_tag); #endif #if REGRESSION_LEVEL_2 diff --git a/static/fixpnt/binary/complex/mod_complex_sub.cpp b/static/fixpnt/binary/complex/mod_complex_sub.cpp index a12726051..04ec49618 100644 --- a/static/fixpnt/binary/complex/mod_complex_sub.cpp +++ b/static/fixpnt/binary/complex/mod_complex_sub.cpp @@ -7,6 +7,8 @@ #include #include #include +// According to the C++ ISO spec, paragraph 26.2/2: +// The effect of instantiating the template complex for any type other than float, double or long double is unspecified. #include // Configure the fixpnt template environment @@ -43,14 +45,14 @@ int VerifyComplexSubtraction(bool reportTestCases) { using FixedPoint = fixpnt; constexpr size_t NR_VALUES = (size_t(1) << nbits); FixedPoint maxpos(SpecificValue::maxpos), maxneg(SpecificValue::maxneg); - FixedPoint ar, ai, br, bi; + FixedPoint ar{ 0 }, ai{ 0 }, br{ 0 }, bi{ 0 }; std::complex a, b, result, ref; int nrOfFailedTests = 0; std::complex da, db, dc; for (size_t i = 0; i < NR_VALUES; i++) { ar.setbits(i); for (size_t j = 0; j < NR_VALUES; j++) { - ar.setbits(j); + ai.setbits(j); a = std::complex(ar, ai); da = std::complex(double(ar), double(ai)); diff --git a/static/fixpnt/binary/logic/logic.cpp b/static/fixpnt/binary/logic/logic.cpp index 04937954b..7c1b47e1d 100644 --- a/static/fixpnt/binary/logic/logic.cpp +++ b/static/fixpnt/binary/logic/logic.cpp @@ -58,7 +58,7 @@ try { ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return EXIT_SUCCESS; // ignore failures #else - fixpnt<16, 1> a; + fixpnt<16, 1> a{0}; #if REGRESSION_LEVEL_1 std::cout << "Logic: operator==()\n"; @@ -90,10 +90,10 @@ try { ReportTestResult(0, "fixpnt<16,1> == 0", "== int literal"); } if (!(a == 0.0f)) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> == 0.0", "== float literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> == 0.0f", "== float literal"); } else { - ReportTestResult(0, "fixpnt<16,1> == 0.0", "== float literal"); + ReportTestResult(0, "fixpnt<16,1> == 0.0f", "== float literal"); } if (!(a == 0.0)) { nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> == 0.0", "== double literal"); @@ -103,10 +103,10 @@ try { } #if LONG_DOUBLE_SUPPORT if (!(a == 0.0l)) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> == 0.0", "== long double literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> == 0.0l", "== long double literal"); } else { - ReportTestResult(0, "fixpnt<16,1> == 0.0", "== long double literal"); + ReportTestResult(0, "fixpnt<16,1> == 0.0l", "== long double literal"); } #endif @@ -136,10 +136,10 @@ try { ReportTestResult(0, "fixpnt<16,1> != 0", "!= int literal"); } if (a != 0.0f) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> != 0.0", "!= float literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> != 0.0f", "!= float literal"); } else { - ReportTestResult(0, "fixpnt<16,1> != 0.0", "!= float literal"); + ReportTestResult(0, "fixpnt<16,1> != 0.0f", "!= float literal"); } if (a != 0.0) { nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> != 0.0", "!= double literal"); @@ -149,10 +149,10 @@ try { } #if LONG_DOUBLE_SUPPORT if (a != 0.0l) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> != 0.0", "!= long double literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> != 0.0l", "!= long double literal"); } else { - ReportTestResult(0, "fixpnt<16,1> != 0.0", "!= long double literal"); + ReportTestResult(0, "fixpnt<16,1> != 0.0l", "!= long double literal"); } #endif @@ -182,10 +182,10 @@ try { ReportTestResult(0, "fixpnt<16,1> < 0", "< int literal"); } if (a < 0.0f) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> < 0.0", "< float literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> < 0.0f", "< float literal"); } else { - ReportTestResult(0, "fixpnt<16,1> < 0.0", "< float literal"); + ReportTestResult(0, "fixpnt<16,1> < 0.0f", "< float literal"); } if (a < 0.0) { nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> < 0.0", "< double literal"); @@ -195,10 +195,10 @@ try { } #if LONG_DOUBLE_SUPPORT if (a < 0.0l) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> < 0.0", "< long double literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> < 0.0l", "< long double literal"); } else { - ReportTestResult(0, "fixpnt<16,1> < 0.0", "< long double literal"); + ReportTestResult(0, "fixpnt<16,1> < 0.0l", "< long double literal"); } #endif @@ -228,10 +228,10 @@ try { ReportTestResult(0, "fixpnt<16,1> <= 0", "<= int literal"); } if (!(a <= 0.0f)) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> <= 0.0", "<= float literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> <= 0.0f", "<= float literal"); } else { - ReportTestResult(0, "fixpnt<16,1> <= 0.0", "<= float literal"); + ReportTestResult(0, "fixpnt<16,1> <= 0.0f", "<= float literal"); } if (!(a <= 0.0)) { nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> <= 0.0", "<= double literal"); @@ -241,10 +241,10 @@ try { } #if LONG_DOUBLE_SUPPORT if (!(a <= 0.0l)) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> <= 0.0", "<= long double literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> <= 0.0l", "<= long double literal"); } else { - ReportTestResult(0, "fixpnt<16,1> <= 0.0", "<= long double literal"); + ReportTestResult(0, "fixpnt<16,1> <= 0.0l", "<= long double literal"); } #endif @@ -274,10 +274,10 @@ try { ReportTestResult(0, "fixpnt<16,1> > 0", "> int literal"); } if (a > 0.0f) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> > 0.0", "> float literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> > 0.0f", "> float literal"); } else { - ReportTestResult(0, "fixpnt<16,1> > 0.0", "> float literal"); + ReportTestResult(0, "fixpnt<16,1> > 0.0f", "> float literal"); } if (a > 0.0) { nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> > 0.0", "> double literal"); @@ -287,10 +287,10 @@ try { } #if LONG_DOUBLE_SUPPORT if (a > 0.0l) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> > 0.0", "> long double literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> > 0.0l", "> long double literal"); } else { - ReportTestResult(0, "fixpnt<16,1> > 0.0", "> long double literal"); + ReportTestResult(0, "fixpnt<16,1> > 0.0l", "> long double literal"); } #endif @@ -320,10 +320,10 @@ try { ReportTestResult(0, "fixpnt<16,1> >= 0", ">= int literal"); } if (!(a >= 0.0f)) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> >= 0.0", ">= float literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> >= 0.0f", ">= float literal"); } else { - ReportTestResult(0, "fixpnt<16,1> >= 0.0", ">= float literal"); + ReportTestResult(0, "fixpnt<16,1> >= 0.0f", ">= float literal"); } if (!(a >= 0.0)) { nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> >= 0.0", ">= double literal"); @@ -333,10 +333,10 @@ try { } #if LONG_DOUBLE_SUPPORT if (!(a >= 0.0l)) { - nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> >= 0.0", ">= long double literal"); + nrOfFailedTestCases += ReportTestResult(1, "fixpnt<16,1> >= 0.0l", ">= long double literal"); } else { - ReportTestResult(0, "fixpnt<16,1> >= 0.0", ">= long double literal"); + ReportTestResult(0, "fixpnt<16,1> >= 0.0l", ">= long double literal"); } #endif diff --git a/static/fixpnt/decimal/api/api.cpp b/static/fixpnt/decimal/api/api.cpp index 93f46a58a..25dd18204 100644 --- a/static/fixpnt/decimal/api/api.cpp +++ b/static/fixpnt/decimal/api/api.cpp @@ -12,7 +12,7 @@ // second: enable/disable fixpnt arithmetic exceptions #define DECI_THROW_ARITHMETIC_EXCEPTION 1 //#include -#include + #include // Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override @@ -42,382 +42,7 @@ try { ReportTestSuiteHeader(test_suite, reportTestCases); - ///////////////////////////////////////////////////////////////////////////////////// - //// MODULAR fixed-point (the default) - - // construction - { - int start = nrOfFailedTestCases; - // default construction using default arithmetic (Modulo) and default BlockType (uint8_t) - fixpnt<8, 4> a, b(-8.125f), c(7.875), d(-7.875); // replace with long double init d(-7.875l); - // b initialized to -8.125 in modular arithmetic becomes 7.875: -8.125 = b1000.0010 > maxneg -> becomes b0111.1110 - if (a != (c + d)) ++nrOfFailedTestCases; - if (a != (b - c)) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : " << a << ' ' << b << ' ' << c << ' ' << d << '\n'; - } - } - - { - using TestType = fixpnt<8, 4, Modulo, uint8_t>; - if constexpr (static_cast(std::is_trivial())) { - ReportTrivialityOfType(); - } - else { - std::cout << "FAIL: " << type_tag(TestType()) << " is not yet trivial\n"; - } - } - - { - int start = nrOfFailedTestCases; - // construction with explicit arithmetic type and default BlockType (uint8_t) - fixpnt<8, 4, Modulo> a, b(-8.125), c(7.875), d(-7.875); - // b initialized to -8.125 in modular arithmetic becomes 7.875: -8.125 = b1000.0010 > maxneg -> becomes b0111.1110 - if (a != (c + d)) ++nrOfFailedTestCases; - if (a != (b - c)) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL: " << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; - } - } - - // extreme cases - { - fixpnt<8, 0, Modulo> a; // only integers - for (int i = 0; i < 5; ++i) { - std::cout << to_binary(a) << " : " << a << '\n'; - ++a; - } - fixpnt<8, 8, Modulo> b(SpecificValue::minpos); // only fractions - for (int i = 0; i < 8; ++i) { - std::cout << to_binary(b) << " : " << b << '\n'; - b <<= 1; // move the fraction bit left == multiply by 2 - } - fixpnt<8, 8, Modulo> c(SpecificValue::maxpos); // only fractions - for (int i = 0; i < 8; ++i) { - std::cout << to_binary(c) << " : " << c << '\n'; - c >>= 1; // move the fraction bit right == divide by 2 - } - } - - ///////////////////////////////////////////////////////////////////////////////////// - //// Saturate fixed-point - - { - int start = nrOfFailedTestCases; - // construction with explicit arithmetic type and default BlockType (uint8_t) - fixpnt<8, 4, Saturate> a(-8.0), b(-8.125), c(7.875), d(-7.875); - // b initialized to -8.125 in Saturate arithmetic becomes -8 -// if (0 != (c + d)) ++nrOfFailedTestCases; //cout << to_binary(c + d) << endl; - if (a != b) ++nrOfFailedTestCases; - - if (a != (d - 1)) ++nrOfFailedTestCases; // Saturate to maxneg - if (a != (d - 0.5)) ++nrOfFailedTestCases; // Saturate to maxneg - if (nrOfFailedTestCases - start > 0) { - std::cout << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; - std::cout << to_binary(d - 1) << ' ' << to_binary(d - 0.5) << '\n'; - } - } - - ///////////////////////////////////////////////////////////////////////////////////// - //// improving efficiency for bigger fixed-points through explicit BlockType specification - - { - int start = nrOfFailedTestCases; - // construction with explicit arithmetic type and BlockType - fixpnt<16, 4, Modulo, uint16_t> a, b(-2048.125f), c(2047.875), d(-2047.875); - if (a != (c + d)) ++nrOfFailedTestCases; - if (a != (b - c)) ++nrOfFailedTestCases; - // cout << to_binary(a, true) << ' ' << to_binary(b, true) << ' ' << to_binary(c, true) << ' ' << to_binary(d, true) << endl; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : construction " << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; - std::cout << a << ' ' << b << ' ' << c << ' ' << d << '\n'; - } - } - - ///////////////////////////////////////////////////////////////////////////////////// - // selectors - - // type tag to identify the type without having to depend on demangle - { - using Fixed = fixpnt<16, 2>; - Fixed a{ 0 }; - std::cout << "type identifier : " << type_tag(a) << '\n'; - std::cout << "type identifier : " << type_tag(fixpnt<8, 4>()) << '\n'; - std::cout << "type identifier : " << type_tag(fixpnt<8, 4, Saturate, uint16_t>()) << '\n'; - } - - { - int start = nrOfFailedTestCases; - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - fixpnt a, b; - a = 1; - if (!a.test(4)) ++nrOfFailedTestCases; - b.setbits(1); // set the ULP - if (!b.at(0)) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : selectors\n"; - } - } - - ///////////////////////////////////////////////////////////////////////////////////// - // modifiers - - { - int start = nrOfFailedTestCases; - // state/bit management - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - fixpnt a, b, c, d; - for (unsigned i = 0; i < rbits; ++i) { - a.setbit(i, true); - } - b.setbits(0x0F); // same as the fixpnt a above - if ((a - b) != 0) ++nrOfFailedTestCases; - c = b; - // manually flip the bits of b: don't use flip() as we are going to confirm flip() is correct - for (unsigned i = 0; i < nbits; ++i) { - b.setbit(i, !b.test(i)); - } - c.flip(); // in-place 1's complement, so now b and c are the same - if (b != c) ++nrOfFailedTestCases; - d.setbits(0xFFFFFFF); - if (0 == d) ++nrOfFailedTestCases; - d.setzero(); - if (d != 0) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : modifiers\n"; - } - } - - ///////////////////////////////////////////////////////////////////////////// - // complements - { - int start = nrOfFailedTestCases; - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - fixpnt a, b; - a.setbits(0xFF); - b = onesComplement(a); - if (b != 0) ++nrOfFailedTestCases; - a = -1; - b = twosComplement(a); - if (b != 1) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : complements 1\n"; - } - } - { - int start = nrOfFailedTestCases; - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - fixpnt a, b; // testing poorly selected BlockType - a.setbits(0xFF); - b = onesComplement(a); - if (b != 0) ++nrOfFailedTestCases; - a = -1; - b = twosComplement(a); - if (b != 1) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : complements 2\n"; - } - } - { - int start = nrOfFailedTestCases; - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - fixpnt a, b; // testing poorly selected BlockType - a.setbits(0xFF); - b = onesComplement(a); - if (b != 0) ++nrOfFailedTestCases; - a = -1; - b = twosComplement(a); - if (b != 1) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL : complements 3\n"; - } - } - - //////////////////////////////////////////////////////////////////////////////////// - // parsing and assignment of text input values - { - constexpr unsigned nbits = 12; - constexpr unsigned rbits = 8; - fixpnt a, b; - a.assign("6.90234375"); - std::cout << to_binary(a, true) << " : " << a << '\n'; - if (a != 6.90234375) ++nrOfFailedTestCases; - a.assign("0b0110.1110'0111"); - std::cout << to_binary(a, true) << " : " << a << '\n'; - b.setbits(0x6E7); - if (a != b) ++nrOfFailedTestCases; - } - - /////////////////////////////////////////////////////////////////////////////////// - // arithmetic - - { - int start = nrOfFailedTestCases; - constexpr unsigned nbits = 16; - constexpr unsigned rbits = 8; - constexpr bool arithmetic = Modulo; - using blocktype = uint32_t; - fixpnt a, b, c, d; - a.maxpos(); - b.maxneg(); - c.minpos(); - d.minneg(); - if ((c + d) != 0) ++nrOfFailedTestCases; - - if ((a + c) != b) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL: min/max\n"; - std::cout << to_binary(c + d) << " vs " << to_binary(fixpnt(0)) << '\n'; - std::cout << to_binary(a + c) << " vs " << to_binary(b) << '\n'; - } - } - - /////////////////////////////////////////////////////////////////////////////////// - // logic, in particular, all the literal constant combinations - { - int start = nrOfFailedTestCases; - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - constexpr bool arithmetic = Modulo; - using blocktype = uint32_t; - fixpnt a, b, c, d; - a = 1; - b = 2l; - c = 3ll; - d = 0ull; - // unsigned literals - if (a != 1u) ++nrOfFailedTestCases; - if (b != 2ul) ++nrOfFailedTestCases; - if (c != 3ull) ++nrOfFailedTestCases; - if (1u != a) ++nrOfFailedTestCases; - if (2ul != b) ++nrOfFailedTestCases; - if (3ull != c) ++nrOfFailedTestCases; - if (d != c - b - a) ++nrOfFailedTestCases; - // signed literals - if (-a != -1) ++nrOfFailedTestCases; - if (-b != -2l) ++nrOfFailedTestCases; - if (-c != -3ll) ++nrOfFailedTestCases; - if (-1 != -a) ++nrOfFailedTestCases; - if (-2l != -b) ++nrOfFailedTestCases; - if (-3ll != -c) ++nrOfFailedTestCases; - - // less than unsigned literal - d = 4.0f; - if (d < 1u) ++nrOfFailedTestCases; - if (d < 2ul) ++nrOfFailedTestCases; - if (d < 3ull) ++nrOfFailedTestCases; - d = 0.0; - if (1u < d) ++nrOfFailedTestCases; - if (2ul < d) ++nrOfFailedTestCases; - if (3ull < d) ++nrOfFailedTestCases; - - // greater than unsigned literal - if (d > 1u) ++nrOfFailedTestCases; - if (d > 2ul) ++nrOfFailedTestCases; - if (d > 3ull) ++nrOfFailedTestCases; - d = 4ll; - if (1u > d) ++nrOfFailedTestCases; - if (2ul > d) ++nrOfFailedTestCases; - if (3ull > d) ++nrOfFailedTestCases; - - // less than or equal unsigned literal - if (d <= 1u) ++nrOfFailedTestCases; - if (d <= 2ul) ++nrOfFailedTestCases; - if (d <= 3ull) ++nrOfFailedTestCases; - d = 0.0f; - if (1u <= d) ++nrOfFailedTestCases; - if (2ul <= d) ++nrOfFailedTestCases; - if (3ull <= d) ++nrOfFailedTestCases; - - // greater than or equal unsigned literal - if (d >= 1u) ++nrOfFailedTestCases; - if (d >= 2ul) ++nrOfFailedTestCases; - if (d >= 3ull) ++nrOfFailedTestCases; - d = 4.0; - if (1u >= d) ++nrOfFailedTestCases; - if (2ul >= d) ++nrOfFailedTestCases; - if (3ull >= d) ++nrOfFailedTestCases; - - // comparisons with signed literals - // less than signed literal - d = 4.0f; - if (d < 1) ++nrOfFailedTestCases; - if (d < 2l) ++nrOfFailedTestCases; - if (d < 3ll) ++nrOfFailedTestCases; - d = 0.0; - if (1 < d) ++nrOfFailedTestCases; - if (2l < d) ++nrOfFailedTestCases; - if (3ll < d) ++nrOfFailedTestCases; - - // greater than signed literal - if (d > 1) ++nrOfFailedTestCases; - if (d > 2l) ++nrOfFailedTestCases; - if (d > 3ll) ++nrOfFailedTestCases; - d = 4ll; - if (1 > d) ++nrOfFailedTestCases; - if (2l > d) ++nrOfFailedTestCases; - if (3ll > d) ++nrOfFailedTestCases; - - // less than or equal signed literal - if (d <= 1) ++nrOfFailedTestCases; - if (d <= 2l) ++nrOfFailedTestCases; - if (d <= 3ll) ++nrOfFailedTestCases; - d = 0.0f; - if (1 <= d) ++nrOfFailedTestCases; - if (2l <= d) ++nrOfFailedTestCases; - if (3ll <= d) ++nrOfFailedTestCases; - - // greater than or equal signed literal - if (d >= 1) ++nrOfFailedTestCases; - if (d >= 2l) ++nrOfFailedTestCases; - if (d >= 3ll) ++nrOfFailedTestCases; - d = 4.0; - if (1 >= d) ++nrOfFailedTestCases; - if (2l >= d) ++nrOfFailedTestCases; - if (3ll >= d) ++nrOfFailedTestCases; - if (nrOfFailedTestCases - start > 0) { - std::cout << "FAIL: logic operators\n"; - } - } - -#ifdef SHOW_STATE_SPACE - { - constexpr unsigned nbits = 7; - constexpr unsigned rbits = 4; - constexpr bool arithmetic = Modulo; - constexpr unsigned NR_VALUES = (1 << nbits); - using blocktype = uint32_t; - - fixpnt a, b, c, d; - for (unsigned i = 0; i < NR_VALUES; ++i) { - a.setbits(i); - float f = float(a); - b = int(f); - c = f; - d = double(a); - if (a != c && a != d) ++nrOfFailedTestCases; - std::cout << setw(3) << i << ' ' << to_binary(a) << ' ' << setw(10) << a << ' ' << setw(3) << int(f) << ' ' << to_binary(b) << ' ' << b << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; - } - } - - { - constexpr unsigned nbits = 8; - constexpr unsigned rbits = 4; - constexpr bool arithmetic = Modulo; - using blocktype = uint32_t; - fixpnt a, b, c, d; - - for (int i = -16; i < 16; ++i) { - a = i; - std::cout << to_binary(i) << ' ' << a << ' ' << to_binary(a) << ' ' << to_binary(-a) << ' ' << -a << ' ' << to_binary(-i) << '\n'; - } - } -#endif // LATER + std::cout << "deci<> decimal fixed-point TBD\n"; ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); @@ -426,12 +51,12 @@ catch (char const* msg) { std::cerr << msg << std::endl; return EXIT_FAILURE; } -catch (const sw::universal::fixpnt_arithmetic_exception& err) { - std::cerr << "Uncaught fixpnt arithmetic exception: " << err.what() << std::endl; +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Uncaught universal arithmetic exception: " << err.what() << std::endl; return EXIT_FAILURE; } -catch (const sw::universal::fixpnt_internal_exception& err) { - std::cerr << "Uncaught fixpnt internal exception: " << err.what() << std::endl; +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Uncaught universal internal exception: " << err.what() << std::endl; return EXIT_FAILURE; } catch (const std::runtime_error& err) { diff --git a/static/qd/CMakeLists.txt b/static/qd/CMakeLists.txt index 2072f6423..93f8d83a5 100644 --- a/static/qd/CMakeLists.txt +++ b/static/qd/CMakeLists.txt @@ -3,12 +3,11 @@ file (GLOB LOGIC_SRC "logic/*.cpp") file (GLOB CONVERSION_SRC "conversion/*.cpp") file (GLOB ARITHMETIC_SRC "arithmetic/*.cpp") file (GLOB MATH_SRC "./math/*.cpp") -#file (GLOB PERFORMANCE_SRC "./performance/*.cpp") - -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/api" "${API_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/logic" "${LOGIC_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/conversion" "${CONVERSION_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/arithmetic" "${ARITHMETIC_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/math" "${MATH_SRC}") -#compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/performance" "${PERFORMANCE_SRC}") +file (GLOB PERFORMANCE_SRC "./performance/*.cpp") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/api" "${API_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/logic" "${LOGIC_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/conversion" "${CONVERSION_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/arithmetic" "${ARITHMETIC_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/math" "${MATH_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/performance" "${PERFORMANCE_SRC}") diff --git a/static/qd/api/api.cpp b/static/qd/api/api.cpp new file mode 100644 index 000000000..fde519878 --- /dev/null +++ b/static/qd/api/api.cpp @@ -0,0 +1,312 @@ +// api.cpp: application programming interface tests for quad-double (qd) number system +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +// minimum set of include files to reflect source code dependencies +// Configure the qd template environment +// enable/disable arithmetic exceptions +#define QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include +#include +#include + +namespace sw { + namespace universal { + + template + void Progression(Real v) { + using namespace sw::universal; + + auto oldPrec = std::cout.precision(); + float f{ float(v) }; + std::cout << std::setprecision(7); + std::cout << to_binary(f, true) << " : " << f << '\n'; + + double d{ v }; + std::cout << std::setprecision(17); + std::cout << to_binary(d, true) << " : " << d << '\n'; + + qd a{ v }; + std::cout << std::setprecision(35); + std::cout << to_binary(a, true) << " : " << a << '\n'; + std::cout << std::setprecision(oldPrec); + } + + qd parse(const std::string& str) { + using namespace sw::universal; + + qd v(str); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(std::numeric_limits::digits10); + std::cout << "string: " << str << " = ( " << v[0] << ", " << v[1] << ") "; + std::cout << std::setprecision(oldPrec); + return v; + } + + void print(std::ostream& ostr, qd const& v) { + std::ios_base::fmtflags fmt = ostr.flags(); + bool showpos = (fmt & std::ios_base::showpos) != 0; + bool uppercase = (fmt & std::ios_base::uppercase) != 0; + bool fixed = (fmt & std::ios_base::fixed) != 0; + bool scientific = (fmt & std::ios_base::scientific) != 0; + bool internal = (fmt & std::ios_base::internal) != 0; + bool left = (fmt & std::ios_base::left) != 0; + std::string str = v.to_string(ostr.precision(), ostr.width(), fixed, scientific, internal, left, showpos, uppercase, ostr.fill()); + ostr << str << '\n'; + } + + } +} + + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double (qd) API tests"; + int nrOfFailedTestCases = 0; + + auto oldPrec = std::cout.precision(); + + // important behavioral traits + { + using TestType = qd; + ReportTrivialityOfType(); + } + + // default behavior + std::cout << "+--------- Default qd has subnormals, but no supernormals\n"; + { + uint64_t big = (1ull << 53); + std::cout << to_binary(big) << " : " << big << '\n'; + qd a(big), b(1.0), c{}; + c = a + b; + ReportValue(a, "a"); + ReportValue(b, "b"); + ReportValue(c, "c"); + } + + // arithmetic behavior + std::cout << "+--------- Default qd has subnormals, but no supernormals\n"; + { + qd a(2.0), b(4.0); + ArithmeticOperators(a, b); + } + + std::cout << "+--------- fraction bit progressions \n"; + { + float fulp = ulp(1.0f); + Progression(1.0f + fulp); + Progression(1.0 + ulp(2.0)); + double v = ulp(1.0); + Progression( 1.0 - v/2.0 ); + std::cout << to_quad(qd(1.0 - v / 2.0)) << '\n'; + } + + // report on the dynamic range of some standard configurations + std::cout << "+--------- Dynamic range doubledouble configurations --------+\n"; + { + qd a; // uninitialized + + a.maxpos(); + std::cout << "maxpos doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.setbits(0x0080); // positive min normal + std::cout << "minnorm doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.minpos(); + std::cout << "minpos doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.zero(); + std::cout << "zero : " << to_binary(a) << " : " << a << '\n'; + a.minneg(); + std::cout << "minneg doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.maxneg(); + std::cout << "maxneg doubledouble : " << to_binary(a) << " : " << a << '\n'; + + std::cout << "---\n"; + } + + // constexpr and specific values + std::cout << "+--------- constexpr and specific values --------+\n"; + { + using Real = qd; + + CONSTEXPRESSION Real a{}; // zero constexpr + std::cout << type_tag(a) << '\n'; + + Real b(1.0f); // constexpr of a native type conversion + std::cout << to_binary(b) << " : " << b << '\n'; + + CONSTEXPRESSION Real c(SpecificValue::minpos); // constexpr of a special value in the encoding + std::cout << to_binary(c) << " : " << c << " == minpos" << '\n'; + + CONSTEXPRESSION Real d(SpecificValue::maxpos); // constexpr of a special value in the encoding + std::cout << to_binary(d) << " : " << d << " == maxpos" << '\n'; + } + + // set bit patterns + std::cout << "+--------- set bit patterns API --------+\n"; + { + using Real = qd; + + Real a; // uninitialized + std::cout << type_tag(a) << '\n'; + + a.setbits(0x0000); + std::cout << to_binary(a) << " : " << a << '\n'; + + a.setbit(8); + std::cout << to_binary(a) << " : " << a << " : set bit 8 assuming 0-based" << '\n'; + a.setbits(0xffff); + a.setbit(8, false); + std::cout << to_binary(a) << " : " << a << " : reset bit 8" << '\n'; + + a.setbits(0xAAAA); + std::cout << to_binary(a) << " : " << a << '\n'; + + a.assign(std::string("0b1.0101'0101.0101'010")); + std::cout << to_binary(a) << " : " << a << '\n'; + + a.assign(std::string("0b0.1010'1010.1010'101")); + std::cout << to_binary(a) << " : " << a << '\n'; + } + + // parse decimal strings + std::cout << "+--------- parse API --------+\n"; + { + std::string qdstr; + qd v; + + v = parse("0.0"); + qdstr = v.to_string(25, 25, true, false, false, false, true, false, ' '); + std::cout << qdstr << '\n'; + + std::cout << std::setprecision(7); + print(std::cout, parse("0.5")); + print(std::cout, parse("1.0")); + print(std::cout, parse("2.0")); + + // 100 digits of e + // 10 2.7182818284 + // 20 2.71828182845904523536 + // 30 2.718281828459045235360287471352 + // 40 2.7182818284590452353602874713526624977572 + // 50 2.71828182845904523536028747135266249775724709369995 + // 60 2.718281828459045235360287471352662497757247093699959574966967 + // 70 2.7182818284590452353602874713526624977572470936999595749669676277240766 + // 80 2.71828182845904523536028747135266249775724709369995957496696762772407663035354759 + // 90 2.718281828459045235360287471352662497757247093699959574966967627724076630353547594571382178 + // 100 2.7182818284590452353602874713526624977572470936999595749669676277240766303535475945713821785251664274 + ReportValue(std::numbers::e, "e", 10, 25); + std::cout << std::setprecision(10); + print(std::cout, parse("2.7182818284")); // 10 digits + std::cout << std::setprecision(15); + print(std::cout, parse("2.71828182845904")); // 15 digits + std::cout << std::setprecision(20); + print(std::cout, parse("2.71828182845904523536")); // 20 digits + std::cout << std::setprecision(30); + print(std::cout, parse("2.718281828459045235360287471352")); // 30 digits + std::cout << std::setprecision(40); + print(std::cout, parse("2.7182818284590452353602874713526624977572")); // 40 digits + + std::cout << std::setprecision(37); + print(std::cout, parse("2.718281828459045235360287471352662498")); //37 digits + std::cout << std::setprecision(oldPrec); + } + + std::cout << "+--------- set specific values of interest --------+\n"; + { + qd a{ 0 }; // initialized + std::cout << "maxpos : " << a.maxpos() << " : " << scale(a) << '\n'; + std::cout << "minpos : " << a.minpos() << " : " << scale(a) << '\n'; + std::cout << "zero : " << a.zero() << " : " << scale(a) << '\n'; + std::cout << "minneg : " << a.minneg() << " : " << scale(a) << '\n'; + std::cout << "maxneg : " << a.maxneg() << " : " << scale(a) << '\n'; + std::cout << dynamic_range() << std::endl; + } + + std::cout << "+--------- doubledouble subnormal behavior --------+\n"; + { + constexpr double minpos = std::numeric_limits::min(); + std::cout << to_binary(minpos) << " : " << minpos << '\n'; + double subnormal = minpos / 2.0; + std::cout << to_binary(subnormal) << " : " << subnormal << '\n'; + qd a(minpos); + for (int i = 0; i < 10/*106*/; ++i) { + std::string str = a.to_string(30, 40, false, true, false, false, false, false, ' '); + std::cout << to_binary(a) << " : " << a << " : " << str << '\n'; + a /= 2.0; + } + } + + std::cout << "+--------- special value properties doubledouble vs IEEE-754 --------+\n"; + { + float fa; + fa = NAN; + std::cout << "qNAN : " << to_binary(NAN) << '\n'; + std::cout << "sNAN : " << to_binary(-NAN) << '\n'; + if (fa < 0.0f && fa > 0.0f && fa != 0.0f) { + std::cout << "IEEE-754 is incorrectly implemented\n"; + } + else { + std::cout << "IEEE-754 NAN has no sign\n"; + } + + qd a(fa); + if ((a < 0.0f && a > 0.0f && a != 0.0f)) { + std::cout << "doubledouble (qd) is incorrectly implemented\n"; + ++nrOfFailedTestCases; + } + else { + std::cout << "qd NAN has no sign\n"; + } + } + + std::cout << "+--------- numeric_limits of doubledouble vs IEEE-754 --------+\n"; + { + std::cout << "qd(INFINITY): " << qd(INFINITY) << "\n"; + std::cout << "qd(-INFINITY): " << qd(-INFINITY) << "\n"; + + std::cout << "qd(std::numeric_limits::infinity()) : " << qd(std::numeric_limits::infinity()) << "\n"; + std::cout << "qd(-std::numeric_limits::infinity()) : " << qd(-std::numeric_limits::infinity()) << "\n"; + + std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; + std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; + std::cout << "-2 * std::numeric_limits::infinity() : " << -2 * std::numeric_limits::infinity() << "\n"; + +// std::cout << "sw::universal::nextafter(qd(0), std::numeric_limits::infinity()) : " << sw::universal::nextafter(qd(-0), std::numeric_limits::infinity()) << "\n"; + std::cout << "std::nextafter(float(0), std::numeric_limits::infinity()) : " << std::nextafter(float(-0), std::numeric_limits::infinity()) << "\n"; +// std::cout << "sw::universal::nextafter(qd(0), -std::numeric_limits::infinity()) : " << sw::universal::nextafter(qd(0), -std::numeric_limits::infinity()) << "\n"; + std::cout << "std::nextafter(float(0), -std::numeric_limits::infinity()) : " << std::nextafter(float(0), -std::numeric_limits::infinity()) << "\n"; + + std::cout << "qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; + std::cout << "qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/api/constants.cpp b/static/qd/api/constants.cpp new file mode 100644 index 000000000..74f801401 --- /dev/null +++ b/static/qd/api/constants.cpp @@ -0,0 +1,395 @@ +// constants.cpp: test suite runner for creating and verifying quad-double constants +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +#include +#include + +namespace sw { + namespace universal { + + sw::universal::qd GenerateQuadDouble(const std::string& str) { + using namespace sw::universal; + qd v(str); + auto oldPrec = std::cout.precision(); + // 53 bits = 16 decimal digits, 17 to include last, 15 typical valid digits + std::cout << std::setprecision(std::numeric_limits::max_digits10); + std::cout << to_quad(v) << '\n'; + std::cout << std::setprecision(oldPrec); + return v; + } + + void report(const sw::universal::qd& v, int precision = 17) { + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision) << to_quad(v) << " : " << v << '\n'; + std::cout << std::setprecision(oldPrec); + } + + void EnumerateConstants() { + qd _zero("0.0"); report(_zero); + qd _one("1.0"); report(_one); + qd _ten("10.0"); report(_ten); + + qd _tenth("0.1"); report(_tenth); + qd _third("0.333333333333333333333333333333333333"); report(_third); + + qd _2pi("6.283185307179586476925286766559005768"); report(_2pi); + qd _pi("3.141592653589793238462643383279502884"); report(_pi); + qd _pi2("1.570796326794896619231321691639751442"); report(_pi2); + qd _pi4("0.785398163397448309615660845819875721"); report(_pi4); + qd _3pi4 = _pi2 + _pi4; report(_3pi4); + + qd _e("2.718281828459045235360287471352662498"); report(_e); + + qd _ln2("0.693147180559945309417232121458176568"); report(_ln2); + qd _ln10("2.302585092994045684017991454684364208"); report(_ln10); + + qd _lge("1.442695040888963407359924681001892137"); report(_lge); + qd _lg10("3.321928094887362347870319429489390176"); report(_lg10); + + qd _log2("0.301029995663981195213738894724493027"); report(_log2); + qd _loge("0.434294481903251827651128918916605082"); report(_loge); + + qd _sqrt2("1.414213562373095048801688724209698079"); report(_sqrt2); + + qd _inv_pi("0.318309886183790671537767526745028724"); report(_inv_pi); + qd _inv_pi2("0.636619772367581343075535053490057448"); report(_inv_pi2); + qd _inv_e("0.367879441171442321595523770161460867"); report(_inv_e); + qd _inv_sqrt2("0.707106781186547524400844362104849039"); report(_inv_sqrt2); + } + + int VerifyParse(const std::string& str) { + int nrFailedTestCases{ 0 }; + qd v{}; + if (!parse(str, v)) { + std::cerr << "failed to parse " << str << '\n'; + ++nrFailedTestCases; + } + else { + std::cout << std::setw(20) << str << " : " << v << '\n'; + std::cout << to_binary(v) << '\n'; + std::cout << "PASS\n"; + } + return nrFailedTestCases; + } + + int TestScientifiFormatParsing() { + // parsing scientific formats + int nrOfFailedTests{ 0 }; + nrOfFailedTests += VerifyParse("12.5e-2"); + nrOfFailedTests += VerifyParse("12.5e-1"); + nrOfFailedTests += VerifyParse("12.5e-0"); + nrOfFailedTests += VerifyParse("12.5e+1"); + nrOfFailedTests += VerifyParse("12.5e2"); + nrOfFailedTests += VerifyParse("12.5e-02"); + nrOfFailedTests += VerifyParse("12.5e-01"); + nrOfFailedTests += VerifyParse("12.5e00"); + nrOfFailedTests += VerifyParse("12.5e+01"); + nrOfFailedTests += VerifyParse("12.5e02"); + nrOfFailedTests += VerifyParse("12.5e-002"); + nrOfFailedTests += VerifyParse("12.5e-001"); + nrOfFailedTests += VerifyParse("12.5e000"); + nrOfFailedTests += VerifyParse("12.5e+001"); + nrOfFailedTests += VerifyParse("12.5e002"); + nrOfFailedTests += VerifyParse("12.5e-200"); + nrOfFailedTests += VerifyParse("12.5e-100"); + nrOfFailedTests += VerifyParse("12.5e000"); + nrOfFailedTests += VerifyParse("12.5e+100"); + nrOfFailedTests += VerifyParse("12.5e200"); + + return nrOfFailedTests; + } + + + void FindRepresentationForOneThird() { + double _third = 0.333'333'333'333'333'333'333'333'333'333'3; + double _third2 = _third * std::pow(2.0, -53.0); + double _short = 0.333'333'333'333'333'3; + ReportValue(_short, "0.333'333'333'333'333'3", 35, 32); + ReportValue(_third, "0.333'333'333'333'333'333'333'333'333'333'3", 35, 32); + + qd a{ 0 }, b, c; + std::cout << std::setprecision(64); + + a = _third; + b = _third2; + std::cout << std::setw(35) << "0.3333...." << " : " << a << '\n'; + std::cout << std::setw(35) << "0.3333...." << " : " << b << '\n'; + c = a + b; + std::cout << std::setw(35) << "0.3333...." << " : " << c << '\n'; + std::cout << to_quad(c) << '\n'; + + qd d(_third, _third2); + std::cout << std::setw(35) << "0.3333...." << " : " << d << '\n'; + std::cout << to_quad(d) << '\n'; + + // 212 bits represent 10log(2) * 212 = 63.8 digits of accuracy + // 1 10 20 30 40 50 60 70 80 90 100 + // ' ' ' ' ' ' ' ' ' ' ' + std::string ten3s = "0.3333333333"; + std::string twenty3s = "0.33333333333333333333"; + std::string thirty3s = "0.333333333333333333333333333333"; + std::string fourty3s = "0.3333333333333333333333333333333333333333"; + std::string fifty3s = "0.33333333333333333333333333333333333333333333333333"; + std::string sixty3s = "0.333333333333333333333333333333333333333333333333333333333333"; + std::string seventy3s = "0.3333333333333333333333333333333333333333333333333333333333333333333333"; + std::string eighty3s = "0.33333333333333333333333333333333333333333333333333333333333333333333333333333333"; + std::string ninety3s = "0.333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"; + std::string onehundred3s = "0.3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"; + // qd e("0.333'333'333'333'333'333'333'333'333'333'333'333'333'333'333'333'33"); // 50 digits + std::string sixty4_3s = "0.3333333333333333333333333333333333333333333333333333333333333333"; + qd e(seventy3s); + std::cout << std::setw(35) << "0.3333...." << " : " << e << '\n'; + std::cout << to_quad(e) << '\n'; // NOTE: this yields a better representation than sixty4_3s or even onehundred3s + + qd f(0.33333333333333331, 1.8503717077085941e-17, 1.0271626370065257e-33, 5.7018980481966837e-50); + std::cout << std::setw(35) << "0.3333...." << " : " << f << '\n'; + std::cout << to_quad(f) << '\n'; + + qd h(sixty4_3s); + std::cout << std::setw(35) << "0.3333...." << " : " << h << '\n'; + std::cout << to_quad(h) << '\n'; + + qd g(onehundred3s); + std::cout << std::setprecision(100); + std::cout << std::setw(35) << "0.3333...." << " : " << g << '\n'; + std::cout << to_quad(g) << '\n'; + } + + void GenerateConstants() { + // phi to one hundred digits + std::string hundred_digits_of_phi = "1.6180339887498948482045868343656381177203091798057628621354486227052604628189024497072072041893911374"; + + qd qd_phi_origin(hundred_digits_of_phi); + std::cout << "phi " << std::setprecision(64) << qd_phi_origin << '\n'; + std::cout << to_quad(qd_phi_origin) << '\n'; + qd qd_inv_phi_ = 1.0 / qd_phi_origin; + std::cout << "inv_phi " << std::setprecision(64) << qd_inv_phi_ << '\n'; + std::cout << to_quad(qd_inv_phi_) << '\n'; + + // e to one hundred digits + std::string hundred_digits_of_e = "2.7182818284590452353602874713526624977572470936999595749669676277240766303535475945713821785251664274"; + + qd qd_e_origin(hundred_digits_of_e); + std::cout << "e " << std::setprecision(64) << qd_e_origin << '\n'; + std::cout << to_quad(qd_e_origin) << '\n'; + qd qd_inv_e_ = 1.0 / qd_e_origin; + std::cout << "inv_e " << std::setprecision(64) << qd_inv_e_ << '\n'; + std::cout << to_quad(qd_inv_e_) << '\n'; + + // pi to one hundred digits + std::string hundred_digits_of_pi = "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679"; + + qd qd_pi_origin(hundred_digits_of_pi); + std::cout << "pi " << std::setprecision(64) << qd_pi_origin << '\n'; + std::cout << to_quad(qd_pi_origin) << '\n'; + qd qd_2pi_ = qd_pi_origin * 2.0; + std::cout << "2pi " << std::setprecision(64) << qd_2pi_ << '\n'; + std::cout << to_quad(qd_2pi_) << '\n'; + qd qd_pi2_ = qd_pi_origin * 0.5; + std::cout << "pi2 " << std::setprecision(64) << qd_pi2_ << '\n'; + std::cout << to_quad(qd_pi2_) << '\n'; + qd qd_pi4_ = qd_pi_origin * 0.25; + std::cout << "pi4 " << std::setprecision(64) << qd_pi4_ << '\n'; + std::cout << to_quad(qd_pi4_) << '\n'; + qd qd_3pi4_ = qd_pi_origin * 0.75; + std::cout << "3pi4 " << std::setprecision(64) << qd_3pi4_ << '\n'; + std::cout << to_quad(qd_3pi4_) << '\n'; + + qd qd_inv_pi_ = 1.0 / qd_pi_origin; + std::cout << "1/pi " << std::setprecision(64) << qd_inv_pi_ << '\n'; + std::cout << to_quad(qd_inv_pi_) << '\n'; + qd qd_inv_pi2_ = 1.0 / qd_pi2_; + std::cout << "1/pi2 " << std::setprecision(64) << qd_inv_pi2_ << '\n'; + std::cout << to_quad(qd_inv_pi2_) << '\n'; + + // natural logarithm (base = e) + qd qd_ln2_("0.6931471805599453094172321214581765680755001343602552541206800094933936219696947156058633269964186875"); + std::cout << "ln(2) " << std::setprecision(64) << qd_ln2_ << '\n'; + std::cout << to_quad(qd_ln2_) << '\n'; + qd qd_ln10_("2.302585092994045684017991454684364207601101488628772976033327900967572609677352480235997205089598298"); + std::cout << "ln(10) " << std::setprecision(64) << qd_ln10_ << '\n'; + std::cout << to_quad(qd_ln10_) << '\n'; + + // binary logarithm (base = 2) + qd qd_lge_("1.442695040888963407359924681001892137426645954152985934135449406931109219181185079885526622893506344"); + std::cout << "lg(e) " << std::setprecision(64) << qd_lge_ << '\n'; + std::cout << to_quad(qd_lge_) << '\n'; + qd qd_lg10_("3.321928094887362347870319429489390175864831393024580612054756395815934776608625215850139743359370155"); + std::cout << "lg(10) " << std::setprecision(64) << qd_lg10_ << '\n'; + std::cout << to_quad(qd_lg10_) << '\n'; + + // common logarithm (base = 10) + qd qd_log2_("0.301029995663981195213738894724493026768189881462108541310427461127108189274424509486927252118186172"); + std::cout << "log(2) " << std::setprecision(64) << qd_log2_ << '\n'; + std::cout << to_quad(qd_log2_) << '\n'; + qd qd_loge_("0.4342944819032518276511289189166050822943970058036665661144537831658646492088707747292249493384317483"); + std::cout << "log(e) " << std::setprecision(64) << qd_loge_ << '\n'; + std::cout << to_quad(qd_loge_) << '\n'; + qd qd_log10_("1.0"); + std::cout << "log(10) " << std::setprecision(64) << qd_log10_ << '\n'; + std::cout << to_quad(qd_log10_) << '\n'; + + qd qd_sqrt2_("1.414213562373095048801688724209698078569671875376948073176679737990732478462107038850387534327641573"); + std::cout << "sqrt(2) " << std::setprecision(64) << qd_sqrt2_ << '\n'; + std::cout << to_quad(qd_sqrt2_) << '\n'; + qd qd_inv_sqrt2_ = 1.0 / qd_sqrt2_; + std::cout << "inv_sqrt(2) " << std::setprecision(64) << qd_inv_sqrt2_ << '\n'; + std::cout << to_quad(qd_inv_sqrt2_) << '\n'; + } + } +} + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double constants"; + std::string test_tag = "qd constants"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + // we need 64 digits of precision in the strings + + std::cout << "verifying constants\n"; + struct constant_kv { + std::string name; + std::string digits; + qd value; + } constant_symbol_table[] = { + { "qd_phi" , "1.6180339887498948482045868343656381177203091798057628621354486228", qd_phi }, + { "qd_inv_phi", "0.6180339887498948482045868343656381177203091798057628621354486227", qd_inv_phi }, + + { "qd_e" , "2.7182818284590452353602874713526624977572470936999595749669676277", qd_e }, + { "qd_inv_e" , "0.3678794411714423215955237701614608674458111310317678345078368017", qd_inv_e }, + + { "qd_2pi" , "6.2831853071795864769252867665590057683943387987502116419498891847", qd_2pi }, + { "qd_pi" , "3.1415926535897932384626433832795028841971693993751058209749445923", qd_pi }, + { "qd_pi2" , "1.5707963267948966192313216916397514420985846996875529104874722962", qd_pi2 }, + { "qd_pi4" , "0.7853981633974483096156608458198757210492923498437764552437361481", qd_pi4 }, + { "qd_3pi4" , "2.3561944901923449288469825374596271631478770495313293657312084443", qd_3pi4 }, + + { "qd_inv_pi" , "0.31830988618379067153776752674502872406891929148091289749533468812", qd_inv_pi }, + { "qd_inv_pi2", "0.63661977236758134307553505349005744813783858296182579499066937624", qd_inv_pi2 }, + + { "qd_ln2" , "0.69314718055994530941723212145817656807550013436025525412068000950", qd_ln2 }, + { "qd_lne" , "1.00000000000000000000000000000000000000000000000000000000000000000", qd_lne }, + { "qd_ln10" , "2.30258509299404568401799145468436420760110148862877297603332790097", qd_ln10 }, + + { "qd_lg2" , "1.0000000000000000000000000000000000000000000000000000000000000000", qd_lg2 }, + { "qd_lge" , "1.4426950408889634073599246810018921374266459541529859341354494069", qd_lge }, + { "qd_lg10" , "3.3219280948873623478703194294893901758648313930245806120547563956", qd_lg10 }, + + { "qd_log2" , "3.0102999566398119521373889472449302676818988146210854131042746113e-01", qd_log2 }, + { "qd_loge" , "4.3429448190325182765112891891660508229439700580366656611445378316e-01", qd_loge }, + { "qd_log10" , "1.00000000000000000000000000000000000000000000000000000000000000000", qd_log10 }, + + { "qd_sqrt2" , "1.4142135623730950488016887242096980785696718753769480731766797380", qd_sqrt2 }, + { "qd_inv_sqrt2", "7.0710678118654752440084436210484903928483593768847403658833986899e-01", qd_inv_sqrt2 }, + }; + + /* + * + * ETLO August 14, 2024 + * Need to verify if these are the most accurate quad-double approximations available. + * +verifying constants +qd_phi : 1.61803398874989484820458683436564e+00 vs 1.61803398874989484820458683436564e+00 : ( 1.6180339887498949, -5.4321152036825061e-17, 2.6543252083815655e-33, -3.3049919975020983e-50) : 4.74778387287989937373662113478098e-66 +qd_inv_phi : 6.18033988749894848204586834365638e-01 vs 6.18033988749894848204586834365638e-01 : ( 0.6180339887498949, -5.4321152036825061e-17, 2.6543252083815655e-33, -3.3049919975021083e-50) : 2.84867032372793962424197268086859e-65 +qd_e : 2.71828182845904523536028747135266e+00 vs 2.71828182845904523536028747135266e+00 : ( 2.7182818284590451, 1.4456468917292502e-16, -2.1277171080381768e-33, 1.5156301598412188e-49) : -1.89911354915195974949464845391239e-65 +qd_inv_e : 3.67879441171442321595523770161461e-01 vs 3.67879441171442321595523770161461e-01 : ( 0.36787944117144233, -1.2428753672788363e-17, -5.830044851072742e-34, -2.8267977849017436e-50) : 0.00000000000000000000000000000000e+00 +qd_2pi : 6.28318530717958647692528676655901e+00 vs 6.28318530717958647692528676655901e+00 : ( 6.2831853071795862, 2.4492935982947064e-16, -5.9895396194366793e-33, 2.2249084417267317e-49) : 3.79822709830391949898929690782478e-65 +qd_pi : 3.14159265358979323846264338327950e+00 vs 3.14159265358979323846264338327950e+00 : ( 3.1415926535897931, 1.2246467991473532e-16, -2.9947698097183397e-33, 1.1124542208633653e-49) : -3.79822709830391949898929690782478e-65 +qd_pi2 : 1.57079632679489661923132169163975e+00 vs 1.57079632679489661923132169163975e+00 : ( 1.5707963267948966, 6.123233995736766e-17, -1.4973849048591698e-33, 5.5622711043168312e-50) : 2.84867032372793962424197268086859e-65 +qd_pi4 : 7.85398163397448309615660845819876e-01 vs 7.85398163397448309615660845819876e-01 : ( 0.78539816339744828, 3.061616997868383e-17, -7.4869245242958492e-34, 2.7811355521584156e-50) : 1.42433516186396981212098634043429e-65 +qd_3pi4 : 2.35619449019234492884698253745963e+00 vs 2.35619449019234492884698253745963e+00 : ( 2.3561944901923448, 9.1848509936051484e-17, 3.9168984647504003e-33, -2.5867981632704857e-49) : 3.79822709830391949898929690782478e-65 +qd_inv_pi : 3.18309886183790671537767526745029e-01 vs 3.18309886183790671537767526745029e-01 : ( 0.31830988618379069, -1.9678676675182486e-17, -1.0721436282893004e-33, 8.053563926594112e-50) : 0.00000000000000000000000000000000e+00 +qd_inv_pi2 : 6.36619772367581343075535053490057e-01 vs 6.36619772367581343075535053490057e-01 : ( 0.63661977236758138, -3.9357353350364972e-17, -2.1442872565786008e-33, 1.6107127853188224e-49) : 0.00000000000000000000000000000000e+00 +qd_ln2 : 6.93147180559945309417232121458177e-01 vs 6.93147180559945309417232121458177e-01 : ( 0.69314718055994529, 2.3190468138462996e-17, 5.7077084384162121e-34, -3.5824322106018109e-50) : -4.74778387287989937373662113478098e-66 +qd_lne : 1.00000000000000000000000000000000e+00 vs 1.00000000000000000000000000000000e+00 : ( 1, 0, 0, 0) : 0.00000000000000000000000000000000e+00 +qd_ln10 : 2.30258509299404568401799145468436e+00 vs 2.30258509299404568401799145468436e+00 : ( 2.3025850929940459, -2.1707562233822494e-16, -9.9842624544657766e-33, -4.0233574544502064e-49) : 7.59645419660783899797859381564957e-65 +qd_lg2 : 1.00000000000000000000000000000000e+00 vs 1.00000000000000000000000000000000e+00 : ( 1, 0, 0, 0) : 0.00000000000000000000000000000000e+00 +qd_lge : 1.44269504088896340735992468100189e+00 vs 1.44269504088896340735992468100189e+00 : ( 1.4426950408889634, 2.0355273740931033e-17, -1.0614659956117258e-33, -1.3836716780181433e-50) : -3.79822709830391949898929690782478e-65 +qd_lg10 : 3.32192809488736234787031942948939e+00 vs 3.32192809488736234787031942948939e+00 : ( 3.3219280948873622, 1.661617516973592e-16, 1.2215512178458181e-32, 5.9551189702782473e-49) : -7.59645419660783899797859381564957e-65 +qd_log2 : 3.01029995663981195213738894724493e-01 vs 3.01029995663981195213738894724493e-01 : ( 0.3010299956639812, -2.8037281277851704e-18, 5.4719484023146385e-35, 5.1051389831070954e-51) : -4.15431088876991195201954349293336e-66 +qd_loge : 4.34294481903251827651128918916605e-01 vs 4.34294481903251827651128918916605e-01 : ( 0.43429448190325182, 1.0983196502167651e-17, 3.717181233110959e-34, 7.7344843465042927e-51) : 0.00000000000000000000000000000000e+00 +qd_log10 : 1.00000000000000000000000000000000e+00 vs 1.00000000000000000000000000000000e+00 : ( 1, 0, 0, 0) : 0.00000000000000000000000000000000e+00 +qd_sqrt2 : 1.41421356237309504880168872420970e+00 vs 1.41421356237309504880168872420970e+00 : ( 1.4142135623730951, -9.6672933134529135e-17, 4.1386753086994136e-33, 4.9355469914683519e-50) : -1.89911354915195974949464845391239e-65 +qd_inv_sqrt2 : 7.07106781186547524400844362104849e-01 vs 7.07106781186547524400844362104849e-01 : ( 0.70710678118654757, -4.8336466567264567e-17, 2.0693376543497068e-33, 2.4677734957341759e-50) : 1.42433516186396981212098634043429e-65 + */ + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(32); + for (auto e : constant_symbol_table) { + qd c(e.digits); + qd error = (c - e.value); + std::cout << std::left << std::setw(15) << e.name << " : " << c << " vs " << e.value << " : " << to_quad(c) << " : " << error << '\n'; + } + std::cout << std::setprecision(oldPrec); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore failures +#else // !MANUAL_TESTING + +#if REGRESSION_LEVEL_1 + +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/api/experiments.cpp b/static/qd/api/experiments.cpp new file mode 100644 index 000000000..1b42d7d0f --- /dev/null +++ b/static/qd/api/experiments.cpp @@ -0,0 +1,211 @@ +// experiments.cpp: experiments with the quad-double floating-point number system +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +// minimum set of include files to reflect source code dependencies +// Configure the qd template environment +// enable/disable arithmetic exceptions +#define DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include +#include +#include + +namespace sw { + namespace universal { + + + /* + enum _Fmtflags { // constants for formatting options + _Fmtmask = 0xffff, + _Fmtzero = 0 + }; + + static constexpr int skipws = 0x0001; + static constexpr int unitbuf = 0x0002; + static constexpr int uppercase = 0x0004; + static constexpr int showbase = 0x0008; + static constexpr int showpoint = 0x0010; + static constexpr int showpos = 0x0020; + static constexpr int left = 0x0040; + static constexpr int right = 0x0080; + static constexpr int internal = 0x0100; + static constexpr int dec = 0x0200; + static constexpr int oct = 0x0400; + static constexpr int hex = 0x0800; + static constexpr int scientific = 0x1000; + static constexpr int fixed = 0x2000; + + static constexpr int boolalpha = 0x4000; + static constexpr int adjustfield = left | right | internal; + static constexpr int basefield = dec | oct | hex; + static constexpr int floatfield = scientific | fixed; + */ + struct fmtCapture { + double v; + }; + + std::ostream& operator<<(std::ostream& ostr, const fmtCapture& v) { + std::ios_base::fmtflags fmt = ostr.flags(); + std::streamsize precision = ostr.precision(); + std::streamsize width = ostr.width(); +// char fillChar = ostr.fill(); +// bool showpos = fmt & std::ios_base::showpos; +// bool uppercase = fmt & std::ios_base::uppercase; + bool fixed = fmt & std::ios_base::fixed; + bool scientific = fmt & std::ios_base::scientific; + + bool left = fmt & std::ios_base::left; + bool right = fmt & std::ios_base::right; + bool internal = fmt & std::ios_base::internal; + + ostr << "width = " << width << '\n'; + ostr << "precision = " << precision << '\n'; + ostr << (fixed ? "fixed\n" : "not fixed\n"); + ostr << (scientific ? "scientific\n" : "not scientific\n"); + ostr << (left ? "left\n" : "not left\n"); + ostr << (internal ? "internal\n" : "not internal\n"); + ostr << (right ? "right\n" : "not right\n"); + + return ostr << v.v; + } + + } +} + + + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double (qd) experiments"; + int nrOfFailedTestCases = 0; + + auto oldPrec = std::cout.precision(); + + { + // what is the difference between ostream fmt scientific/fixed + + fmtCapture v; + v.v = 1.0e10; + std::cout << " 1 " << v << '\n'; + std::cout << " 2 " << std::fixed << v << '\n'; + std::cout << " 3 " << std::scientific << v << '\n'; + std::cout << " 4 " << std::defaultfloat << v << '\n'; + std::cout << " 5 " << std::setw(10) << v << '\n'; + + std::cout << " 6 " << std::fixed << std::scientific << v << '\n'; + std::cout << " 7 " << v << '\n'; + std::cout << " 8 " << std::scientific << std::fixed << v << '\n'; + std::cout << " 9 " << v << '\n'; + } + + + std::cout << std::setprecision(oldPrec); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "caught unknown exception" << std::endl; + return EXIT_FAILURE; +} + + +/* +void ThreeSumExperiments() { + using namespace sw::universal; + std::cout << "three_sum experiments\n"; + + double a{ 1.0 }; + double b{ ulp(a) / 2.0 }; + double c{ ulp(b) / 2.0 }; + ReportValue(a, "a = 1.0"); + ReportValue(b, "b = ulp(1.0)/2"); + ReportValue(c, "c = ulp(b)/2"); + + std::cout << "two_sum\n"; + double r{ 0 }; + double s = two_sum(a, b, r); + ReportValue(s, "sum"); + ReportValue(r, "residual"); + + std::cout << "three_sum\n"; + double aa{ a }, bb{ b }, cc{ c }; + ReportValue(a, "a"); + ReportValue(b, "b"); + ReportValue(c, "c"); + three_sum(a, b, c); + ReportValue(a, "a"); + ReportValue(b, "b"); + ReportValue(c, "c"); + + std::cout << "three_sum2\n"; + a = aa, b = bb, c = cc; // reload + ReportValue(a, "a"); + ReportValue(b, "b"); + ReportValue(c, "c"); + three_sum2(a, b, c); + ReportValue(a, "a"); + ReportValue(b, "b"); + // c is unchanged + + std::cout << "three_sum3\n"; + a = aa, b = bb, c = cc; // reload + ReportValue(a, "a"); + ReportValue(b, "b"); + ReportValue(c, "c"); + double sum = three_sum3(a, b, c); + ReportValue(sum, "three_sum3"); + +} + +three_sum experiments + a = 1.0 : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + b = ulp(1.0)/2 : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 + c = ulp(b)/2 : 0b0.011'1001'0101.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.232595e-32 +two_sum + sum : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + residual : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 +three_sum + in a : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + b : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 + c : 0b0.011'1001'0101.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.232595e-32 + out a : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + b : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 + c : 0b0.011'1001'0101.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.232595e-32 +three_sum2 + in a : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + b : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 + c : 0b0.011'1001'0101.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.232595e-32 + out a : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + b : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 +three_sum3 + in a : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + b : 0b0.011'1100'1010.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.110223e-16 + c : 0b0.011'1001'0101.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1.232595e-32 + out three_sum3 : 0b0.011'1111'1111.0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000 : 1 + +*/ \ No newline at end of file diff --git a/static/qd/arithmetic/arithmetic.cpp b/static/qd/arithmetic/arithmetic.cpp new file mode 100644 index 000000000..ec9e784f7 --- /dev/null +++ b/static/qd/arithmetic/arithmetic.cpp @@ -0,0 +1,325 @@ +// arithmetic.cpp: test suite runner of arithmetic operations on quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sw { + namespace universal { + + constexpr int LABELWIDTH = 15; + constexpr int PRECISION = 25; + + void TestArithmeticOp(const sw::universal::qd& a, sw::universal::RandomsOp op, const sw::universal::qd& b) { + using namespace sw::universal; + bool binaryOp = true; + qd c; + switch (op) { + case RandomsOp::OPCODE_ADD: + c = a + b; + break; + case RandomsOp::OPCODE_SUB: + c = a - b; + break; + case RandomsOp::OPCODE_MUL: + c = a * b; + break; + case RandomsOp::OPCODE_DIV: + c = a / b; + break; + case RandomsOp::OPCODE_SQRT: + c = sqrt(a); + binaryOp = false; + break; + case RandomsOp::OPCODE_NOP: + case RandomsOp::OPCODE_ASSIGN: + case RandomsOp::OPCODE_IPA: // In Place Aqd + case RandomsOp::OPCODE_IPS: // In Place Sub + case RandomsOp::OPCODE_IPM: // In Place Mul + case RandomsOp::OPCODE_IPD: // In Place Div + case RandomsOp::OPCODE_EXP: + case RandomsOp::OPCODE_EXP2: + case RandomsOp::OPCODE_LOG: + case RandomsOp::OPCODE_LOG2: + case RandomsOp::OPCODE_LOG10: + case RandomsOp::OPCODE_SIN: + case RandomsOp::OPCODE_COS: + case RandomsOp::OPCODE_TAN: + case RandomsOp::OPCODE_ASIN: + case RandomsOp::OPCODE_ACOS: + case RandomsOp::OPCODE_ATAN: + case RandomsOp::OPCODE_SINH: + case RandomsOp::OPCODE_COSH: + case RandomsOp::OPCODE_TANH: + case RandomsOp::OPCODE_ASINH: + case RandomsOp::OPCODE_ACOSH: + case RandomsOp::OPCODE_ATANH: + case RandomsOp::OPCODE_POW: + case RandomsOp::OPCODE_HYPOT: + case RandomsOp::OPCODE_RAN: + std::cerr << "invalid operator: test ignored\n"; + break; + default: + std::cerr << "unknown operator: test ignored\n"; + break; + } + ReportValue(a, "a", LABELWIDTH, PRECISION); + if (binaryOp) ReportValue(b, "b", LABELWIDTH, PRECISION); + ReportValue(c, "c", LABELWIDTH, PRECISION); + } + + + + void TestReciprocalIdentity(sw::universal::qd const& a) { + + qd oneOverA = reciprocal(a); + + qd one(1.0); + qd error = one - a * oneOverA; + ReportValue(a, "a", LABELWIDTH, PRECISION); + ReportValue(oneOverA, "1/a", LABELWIDTH, PRECISION); + ReportValue(error, "error", LABELWIDTH, PRECISION); + } + + void TestDivisionalIdentity(sw::universal::qd const& a) { + + qd oneOverA = 1.0 / a; + + qd one(1.0); + qd error = one - a * oneOverA; + ReportValue(a, "a", LABELWIDTH, PRECISION); + ReportValue(oneOverA, "1/a", LABELWIDTH, PRECISION); + ReportValue(error, "error", LABELWIDTH, PRECISION); + } + + void TestRandomReciprocalIdentities(int nrRandoms = 10) { + std::default_random_engine generator; + std::uniform_real_distribution< double > distr(-1048576.0, 1048576.0); + + for (int i = 0; i < nrRandoms; ++i) { + qd a = distr(generator); + TestReciprocalIdentity(a); + } + } + + void TestRandomDivisionalIdentities(int nrRandoms = 10) { + std::default_random_engine generator; + std::uniform_real_distribution< double > distr(-1048576.0, 1048576.0); + + for (int i = 0; i < nrRandoms; ++i) { + qd a = distr(generator); + TestDivisionalIdentity(a); + } + } + + void AdditionSubtraction() { + double a0 = 1.0; + double a1 = ulp(a0) / 2.0; + double a2 = ulp(a1) / 2.0; + double a3 = ulp(a2) / 2.0; + + ReportValue(a0, "a0 = 1.0"); + ReportValue(a1, "a1 = ulp(a0) / 2.0"); + ReportValue(a2, "a2 = ulp(a1) / 2.0"); + ReportValue(a3, "a3 = ulp(a2) / 2.0"); + renorm(a0, a1, a2, a3); // double check this is a normalized quad-double configuration + ReportValue(a0, "a0 = 1.0"); + ReportValue(a1, "a1 = ulp(a0) / 2.0"); + ReportValue(a2, "a2 = ulp(a1) / 2.0"); + ReportValue(a3, "a3 = ulp(a2) / 2.0"); + + double b0 = 1.0; + double b1 = ulp(b0) / 2.0; + double b2 = ulp(b1) / 2.0; + double b3 = ulp(b2) / 2.0; + + qd a(a0, a1, a2, a3); + qd b(b0, b1, b2, b3); + + qd accurate_sum = a.accurate_addition(a, b); + ReportValue(accurate_sum[0], "accurate_sum[0]"); + ReportValue(accurate_sum[1], "accurate_sum[1]"); + ReportValue(accurate_sum[2], "accurate_sum[2]"); + ReportValue(accurate_sum[3], "accurate_sum[3]"); + + qd approximate_sum = a.approximate_addition(a, b); + ReportValue(approximate_sum[0], "approximate_sum[0]"); + ReportValue(approximate_sum[1], "approximate_sum[1]"); + ReportValue(approximate_sum[2], "approximate_sum[2]"); + ReportValue(approximate_sum[3], "approximate_sum[3]"); + + std::cout << to_quad(accurate_sum) << '\n'; + std::cout << to_binary(accurate_sum, true) << '\n'; + + qd mina = -a; + qd doublea = a + a; + qd zero = a + mina; + std::cout << to_quad(a) << '\n'; + std::cout << to_quad(mina) << '\n'; + std::cout << to_quad(doublea) << '\n'; + std::cout << to_quad(zero) << '\n'; + qd zero2 = a - a; + std::cout << to_quad(zero2) << '\n'; + qd zero3 = -a + a; + std::cout << to_quad(zero3) << '\n'; + } + } +} + + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double arithmetic validation"; + std::string test_tag = "quad-double arithmetic"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + AdditionSubtraction(); + + { + qd a{ 1.0 }, b{ 2.0 }, c{}; + + a *= 2.0; + c = a * b; + + std::cout << to_binary(c) << '\n'; + std::cout << "product : " << c << '\n'; + } + + { + double a0 = 1.0; + double a1 = ulp(a0) / 2.0; + double a2 = ulp(a1) / 2.0; + double a3 = ulp(a2) / 2.0; + + qd a(a0, a1, a2, a3); + std::cout << to_binary(a) << '\n'; + a *= 2; + qd c{ a }; + std::cout << to_binary(c) << '\n'; + std::cout << "product : " << c << '\n'; + } + + std::cout << "division\n"; + { + double a0 = 1.0; + double a1 = ulp(a0) / 2.0; + double a2 = ulp(a1) / 2.0; + double a3 = ulp(a2) / 2.0; + + qd a(a0, a1, a2, a3); + qd b{ 2.0 }; + qd c = a * b; + std::cout << to_binary(c) << '\n'; + c = c / b; + std::cout << to_binary(c) << '\n'; + std::cout << "ratio : " << c << '\n'; + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore failures +#else // !MANUAL_TESTING + +#if REGRESSION_LEVEL_1 + + constexpr unsigned nrOfRandoms = 1000; + std::stringstream aqds; + aqds << test_tag << " " << nrOfRandoms << " random aqds"; + std::string description = aqds.str(); + nrOfFailedTestCases += ReportTestResult( + VerifyBinaryOperatorThroughRandoms(reportTestCases, RandomsOp::OPCODE_ADD, nrOfRandoms), + description, + test_tag + ); + std::stringstream subs; + subs << test_tag << " " << nrOfRandoms << " random subs"; + description = subs.str(); + nrOfFailedTestCases += ReportTestResult( + VerifyBinaryOperatorThroughRandoms(reportTestCases, RandomsOp::OPCODE_SUB, nrOfRandoms), + description, + test_tag + ); + std::stringstream muls; + muls << test_tag << " " << nrOfRandoms << " random muls"; + description = muls.str(); + nrOfFailedTestCases += ReportTestResult( + VerifyBinaryOperatorThroughRandoms(reportTestCases, RandomsOp::OPCODE_MUL, nrOfRandoms), + description, + test_tag + ); + std::stringstream divs; + divs << test_tag << " " << nrOfRandoms << " random divs"; + description = divs.str(); + nrOfFailedTestCases += ReportTestResult( + VerifyBinaryOperatorThroughRandoms(reportTestCases, RandomsOp::OPCODE_DIV, nrOfRandoms), + description, + test_tag + ); + +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/classify.cpp b/static/qd/math/classify.cpp new file mode 100644 index 000000000..e11853fd9 --- /dev/null +++ b/static/qd/math/classify.cpp @@ -0,0 +1,152 @@ +// classify.cpp: test suite runner for quad-double (qd) classification functions +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 0 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib classification function validation"; + std::string test_tag = "isfinite/isinf/isnan/isnormal/isdenorm/iszero/signbit"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + std::cout << "fpclassify(qnan) = " << fpclassify(std::numeric_limits::quiet_NaN()) << " == " << FP_NAN << "\n"; + std::cout << "fpclassify(snan) = " << fpclassify(std::numeric_limits::signaling_NaN()) << " == " << FP_NAN << "\n"; + std::cout << "fpclassify(-inf) = " << fpclassify(-std::numeric_limits::infinity()) << " == " << FP_INFINITE << "\n"; + std::cout << "fpclassify(-1.0) = " << fpclassify(qd(-1.0)) << " == " << FP_NORMAL << "\n"; + std::cout << "fpclassify(-0.0) = " << fpclassify(qd("-0.0")) << " == " << FP_ZERO << "\n"; + std::cout << "fpclassify(0.0) = " << fpclassify(qd("0.0")) << " == " << FP_ZERO << "\n"; + std::cout << "fpclassify(1.0) = " << fpclassify(qd(1.0)) << " == " << FP_NORMAL << "\n"; + std::cout << "fpclassify(inf) = " << fpclassify(std::numeric_limits::infinity()) << " == " << FP_INFINITE << "\n"; + std::cout << "\n"; + + std::cout << "isfinite(qnan) = " << isfinite(std::numeric_limits::quiet_NaN()) << "\n"; + std::cout << "isfinite(snan) = " << isfinite(std::numeric_limits::signaling_NaN()) << "\n"; + std::cout << "isfinite(-inf) = " << isfinite(-std::numeric_limits::infinity()) << "\n"; + std::cout << "isfinite(-1.0) = " << isfinite(qd(-1.0)) << "\n"; + std::cout << "isfinite(-0.0) = " << isfinite(qd("-0.0")) << "\n"; + std::cout << "isfinite(0.0) = " << isfinite(qd("0.0")) << "\n"; + std::cout << "isfinite(1.0) = " << isfinite(qd(1.0)) << "\n"; + std::cout << "isfinite(inf) = " << isfinite(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + std::cout << "isinf(qnan) = " << isinf(std::numeric_limits::quiet_NaN()) << "\n"; + std::cout << "isinf(snan) = " << isinf(std::numeric_limits::signaling_NaN()) << "\n"; + std::cout << "isinf(-inf) = " << isinf(-std::numeric_limits::infinity()) << "\n"; + std::cout << "isinf(-1.0) = " << isinf(qd(-1.0)) << "\n"; + std::cout << "isinf(-0.0) = " << isinf(qd("-0.0")) << "\n"; + std::cout << "isinf(0.0) = " << isinf(qd("0.0")) << "\n"; + std::cout << "isinf(1.0) = " << isinf(qd(1.0)) << "\n"; + std::cout << "isinf(inf) = " << isinf(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + std::cout << "isnan(qnan) = " << isnan(std::numeric_limits::quiet_NaN()) << "\n"; + std::cout << "isnan(snan) = " << isnan(std::numeric_limits::signaling_NaN()) << "\n"; + std::cout << "isnan(-inf) = " << isnan(-std::numeric_limits::infinity()) << "\n"; + std::cout << "isnan(-1.0) = " << isnan(qd(-1.0)) << "\n"; + std::cout << "isnan(-0.0) = " << isnan(qd("-0.0")) << "\n"; + std::cout << "isnan(0.0) = " << isnan(qd("0.0")) << "\n"; + std::cout << "isnan(1.0) = " << isnan(qd(1.0)) << "\n"; + std::cout << "isnan(inf) = " << isnan(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + std::cout << "isnormal(qnan) = " << isnormal(std::numeric_limits::quiet_NaN()) << "\n"; + std::cout << "isnormal(snan) = " << isnormal(std::numeric_limits::signaling_NaN()) << "\n"; + std::cout << "isnormal(-inf) = " << isnormal(-std::numeric_limits::infinity()) << "\n"; + std::cout << "isnormal(-1.0) = " << isnormal(qd(-1.0)) << "\n"; + std::cout << "isnormal(-0.0) = " << isnormal(qd("-0.0")) << "\n"; + std::cout << "isnormal(0.0) = " << isnormal(qd("0.0")) << "\n"; + std::cout << "isnormal(1.0) = " << isnormal(qd(1.0)) << "\n"; + std::cout << "isnormal(inf) = " << isnormal(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + constexpr double minpos = std::numeric_limits::min(); + std::cout << to_binary(minpos) << " : " << minpos << '\n'; + double subnormal = minpos / 2.0; + std::cout << to_binary(subnormal) << " : " << subnormal << '\n'; + + std::cout << "isdenorm(qnan) = " << isdenorm(std::numeric_limits::quiet_NaN()) << "\n"; + std::cout << "isdenorm(snan) = " << isdenorm(std::numeric_limits::signaling_NaN()) << "\n"; + std::cout << "isdenorm(-inf) = " << isdenorm(-std::numeric_limits::infinity()) << "\n"; + std::cout << "isdenorm(-1.0) = " << isdenorm(qd(-1.0)) << "\n"; + std::cout << "isdenorm(-0.0) = " << isdenorm(qd("-0.0")) << "\n"; + std::cout << "isdenorm(0.0) = " << isdenorm(qd("0.0")) << "\n"; + std::cout << "isdenorm(subnorm) = " << isdenorm(subnormal) << "\n"; + std::cout << "isdenorm(1.0) = " << isdenorm(qd(1.0)) << "\n"; + std::cout << "isdenorm(inf) = " << isdenorm(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + std::cout << "iszero(qnan) = " << iszero(std::numeric_limits::quiet_NaN()) << "\n"; + std::cout << "iszero(snan) = " << iszero(std::numeric_limits::signaling_NaN()) << "\n"; + std::cout << "iszero(-inf) = " << iszero(-std::numeric_limits::infinity()) << "\n"; + std::cout << "iszero(-1.0) = " << iszero(qd(-1.0)) << "\n"; + std::cout << "iszero(-0.0) = " << iszero(qd("-0.0")) << "\n"; + std::cout << "iszero(0.0) = " << iszero(qd("0.0")) << "\n"; + std::cout << "iszero(1.0) = " << iszero(qd(1.0)) << "\n"; + std::cout << "iszero(inf) = " << iszero(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + std::cout << "signbit(-inf) = " << signbit(-std::numeric_limits::infinity()) << "\n"; + std::cout << "signbit(-1.0) = " << signbit(qd(-1.0)) << "\n"; + std::cout << "signbit(-0.0) = " << signbit(qd("-0.0")) << "\n"; + std::cout << "signbit(0.0) = " << signbit(qd("0.0")) << "\n"; + std::cout << "signbit(1.0) = " << signbit(qd(1.0)) << "\n"; + std::cout << "signbit(inf) = " << signbit(std::numeric_limits::infinity()) << "\n"; + std::cout << "\n"; + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/error_and_gamma.cpp b/static/qd/math/error_and_gamma.cpp new file mode 100644 index 000000000..c670b09da --- /dev/null +++ b/static/qd/math/error_and_gamma.cpp @@ -0,0 +1,105 @@ +// error_and_gamma.cpp: test suite runner for error and gamma functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +// generate specific test case +template +void GenerateTestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + sw::universal::qd a, ref, v; + a = fa; + fref = std::erf(fa); + ref = fref; + v = sw::universal::erf(a); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> erf(" << fa << ") = " << std::setw(width) << fref << std::endl; + std::cout << " -> erf( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; + std::cout << std::setprecision(oldPrec); +} + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib error/gamma function validation"; + std::string test_tag = "error/gamma"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + GenerateTestCase(1.0); + + qd x(-3.0), increment(0.5); + for (int i = 0; i < 14; i++) { + std::cout << " erf( " << x << ") = " << erf(x) << '\n'; + x += increment; + } + x = -3.0; + for (int i = 0; i < 14; i++) { + std::cout << " erfc( " << x << ") = " << erfc(x) << '\n'; + x += increment; + } + + //nrOfFailedTestCases += ReportTestResult(VerifyLogFunction("Manual Testing", reportTestCases), "qd", test_tag); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/exponent.cpp b/static/qd/math/exponent.cpp new file mode 100644 index 000000000..ba1bb1e25 --- /dev/null +++ b/static/qd/math/exponent.cpp @@ -0,0 +1,126 @@ +// exponent.cpp: test suite runner for exponentiation function for quad-double (qd) floats +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + +// generate specific test case +template +void GenerateTestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + sw::universal::qd a, ref, v; + a = fa; + fref = std::exp(fa); + ref = fref; + v = sw::universal::exp(a); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> exp(" << fa << ") = " << std::setw(width) << fref << std::endl; + std::cout << " -> exp( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; + std::cout << std::setprecision(oldPrec); +} + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib exponentiation function validation"; + std::string test_tag = "exp/exp2/exp10/expm1"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + // generate individual testcases to hand trace/debug + GenerateTestCase(4.0); + + auto oldPrec = std::cout.precision(); + for (int i = 0; i < 30; ++i) { + std::string tag = "exp(" + std::to_string(i) + ")"; + double exponentRef = std::exp(double(i)); + qd exponent = exp(qd(i)); + qd error = exponentRef - exponent; + std::cout << std::setw(20) << tag << " : " << std::setprecision(32) << exponentRef << " : " << exponent << " : " << std::setw(25) << error << '\n'; + } + + for (int i = 0; i < 30; ++i) { + std::string tag = "exp2(" + std::to_string(i) + ")"; + double exponentRef = std::exp2(double(i)); + qd exponent = exp2(qd(i)); + qd error = exponentRef - exponent; + std::cout << std::setw(20) << tag << " : " << std::setprecision(32) << exponentRef << " : " << exponent << " : " << std::setw(25) << error << '\n'; + } + + for (int i = 0; i < 30; ++i) { + std::string tag = "exp10(" + std::to_string(i) + ")"; + double exponentRef = std::pow(10.0, double(i)); + qd exponent = exp10(qd(i)); + qd error = exponentRef - exponent; + std::cout << std::setw(20) << tag << " : " << std::setprecision(32) << exponentRef << " : " << exponent << " : " << std::setw(25) << error << '\n'; + } + + for (int i = 0; i < 30; ++i) { + std::string tag = "expm1(" + std::to_string(i) + ")"; + double exponentRef = std::expm1(double(i)); + qd exponent = expm1(qd(i)); + qd error = exponentRef - exponent; + std::cout << std::setw(20) << tag << " : " << std::setprecision(32) << exponentRef << " : " << exponent << " : " << std::setw(25) << error << '\n'; + } + + std::cout << std::setprecision(oldPrec); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/fractional.cpp b/static/qd/math/fractional.cpp new file mode 100644 index 000000000..7b94013bd --- /dev/null +++ b/static/qd/math/fractional.cpp @@ -0,0 +1,106 @@ +// fractional.cpp: test suite runner for fractional functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +namespace sw { + namespace universal { + + template + void GenerateTestCase(Ty fa, Ty fb) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + sw::universal::qd a, b, ref, v; + a = fa; + b = fb; + fref = std::remainder(fa, fb); + ref = fref; + v = sw::universal::remainder(a, b); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> remainder(" << fa << "," << fb << ") = " << std::setw(width) << fref << std::endl; + std::cout << " -> remainder( " << a << "," << b << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; + std::cout << std::setprecision(oldPrec); + } + } +} + + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + using std::fmod; + + std::string test_suite = "quad-double mathlib fractional function validation"; + std::string test_tag = "fmod/remainder"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + double a{ 1.5 }, b{ 1.25 }; + qd da(a), db(b); + + std::cout << "fmod( " << a << ", " << b << ") = " << fmod(a, b) << '\n'; + std::cout << "fmod( " << da << ", " << db << ") = " << fmod(da, db) << '\n'; + + std::cout << "remainder( " << a << ", " << b << ") = " << remainder(a, b) << '\n'; + std::cout << "remainder( " << da << ", " << db << ") = " << remainder(da, db) << '\n'; + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/hyperbolic.cpp b/static/qd/math/hyperbolic.cpp new file mode 100644 index 000000000..cbdd4640f --- /dev/null +++ b/static/qd/math/hyperbolic.cpp @@ -0,0 +1,97 @@ +// hyperbolic.cpp: test suite runner for hyperbolic functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib hyperbolic function validation"; + std::string test_tag = "hyperbolic"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + std::cout << "ALL HYPERBOLIC FUNCTIONS ARE SHIMS TO DOUBLE\n"; + + { + std::cout << "double reference\n"; + double x = std::numbers::pi * 0.25; + std::cout << "sinh( " << x << " ) = " << sinh(x) << '\n'; + std::cout << "cosh( " << x << " ) = " << cosh(x) << '\n'; + std::cout << "tanh( " << x << " ) = " << tanh(x) << '\n'; + + std::cout << "asinh( " << x << " ) = " << asinh(x) << '\n'; + std::cout << "acosh( " << x << " ) = " << acosh(x) << '\n'; + std::cout << "atanh( " << x << " ) = " << atanh(x) << '\n'; + } + + { + std::cout << "quad-double reference\n"; + qd x = qd_pi4; + std::cout << "sinh( " << x << " ) = " << sinh(x) << '\n'; + std::cout << "cosh( " << x << " ) = " << cosh(x) << '\n'; + std::cout << "tanh( " << x << " ) = " << tanh(x) << '\n'; + + std::cout << "asinh( " << x << " ) = " << asinh(x) << '\n'; + std::cout << "acosh( " << x << " ) = " << acosh(x) << '\n'; + std::cout << "atanh( " << x << " ) = " << atanh(x) << '\n'; + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/hypot.cpp b/static/qd/math/hypot.cpp new file mode 100644 index 000000000..b7c79ba3d --- /dev/null +++ b/static/qd/math/hypot.cpp @@ -0,0 +1,77 @@ +// hypot.cpp: test suite runner for hypot functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib hypothenuse function validation"; + std::string test_tag = "hypot"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + std::cout << "HYPOT FUNCTION IS A SHIM TO DOUBLE\n"; + + qd x{ 3.0 }, y{ 4.0 }; + + std::cout << "hypot( " << x << ", " << y << ") = " << hypot(x, y) << '\n'; + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/logarithm.cpp b/static/qd/math/logarithm.cpp new file mode 100644 index 000000000..dc1401dcd --- /dev/null +++ b/static/qd/math/logarithm.cpp @@ -0,0 +1,403 @@ +// logarithm.cpp: test suite runner for log/log1p/log2/log10 functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +#include + +namespace sw { + namespace universal { + + qd trace_log(const qd& a) { + if (a.isnan() || a.isinf()) return a; + + if (a.iszero()) return qd(SpecificValue::infneg); + + if (a.isone()) return 0.0; + + if (a[0] <= 0.0) { + std::cerr << "log: non-positive argument\n"; + errno = EDOM; + return qd(SpecificValue::qnan); + } + + /* Strategy. The Taylor series for log converges much more + slowly than that of exp, due to the lack of the factorial + term in the denominator. Hence this routine instead tries + to determine the root of the function + + f(x) = exp(x) - a + + using Newton iteration. The iteration is given by + + x' = x - f(x)/f'(x) + = x - (1 - a * exp(-x)) + = x + a * exp(-x) - 1. + + Two iteration is needed, since Newton's iteration + approximately doubles the number of digits per iteration. + */ + + qd x = std::log(a[0]); // Initial approximation + std::cout << "initial approximation :\n" << to_binary(x) << '\n'; + + // if a = e then x = 1 + e * 1 / e - 1.0; + + x = x + a * exp(-x) - 1.0; + std::cout << "1st Newton iteration :\n" << to_binary(x) << '\n'; + x = x + a * exp(-x) - 1.0; + std::cout << "2nd Newton iteration :\n" << to_binary(x) << '\n'; + x = x + a * exp(-x) - 1.0; + std::cout << "3rd Newton iteration :\n" << to_binary(x) << '\n'; + + return x; + } + + // generate specific test case + template + void GenerateLogTestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + qd a, ref, v; + a = fa; + fref = std::log(fa); + ref = fref; + v = sw::universal::log(a); + qd error = (v - ref); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> log(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void GenerateLog2TestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + qd a, ref, v; + a = fa; + fref = std::log2(fa); + ref = fref; + v = sw::universal::log2(a); + qd error = (v - ref); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> log2(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log2( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void GenerateLog10TestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + qd a, ref, v; + a = fa; + fref = std::log10(fa); + ref = fref; + v = sw::universal::log10(a); + auto oldPrec = std::cout.precision(); + qd error = (v - ref); + std::cout << std::setprecision(precision); + std::cout << " -> log10(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log10( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void GenerateLog1pTestCase(Ty fa) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + qd a, ref, v; + a = fa; + fref = std::log1p(fa); + ref = fref; + v = sw::universal::log1p(a); + auto oldPrec = std::cout.precision(); + qd error = (v - ref); + std::cout << std::setprecision(precision); + std::cout << " -> log1p(" << fa << ") = " << std::setw(width) << fref << '\n'; + std::cout << " -> log1p( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << " error : " << error << '\n'; + std::cout << (ref == v ? "PASS" : "FAIL") << '\n'; + std::cout << '\n'; + std::cout << std::setprecision(oldPrec); + } + + template + void ReportQuadDoubleFunctionError(const std::string& op, const TestType& a, const TestType& ref, const TestType& error) { + std::cerr << op << " : " << a << " != " << ref << " : error : " << error << '\n'; + } + + template + int VerifyLogFunction(bool reportTestCases, double maxError = 1.0e-15) { + using std::log; + int nrOfFailedTestCases{ 0 }; + constexpr double eulersNr = std::numbers::e; + for (int i = -64; i < 65; ++i) { + double da = std::pow(eulersNr, double(i)); + TestType a = da; + double dref = log(da); + TestType ref = dref; + TestType v = log(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportQuadDoubleFunctionError("log", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + template + int VerifyLog2Function(bool reportTestCases, double maxError = 1.0e-15) { + using std::log2; + int nrOfFailedTestCases{ 0 }; + for (int i = -64; i < 65; ++i) { + double da = std::pow(2.0, double(i)); + TestType a = da; + double dref = log2(da); + TestType ref = dref; + TestType v = log2(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportQuadDoubleFunctionError("log2", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + template + int VerifyLog10Function(bool reportTestCases, double maxError = 1.0e-15) { + using std::log10; + int nrOfFailedTestCases{ 0 }; + for (int i = -64; i < 65; ++i) { + double da = std::pow(2.0, double(i)); + TestType a = da; + double dref = log10(da); + TestType ref = dref; + TestType v = log10(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportQuadDoubleFunctionError("log10", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + template + int VerifyLog1pFunction(bool reportTestCases, double maxError = 1.0e-15) { + using std::log1p; + int nrOfFailedTestCases{ 0 }; + for (int i = -64; i < 65; ++i) { + double da = std::pow(2.0, double(i)); + TestType a = da; + double dref = log1p(da); + TestType ref = dref; + TestType v = log1p(a); + TestType error = abs(v - ref); + if (error > maxError) { + ++nrOfFailedTestCases; + if (reportTestCases) ReportQuadDoubleFunctionError("log1p", v, ref, error); + } + } + + return nrOfFailedTestCases; + } + + + } +} + + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib logarithm function validation"; + std::string test_tag = "log/log1p/log2/log10"; + bool reportTestCases = true; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + std::cerr << test_tag << '\n'; + +#if MANUAL_TESTING + + auto defaultPrecision = std::cout.precision(); + + { + double a0 = 1.0; + double a1 = ulp(a0) / 2.0; + double a2 = ulp(a1) / 2.0; + double a3 = ulp(a2) / 2.0; + qd a(a0, a1, a2, a3); + std::cout << to_quad(a) << '\n'; + std::cout << std::setprecision(64) << a << std::setprecision(defaultPrecision) << '\n'; + std::cout << to_binary(a) << '\n'; + std::cout << color_print(a, true) << '\n'; + } + + { + volatile double residual; + double square; + double a{ 1.0e50 }; + for (int i = 0; i < 3; ++i) { + square = two_sqr(a, residual); + std::cout << "square : " << square << '\n'; + std::cout << "residual : " << residual << '\n'; + a *= 1.0e50; + } + } + { + qd x = trace_log(qd_e); + std::cout << x << '\n'; + + + x = exp(qd(1.0)); + std::cout << "exp( 1.0) : " << std::setprecision(64) << x << std::setprecision(defaultPrecision) << '\n'; + x = exp(qd(2.0)); + std::cout << "exp( 2.0) : " << std::setprecision(64) << x << std::setprecision(defaultPrecision) << '\n'; + x = exp(qd(4.0)); + std::cout << "exp( 4.0) : " << std::setprecision(64) << x << std::setprecision(defaultPrecision) << '\n'; + + + x = exp(qd(-1.0)); + std::cout << "exp(-1.0) : " << std::setprecision(64) << x << std::setprecision(defaultPrecision) << '\n'; + double a = 1.0 / std::numbers::e; + std::cout << "exp(-1.0) : " << std::setprecision(16) << a << std::setprecision(defaultPrecision) << '\n'; + } + + return 0; + + GenerateLogTestCase(1.0); + GenerateLogTestCase(std::numbers::e); + GenerateLogTestCase(pow(std::numbers::e, 2.0)); + + + GenerateLog2TestCase(1.0); + GenerateLog2TestCase(2.0); + GenerateLog2TestCase(4.0); + + { + std::stringstream s; + double maxError = 1.0e-14; + s << maxError; + std::string test_id = "log(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLogFunction(reportTestCases, maxError), "quad-double", test_id); + } + + { + std::stringstream s; + double maxError = 1.0e-29; + s << maxError; + std::string test_id = "log2(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLog2Function(reportTestCases, maxError), "quad-double", test_id); + } + + { + std::stringstream s; + double maxError = 1.0e-15; + s << maxError; + std::string test_id = "log10(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLog10Function(reportTestCases, maxError), "quad-double", test_id); + } + + { + std::stringstream s; + double maxError = 1.0e-14; + s << maxError; + std::string test_id = "log1p(error < " + s.str() + ")"; + nrOfFailedTestCases += ReportTestResult(VerifyLog1pFunction(reportTestCases, maxError), "quad-double", test_id); + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + +#if REGRESSION_LEVEL_1 + std::cout << "NOTE: quad-double log functions are LESS accurate than stdlib double: \ncurrently log() is accurate to just 14 digits, quad-double should have 32 digits of accuracy\n"; + nrOfFailedTestCases += ReportTestResult(VerifyLogFunction(reportTestCases, 1.0e-14), "quad-double", "log()"); + nrOfFailedTestCases += ReportTestResult(VerifyLog2Function(reportTestCases, 1.0e-14), "quad-double", "log2()"); + nrOfFailedTestCases += ReportTestResult(VerifyLog10Function(reportTestCases, 1.0e-14), "quad-double", "log10()"); + nrOfFailedTestCases += ReportTestResult(VerifyLog1pFunction(reportTestCases, 1.0e-14), "quad-double", "log1p()"); +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/minmax.cpp b/static/qd/math/minmax.cpp new file mode 100644 index 000000000..d8bba6b85 --- /dev/null +++ b/static/qd/math/minmax.cpp @@ -0,0 +1,76 @@ +// minmax.cpp: test suite runner for minmax functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib minmax function validation"; + std::string test_tag = "min/max"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + qd x{ 3.0 }, y{ 4.0 }; + + std::cout << "MIN/MAX FUNCTIONS ARE SHIMS TO DOUBLE\n"; + std::cout << "min( " << x << ", " << y << ") = " << min(x, y) << '\n'; + std::cout << "max( " << x << ", " << y << ") = " << max(x, y) << '\n'; + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/bfloat/arithmetic/addition.cpp b/static/qd/math/next.cpp similarity index 55% rename from static/bfloat/arithmetic/addition.cpp rename to static/qd/math/next.cpp index f2f5813bd..090bf5430 100644 --- a/static/bfloat/arithmetic/addition.cpp +++ b/static/qd/math/next.cpp @@ -1,19 +1,36 @@ -// addition.cpp: test suite runner for addition on bfloat16s +// next.cpp: test suite runner for nextafter/nextbefore functions for quad-double (qd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include -#include -#include +#include #include -#include -#include -#include + +// generate specific test case +template +void GenerateTestCase(Ty fa, Ty fb) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + sw::universal::qd a, b, ref, v; + a = fa; + b = fb; + fref = std::nextafter(fa, fb); + ref = fref; + v = sw::universal::nextafter(a, b); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> nextafter(" << fa << "," << fb << ") = " << std::setw(width) << fref << std::endl; + std::cout << " -> nextafter( " << a << "," << b << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; + std::cout << std::setprecision(oldPrec); +} // Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override -#define MANUAL_TESTING 0 +#define MANUAL_TESTING 1 // REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity // It is the responsibility of the regression test to organize the tests in a quartile progression. //#undef REGRESSION_LEVEL_OVERRIDE @@ -32,59 +49,33 @@ int main() try { using namespace sw::universal; - std::string test_suite = "Google Brain Float addition validation"; - std::string test_tag = "bfloat16 addition"; - bool reportTestCases = false; - int nrOfFailedTestCases = 0; + std::string test_suite = "quad-double mathlib nextafter/nextbefore function validation"; + std::string test_tag = "nextafter/nextbefore"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; ReportTestSuiteHeader(test_suite, reportTestCases); #if MANUAL_TESTING - // generate individual testcases to hand trace/debug - TestCase< bfloat16, float>(TestCaseOperator::ADD, 1.0f, 1.0f); - TestCase< cfloat<16, 8, uint16_t, true, true, false>, double>(TestCaseOperator::ADD, INFINITY, INFINITY); + GenerateTestCase(1.0, 2.0); + std::cout << "nextafter/nexttoward functions depend on increment/decrement of quad-double, which are TBD\n"; + qd x{ 1.0 }, y{ 2.0 }; + std::cout << "nextafter( " << x << ", " << y << ") = " << nextafter(x, y) << '\n'; + std::cout << "nexttoward( " << x << ", " << y << ") = " << nexttoward(x, y) << '\n'; - nrOfFailedTestCases += ReportTestResult( - VerifyCfloatAddition< cfloat<8, 2, uint8_t, true, true, false> >(reportTestCases), - "cfloat<8,2,uint8_t,t,t,f>", "addition" - ); - nrOfFailedTestCases += ReportTestResult( - VerifyBinaryOperatorThroughRandoms(reportTestCases, RandomsOp::OPCODE_ADD, 1000), - "bfloat16", "addition" - ); + //nrOfFailedTestCases += ReportTestResult(VerifyNextFunction("Manual Testing", reportTestCases), "qd", test_tag); ReportTestSuiteResults(test_suite, nrOfFailedTestCases); - return EXIT_SUCCESS; // ignore failures -#else // !MANUAL_TESTING - -#if REGRESSION_LEVEL_1 + return EXIT_SUCCESS; // ignore errors +#else - constexpr unsigned nrOfRandoms = 1000; - std::stringstream s; - s << test_tag << " " << nrOfRandoms << " random pairs"; - std::string description = s.str(); - nrOfFailedTestCases += ReportTestResult( - VerifyBinaryOperatorThroughRandoms(reportTestCases, RandomsOp::OPCODE_ADD, nrOfRandoms), - description, - test_tag - ); - -#endif - -#if REGRESSION_LEVEL_2 -#endif - -#if REGRESSION_LEVEL_3 -#endif - -#if REGRESSION_LEVEL_4 -#endif ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + #endif // MANUAL_TESTING } catch (char const* msg) { diff --git a/static/qd/math/pow.cpp b/static/qd/math/pow.cpp new file mode 100644 index 000000000..5d6ece9b0 --- /dev/null +++ b/static/qd/math/pow.cpp @@ -0,0 +1,112 @@ +// pow.cpp: test suite runner for pow function for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + +// generate specific test case +template +void GenerateTestCase(Ty fa, Ty fb) { + unsigned precision = 25; + unsigned width = 30; + Ty fref; + sw::universal::qd a, b, ref, v; + a = fa; + b = fb; + fref = std::pow(fa, fb); + ref = fref; + v = sw::universal::pow(a, b); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> pow(" << fa << "," << fb << ") = " << std::setw(width) << fref << std::endl; + std::cout << " -> pow( " << a << "," << b << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; + std::cout << std::setprecision(oldPrec); +} + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib power function validation"; + std::string test_tag = "pow"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + // generate individual testcases to hand trace/debug + GenerateTestCase(4.0, 2.0); + + // integer pown is working + qd a{ 1.0 }; + for (int i = 0; i < 30; ++i) { + std::string tag = "pow(1.0, " + std::to_string(i) + ")"; + std::cout << std::setw(25) << tag << " : " << pow(a, i) << '\n'; + } + a = 2.0; + + for (int i = 0; i < 30; ++i) { + std::string tag = "pow(2.0, " + std::to_string(i) + ")"; + std::cout << std::setw(25) << tag << " : " << pow(a, i) << '\n'; + } + + // qd pow uses exp() function, which is currently incorrect + for (int i = 0; i < 30; ++i) { + std::string tag = "pow(2.0, " + std::to_string(i) + ")"; + std::cout << std::setw(25) << tag << " : " << pow(a, qd(i)) << '\n'; + } + nrOfFailedTestCases = 30; + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/sqrt.cpp b/static/qd/math/sqrt.cpp new file mode 100644 index 000000000..b5518d598 --- /dev/null +++ b/static/qd/math/sqrt.cpp @@ -0,0 +1,121 @@ +// sqrt.cpp: test suite runner for sqrt function for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + + +namespace sw { + namespace universal { + + template + void GenerateSqrtTestCase(Ty fa) { + unsigned precision = 25; + //unsigned width = 30; + Ty fref; + sw::universal::qd a, ref, v; + a = fa; + fref = std::sqrt(fa); + ref = fref; + v = sw::universal::sqrt(a); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(precision); + std::cout << " -> sqrt(" << fa << ") = " << fref << std::endl; + std::cout << " -> sqrt( " << a << ") = " << v << '\n' << to_binary(v) << '\n'; + std::cout << to_binary(ref) << "\n -> reference\n"; + std::cout << (ref == v ? "PASS" : "FAIL") << std::endl << std::endl; + std::cout << std::setprecision(oldPrec); + } + + template + int VerifySqrtFunction(bool reportTestCases, QuadDouble a) { + int nrOfFailedTestCases{ 0 }; + QuadDouble b{ a }; + for (int i = 0; i < 9; ++i) { + a *= a; + qd c = sqrt(a); + if (b != c) { + if (reportTestCases) std::cerr << "FAIL : " << b << " != " << c << '\n'; + ++nrOfFailedTestCases; + } + b *= b; + } + return nrOfFailedTestCases; + } + } +} + + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 0 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib sqrt function validation"; + std::string test_tag = "sqrt"; + bool reportTestCases = true; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + // generate individual testcases to hand trace/debug + GenerateSqrtTestCase(1.0); + GenerateSqrtTestCase(1024.0 * 1024.0); + constexpr double minpos = std::numeric_limits::min(); + GenerateSqrtTestCase(minpos); + constexpr double maxpos = std::numeric_limits::max(); + GenerateSqrtTestCase(maxpos); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + nrOfFailedTestCases += ReportTestResult(VerifySqrtFunction(reportTestCases, qd(2.0)), "sqrt(qd > 1.0)", test_tag); + nrOfFailedTestCases += ReportTestResult(VerifySqrtFunction(reportTestCases, qd(0.5)), "sqrt(qd < 1.0)", test_tag); + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/trigonometry.cpp b/static/qd/math/trigonometry.cpp new file mode 100644 index 000000000..a3c432625 --- /dev/null +++ b/static/qd/math/trigonometry.cpp @@ -0,0 +1,336 @@ +// trigonometry.cpp: test suite runner for trigonometry functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +template +int VerifySinFunction(bool reportTestCases) { + using std::sin, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + const double d2pi = 6.283185307179586476925286766559; + //const double piOver4 = 0.78539816339744830961566084581988; + //const double piOver8 = 0.39269908169872415480783042290994; + //const double piOver16 = 0.19634954084936207740391521145497; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + for (unsigned i = 0; i < samples; ++i) { + Real angle = Real(i) * increment; + double dangle = double(i) * dinc; + double ref = sin(dangle); + Real result = sin(angle); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cerr << "sin( " << angle << ") : " << sin(angle) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cerr << "sin( " << angle << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyCosFunction(bool reportTestCases) { + using std::cos, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + const double d2pi = 6.283185307179586476925286766559; + //const double piOver4 = 0.78539816339744830961566084581988; + //const double piOver8 = 0.39269908169872415480783042290994; + //const double piOver16 = 0.19634954084936207740391521145497; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + for (unsigned i = 0; i < samples; ++i) { + Real angle = Real(i) * increment; + double dangle = double(i) * dinc; + double ref = cos(dangle); + Real result = cos(angle); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cerr << "cos( " << angle << ") : " << cos(angle) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cerr << "cos( " << angle << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyTanFunction(bool reportTestCases) { + using std::tan, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + const double d2pi = 6.283185307179586476925286766559; + //const double piOver2 = 1.5707963267948966192313216916398; + //const double piOver4 = 0.78539816339744830961566084581988; + //const double piOver8 = 0.39269908169872415480783042290994; + //const double piOver16 = 0.19634954084936207740391521145497; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + // tan(x) is inf at pi/2 and 3pi/4 + // they are at 1/4 and 3/4s of the sample sequence + for (unsigned i = 0; i < samples; ++i) { + Real angle = Real(i) * increment; + double dangle = double(i) * dinc; + double ref = tan(dangle); + Real result = tan(angle); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (i == samples / 4 || i == 3 * samples / 4) { + // tan(x) approximation is expected to have a much smaller error + // std::cout << samples << " : " << i << '\n'; + if (error > 1e-01) continue; + std::cerr << "error : " << error << '\n'; + } + if (reportTestCases) std::cerr << "tan( " << angle << ") : " << tan(angle) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cerr << "tan( " << angle << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyArcsinFunction(bool reportTestCases) { + using std::asin, std::sin, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + // walk the domain of arcsin = [-1, 1] to the range of [ -pi/2, pi/2 ] + int samples{ 64 }; + double dinc{ 2.0 / double(samples) }; + Real increment{ dinc }; + for (int i = -samples / 2; i < samples / 2; ++i) { + Real rx = Real(i) * increment; + double dx = double(i) * dinc; + // std::cout << "dx " << dx << '\n'; + double ref = asin(dx); + Real result = asin(rx); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cout << "arcsin( " << rx << ") : " << asin(rx) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cout << "arcsin( " << rx << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyArccosFunction(bool reportTestCases) { + using std::acos, std::cos, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + // walk the domain of arccos = [-1, 1] to the range of [0, pi] + int samples{ 64 }; + double dinc{ 2.0 / double(samples) }; + Real increment{ dinc }; + for (int i = -samples / 2; i < samples / 2; ++i) { + Real rx = Real(i) * increment; + double dx = double(i) * dinc; + // std::cout << "dx " << dx << '\n'; + double ref = acos(dx); + Real result = acos(rx); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cout << "arccos( " << rx << ") : " << acos(rx) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cout << "arccos( " << rx << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +template +int VerifyArctanFunction(bool reportTestCases) { + using std::atan, std::tan, std::abs; + constexpr bool bTraceError{ false }; + int nrOfFailedTestCases{ 0 }; + + // walk the domain of arctan = [ -inf, inf ] to the range of [ -pi/2, pi/2 ] + // we are going to use tan(x) to generate the values to inverse + const double d2pi = 6.283185307179586476925286766559; + const double piOver32 = 0.01227184630308512983774470071594; + + // walk the unit circle in steps of pi/32 + double dinc{ piOver32 }; + unsigned samples{ static_cast(d2pi / dinc) }; + Real increment{ piOver32 }; + // tan(x) is inf at pi/2 and 3pi/4 + // they are at 1/4 and 3/4s of the sample sequence + for (unsigned i = 0; i < samples; ++i) { + + double dangle = double(i) * dinc; + double dx = tan(dangle); + + Real angle = Real(i) * increment; + Real rx = tan(angle); + + double ref = atan(dx); + Real result = atan(rx); + Real error = abs(result - Real(ref)); + if (error > 1e-10) { + if (reportTestCases) std::cout << "arctan( " << rx << ") : " << atan(rx) << " : error " << error << '\n'; + ++nrOfFailedTestCases; + } + else { + if constexpr (bTraceError) std::cout << "arctan( " << rx << ") : error " << error << '\n'; + } + } + + return nrOfFailedTestCases; +} + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib trigonometry function validation"; + std::string test_tag = "sin/cos/tan asin/acos/atan"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + std::cout << "TRIGONOMETRY FUNCTIONS ARE SHIMS TO DOUBLE\n"; + + std::cout << std::setw(10) << "sin(pi/4)" << " : " << sin(qd_pi4) << '\n'; + std::cout << std::setw(10) << "cos(pi/4)" << " : " << cos(qd_pi4) << '\n'; + std::cout << std::setw(10) << "tan(pi/4)" << " : " << tan(qd_pi4) << '\n'; + + { + qd a = sin(qd_pi4); + qd b = asin(a); + std::cout << "pi/4 : " << qd_pi4 << '\n'; + std::cout << "sin(pi/4) : " << a << '\n'; + std::cout << "asin(sin(pi/4) : " << b << '\n'; + } +// std::cout << std::setw(10) << "asin(sin(pi/4))" << " : " << asin(sin(qd_pi4)) << '\n'; + std::cout << std::setw(10) << "acos(cos(pi/4))" << " : " << acos(cos(qd_pi4)) << '\n'; + std::cout << std::setw(10) << "atan(tan(pi/4))" << " : " << atan(tan(qd_pi4)) << '\n'; + + VerifySinFunction(reportTestCases); + + qd piOver4("0.78539816339744830961566084581988"); + qd piOver8("0.39269908169872415480783042290994"); + qd piOver16("0.19634954084936207740391521145497"); + qd piOver32("0.01227184630308512983774470071594"); + + qd a = sin(piOver4); + + std::cout << "pi/4 : " << std::setprecision(32) << piOver4 << '\n'; + std::cout << "pi/8 : " << std::setprecision(32) << piOver8 << '\n'; + std::cout << "pi/16 : " << std::setprecision(32) << piOver16 << '\n'; + std::cout << "pi/32 : " << std::setprecision(32) << piOver32 << '\n'; + + qd b{}; + b = asin(qd(0)); + std::cout << b << '\n'; + b = asin(qd(-1.0)); + std::cout << b << '\n'; + b = asin(qd(1.0)); + std::cout << b << '\n'; + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + +#if REGRESSION_LEVEL_1 + nrOfFailedTestCases = ReportTestResult(VerifySinFunction(reportTestCases) , " sin function", " sin(qd)"); + nrOfFailedTestCases = ReportTestResult(VerifyCosFunction(reportTestCases) , " cos function", " cos(qd)"); + nrOfFailedTestCases = ReportTestResult(VerifyTanFunction(reportTestCases) , " tan function", " tan(qd)"); + + nrOfFailedTestCases = ReportTestResult(VerifyArcsinFunction(reportTestCases), "arcsin function", "asin(qd)"); + nrOfFailedTestCases = ReportTestResult(VerifyArccosFunction(reportTestCases), "arccos function", "acos(qd)"); + nrOfFailedTestCases = ReportTestResult(VerifyArctanFunction(reportTestCases), "arctan function", "atan(qd)"); +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/math/truncate.cpp b/static/qd/math/truncate.cpp new file mode 100644 index 000000000..bce32c34f --- /dev/null +++ b/static/qd/math/truncate.cpp @@ -0,0 +1,78 @@ +// truncate.cpp: test suite runner for truncate functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double mathlib truncate function validation"; + std::string test_tag = "trunc/round/floor/ceil"; + bool reportTestCases = false; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + qd x{ 1.75 }; + + std::cout << "TRUNC/ROUND FUNCTIONS ARE SHIMS TO DOUBLE\n"; + std::cout << "trunc( " << x << ") = " << trunc(x) << '\n'; + std::cout << "round( " << x << ") = " << round(x) << '\n'; + std::cout << "floor( " << x << ") = " << floor(x) << '\n'; + std::cout << "ceil( " << x << ") = " << ceil(x) << '\n'; + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; // ignore errors +#else + + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/tools/cmake/summary.cmake b/tools/cmake/summary.cmake index 15dfca677..a41394a2c 100644 --- a/tools/cmake/summary.cmake +++ b/tools/cmake/summary.cmake @@ -124,8 +124,8 @@ function(universal_print_configuration_summary) universal_status(" BUILD_NUMBER_BFLOATS : ${BUILD_NUMBER_BFLOATS}") universal_status(" BUILD_NUMBER_CFLOATS : ${BUILD_NUMBER_CFLOATS}") universal_status(" BUILD_NUMBER_DFLOATS : ${BUILD_NUMBER_DFLOATS}") - universal_status(" BUILD_NUMBER_DDS : ${BUILD_NUMBER_DDS}") - universal_status(" BUILD_NUMBER_QDS : ${BUILD_NUMBER_QDS}") + universal_status(" BUILD_NUMBER_DOUBLE_DOUBLE : ${BUILD_NUMBER_DOUBLE_DOUBLE}") + universal_status(" BUILD_NUMBER_QUAD_DOUBLE : ${BUILD_NUMBER_QUAD_DOUBLE}") universal_status(" BUILD_NUMBER_AREALS : ${BUILD_NUMBER_AREALS}") universal_status(" BUILD_NUMBER_UNUM1S : ${BUILD_NUMBER_UNUM1S}") universal_status(" BUILD_NUMBER_UNUM2S : ${BUILD_NUMBER_UNUM2S}")