From 0f5dd89e3a055f62091224d82fd888e753c79e43 Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Tue, 12 Nov 2024 18:53:22 -0500 Subject: [PATCH] WIP: adaptive scaling algorithm to convert ieee-754 to rational --- .../universal/number/rational/attributes.hpp | 3 +- .../universal/number/rational/rational.hpp | 17 +++++ .../number/rational/rational_impl.hpp | 68 ++++++++++++++++--- include/universal/utility/find_msb.hpp | 35 +++++++++- static/rational/api/api.cpp | 63 +++++++++++++++-- 5 files changed, 167 insertions(+), 19 deletions(-) diff --git a/include/universal/number/rational/attributes.hpp b/include/universal/number/rational/attributes.hpp index bc0e09f9d..17654e4d8 100644 --- a/include/universal/number/rational/attributes.hpp +++ b/include/universal/number/rational/attributes.hpp @@ -25,7 +25,8 @@ bool sign(const rational& v) { // generate the maxneg through maxpos value range of a logarithmic number system configuration // the type of arithmetic, Modulo or Saturating, does not affect the range template -std::string rational_range(const rational& r) { +std::string rational_range(const rational& v) { + rational r{ v }; std::stringstream s; s << std::setw(45) << type_tag(r) << " : [ " << r.maxneg() << " ... " diff --git a/include/universal/number/rational/rational.hpp b/include/universal/number/rational/rational.hpp index 994d03ca6..32bddabd8 100644 --- a/include/universal/number/rational/rational.hpp +++ b/include/universal/number/rational/rational.hpp @@ -59,4 +59,21 @@ /// math functions #include +/////////////////////////////////////////////////////////////////////////////////////// +/// aliases for industry standard floating point configurations +namespace sw { namespace universal { + + // rational binary of 8bits + using rb8 = rational<8, uint8_t>; + // rational binary of 16bits + using rb16 = rational<16, uint16_t>; + // rational binary of 32bits + using rb32 = rational<32, uint32_t>; + // rational binary of 64bits + using rb64 = rational<64, uint64_t>; + // rational binary of 128bits + using rb128 = rational<128, uint32_t>; + +}} + #endif diff --git a/include/universal/number/rational/rational_impl.hpp b/include/universal/number/rational/rational_impl.hpp index 35cc93ce9..8d818b457 100644 --- a/include/universal/number/rational/rational_impl.hpp +++ b/include/universal/number/rational/rational_impl.hpp @@ -321,33 +321,80 @@ class rational { uint64_t e{ 0 }, f{ 0 }; bool s{ false }; extractFields(rhs, s, e, f, bits); + if (e == 0) { // subnormal } else { // normal - uint64_t _a = f | ieee754_parameter::hmask; + uint64_t a = f | ieee754_parameter::hmask; uint64_t b = ieee754_parameter::hmask; int exponent = static_cast(e - ieee754_parameter::bias); - uint64_t a{ 0 }; - if (exponent > 0) { - a = _a * (1ull << exponent); - } - else { - a = _a / (1ull << -exponent); - } + std::cout << "exponent = " << exponent << '\n'; + std::cout << "a = " << to_binary(a) << '\n'; + std::cout << "b = " << to_binary(b) << '\n'; if (a == b) { n = 1; d = 1; } else { - // gcd + // do we need to round the value or can we just throw the lower bits away? + // + // find the msb and shift it to the msb of the numerator + int msb = find_msb(a); + if (msb > nbits) { + int shift = 1 + msb - nbits; // one extra slot as we are shifting into a 2's complement encoding + a >>= shift; + b >>= shift; + } + /* + // normalize the ratio uint64_t r; while (a % b > 0ull) { r = a % b; a = b; b = r; } + */ + std::cout << "a = " << to_binary(a) << '\n'; + std::cout << "b = " << to_binary(b) << '\n'; + // and finally scale the ratio + msb = find_msb(a); + uint64_t maxUpShift = (nbits - msb - 1); + if (exponent >= 0) { + uint64_t scale = static_cast(exponent); + // find the new msb to direct how we need to scale while avoiding overflow + if (scale > maxUpShift) { + a <<= maxUpShift; + b >>= (scale - maxUpShift); + } + else { + a <<= scale; + } + } + else { + uint64_t scale = static_cast(-exponent); + // find the new msb to direct how we need to scale while avoiding underflow + uint64_t maxDownShift = find_msb(b); + if (scale > maxDownShift) { + if (maxUpShift < (scale - maxDownShift)) { + // overflow, saturate to maxpos + std::cerr << "overflow: scale = " << scale << '\n'; + n = 0; d = 0; + } + else { + a <<= maxUpShift; + b >>= maxDownShift; + } + } + else { + b >>= scale; + } + + } n = (s ? -static_cast(a) : static_cast(a)); - d = 1; + d = b; + normalize(); + std::cout << "n = " << to_binary(n) << '\n'; + std::cout << "d = " << to_binary(d) << '\n'; } } return *this; @@ -458,5 +505,4 @@ rational abs(const rational& v) { } - }} // namespace sw::universal diff --git a/include/universal/utility/find_msb.hpp b/include/universal/utility/find_msb.hpp index 20a4672b2..45c8d1f7b 100644 --- a/include/universal/utility/find_msb.hpp +++ b/include/universal/utility/find_msb.hpp @@ -20,7 +20,7 @@ namespace sw { namespace universal { /// /// find most significant bit that is set /// -/// value to +/// value to scan /// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(unsigned int x) { // find the first non-zero bit @@ -39,7 +39,7 @@ inline constexpr unsigned int find_msb(unsigned int x) { /// /// find most significant bit that is set /// -/// value to +/// value to scan /// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(unsigned long x) { // find the first non-zero bit @@ -55,6 +55,11 @@ inline constexpr unsigned int find_msb(unsigned long x) { return base + bval; } +/// +/// find most significant bit that is set +/// +/// value to scan +/// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(unsigned long long x) { // find the first non-zero bit unsigned int base = 0; @@ -72,7 +77,11 @@ inline constexpr unsigned int find_msb(unsigned long long x) { //////////////////////////// SIGNED integer types //////////////////////// - +/// +/// find most significant bit that is set +/// +/// value to scan +/// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(signed char x) { // find the first non-zero bit uint8_t tmp = uint8_t(x); @@ -86,6 +95,11 @@ inline constexpr unsigned int find_msb(signed char x) { return base + bval; } +/// +/// find most significant bit that is set +/// +/// value to scan +/// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(short x) { // find the first non-zero bit uint16_t tmp = uint16_t(x); @@ -100,6 +114,11 @@ inline constexpr unsigned int find_msb(short x) { return base + bval; } +/// +/// find most significant bit that is set +/// +/// value to scan +/// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(int x) { // find the first non-zero bit uint32_t tmp = uint32_t(x); @@ -115,6 +134,11 @@ inline constexpr unsigned int find_msb(int x) { return base + bval; } +/// +/// find most significant bit that is set +/// +/// value to scan +/// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(long x) { // find the first non-zero bit uint32_t tmp = uint32_t(x); @@ -131,6 +155,11 @@ inline constexpr unsigned int find_msb(long x) { return base + bval; } +/// +/// find most significant bit that is set +/// +/// value to scan +/// position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0 inline constexpr unsigned int find_msb(long long x) { // find the first non-zero bit uint64_t tmp = uint64_t(x); diff --git a/static/rational/api/api.cpp b/static/rational/api/api.cpp index 4172982c5..9900fc326 100644 --- a/static/rational/api/api.cpp +++ b/static/rational/api/api.cpp @@ -34,6 +34,59 @@ try { std::string test_suite = "rational<16,uint16_t> API tests"; int nrOfFailedTestCases = 0; + { + Conversion< rb16 >(1.0e4f); + Conversion< rb16 >(1.5e4f); + Conversion< rb16 >(1.75e4f); + Conversion< rb16 >(1.875e4f); + Conversion< rb16 >(1.9375e4f); + Conversion< rb16 >(3.2767e4f); + } + + return 0; + + { + /* + rational<8, uint8_t> : [ -128 ... -0.00787402 0 0.00787402 ... 127 ] + rational<16, uint16_t> : [ -32768 ... -3.05185e-05 0 3.05185e-05 ... 32767 ] + rational<32, uint32_t> : [ -2.14748e+09 ... -4.65661e-10 0 4.65661e-10 ... 2.14748e+09 ] + rational<64, uint64_t> : [ -9.22337e+18 ... -1.0842e-19 0 1.0842e-19 ... 9.22337e+18 ] + */ + + float f{ 32767 }; + f /= 10000; + std::cout << to_binary(f) << " : " << f << '\n'; + rb16 r; + r = 1.9375; + r = f; + for (int i = 0; i < 4; ++i) { + std::cout << to_binary(r) << " : " << r << '\n'; + r *= 10; + } + } + + return 0; + { + rb16 r; + r.maxpos(); + std::cout << std::setprecision(25); + std::cout << to_binary(float(r)) << " : " << float(r) << '\n'; + std::cout << to_binary(double(r)) << " : " << double(r) << '\n'; + // 0b0.10111110.00000000000000000000000 : 9.2233720368547758e+18 + // 0b0.10000111110.0000000000000000000000000000000000000000000000000000 : 9.2233720368547758e+18 + float f{ 9.223372036854775808e+18 }; + std::cout << to_binary(f) << " : " << f << '\n'; + double d{ 9.223372036854775808e+18 }; + std::cout << to_binary(d) << " : " << d << '\n'; + + int64_t i64{ 9223372036854775807 }; + std::cout << to_binary(i64) << " : " << i64 << '\n'; + + r = f; + std::cout << to_binary(r) << " : " << r << '\n'; + + } + // important behavioral traits { using TestType = rational<16,uint16_t>; @@ -43,10 +96,11 @@ try { // conversions std::cout << "+--------- Conversions\n"; { - Conversion< rational<8, uint8_t> >(1.875f); - Conversion< rational<16, uint16_t> >(1.875f); - Conversion< rational<32, uint32_t> >(1.875f); - Conversion< rational<64, uint64_t> >(1.875f); + Conversion< rb8 >(-1.875f); + Conversion< rb16 >(1.875e1); + Conversion< rb32 >(-1.875e5f); + Conversion< rb64 >(1.875e10); + Conversion< rb128 >(1.875e20); } // default behavior @@ -66,6 +120,7 @@ try { // report on the dynamic range of some standard configurations std::cout << "+--------- Dynamic ranges of standard rational<16,uint16_t> configurations --------+\n"; { + // default standard types: rb8, rb16, rb32, and rb64 ExtremeValues< rational<8, uint8_t> >(); ExtremeValues< rational<16, uint16_t> >(); ExtremeValues< rational<32, uint32_t> >();