From 2f46eccae492a293f78d39913f07f5002038f5c9 Mon Sep 17 00:00:00 2001
From: Koen De Vleeschauwer <kdv@kdvelectronics.eu>
Date: Fri, 3 Nov 2023 10:44:12 +0100
Subject: [PATCH] memwatch with float

---
 src/Makefile           |   3 +
 src/ftoa.c             | 181 +++++++++++++++++++++++++++++++++++++++++
 src/include/ftoa.h     |   4 +
 src/include/memwatch.h |   1 +
 src/memwatch.c         |  29 +++++--
 src/target/cortexm.c   |   5 +-
 6 files changed, 215 insertions(+), 8 deletions(-)
 create mode 100644 src/ftoa.c
 create mode 100644 src/include/ftoa.h

diff --git a/src/Makefile b/src/Makefile
index 64615b5b620..fc49839debd 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -135,6 +135,9 @@ endif
 ifeq ($(ENABLE_MEMWATCH), 1)
 CFLAGS += -DENABLE_MEMWATCH
 SRC += memwatch.c
+ifndef PC_HOSTED
+SRC += ftoa.c
+endif
 endif
 
 ifeq ($(ENABLE_RTT), 1)
diff --git a/src/ftoa.c b/src/ftoa.c
new file mode 100644
index 00000000000..68c89253456
--- /dev/null
+++ b/src/ftoa.c
@@ -0,0 +1,181 @@
+#include <stdint.h>
+#include <math.h>
+#include "ftoa.h"
+
+/* Convert IEEE single precison numbers into decimal ASCII strings, while
+   satisfying the following two properties:
+   1) Calling strtof or '(float) strtod' on the result must produce the
+   original float, independent of the rounding mode used by strtof/strtod.
+   2) Minimize the number of produced decimal digits. E.g. the float 0.7f
+   should convert to "0.7", not "0.69999999".
+
+   To solve this we use a dedicated single precision version of
+   Florian Loitsch's Grisu2 algorithm. See
+   http://florian.loitsch.com/publications/dtoa-pldi2010.pdf?attredirects=0
+
+   The code below is derived from Loitsch's C code, which
+   implements the same algorithm for IEEE double precision. See
+   http://florian.loitsch.com/publications/bench.tar.gz?attredirects=0
+
+   Adapted from https://github.com/bofh453/ftoa-fast/
+*/
+
+#define DIY_SIGNIFICAND_SIZE 64
+#define SP_SIGNIFICAND_MASK  0x7fffff
+#define SP_HIDDEN_BIT        0x800000 /* 2^23 */
+
+typedef union _f32 {
+	float f;
+	unsigned int i;
+} _f32;
+
+#if defined(__x86_64__) || defined(__amd64__)
+static uint64_t multiply(uint64_t x, uint32_t y)
+{
+	uint64_t y0 = ((uint64_t)y << 32), ac, tmp;
+	__asm__ __volatile__("mulq %3" : "=a"(tmp), "=d"(ac) : "%0"(x), "rm"(y0));
+	//tmp += 0x80000000; /* Round.  */
+	return ac + (tmp >> 63);
+}
+#else
+static uint64_t multiply(uint64_t x, uint32_t y)
+{
+	uint64_t xlo = (x & 0xffffffff);
+	uint64_t xhi = (x >> 32);
+	return ((xhi * y) + ((xlo * y) >> 31));
+}
+#endif
+
+static int k_comp(int n)
+{
+	float ds = n * 0.30103f;
+	int k = (int)ds;
+	return n < 0 ? k - 1 : k;
+}
+
+/* Cached powers of ten from 10**-37..10**40.
+   Produced using GNU MPFR's mpfr_pow_si.  */
+
+/* Significands.  */
+static uint64_t powers_ten[78] = {0x881cea14545c7575, 0xaa242499697392d3, 0xd4ad2dbfc3d07788, 0x84ec3c97da624ab5,
+	0xa6274bbdd0fadd62, 0xcfb11ead453994ba, 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3e,
+	0xfd87b5f28300ca0e, 0x9e74d1b791e07e48, 0xc612062576589ddb, 0xf79687aed3eec551, 0x9abe14cd44753b53,
+	0xc16d9a0095928a27, 0xf1c90080baf72cb1, 0x971da05074da7bef, 0xbce5086492111aeb, 0xec1e4a7db69561a5,
+	0x9392ee8e921d5d07, 0xb877aa3236a4b449, 0xe69594bec44de15b, 0x901d7cf73ab0acd9, 0xb424dc35095cd80f,
+	0xe12e13424bb40e13, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, 0xdbe6fecebdedd5bf, 0x89705f4136b4a597,
+	0xabcc77118461cefd, 0xd6bf94d5e57a42bc, 0x8637bd05af6c69b6, 0xa7c5ac471b478423, 0xd1b71758e219652c,
+	0x83126e978d4fdf3b, 0xa3d70a3d70a3d70a, 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000,
+	0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000, 0xf424000000000000,
+	0x9896800000000000, 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000,
+	0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+	0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000, 0xd8d726b7177a8000,
+	0x878678326eac9000, 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8,
+	0xcecb8f27f4200f3a, 0x813f3978f8940984, 0xa18f07d736b90be5, 0xc9f2c9cd04674edf, 0xfc6f7c4045812296,
+	0x9dc5ada82b70b59e, 0xc5371912364ce305, 0xf684df56c3e01bc7, 0x9a130b963a6c115c, 0xc097ce7bc90715b3,
+	0xf0bdc21abb48db20, 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd};
+
+/* Exponents.  */
+static int8_t powers_ten_e[78] = {-127, -124, -121, -117, -114, -111, -107, -104, -101, -98, -94, -91, -88, -84, -81,
+	-78, -74, -71, -68, -64, -61, -58, -54, -51, -48, -44, -41, -38, -34, -31, -28, -24, -21, -18, -14, -11, -8, -4, -1,
+	2, 5, 9, 12, 15, 19, 22, 25, 29, 32, 35, 39, 42, 45, 49, 52, 55, 59, 62, 65, 69, 72, 75, 79, 82, 85, 89, 92, 95, 98,
+	102, 105, 108, 112, 115, 118, 122, 125, 127};
+
+/*
+ * compute decimal integer m, exp such that:
+ *  f = m*10^exp
+ *  m is as short as possible without losing exactness
+ */
+unsigned int ftoa(char *s, float f, unsigned int maxlen)
+{
+	uint32_t w_lower, w_upper;
+	uint64_t D_upper, D_lower, delta, c_mk, one, p2;
+	_f32 f2;
+	int ve, mk = 0, kabs = 0;
+	unsigned int len = 0;
+	unsigned char digit, p1;
+
+	if (f != f) {
+		if (maxlen < 3)
+			return 0;
+		*(uint32_t *)s = 0x004E614E;
+		goto nanzero;
+	}
+
+	/* Handle NaN/zero. This is split up like this because otherwise gcc generates shockingly awful assembly. Like, doubling total function size bad. */
+	if (!f) {
+		/* f is NaN, +0 or -0.  */
+		if (maxlen < 3)
+			return 0;
+		*(uint32_t *)s = 0x00302E30;
+	nanzero:
+		return 3;
+	}
+
+	f2.f = fabsf(f);
+	ve = (f2.i >> 23) - 127 - 1;
+	f2.i = ((f2.i & SP_SIGNIFICAND_MASK) | SP_HIDDEN_BIT);
+	w_upper = (f2.i << 2) + 2;
+	w_lower = (f2.i << 2) - 1;
+	if (f2.i != SP_HIDDEN_BIT) {
+		w_lower--;
+	}
+	w_upper <<= (DIY_SIGNIFICAND_SIZE - 58);
+	w_lower <<= (DIY_SIGNIFICAND_SIZE - 58);
+
+	mk = k_comp(ve - 1);
+	ve = ve + powers_ten_e[37 - mk] - DIY_SIGNIFICAND_SIZE + 7;
+	one = ((uint64_t)1 << -ve) - 1;
+
+	c_mk = powers_ten[37 - mk];
+	D_upper = multiply(c_mk, w_upper);
+	D_lower = multiply(c_mk, w_lower);
+
+	D_upper--;
+	D_lower++;
+
+	delta = (D_upper - D_lower);
+	p1 = D_upper >> -ve;
+	p2 = D_upper & one;
+
+	digit = p1 / 10;
+	if (digit) {
+		if (len < maxlen)
+			s[len++] = 0x30 + digit;
+		if (len < maxlen)
+			s[len++] = '.';
+		mk++;
+	}
+	p1 %= 10;
+	if (len < maxlen)
+		s[len++] = 0x30 + p1;
+	if (!digit)
+		if (len < maxlen)
+			s[len++] = '.';
+	do {
+		p2 *= 10;
+		if (len < maxlen)
+			s[len++] = 0x30 + (p2 >> -ve);
+		p2 &= one;
+		delta *= 10;
+	} while (p2 > delta);
+
+	if (len < maxlen)
+		s[len++] = 'e';
+	if (mk < 0) {
+		if (len < maxlen)
+			s[len++] = '-';
+		kabs = -mk;
+	} else {
+		if (len < maxlen)
+			s[len++] = '+';
+		kabs = mk;
+	}
+	if (len < maxlen)
+		s[len++] = (kabs / 10) + 0x30;
+	if (len < maxlen)
+		s[len++] = (kabs % 10) + 0x30;
+	if (len < maxlen)
+		s[len] = 0;
+
+	return len;
+}
diff --git a/src/include/ftoa.h b/src/include/ftoa.h
new file mode 100644
index 00000000000..0754d3154b7
--- /dev/null
+++ b/src/include/ftoa.h
@@ -0,0 +1,4 @@
+#ifndef FTOA_H
+#define FTOA_H
+unsigned int ftoa(char *buf, float f, unsigned int buf_siz);
+#endif
diff --git a/src/include/memwatch.h b/src/include/memwatch.h
index 7bfc1d304f8..ad04107e1ce 100644
--- a/src/include/memwatch.h
+++ b/src/include/memwatch.h
@@ -12,6 +12,7 @@
 typedef enum memwatch_format {
 	MEMWATCH_FMT_SIGNED,
 	MEMWATCH_FMT_UNSIGNED,
+	MEMWATCH_FMT_FLOAT,
 	MEMWATCH_FMT_HEX
 } memwatch_format_e;
 
diff --git a/src/memwatch.c b/src/memwatch.c
index 7dbb902a9ef..777697488c3 100644
--- a/src/memwatch.c
+++ b/src/memwatch.c
@@ -8,6 +8,7 @@
 #include <unistd.h>
 #else
 #include "usb_serial.h"
+#include "ftoa.h"
 #endif
 
 memwatch_s memwatch_table[MEMWATCH_NUM];
@@ -32,7 +33,11 @@ static uint32_t rtt_write(const char *buf, uint32_t len)
 
 void poll_memwatch(target_s *cur_target)
 {
-	uint32_t val;
+	union val32_u {
+		uint32_t i;
+		volatile float f;
+	} val;
+
 	char buf[64];
 	char timestamp[64];
 	uint32_t len;
@@ -40,27 +45,37 @@ void poll_memwatch(target_s *cur_target)
 		return;
 
 	for (uint32_t i = 0; i < memwatch_cnt; i++) {
-		if (!target_mem_read(cur_target, &val, memwatch_table[i].addr, sizeof(val)) &&
-			(val != memwatch_table[i].value)) {
+		if (!target_mem_read(cur_target, &val.i, memwatch_table[i].addr, sizeof(val.i)) &&
+			(val.i != memwatch_table[i].value)) {
 			if (memwatch_timestamp)
 				snprintf(timestamp, sizeof(timestamp), "%" PRIu32 " ", platform_time_ms());
 			else
 				timestamp[0] = '\0';
 			switch (memwatch_table[i].format) {
 			case MEMWATCH_FMT_SIGNED:
-				len = snprintf(buf, sizeof(buf), "%s%s %" PRId32 "\r\n", timestamp, memwatch_table[i].name, val);
+				len = snprintf(buf, sizeof(buf), "%s%s %" PRId32 "\r\n", timestamp, memwatch_table[i].name, val.i);
 				break;
 			case MEMWATCH_FMT_UNSIGNED:
-				len = snprintf(buf, sizeof(buf), "%s%s %" PRIu32 "\r\n", timestamp, memwatch_table[i].name, val);
+				len = snprintf(buf, sizeof(buf), "%s%s %" PRIu32 "\r\n", timestamp, memwatch_table[i].name, val.i);
+				break;
+			case MEMWATCH_FMT_FLOAT:
+#if PC_HOSTED == 1
+				len = snprintf(buf, sizeof(buf), "%s%s %g\r\n", timestamp, memwatch_table[i].name, val.f);
+#else
+				char fbuf[32];
+				ftoa(fbuf, val.f, sizeof(fbuf));
+				fbuf[sizeof(fbuf) - 1] = '\0';
+				len = snprintf(buf, sizeof(buf), "%s%s %s\r\n", timestamp, memwatch_table[i].name, fbuf);
+#endif
 				break;
 			case MEMWATCH_FMT_HEX:
 			default:
-				len = snprintf(buf, sizeof(buf), "%s%s 0x%" PRIx32 "\r\n", timestamp, memwatch_table[i].name, val);
+				len = snprintf(buf, sizeof(buf), "%s%s 0x%" PRIx32 "\r\n", timestamp, memwatch_table[i].name, val.i);
 				break;
 			}
 			buf[sizeof(buf) - 1] = '\0';
 			rtt_write(buf, len);
-			memwatch_table[i].value = val;
+			memwatch_table[i].value = val.i;
 		}
 	}
 	return;
diff --git a/src/target/cortexm.c b/src/target/cortexm.c
index c484f4a41dc..fa7611720f1 100644
--- a/src/target/cortexm.c
+++ b/src/target/cortexm.c
@@ -84,7 +84,7 @@ static bool cortexm_redirect_stdout(target_s *target, int argc, const char **arg
 const command_s cortexm_cmd_list[] = {
 	{"vector_catch", cortexm_vector_catch, "Catch exception vectors"},
 #ifdef ENABLE_MEMWATCH
-	{"memwatch", cortexm_memwatch, "Read memory while target running: [/t] [[NAME] [/d|/u|/x] ADDRESS]..."},
+	{"memwatch", cortexm_memwatch, "Read memory while target running: [/t] [[NAME] [/d|/u|/f|/x] ADDRESS]..."},
 #endif
 #if PC_HOSTED == 0
 	{"redirect_stdout", cortexm_redirect_stdout, "Redirect semihosting stdout to USB UART"},
@@ -1418,6 +1418,9 @@ static bool cortexm_memwatch(target_s *target, int argc, const char **argv)
 			case 'u':
 				fmt = MEMWATCH_FMT_UNSIGNED;
 				break;
+			case 'f':
+				fmt = MEMWATCH_FMT_FLOAT;
+				break;
 			case 'x':
 				fmt = MEMWATCH_FMT_HEX;
 				break;