From bd45d9e29afe6a4a083c3dd9f2d37cb78344947f Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Wed, 18 Sep 2024 20:23:58 +0900
Subject: [PATCH 1/8] First pass at handling spaces in filenames.

---
 .../loadtests/vkloadtests/VulkanLoadTests.cpp |  5 ++
 utils/argparser.cpp                           | 52 ++++++++++++++++---
 2 files changed, 50 insertions(+), 7 deletions(-)
diff --git a/tests/loadtests/vkloadtests/VulkanLoadTests.cpp b/tests/loadtests/vkloadtests/VulkanLoadTests.cpp
index 50d2124bb5..116c5713a6 100644
--- a/tests/loadtests/vkloadtests/VulkanLoadTests.cpp
+++ b/tests/loadtests/vkloadtests/VulkanLoadTests.cpp
@@ -20,6 +20,8 @@
 
 #define _USE_MATH_DEFINES
 #include <math.h>
+#include <string>
+#include <regex>
 
 #include "VulkanLoadTests.h"
 #include "Texture.h"
@@ -355,6 +357,9 @@ VulkanLoadTests::showFile(std::string& filename)
         createViewer = Texture::create;
     }
     ktxTexture_Destroy(kTexture);
+
+    // Escape any spaces in filename.
+    filename = std::regex_replace( filename, std::regex(" "), "\\ " );
     std::string args = "--external " + filename;
     pViewer = createViewer(vkctx, w_width, w_height, args.c_str(), sBasePath);
     return pViewer;
diff --git a/utils/argparser.cpp b/utils/argparser.cpp
index 48620971f7..314b995ccb 100644
--- a/utils/argparser.cpp
+++ b/utils/argparser.cpp
@@ -21,7 +21,10 @@
  */
 
 #include <assert.h>
+#include <iostream>
 #include "argparser.h"
+#include <regex>
+#include <locale>
 
 using namespace std;
 
@@ -30,18 +33,53 @@ using namespace std;
  */
 argvector::argvector(const string& sArgs)
 {
-    const string sep(" \t\n\r\v\f");
+    const string sep(" \t");
+    //std::locale old = std::locale::global(std::locale("ja_JP.UTF-8"));
+    // std::regex does not support negative lookbehind assertions. Darn!
+    // RE to extract argument between string start and separator.
+    // - Match 0 is whole matching string including separator.
+    // - Match 1 is the argument.
+    // Use negative character class as it is easiest way to handle
+    // utf-8 file names with non-Latin characters. $ must not be last
+    // in the character class or it will be taken literally not as
+    // end of string.
+    // Use raw literal to avoid excess backslashes.
+    regex re(R"--(^([^$ \t]+)(?:[ \t]+|$))--");
+    //regex re(R"--(^([\\/:._[:alnum:]-]+)(?:[ \t]+|$))--");
     size_t pos;
 
     pos = sArgs.find_first_not_of(sep);
     assert(pos != string::npos);
 
-    do {
-        size_t epos = sArgs.find_first_of(sep, pos);
-        size_t len = epos == string::npos ? epos : epos - pos;
-        push_back(sArgs.substr(pos, len));
-        pos = sArgs.find_first_not_of(sep, epos);
-    } while (pos != string::npos);
+    auto first = sArgs.begin() + pos;
+    auto last = sArgs.end();
+    bool continuation = false;
+    bool needContinuation = false;
+    for (smatch sm; regex_search(first, last, sm, re);) {
+        std::cout << "prefix: " << sm.prefix() << '\n';
+        std::cout << "suffix: " << sm.suffix() << '\n';
+        std::cout << "match size: " << sm.size() << '\n';
+        for(uint32_t i = 0; i < sm.size(); i++) {
+            std::cout << "match " << i << ": " << "\"" << sm.str(i) << "\"" << '\n';
+        }
+        string arg;
+        // All this because std::regex does not support negative lookbehind.
+        if (*(sm[1].second - 1) == '\\') {
+            arg = string(sm[1].first, sm[1].second - 1) + " ";
+            needContinuation = true;
+        } else {
+            arg = sm.str(1);
+            needContinuation = false;
+        }
+        if (continuation) {
+            this->back() += arg;
+        } else {
+            push_back(arg);
+        }
+        continuation = needContinuation;
+        first = sm.suffix().first;
+    }
+    //std::locale::global(old);
 }
 
 /*

From 56ea7a1d6f16fcfa022f89b3127ea437ed9d747c Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Wed, 18 Sep 2024 21:43:33 +0900
Subject: [PATCH 2/8] Simplify backslash handling.

---
 utils/argparser.cpp | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/utils/argparser.cpp b/utils/argparser.cpp
index 314b995ccb..666b98f79a 100644
--- a/utils/argparser.cpp
+++ b/utils/argparser.cpp
@@ -34,18 +34,17 @@ using namespace std;
 argvector::argvector(const string& sArgs)
 {
     const string sep(" \t");
-    //std::locale old = std::locale::global(std::locale("ja_JP.UTF-8"));
     // std::regex does not support negative lookbehind assertions. Darn!
     // RE to extract argument between string start and separator.
     // - Match 0 is whole matching string including separator.
     // - Match 1 is the argument.
+    // - Match 2 is an empty string or a backslash.
     // Use negative character class as it is easiest way to handle
     // utf-8 file names with non-Latin characters. $ must not be last
     // in the character class or it will be taken literally not as
     // end of string.
     // Use raw literal to avoid excess backslashes.
-    regex re(R"--(^([^$ \t]+)(?:[ \t]+|$))--");
-    //regex re(R"--(^([\\/:._[:alnum:]-]+)(?:[ \t]+|$))--");
+    regex re(R"--(^([^$\\ \t]+)(\\?)(?:[ \t]+|$))--");
     size_t pos;
 
     pos = sArgs.find_first_not_of(sep);
@@ -54,22 +53,23 @@ argvector::argvector(const string& sArgs)
     auto first = sArgs.begin() + pos;
     auto last = sArgs.end();
     bool continuation = false;
-    bool needContinuation = false;
     for (smatch sm; regex_search(first, last, sm, re);) {
+        bool needContinuation = false;
+#if DEBUG_REGEX
         std::cout << "prefix: " << sm.prefix() << '\n';
         std::cout << "suffix: " << sm.suffix() << '\n';
         std::cout << "match size: " << sm.size() << '\n';
         for(uint32_t i = 0; i < sm.size(); i++) {
             std::cout << "match " << i << ": " << "\"" << sm.str(i) << "\"" << '\n';
         }
+#endif
         string arg;
-        // All this because std::regex does not support negative lookbehind.
-        if (*(sm[1].second - 1) == '\\') {
-            arg = string(sm[1].first, sm[1].second - 1) + " ";
+        // All this because std::regex does not support negative
+        // lookbehind assertions.
+        arg = sm.str(1);
+        if (*sm[2].first == '\\') {
+            arg += " ";
             needContinuation = true;
-        } else {
-            arg = sm.str(1);
-            needContinuation = false;
         }
         if (continuation) {
             this->back() += arg;
@@ -79,7 +79,6 @@ argvector::argvector(const string& sArgs)
         continuation = needContinuation;
         first = sm.suffix().first;
     }
-    //std::locale::global(old);
 }
 
 /*

From cfa4e56db2ff9c823cd27519a9451d24a0b932da Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Thu, 19 Sep 2024 16:59:16 +0900
Subject: [PATCH 3/8] Escape spaces in received filename.

---
 tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp b/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp
index edd4057c98..87ab152ab0 100644
--- a/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp
+++ b/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp
@@ -15,6 +15,9 @@
  *        OpenGL ES 3.x
  */
 
+#include <string>
+#include <regex>
+
 #include "GLLoadTests.h"
 #include "EncodeTexture.h"
 #include "DrawTexture.h"
@@ -71,6 +74,9 @@ GLLoadTests::showFile(std::string& filename)
         createViewer = DrawTexture::create;
     }
     ktxTexture_Destroy(kTexture);
+
+    // Escape any spaces in filename.
+    filename = std::regex_replace( filename, std::regex(" "), "\\ " );
     std::string args = "--external " + filename;
     pViewer = createViewer(w_width, w_height, args.c_str(), sBasePath);
     return pViewer;

From 8c7f63ff240f0655f33e7335b2213fe03b8a4b2e Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Sun, 22 Sep 2024 12:43:54 +0900
Subject: [PATCH 4/8] Don't crash when filename has non-latin characters.

---
 .../VulkanAppSDL/vulkantextoverlay.hpp        | 130 +++++++++++++++++-
 tests/loadtests/vkloadtests.cmake             |   6 +
 2 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
index 6ff559da44..23349a4686 100644
--- a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
+++ b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
@@ -29,10 +29,113 @@
 #define STB_FONT_HEIGHT STB_FONT_consolas_24_latin1_BITMAP_HEIGHT 
 #define STB_FIRST_CHAR STB_FONT_consolas_24_latin1_FIRST_CHAR
 #define STB_NUM_CHARS STB_FONT_consolas_24_latin1_NUM_CHARS
+#define STB_MISSING_GLPYH 0x80 // Actually a control character.
 
 // Max. number of chars the text overlay buffer can hold
 #define MAX_CHAR_COUNT 1024
 
+
+/**
+ * @internal
+ * @brief Given the lead byte of a UTF-8 sequence returns the expected length of the codepoint
+ * @param[in] leadByte The lead byte of a UTF-8 sequence
+ * @return The expected length of the codepoint */
+[[nodiscard]] constexpr inline int sequenceLength(uint8_t leadByte) noexcept {
+    if ((leadByte & 0b1000'0000u) == 0b0000'0000u)
+        return 1;
+    if ((leadByte & 0b1110'0000u) == 0b1100'0000u)
+        return 2;
+    if ((leadByte & 0b1111'0000u) == 0b1110'0000u)
+        return 3;
+    if ((leadByte & 0b1111'1000u) == 0b1111'0000u)
+        return 4;
+
+    return 0;
+}
+
+/**
+ * @internal
+ * @brief Checks if the codepoint was coded as a longer than required sequence
+ * @param[in] codepoint The unicode codepoint
+ * @param[in] length The UTF-8 sequence length
+ * @return True if the sequence length was inappropriate for the given codepoint */
+[[nodiscard]] constexpr inline bool isOverlongSequence(uint32_t codepoint, int length) noexcept {
+    if (codepoint < 0x80)
+        return length != 1;
+    else if (codepoint < 0x800)
+        return length != 2;
+    else if (codepoint < 0x10000)
+        return length != 3;
+    else
+        return false;
+}
+
+/**
+ * @internal
+ * @brief Checks if the codepoint is valid
+ * @param[in] codepoint The unicode codepoint
+ * @return True if the codepoint is a valid unicode codepoint */
+[[nodiscard]] constexpr inline bool isCodepointValid(uint32_t codepoint) noexcept {
+    return codepoint <= 0x0010FFFFu
+            && !(0xD800u <= codepoint && codepoint <= 0xDBFFu);
+}
+
+/**
+ * @internal
+ * @brief Safely checks and advances a UTF-8 sequence iterator to the start of the next unicode codepoint
+ * @param[in] it iterator to be advanced
+ * @param[in] end iterator pointing to the end of the range
+ * @return True if the advance operation was successful and the advanced codepoint was a valid UTF-8 sequence */
+template <typename Iterator>
+[[nodiscard]] constexpr bool advanceUTF8(Iterator& it, Iterator end,
+                                         uint32_t& codepoint) noexcept {
+    if (it == end)
+        return false;
+
+    const auto length = sequenceLength(*it);
+    if (length == 0)
+        return false;
+
+    if (std::distance(it, end) < length)
+        return false;
+
+    for (int i = 1; i < length; ++i) {
+        const auto trailByte = *(it + i);
+        if ((static_cast<uint8_t>(trailByte) & 0b1100'0000u) != 0b1000'0000u)
+            return false;
+    }
+
+    codepoint = 0;
+    switch (length) {
+    case 1:
+        codepoint |= *it++;
+        break;
+    case 2:
+        codepoint |= (*it++ & 0b0001'1111u) << 6u;
+        codepoint |= (*it++ & 0b0011'1111u);
+        break;
+    case 3:
+        codepoint |= (*it++ & 0b0000'1111u) << 12u;
+        codepoint |= (*it++ & 0b0011'1111u) << 6u;
+        codepoint |= (*it++ & 0b0011'1111u);
+        break;
+    case 4:
+        codepoint |= (*it++ & 0b0000'0111u) << 18u;
+        codepoint |= (*it++ & 0b0011'1111u) << 12u;
+        codepoint |= (*it++ & 0b0011'1111u) << 6u;
+        codepoint |= (*it++ & 0b0011'1111u);
+        break;
+    }
+
+    if (!isCodepointValid(codepoint))
+        return false;
+
+    if (isOverlongSequence(codepoint, length))
+        return false;
+
+    return true;
+}
+
 // Mostly self-contained text overlay class
 // todo : comment
 class VulkanTextOverlay
@@ -91,6 +194,7 @@ class VulkanTextOverlay
         // todo : throw error
         return 0;
     }
+
 public:
 
     enum TextAlign { alignLeft, alignCenter, alignRight };
@@ -573,12 +677,20 @@ class VulkanTextOverlay
 
         // Calculate text width
         float textWidth = 0;
-        for (auto letter : text)
+        uint32_t codepoint;
+        auto it = text.begin();
+        while (it != text.end())
         {
-            stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR];
+            if (!advanceUTF8(it, text.end(), codepoint))
+                break;
+            // TODO: Get a UTF8 font. Consider changing to Dear ImGUI
+            // https://github.com/ocornut/imgui.
+            // Placeholder to avoid crashing.
+            if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR)
+                codepoint = STB_MISSING_GLPYH;
+            stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR];
             textWidth += charData->advance * charW;
         }
-
         switch (align)
         {
         case alignRight:
@@ -592,9 +704,15 @@ class VulkanTextOverlay
         }
 
         // Generate a uv mapped quad per char in the new text
-        for (auto letter : text)
+        it = text.begin();
+        while (it != text.end())        {
         {
-            stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR];
+            if (!advanceUTF8(it, text.end(), codepoint))
+                break;
+            if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR)
+                codepoint = STB_MISSING_GLPYH;
+
+            stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR];
 
             mappedLocal->x = (x + (float)charData->x0 * charW);
             mappedLocal->y = (y + (float)charData->y0 * charH);
@@ -627,8 +745,8 @@ class VulkanTextOverlay
             if (numLetters == MAX_CHAR_COUNT)
                 break; // Truncate the text.
         }
+        }
     }
-
     // Unmap buffer and update command buffers
     void endTextUpdate()
     {
diff --git a/tests/loadtests/vkloadtests.cmake b/tests/loadtests/vkloadtests.cmake
index 3ebf51ee1b..b36f756187 100644
--- a/tests/loadtests/vkloadtests.cmake
+++ b/tests/loadtests/vkloadtests.cmake
@@ -165,6 +165,12 @@ add_executable( vkloadtests
 
 set_code_sign(vkloadtests)
 
+# If VulkanAppSDL is ever made into its own target change the target here.
+target_compile_features(vkloadtests
+PRIVATE
+    cxx_std_14
+)
+
 target_include_directories(vkloadtests
 PRIVATE
     ${SDL2_INCLUDE_DIRS}

From 70c8424321335749d42b0ace566b0bbc39a21667 Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Sun, 22 Sep 2024 12:53:20 +0900
Subject: [PATCH 5/8] Remove unneeded header.

---
 utils/argparser.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/argparser.cpp b/utils/argparser.cpp
index 666b98f79a..9c2e260add 100644
--- a/utils/argparser.cpp
+++ b/utils/argparser.cpp
@@ -24,7 +24,6 @@
 #include <iostream>
 #include "argparser.h"
 #include <regex>
-#include <locale>
 
 using namespace std;
 

From b2b4cb3beed79f72c07a5e1f9fd4c623accbdb55 Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Sun, 22 Sep 2024 18:25:38 +0900
Subject: [PATCH 6/8] Skip debug build to get cache populated.

---
 scripts/build_macos.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh
index bd616ebcf5..b4555fa7e5 100755
--- a/scripts/build_macos.sh
+++ b/scripts/build_macos.sh
@@ -113,7 +113,7 @@ IFS=, ; for config in $CONFIGURATION
 do
   IFS=$oldifs # Because of ; IFS set above will still be present.
   # Build and test
-  #if [ "$config" = "Debug" ]; then continue; fi
+  if [ "$config" = "Debug" ]; then continue; fi
   echo "Build KTX-Software (macOS $ARCHS $config)"
   if [ -n "$MACOS_CERTIFICATES_P12" -a "$config" = "Release" ]; then
     cmake --build . --config $config | handle_compiler_output

From 5384f020768ad8d3dcbc879254ddde35811d40aa Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Tue, 24 Sep 2024 12:55:29 +0900
Subject: [PATCH 7/8] Reenable debug build.

---
 scripts/build_macos.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh
index b4555fa7e5..bd616ebcf5 100755
--- a/scripts/build_macos.sh
+++ b/scripts/build_macos.sh
@@ -113,7 +113,7 @@ IFS=, ; for config in $CONFIGURATION
 do
   IFS=$oldifs # Because of ; IFS set above will still be present.
   # Build and test
-  if [ "$config" = "Debug" ]; then continue; fi
+  #if [ "$config" = "Debug" ]; then continue; fi
   echo "Build KTX-Software (macOS $ARCHS $config)"
   if [ -n "$MACOS_CERTIFICATES_P12" -a "$config" = "Release" ]; then
     cmake --build . --config $config | handle_compiler_output

From eccfa1a2fd238b25ae0cccd394b9afac68d48598 Mon Sep 17 00:00:00 2001
From: Mark Callow <github@callow.im>
Date: Wed, 25 Sep 2024 16:05:34 +0900
Subject: [PATCH 8/8] Remove unnecessary braces.

---
 tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
index 23349a4686..0a0d236f00 100644
--- a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
+++ b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
@@ -705,7 +705,7 @@ class VulkanTextOverlay
 
         // Generate a uv mapped quad per char in the new text
         it = text.begin();
-        while (it != text.end())        {
+        while (it != text.end())
         {
             if (!advanceUTF8(it, text.end(), codepoint))
                 break;
@@ -745,7 +745,6 @@ class VulkanTextOverlay
             if (numLetters == MAX_CHAR_COUNT)
                 break; // Truncate the text.
         }
-        }
     }
     // Unmap buffer and update command buffers
     void endTextUpdate()