From bd45d9e29afe6a4a083c3dd9f2d37cb78344947f Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Wed, 18 Sep 2024 20:23:58 +0900 Subject: [PATCH 1/8] First pass at handling spaces in filenames. --- .../loadtests/vkloadtests/VulkanLoadTests.cpp | 5 ++ utils/argparser.cpp | 52 ++++++++++++++++--- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/tests/loadtests/vkloadtests/VulkanLoadTests.cpp b/tests/loadtests/vkloadtests/VulkanLoadTests.cpp index 50d2124bb5..116c5713a6 100644 --- a/tests/loadtests/vkloadtests/VulkanLoadTests.cpp +++ b/tests/loadtests/vkloadtests/VulkanLoadTests.cpp @@ -20,6 +20,8 @@ #define _USE_MATH_DEFINES #include +#include +#include #include "VulkanLoadTests.h" #include "Texture.h" @@ -355,6 +357,9 @@ VulkanLoadTests::showFile(std::string& filename) createViewer = Texture::create; } ktxTexture_Destroy(kTexture); + + // Escape any spaces in filename. + filename = std::regex_replace( filename, std::regex(" "), "\\ " ); std::string args = "--external " + filename; pViewer = createViewer(vkctx, w_width, w_height, args.c_str(), sBasePath); return pViewer; diff --git a/utils/argparser.cpp b/utils/argparser.cpp index 48620971f7..314b995ccb 100644 --- a/utils/argparser.cpp +++ b/utils/argparser.cpp @@ -21,7 +21,10 @@ */ #include +#include #include "argparser.h" +#include +#include using namespace std; @@ -30,18 +33,53 @@ using namespace std; */ argvector::argvector(const string& sArgs) { - const string sep(" \t\n\r\v\f"); + const string sep(" \t"); + //std::locale old = std::locale::global(std::locale("ja_JP.UTF-8")); + // std::regex does not support negative lookbehind assertions. Darn! + // RE to extract argument between string start and separator. + // - Match 0 is whole matching string including separator. + // - Match 1 is the argument. + // Use negative character class as it is easiest way to handle + // utf-8 file names with non-Latin characters. $ must not be last + // in the character class or it will be taken literally not as + // end of string. + // Use raw literal to avoid excess backslashes. + regex re(R"--(^([^$ \t]+)(?:[ \t]+|$))--"); + //regex re(R"--(^([\\/:._[:alnum:]-]+)(?:[ \t]+|$))--"); size_t pos; pos = sArgs.find_first_not_of(sep); assert(pos != string::npos); - do { - size_t epos = sArgs.find_first_of(sep, pos); - size_t len = epos == string::npos ? epos : epos - pos; - push_back(sArgs.substr(pos, len)); - pos = sArgs.find_first_not_of(sep, epos); - } while (pos != string::npos); + auto first = sArgs.begin() + pos; + auto last = sArgs.end(); + bool continuation = false; + bool needContinuation = false; + for (smatch sm; regex_search(first, last, sm, re);) { + std::cout << "prefix: " << sm.prefix() << '\n'; + std::cout << "suffix: " << sm.suffix() << '\n'; + std::cout << "match size: " << sm.size() << '\n'; + for(uint32_t i = 0; i < sm.size(); i++) { + std::cout << "match " << i << ": " << "\"" << sm.str(i) << "\"" << '\n'; + } + string arg; + // All this because std::regex does not support negative lookbehind. + if (*(sm[1].second - 1) == '\\') { + arg = string(sm[1].first, sm[1].second - 1) + " "; + needContinuation = true; + } else { + arg = sm.str(1); + needContinuation = false; + } + if (continuation) { + this->back() += arg; + } else { + push_back(arg); + } + continuation = needContinuation; + first = sm.suffix().first; + } + //std::locale::global(old); } /* From 56ea7a1d6f16fcfa022f89b3127ea437ed9d747c Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Wed, 18 Sep 2024 21:43:33 +0900 Subject: [PATCH 2/8] Simplify backslash handling. --- utils/argparser.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/utils/argparser.cpp b/utils/argparser.cpp index 314b995ccb..666b98f79a 100644 --- a/utils/argparser.cpp +++ b/utils/argparser.cpp @@ -34,18 +34,17 @@ using namespace std; argvector::argvector(const string& sArgs) { const string sep(" \t"); - //std::locale old = std::locale::global(std::locale("ja_JP.UTF-8")); // std::regex does not support negative lookbehind assertions. Darn! // RE to extract argument between string start and separator. // - Match 0 is whole matching string including separator. // - Match 1 is the argument. + // - Match 2 is an empty string or a backslash. // Use negative character class as it is easiest way to handle // utf-8 file names with non-Latin characters. $ must not be last // in the character class or it will be taken literally not as // end of string. // Use raw literal to avoid excess backslashes. - regex re(R"--(^([^$ \t]+)(?:[ \t]+|$))--"); - //regex re(R"--(^([\\/:._[:alnum:]-]+)(?:[ \t]+|$))--"); + regex re(R"--(^([^$\\ \t]+)(\\?)(?:[ \t]+|$))--"); size_t pos; pos = sArgs.find_first_not_of(sep); @@ -54,22 +53,23 @@ argvector::argvector(const string& sArgs) auto first = sArgs.begin() + pos; auto last = sArgs.end(); bool continuation = false; - bool needContinuation = false; for (smatch sm; regex_search(first, last, sm, re);) { + bool needContinuation = false; +#if DEBUG_REGEX std::cout << "prefix: " << sm.prefix() << '\n'; std::cout << "suffix: " << sm.suffix() << '\n'; std::cout << "match size: " << sm.size() << '\n'; for(uint32_t i = 0; i < sm.size(); i++) { std::cout << "match " << i << ": " << "\"" << sm.str(i) << "\"" << '\n'; } +#endif string arg; - // All this because std::regex does not support negative lookbehind. - if (*(sm[1].second - 1) == '\\') { - arg = string(sm[1].first, sm[1].second - 1) + " "; + // All this because std::regex does not support negative + // lookbehind assertions. + arg = sm.str(1); + if (*sm[2].first == '\\') { + arg += " "; needContinuation = true; - } else { - arg = sm.str(1); - needContinuation = false; } if (continuation) { this->back() += arg; @@ -79,7 +79,6 @@ argvector::argvector(const string& sArgs) continuation = needContinuation; first = sm.suffix().first; } - //std::locale::global(old); } /* From cfa4e56db2ff9c823cd27519a9451d24a0b932da Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Thu, 19 Sep 2024 16:59:16 +0900 Subject: [PATCH 3/8] Escape spaces in received filename. --- tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp b/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp index edd4057c98..87ab152ab0 100644 --- a/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp +++ b/tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp @@ -15,6 +15,9 @@ * OpenGL ES 3.x */ +#include +#include + #include "GLLoadTests.h" #include "EncodeTexture.h" #include "DrawTexture.h" @@ -71,6 +74,9 @@ GLLoadTests::showFile(std::string& filename) createViewer = DrawTexture::create; } ktxTexture_Destroy(kTexture); + + // Escape any spaces in filename. + filename = std::regex_replace( filename, std::regex(" "), "\\ " ); std::string args = "--external " + filename; pViewer = createViewer(w_width, w_height, args.c_str(), sBasePath); return pViewer; From 8c7f63ff240f0655f33e7335b2213fe03b8a4b2e Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Sun, 22 Sep 2024 12:43:54 +0900 Subject: [PATCH 4/8] Don't crash when filename has non-latin characters. --- .../VulkanAppSDL/vulkantextoverlay.hpp | 130 +++++++++++++++++- tests/loadtests/vkloadtests.cmake | 6 + 2 files changed, 130 insertions(+), 6 deletions(-) diff --git a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp index 6ff559da44..23349a4686 100644 --- a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp +++ b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp @@ -29,10 +29,113 @@ #define STB_FONT_HEIGHT STB_FONT_consolas_24_latin1_BITMAP_HEIGHT #define STB_FIRST_CHAR STB_FONT_consolas_24_latin1_FIRST_CHAR #define STB_NUM_CHARS STB_FONT_consolas_24_latin1_NUM_CHARS +#define STB_MISSING_GLPYH 0x80 // Actually a control character. // Max. number of chars the text overlay buffer can hold #define MAX_CHAR_COUNT 1024 + +/** + * @internal + * @brief Given the lead byte of a UTF-8 sequence returns the expected length of the codepoint + * @param[in] leadByte The lead byte of a UTF-8 sequence + * @return The expected length of the codepoint */ +[[nodiscard]] constexpr inline int sequenceLength(uint8_t leadByte) noexcept { + if ((leadByte & 0b1000'0000u) == 0b0000'0000u) + return 1; + if ((leadByte & 0b1110'0000u) == 0b1100'0000u) + return 2; + if ((leadByte & 0b1111'0000u) == 0b1110'0000u) + return 3; + if ((leadByte & 0b1111'1000u) == 0b1111'0000u) + return 4; + + return 0; +} + +/** + * @internal + * @brief Checks if the codepoint was coded as a longer than required sequence + * @param[in] codepoint The unicode codepoint + * @param[in] length The UTF-8 sequence length + * @return True if the sequence length was inappropriate for the given codepoint */ +[[nodiscard]] constexpr inline bool isOverlongSequence(uint32_t codepoint, int length) noexcept { + if (codepoint < 0x80) + return length != 1; + else if (codepoint < 0x800) + return length != 2; + else if (codepoint < 0x10000) + return length != 3; + else + return false; +} + +/** + * @internal + * @brief Checks if the codepoint is valid + * @param[in] codepoint The unicode codepoint + * @return True if the codepoint is a valid unicode codepoint */ +[[nodiscard]] constexpr inline bool isCodepointValid(uint32_t codepoint) noexcept { + return codepoint <= 0x0010FFFFu + && !(0xD800u <= codepoint && codepoint <= 0xDBFFu); +} + +/** + * @internal + * @brief Safely checks and advances a UTF-8 sequence iterator to the start of the next unicode codepoint + * @param[in] it iterator to be advanced + * @param[in] end iterator pointing to the end of the range + * @return True if the advance operation was successful and the advanced codepoint was a valid UTF-8 sequence */ +template +[[nodiscard]] constexpr bool advanceUTF8(Iterator& it, Iterator end, + uint32_t& codepoint) noexcept { + if (it == end) + return false; + + const auto length = sequenceLength(*it); + if (length == 0) + return false; + + if (std::distance(it, end) < length) + return false; + + for (int i = 1; i < length; ++i) { + const auto trailByte = *(it + i); + if ((static_cast(trailByte) & 0b1100'0000u) != 0b1000'0000u) + return false; + } + + codepoint = 0; + switch (length) { + case 1: + codepoint |= *it++; + break; + case 2: + codepoint |= (*it++ & 0b0001'1111u) << 6u; + codepoint |= (*it++ & 0b0011'1111u); + break; + case 3: + codepoint |= (*it++ & 0b0000'1111u) << 12u; + codepoint |= (*it++ & 0b0011'1111u) << 6u; + codepoint |= (*it++ & 0b0011'1111u); + break; + case 4: + codepoint |= (*it++ & 0b0000'0111u) << 18u; + codepoint |= (*it++ & 0b0011'1111u) << 12u; + codepoint |= (*it++ & 0b0011'1111u) << 6u; + codepoint |= (*it++ & 0b0011'1111u); + break; + } + + if (!isCodepointValid(codepoint)) + return false; + + if (isOverlongSequence(codepoint, length)) + return false; + + return true; +} + // Mostly self-contained text overlay class // todo : comment class VulkanTextOverlay @@ -91,6 +194,7 @@ class VulkanTextOverlay // todo : throw error return 0; } + public: enum TextAlign { alignLeft, alignCenter, alignRight }; @@ -573,12 +677,20 @@ class VulkanTextOverlay // Calculate text width float textWidth = 0; - for (auto letter : text) + uint32_t codepoint; + auto it = text.begin(); + while (it != text.end()) { - stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR]; + if (!advanceUTF8(it, text.end(), codepoint)) + break; + // TODO: Get a UTF8 font. Consider changing to Dear ImGUI + // https://github.com/ocornut/imgui. + // Placeholder to avoid crashing. + if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR) + codepoint = STB_MISSING_GLPYH; + stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR]; textWidth += charData->advance * charW; } - switch (align) { case alignRight: @@ -592,9 +704,15 @@ class VulkanTextOverlay } // Generate a uv mapped quad per char in the new text - for (auto letter : text) + it = text.begin(); + while (it != text.end()) { { - stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR]; + if (!advanceUTF8(it, text.end(), codepoint)) + break; + if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR) + codepoint = STB_MISSING_GLPYH; + + stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR]; mappedLocal->x = (x + (float)charData->x0 * charW); mappedLocal->y = (y + (float)charData->y0 * charH); @@ -627,8 +745,8 @@ class VulkanTextOverlay if (numLetters == MAX_CHAR_COUNT) break; // Truncate the text. } + } } - // Unmap buffer and update command buffers void endTextUpdate() { diff --git a/tests/loadtests/vkloadtests.cmake b/tests/loadtests/vkloadtests.cmake index 3ebf51ee1b..b36f756187 100644 --- a/tests/loadtests/vkloadtests.cmake +++ b/tests/loadtests/vkloadtests.cmake @@ -165,6 +165,12 @@ add_executable( vkloadtests set_code_sign(vkloadtests) +# If VulkanAppSDL is ever made into its own target change the target here. +target_compile_features(vkloadtests +PRIVATE + cxx_std_14 +) + target_include_directories(vkloadtests PRIVATE ${SDL2_INCLUDE_DIRS} From 70c8424321335749d42b0ace566b0bbc39a21667 Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Sun, 22 Sep 2024 12:53:20 +0900 Subject: [PATCH 5/8] Remove unneeded header. --- utils/argparser.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/argparser.cpp b/utils/argparser.cpp index 666b98f79a..9c2e260add 100644 --- a/utils/argparser.cpp +++ b/utils/argparser.cpp @@ -24,7 +24,6 @@ #include #include "argparser.h" #include -#include using namespace std; From b2b4cb3beed79f72c07a5e1f9fd4c623accbdb55 Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Sun, 22 Sep 2024 18:25:38 +0900 Subject: [PATCH 6/8] Skip debug build to get cache populated. --- scripts/build_macos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh index bd616ebcf5..b4555fa7e5 100755 --- a/scripts/build_macos.sh +++ b/scripts/build_macos.sh @@ -113,7 +113,7 @@ IFS=, ; for config in $CONFIGURATION do IFS=$oldifs # Because of ; IFS set above will still be present. # Build and test - #if [ "$config" = "Debug" ]; then continue; fi + if [ "$config" = "Debug" ]; then continue; fi echo "Build KTX-Software (macOS $ARCHS $config)" if [ -n "$MACOS_CERTIFICATES_P12" -a "$config" = "Release" ]; then cmake --build . --config $config | handle_compiler_output From 5384f020768ad8d3dcbc879254ddde35811d40aa Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Tue, 24 Sep 2024 12:55:29 +0900 Subject: [PATCH 7/8] Reenable debug build. --- scripts/build_macos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh index b4555fa7e5..bd616ebcf5 100755 --- a/scripts/build_macos.sh +++ b/scripts/build_macos.sh @@ -113,7 +113,7 @@ IFS=, ; for config in $CONFIGURATION do IFS=$oldifs # Because of ; IFS set above will still be present. # Build and test - if [ "$config" = "Debug" ]; then continue; fi + #if [ "$config" = "Debug" ]; then continue; fi echo "Build KTX-Software (macOS $ARCHS $config)" if [ -n "$MACOS_CERTIFICATES_P12" -a "$config" = "Release" ]; then cmake --build . --config $config | handle_compiler_output From eccfa1a2fd238b25ae0cccd394b9afac68d48598 Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Wed, 25 Sep 2024 16:05:34 +0900 Subject: [PATCH 8/8] Remove unnecessary braces. --- tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp index 23349a4686..0a0d236f00 100644 --- a/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp +++ b/tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp @@ -705,7 +705,7 @@ class VulkanTextOverlay // Generate a uv mapped quad per char in the new text it = text.begin(); - while (it != text.end()) { + while (it != text.end()) { if (!advanceUTF8(it, text.end(), codepoint)) break; @@ -745,7 +745,6 @@ class VulkanTextOverlay if (numLetters == MAX_CHAR_COUNT) break; // Truncate the text. } - } } // Unmap buffer and update command buffers void endTextUpdate()