Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: handle spaces and non-latin characters in filenames in loadtest apps. #949

Merged
merged 8 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 123 additions & 6 deletions tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,113 @@
#define STB_FONT_HEIGHT STB_FONT_consolas_24_latin1_BITMAP_HEIGHT
#define STB_FIRST_CHAR STB_FONT_consolas_24_latin1_FIRST_CHAR
#define STB_NUM_CHARS STB_FONT_consolas_24_latin1_NUM_CHARS
#define STB_MISSING_GLPYH 0x80 // Actually a control character.

// Max. number of chars the text overlay buffer can hold
#define MAX_CHAR_COUNT 1024


/**
* @internal
* @brief Given the lead byte of a UTF-8 sequence returns the expected length of the codepoint
* @param[in] leadByte The lead byte of a UTF-8 sequence
* @return The expected length of the codepoint */
[[nodiscard]] constexpr inline int sequenceLength(uint8_t leadByte) noexcept {
if ((leadByte & 0b1000'0000u) == 0b0000'0000u)
return 1;
if ((leadByte & 0b1110'0000u) == 0b1100'0000u)
return 2;
if ((leadByte & 0b1111'0000u) == 0b1110'0000u)
return 3;
if ((leadByte & 0b1111'1000u) == 0b1111'0000u)
return 4;

return 0;
}

/**
* @internal
* @brief Checks if the codepoint was coded as a longer than required sequence
* @param[in] codepoint The unicode codepoint
* @param[in] length The UTF-8 sequence length
* @return True if the sequence length was inappropriate for the given codepoint */
[[nodiscard]] constexpr inline bool isOverlongSequence(uint32_t codepoint, int length) noexcept {
if (codepoint < 0x80)
return length != 1;
else if (codepoint < 0x800)
return length != 2;
else if (codepoint < 0x10000)
return length != 3;
else
return false;
}

/**
* @internal
* @brief Checks if the codepoint is valid
* @param[in] codepoint The unicode codepoint
* @return True if the codepoint is a valid unicode codepoint */
[[nodiscard]] constexpr inline bool isCodepointValid(uint32_t codepoint) noexcept {
return codepoint <= 0x0010FFFFu
&& !(0xD800u <= codepoint && codepoint <= 0xDBFFu);
}

/**
* @internal
* @brief Safely checks and advances a UTF-8 sequence iterator to the start of the next unicode codepoint
* @param[in] it iterator to be advanced
* @param[in] end iterator pointing to the end of the range
* @return True if the advance operation was successful and the advanced codepoint was a valid UTF-8 sequence */
template <typename Iterator>
[[nodiscard]] constexpr bool advanceUTF8(Iterator& it, Iterator end,
uint32_t& codepoint) noexcept {
if (it == end)
return false;

const auto length = sequenceLength(*it);
if (length == 0)
return false;

if (std::distance(it, end) < length)
return false;

for (int i = 1; i < length; ++i) {
const auto trailByte = *(it + i);
if ((static_cast<uint8_t>(trailByte) & 0b1100'0000u) != 0b1000'0000u)
return false;
}

codepoint = 0;
switch (length) {
case 1:
codepoint |= *it++;
break;
case 2:
codepoint |= (*it++ & 0b0001'1111u) << 6u;
codepoint |= (*it++ & 0b0011'1111u);
break;
case 3:
codepoint |= (*it++ & 0b0000'1111u) << 12u;
codepoint |= (*it++ & 0b0011'1111u) << 6u;
codepoint |= (*it++ & 0b0011'1111u);
break;
case 4:
codepoint |= (*it++ & 0b0000'0111u) << 18u;
codepoint |= (*it++ & 0b0011'1111u) << 12u;
codepoint |= (*it++ & 0b0011'1111u) << 6u;
codepoint |= (*it++ & 0b0011'1111u);
break;
}

if (!isCodepointValid(codepoint))
return false;

if (isOverlongSequence(codepoint, length))
return false;

return true;
}

// Mostly self-contained text overlay class
// todo : comment
class VulkanTextOverlay
Expand Down Expand Up @@ -91,6 +194,7 @@ class VulkanTextOverlay
// todo : throw error
return 0;
}

public:

enum TextAlign { alignLeft, alignCenter, alignRight };
Expand Down Expand Up @@ -573,12 +677,20 @@ class VulkanTextOverlay

// Calculate text width
float textWidth = 0;
for (auto letter : text)
uint32_t codepoint;
auto it = text.begin();
while (it != text.end())
{
stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR];
if (!advanceUTF8(it, text.end(), codepoint))
break;
// TODO: Get a UTF8 font. Consider changing to Dear ImGUI
// https://github.com/ocornut/imgui.
// Placeholder to avoid crashing.
if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR)
codepoint = STB_MISSING_GLPYH;
stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR];
textWidth += charData->advance * charW;
}

switch (align)
{
case alignRight:
Expand All @@ -592,9 +704,15 @@ class VulkanTextOverlay
}

// Generate a uv mapped quad per char in the new text
for (auto letter : text)
it = text.begin();
while (it != text.end())
{
stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR];
if (!advanceUTF8(it, text.end(), codepoint))
break;
if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR)
codepoint = STB_MISSING_GLPYH;

stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR];

mappedLocal->x = (x + (float)charData->x0 * charW);
mappedLocal->y = (y + (float)charData->y0 * charH);
Expand Down Expand Up @@ -628,7 +746,6 @@ class VulkanTextOverlay
break; // Truncate the text.
}
}

// Unmap buffer and update command buffers
void endTextUpdate()
{
Expand Down
6 changes: 6 additions & 0 deletions tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
* OpenGL ES 3.x
*/

#include <string>
#include <regex>

#include "GLLoadTests.h"
#include "EncodeTexture.h"
#include "DrawTexture.h"
Expand Down Expand Up @@ -71,6 +74,9 @@ GLLoadTests::showFile(std::string& filename)
createViewer = DrawTexture::create;
}
ktxTexture_Destroy(kTexture);

// Escape any spaces in filename.
filename = std::regex_replace( filename, std::regex(" "), "\\ " );
std::string args = "--external " + filename;
pViewer = createViewer(w_width, w_height, args.c_str(), sBasePath);
return pViewer;
Expand Down
6 changes: 6 additions & 0 deletions tests/loadtests/vkloadtests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ add_executable( vkloadtests

set_code_sign(vkloadtests)

# If VulkanAppSDL is ever made into its own target change the target here.
target_compile_features(vkloadtests
PRIVATE
cxx_std_14
)

target_include_directories(vkloadtests
PRIVATE
${SDL2_INCLUDE_DIRS}
Expand Down
5 changes: 5 additions & 0 deletions tests/loadtests/vkloadtests/VulkanLoadTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#define _USE_MATH_DEFINES
#include <math.h>
#include <string>
#include <regex>

#include "VulkanLoadTests.h"
#include "Texture.h"
Expand Down Expand Up @@ -355,6 +357,9 @@ VulkanLoadTests::showFile(std::string& filename)
createViewer = Texture::create;
}
ktxTexture_Destroy(kTexture);

// Escape any spaces in filename.
filename = std::regex_replace( filename, std::regex(" "), "\\ " );
std::string args = "--external " + filename;
pViewer = createViewer(vkctx, w_width, w_height, args.c_str(), sBasePath);
return pViewer;
Expand Down
50 changes: 43 additions & 7 deletions utils/argparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
*/

#include <assert.h>
#include <iostream>
#include "argparser.h"
#include <regex>

using namespace std;

Expand All @@ -30,18 +32,52 @@ using namespace std;
*/
argvector::argvector(const string& sArgs)
{
const string sep(" \t\n\r\v\f");
const string sep(" \t");
// std::regex does not support negative lookbehind assertions. Darn!
// RE to extract argument between string start and separator.
// - Match 0 is whole matching string including separator.
// - Match 1 is the argument.
// - Match 2 is an empty string or a backslash.
// Use negative character class as it is easiest way to handle
// utf-8 file names with non-Latin characters. $ must not be last
// in the character class or it will be taken literally not as
// end of string.
// Use raw literal to avoid excess backslashes.
regex re(R"--(^([^$\\ \t]+)(\\?)(?:[ \t]+|$))--");
size_t pos;

pos = sArgs.find_first_not_of(sep);
assert(pos != string::npos);

do {
size_t epos = sArgs.find_first_of(sep, pos);
size_t len = epos == string::npos ? epos : epos - pos;
push_back(sArgs.substr(pos, len));
pos = sArgs.find_first_not_of(sep, epos);
} while (pos != string::npos);
auto first = sArgs.begin() + pos;
auto last = sArgs.end();
bool continuation = false;
for (smatch sm; regex_search(first, last, sm, re);) {
bool needContinuation = false;
#if DEBUG_REGEX
std::cout << "prefix: " << sm.prefix() << '\n';
std::cout << "suffix: " << sm.suffix() << '\n';
std::cout << "match size: " << sm.size() << '\n';
for(uint32_t i = 0; i < sm.size(); i++) {
std::cout << "match " << i << ": " << "\"" << sm.str(i) << "\"" << '\n';
}
#endif
string arg;
// All this because std::regex does not support negative
// lookbehind assertions.
arg = sm.str(1);
if (*sm[2].first == '\\') {
arg += " ";
needContinuation = true;
}
if (continuation) {
this->back() += arg;
} else {
push_back(arg);
}
continuation = needContinuation;
first = sm.suffix().first;
}
}

/*
Expand Down
Loading