From 2c82a88902cf8c1f2493345f3af47f2db11d893d Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 19 Oct 2024 06:05:52 +0200 Subject: [PATCH 1/4] epubfmt: minor simplification --- crengine/src/epubfmt.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/crengine/src/epubfmt.cpp b/crengine/src/epubfmt.cpp index 4018a0133..eaf0d35a3 100644 --- a/crengine/src/epubfmt.cpp +++ b/crengine/src/epubfmt.cpp @@ -1753,18 +1753,13 @@ bool ImportEpubDocument( LVStreamRef stream, ldomDocument * m_doc, LVDocViewCall LVStreamRef stream = m_arc->OpenStream(cover_xhtml_path.c_str(), LVOM_READ); lString32 cover_image_href; if ( ExtractCoverFilenameFromCoverPageFragment(stream, cover_image_href, node_scheme, attr_scheme, ns_scheme) ) { + cover_image_href = DecodeHTMLUrlString(cover_image_href); lString32 codeBase = LVExtractPath( cover_xhtml_path ); if ( codeBase.length()>0 && codeBase.lastChar()!='/' ) codeBase.append(1, U'/'); lString32 cover_image_path = LVCombinePaths(codeBase, cover_image_href); CRLog::info("EPUB cover image file: %s", LCSTR(cover_image_path)); LVStreamRef stream = m_arc->OpenStream(cover_image_path.c_str(), LVOM_READ); - if ( stream.isNull() ) { - // Try again in case cover_image_path is percent-encoded - cover_image_path = LVCombinePaths(codeBase, DecodeHTMLUrlString(cover_image_href)); - CRLog::info("EPUB cover image file pct-decoded: %s", LCSTR(cover_image_path)); - stream = m_arc->OpenStream(cover_image_path.c_str(), LVOM_READ); - } if ( !stream.isNull() ) { LVImageSourceRef img = LVCreateStreamImageSource(stream); if ( !img.isNull() ) { From 0d0102e2f107af62a27cf6d7229fcb81ad9cf348 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 19 Oct 2024 06:05:53 +0200 Subject: [PATCH 2/4] lvstring: fix `DecodeHTMLUrlString` implementation Don't mangle non-encoded non-ASCII characters. --- crengine/src/lvstring.cpp | 91 +++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/crengine/src/lvstring.cpp b/crengine/src/lvstring.cpp index deed4336d..c98bad229 100644 --- a/crengine/src/lvstring.cpp +++ b/crengine/src/lvstring.cpp @@ -5842,64 +5842,53 @@ bool lString32::replaceIntParam(int index, int replaceNumber) return replaceParam( index, lString32::itoa(replaceNumber)); } -static int decodeHex( lChar32 ch ) -{ - if ( ch>='0' && ch<='9' ) - return ch-'0'; - else if ( ch>='a' && ch<='f' ) - return ch-'a'+10; - else if ( ch>='A' && ch<='F' ) - return ch-'A'+10; - return -1; -} - -static lChar8 decodeHTMLChar( const lChar32 * s ) -{ - if (s[0] == '%') { - int d1 = decodeHex( s[1] ); - if (d1 >= 0) { - int d2 = decodeHex( s[2] ); - if (d2 >= 0) { - return (lChar8)(d1*16 + d2); - } - } - } - return 0; -} - /// decodes path like "file%20name%C3%A7" to "file nameç" +/// NOTE: return the original string unchanged on error +/// (malformed escape sequence, bad UTF-8 encoding, …). lString32 DecodeHTMLUrlString( lString32 s ) { - const lChar32 * str = s.c_str(); - for ( int i=0; str[i]; i++ ) { - if ( str[i]=='%' ) { - lChar8 ch = decodeHTMLChar( str + i ); - if ( ch==0 ) { + for (const lChar32 *src = s.c_str(); *src; ++src) { + if (*src != '%') + continue; + lString32 res(s); + lChar32 *dst = res.modify() + (src - s.c_str()); + int continuation = 0; + lChar32 c; + while (*src) { + if (*src != '%') { + if (continuation) + return s; // ERROR: truncated UTF-8 sequence. + *dst++ = *src++; continue; } - // HTML encoded char found - lString8 res; - res.reserve(s.length()); - res.append(UnicodeToUtf8(str, i)); - res.append(1, ch); - i+=3; - - // continue conversion - for ( ; str[i]; i++ ) { - if ( str[i]=='%' ) { - ch = decodeHTMLChar( str + i ); - if ( ch==0 ) { - res.append(1, (lChar8)str[i]); - continue; - } - res.append(1, ch); - i+=2; - } else { - res.append(1, (lChar8)str[i]); - } + int hex = decodeHex(src + 1, 2); + if (hex <= 0) + return s; // ERROR: malformed or invalid escape sequence. + src += 3; + if (continuation) { + if ((hex & 0xc0) != 0x80) + return s; // ERROR: bad UTF-8 continuation byte. + c = (c << 6) | (hex & 0x3f); + if (!--continuation) + *dst++ = c; + } else { + if (!(hex & 0x80)) + *dst++ = hex; + else if ((hex & 0xe0) == 0xc0) { + c = hex & 0x1f; + continuation = 1; + } else if ((hex & 0xf0) == 0xe0) { + c = hex & 0x0f; + continuation = 2; + } else if ((hex & 0xf8) == 0xf0) { + c = hex & 0x07; + continuation = 3; + } else + return s; // ERROR: bad UTF-8 sequence first byte. } - return Utf8ToUnicode(res); } + res.erase(dst - res.c_str(), res.length()); + return res; } return s; } From 4df360bf71190e2604ab7a3fb930536d43006c0b Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 19 Oct 2024 06:05:54 +0200 Subject: [PATCH 3/4] lvimg: minor cleanup (dead code) --- crengine/include/lvimg.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/crengine/include/lvimg.h b/crengine/include/lvimg.h index 2d7a3f17f..00d0eee97 100644 --- a/crengine/include/lvimg.h +++ b/crengine/include/lvimg.h @@ -147,11 +147,4 @@ void LVDrawBatteryIcon( LVDrawBuf * drawbuf, const lvRect & batteryRc, int perce unsigned char * convertSVGtoPNG(const unsigned char *svg_data, int svg_data_size, float zoom_factor, int *png_data_len); -#define IMAGE_SOURCE_FROM_BYTES( imgvar , bufvar ) \ - extern unsigned char bufvar []; \ - extern int bufvar ## _size ; \ - LVImageSourceRef imgvar = LVCreateStreamImageSource( \ - LVCreateMemoryStream( bufvar , bufvar ## _size ) ) - - #endif From f3d74bfed558d1630b5d5b332f37a540cae5efe9 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 19 Oct 2024 06:05:55 +0200 Subject: [PATCH 4/4] lvimg: fix `lunasvgDrawImageHelper` implementation Correctly handle percent encoded URLs. --- crengine/src/lvimg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crengine/src/lvimg.cpp b/crengine/src/lvimg.cpp index b89e79c8b..8ffa10cad 100644 --- a/crengine/src/lvimg.cpp +++ b/crengine/src/lvimg.cpp @@ -2077,7 +2077,7 @@ static bool lunasvgDrawImageHelper(lunasvg::external_context_t * xcontext, const ldomDocument * doc = ((LVNodeImageSource *)xcontext->external_object)->GetSourceDocument(); if ( doc ) { ldomNode * node = ((LVNodeImageSource *)xcontext->external_object)->GetSourceNode(); - img = doc->getObjectImageSource(Utf8ToUnicode(url), node); + img = doc->getObjectImageSource(DecodeHTMLUrlString(Utf8ToUnicode(url)), node); } else { // We may be used by frontends without a ldomDocument to render SVG, and