diff --git a/CMakeLists.txt b/CMakeLists.txt index de8dec18..71be2335 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(liblcf VERSION 0.8 LANGUAGES CXX) # Compilation options option(BUILD_SHARED_LIBS "Build shared library, disable for building the static library (default: ON)" ON) -option(LIBLCF_WITH_ICU "ICU encoding detection (when OFF fallback to iconv, not recommended, default: ON)" ON) +option(LIBLCF_WITH_ICU "ICU encoding handling (disable only for testing purposes, default: ON)" ON) option(LIBLCF_WITH_XML "XML reading support (expat, default: ON)" ON) option(LIBLCF_UPDATE_MIMEDB "Whether to run update-mime-database after install (default: ON)" ON) option(LIBLCF_ENABLE_TOOLS "Whether to build the tools (default: ON)" ON) @@ -343,16 +343,13 @@ set_property(TARGET lcf PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) # Name of the exported library set_property(TARGET lcf PROPERTY EXPORT_NAME liblcf) -# icu or fallback to iconv +# icu set(LCF_SUPPORT_ICU 0) if(LIBLCF_WITH_ICU) find_package(ICU COMPONENTS i18n uc data REQUIRED) target_link_libraries(lcf ICU::i18n ICU::uc ICU::data) list(APPEND LIBLCF_DEPS "icu-i18n") set(LCF_SUPPORT_ICU 1) -else() - find_package(Iconv REQUIRED) - target_link_libraries(lcf Iconv::Iconv) endif() # expat diff --git a/builds/cmake/Modules/FindIconv.cmake b/builds/cmake/Modules/FindIconv.cmake deleted file mode 100644 index 5185601a..00000000 --- a/builds/cmake/Modules/FindIconv.cmake +++ /dev/null @@ -1,133 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. - -#[=======================================================================[.rst: -FindIconv ---------- - -This module finds the ``iconv()`` POSIX.1 functions on the system. -These functions might be provided in the regular C library or externally -in the form of an additional library. - -The following variables are provided to indicate iconv support: - -.. variable:: Iconv_FOUND - - Variable indicating if the iconv support was found. - -.. variable:: Iconv_INCLUDE_DIRS - - The directories containing the iconv headers. - -.. variable:: Iconv_LIBRARIES - - The iconv libraries to be linked. - -.. variable:: Iconv_IS_BUILT_IN - - A variable indicating whether iconv support is stemming from the - C library or not. Even if the C library provides `iconv()`, the presence of - an external `libiconv` implementation might lead to this being false. - -Additionally, the following :prop_tgt:`IMPORTED` target is being provided: - -.. variable:: Iconv::Iconv - - Imported target for using iconv. - -The following cache variables may also be set: - -.. variable:: Iconv_INCLUDE_DIR - - The directory containing the iconv headers. - -.. variable:: Iconv_LIBRARY - - The iconv library (if not implicitly given in the C library). - -.. note:: - On POSIX platforms, iconv might be part of the C library and the cache - variables ``Iconv_INCLUDE_DIR`` and ``Iconv_LIBRARY`` might be empty. - -#]=======================================================================] - -include(CMakePushCheckState) -if(CMAKE_C_COMPILER_LOADED) - include(CheckCSourceCompiles) -elseif(CMAKE_CXX_COMPILER_LOADED) - include(CheckCXXSourceCompiles) -else() - # If neither C nor CXX are loaded, implicit iconv makes no sense. - set(Iconv_IS_BUILT_IN FALSE) -endif() - -# iconv can only be provided in libc on a POSIX system. -# If any cache variable is already set, we'll skip this test. -if(NOT DEFINED Iconv_IS_BUILT_IN) - if(UNIX AND NOT DEFINED Iconv_INCLUDE_DIR AND NOT DEFINED Iconv_LIBRARY) - cmake_push_check_state(RESET) - # We always suppress the message here: Otherwise on supported systems - # not having iconv in their C library (e.g. those using libiconv) - # would always display a confusing "Looking for iconv - not found" message - set(CMAKE_FIND_QUIETLY TRUE) - # The following code will not work, but it's sufficient to see if it compiles. - # Note: libiconv will define the iconv functions as macros, so CheckSymbolExists - # will not yield correct results. - set(Iconv_IMPLICIT_TEST_CODE - " - #include - #include - int main() { - char *a, *b; - size_t i, j; - iconv_t ic; - ic = iconv_open(\"to\", \"from\"); - iconv(ic, &a, &i, &b, &j); - iconv_close(ic); - } - " - ) - if(CMAKE_C_COMPILER_LOADED) - check_c_source_compiles("${Iconv_IMPLICIT_TEST_CODE}" Iconv_IS_BUILT_IN) - else() - check_cxx_source_compiles("${Iconv_IMPLICIT_TEST_CODE}" Iconv_IS_BUILT_IN) - endif() - cmake_pop_check_state() - else() - set(Iconv_IS_BUILT_IN FALSE) - endif() -endif() - -if(NOT Iconv_IS_BUILT_IN) - find_path(Iconv_INCLUDE_DIR - NAMES "iconv.h" - DOC "iconv include directory") - set(Iconv_LIBRARY_NAMES "iconv" "libiconv") -else() - set(Iconv_INCLUDE_DIR "" CACHE FILEPATH "iconv include directory") - set(Iconv_LIBRARY_NAMES "c") -endif() - -find_library(Iconv_LIBRARY - NAMES ${Iconv_LIBRARY_NAMES} - DOC "iconv library (potentially the C library)") - -mark_as_advanced(Iconv_INCLUDE_DIR) -mark_as_advanced(Iconv_LIBRARY) - -include(FindPackageHandleStandardArgs) -if(NOT Iconv_IS_BUILT_IN) - find_package_handle_standard_args(Iconv REQUIRED_VARS Iconv_LIBRARY Iconv_INCLUDE_DIR) -else() - find_package_handle_standard_args(Iconv REQUIRED_VARS Iconv_LIBRARY) -endif() - -if(Iconv_FOUND) - set(Iconv_INCLUDE_DIRS "${Iconv_INCLUDE_DIR}") - set(Iconv_LIBRARIES "${Iconv_LIBRARY}") - if(NOT TARGET Iconv::Iconv) - add_library(Iconv::Iconv INTERFACE IMPORTED) - endif() - set_property(TARGET Iconv::Iconv PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${Iconv_INCLUDE_DIRS}") - set_property(TARGET Iconv::Iconv PROPERTY INTERFACE_LINK_LIBRARIES "${Iconv_LIBRARIES}") -endif() diff --git a/builds/cmake/liblcf-config.cmake.in b/builds/cmake/liblcf-config.cmake.in index 84a70177..c892c0a5 100644 --- a/builds/cmake/liblcf-config.cmake.in +++ b/builds/cmake/liblcf-config.cmake.in @@ -4,8 +4,6 @@ include(CMakeFindDependencyMacro) if(@LCF_SUPPORT_ICU@) find_dependency(ICU COMPONENTS i18n uc data REQUIRED) -else() - find_dependency(Iconv REQUIRED) endif() if(@LCF_SUPPORT_XML@) diff --git a/configure.ac b/configure.ac index c376464a..f4d8a78e 100644 --- a/configure.ac +++ b/configure.ac @@ -22,7 +22,7 @@ AM_CONDITIONAL(CROSS_COMPILING,[test "x$cross_compiling" = "xyes"]) # Checks for libraries. AC_SUBST([LCF_SUPPORT_ICU],[0]) -AC_ARG_ENABLE([icu],[AS_HELP_STRING([--disable-icu],[Disable ICU encoding detection (fallback to iconv) [default=no]])]) +AC_ARG_ENABLE([icu],[AS_HELP_STRING([--disable-icu],[Disable ICU encoding detection (only for testing purposes) [default=no]])]) AS_IF([test "x$enable_icu" != "xno"],[ AX_PKG_CHECK_MODULES([ICU],[],[icu-i18n],[LCF_SUPPORT_ICU=1]) ]) diff --git a/src/encoder.cpp b/src/encoder.cpp index 859c11bc..8cfae2a8 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -13,7 +13,6 @@ #include "lcf/scope_guard.h" #include #include -#include #if LCF_SUPPORT_ICU # include @@ -27,9 +26,6 @@ #ifdef _WIN32 # include #else -# if !LCF_SUPPORT_ICU -# include -# endif # include #endif @@ -77,12 +73,12 @@ void Encoder::Decode(std::string& str) { Convert(str, _conv_storage, _conv_runtime); } +#if LCF_SUPPORT_ICU void Encoder::Init() { if (_encoding.empty()) { return; } -#if LCF_SUPPORT_ICU auto code_page = atoi(_encoding.c_str()); const auto& storage_encoding = code_page > 0 ? ReaderUtil::CodepageToEncoding(code_page) @@ -110,27 +106,22 @@ void Encoder::Init() { _conv_runtime = conv_runtime; _conv_storage = conv_storage; -#else - _conv_runtime = const_cast("UTF-8"); - _conv_storage = const_cast(_encoding.c_str()); -#endif } void Encoder::Reset() { -#if LCF_SUPPORT_ICU - auto* conv = reinterpret_cast(_conv_runtime); - if (conv) ucnv_close(conv); - conv = reinterpret_cast(_conv_storage); - if (conv) ucnv_close(conv); -#endif -} + if (_conv_runtime) { + ucnv_close(_conv_runtime); + _conv_runtime = nullptr; + } + if (_conv_storage) { + ucnv_close(_conv_storage); + _conv_storage = nullptr; + } +} -void Encoder::Convert(std::string& str, void* conv_dst_void, void* conv_src_void) { -#if LCF_SUPPORT_ICU +void Encoder::Convert(std::string& str, UConverter* conv_dst, UConverter* conv_src) { const auto& src = str; - auto* conv_dst = reinterpret_cast(conv_dst_void); - auto* conv_src = reinterpret_cast(conv_src_void); auto status = U_ZERO_ERROR; _buffer.resize(src.size() * 4); @@ -151,36 +142,8 @@ void Encoder::Convert(std::string& str, void* conv_dst_void, void* conv_src_void } str.assign(_buffer.data(), dst_p); - return; -#else - auto* conv_dst = reinterpret_cast(conv_dst_void); - auto* conv_src = reinterpret_cast(conv_src_void); - iconv_t cd = iconv_open(conv_dst, conv_src); - if (cd == (iconv_t)-1) - return; - char *src = &str.front(); - size_t src_left = str.size(); - size_t dst_size = str.size() * 5 + 10; - _buffer.resize(dst_size); - char *dst = _buffer.data(); - size_t dst_left = dst_size; -# ifdef ICONV_CONST - char ICONV_CONST *p = src; -# else - char *p = src; -# endif - char *q = dst; - size_t status = iconv(cd, &p, &src_left, &q, &dst_left); - iconv_close(cd); - if (status == (size_t) -1 || src_left > 0) { - str.clear(); - return; - } - *q++ = '\0'; - str.assign(dst, dst_size - dst_left); - return; -#endif } +#endif } //namespace lcf diff --git a/src/lcf/encoder.h b/src/lcf/encoder.h index 14bcb779..f604e23f 100644 --- a/src/lcf/encoder.h +++ b/src/lcf/encoder.h @@ -12,6 +12,10 @@ #include #include +#if LCF_SUPPORT_ICU +class UConverter; +#endif + namespace lcf { class Encoder { @@ -23,20 +27,39 @@ class Encoder { ~Encoder(); + /** + * Converts from the specified encoding to UTF-8 + * + * @param str String to encode to UTF-8 + */ void Encode(std::string& str); + + /** + * Converts from UTF-8 to the specified encoding + * + * @param str String to decode from UTF-8 + */ void Decode(std::string& str); bool IsOk() const; const std::string& GetEncoding() const; private: +#if LCF_SUPPORT_ICU void Init(); void Reset(); - void Convert(std::string& str, void* conv_dst, void* conv_src); - private: + void Convert(std::string& str, UConverter* conv_dst, UConverter* conv_src); + + UConverter* _conv_storage = nullptr; + UConverter* _conv_runtime = nullptr; + std::vector _buffer; +#else + void Init() {} + void Reset() {} + void Convert(std::string&, void*, void*) {} void* _conv_storage = nullptr; void* _conv_runtime = nullptr; - std::vector _buffer; +#endif std::string _encoding; }; diff --git a/src/lcf/reader_util.h b/src/lcf/reader_util.h index 7de34b47..ed3413fb 100644 --- a/src/lcf/reader_util.h +++ b/src/lcf/reader_util.h @@ -104,27 +104,14 @@ namespace ReaderUtil { */ std::string Recode(StringView str_to_encode, StringView source_encoding); - /** - * Converts a string between encodings. - * - * @param str_to_encode the string to convert. - * @param src_enc the source encoding. - * @param dst_enc the destination encoding. - * @return the recoded string. - */ - std::string Recode(StringView str_to_encode, - StringView src_enc, - StringView dst_enc); - /** * Converts a UTF-8 string to lowercase and then decomposes it. - * + * * @param str the string to normalize. * @return the normalized string. */ std::string Normalize(StringView str); - /** * Helper function that returns an element from a vector using a 1-based * index as usually used by LCF data structures. diff --git a/src/reader_util.cpp b/src/reader_util.cpp index 9c8e6934..7b9887c6 100644 --- a/src/reader_util.cpp +++ b/src/reader_util.cpp @@ -16,18 +16,11 @@ # include # include # include -#else -# ifdef _MSC_VER -# error MSVC builds require ICU -# endif #endif #ifdef _WIN32 # include #else -# if !LCF_SUPPORT_ICU -# include -# endif # include #endif @@ -37,6 +30,7 @@ #include #include +#include "lcf/encoder.h" #include "lcf/inireader.h" #include "lcf/ldb/reader.h" #include "lcf/reader_util.h" @@ -48,39 +42,23 @@ namespace ReaderUtil { std::string ReaderUtil::CodepageToEncoding(int codepage) { if (codepage == 0) - return std::string(); + return {}; if (codepage == 932) { -#if LCF_SUPPORT_ICU return "ibm-943_P15A-2003"; -#else - return "SHIFT_JIS"; -#endif } if (codepage == 949) { -#if LCF_SUPPORT_ICU return "windows-949-2000"; -#else - return "cp949"; -#endif } - std::ostringstream out; -#if LCF_SUPPORT_ICU - out << "windows-" << codepage; -#else - out << "CP" << codepage; -#endif - // Looks like a valid codepage - std::string outs = out.str(); - return outs; + return "windows-" + std::to_string(codepage); } std::string ReaderUtil::DetectEncoding(lcf::rpg::Database& db) { std::vector encodings = DetectEncodings(db); if (encodings.empty()) { - return ""; + return {}; } return encodings.front(); @@ -137,7 +115,7 @@ std::vector ReaderUtil::DetectEncodings(lcf::rpg::Database& db) { return ReaderUtil::DetectEncodings(text.str()); #else - return std::vector(); + return {"windows-1252"}; #endif } @@ -145,14 +123,14 @@ std::string ReaderUtil::DetectEncoding(StringView string) { std::vector encodings = DetectEncodings(string); if (encodings.empty()) { - return ""; + return {}; } return encodings.front(); } std::vector ReaderUtil::DetectEncodings(StringView string) { -std::vector encodings; + std::vector encodings; #if LCF_SUPPORT_ICU if (!string.empty()) { UErrorCode status = U_ZERO_ERROR; @@ -195,6 +173,8 @@ std::vector encodings; } ucsdet_close(detector); } +#else + encodings.push_back("windows-1252"); #endif return encodings; @@ -208,7 +188,7 @@ std::string ReaderUtil::GetEncoding(StringView ini_file) { return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str())); } } - return std::string(); + return {}; } std::string ReaderUtil::GetEncoding(std::istream& filestream) { @@ -219,7 +199,7 @@ std::string ReaderUtil::GetEncoding(std::istream& filestream) { return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str())); } } - return std::string(); + return {}; } std::string ReaderUtil::GetLocaleEncoding() { @@ -280,93 +260,10 @@ std::string ReaderUtil::GetLocaleEncoding() { } std::string ReaderUtil::Recode(StringView str_to_encode, StringView source_encoding) { - return ReaderUtil::Recode(str_to_encode, source_encoding, "UTF-8"); -} - -std::string ReaderUtil::Recode(StringView str_to_encode, - StringView src_enc, - StringView dst_enc) { - - if (src_enc.empty() || dst_enc.empty() || str_to_encode.empty()) { - return ToString(str_to_encode); - } - - auto src_cp = SvAtoi(src_enc); - const auto& src_enc_str = src_cp > 0 - ? ReaderUtil::CodepageToEncoding(src_cp) - : ToString(src_enc); - - auto dst_cp = SvAtoi(dst_enc); - const auto& dst_enc_str = dst_cp > 0 - ? ReaderUtil::CodepageToEncoding(dst_cp) - : ToString(dst_enc); - -#if LCF_SUPPORT_ICU - auto status = U_ZERO_ERROR; - auto conv_from = ucnv_open(src_enc_str.c_str(), &status); - - if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) { - fprintf(stderr, "liblcf: ucnv_open() error for source encoding \"%s\": %s\n", src_enc_str.c_str(), u_errorName(status)); - return std::string(); - } - status = U_ZERO_ERROR; - auto conv_from_sg = makeScopeGuard([&]() { ucnv_close(conv_from); }); - - auto conv_to = ucnv_open(dst_enc_str.c_str(), &status); - - if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) { - fprintf(stderr, "liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", dst_enc_str.c_str(), u_errorName(status)); - return std::string(); - } - auto conv_to_sg = makeScopeGuard([&]() { ucnv_close(conv_to); }); - status = U_ZERO_ERROR; - - std::string result(str_to_encode.size() * 4, '\0'); - auto* src = str_to_encode.data(); - auto* dst = &result.front(); - - ucnv_convertEx(conv_to, conv_from, - &dst, dst + result.size(), - &src, src + str_to_encode.size(), - nullptr, nullptr, nullptr, nullptr, - true, true, - &status); - - if (U_FAILURE(status)) { - fprintf(stderr, "liblcf: ucnv_convertEx() error when encoding \"%.*s\": %s\n", (int)str_to_encode.length(), str_to_encode.data(), u_errorName(status)); - return std::string(); - } - - result.resize(dst - result.c_str()); - result.shrink_to_fit(); - - return result; -#else - iconv_t cd = iconv_open(dst_enc_str.c_str(), src_enc_str.c_str()); - if (cd == (iconv_t)-1) - return ToString(str_to_encode); - char *src = const_cast(str_to_encode.data()); - size_t src_left = str_to_encode.size(); - size_t dst_size = str_to_encode.size() * 5 + 10; - char *dst = new char[dst_size]; - size_t dst_left = dst_size; -# ifdef ICONV_CONST - char ICONV_CONST *p = src; -# else - char *p = src; -# endif - char *q = dst; - size_t status = iconv(cd, &p, &src_left, &q, &dst_left); - iconv_close(cd); - if (status == (size_t) -1 || src_left > 0) { - delete[] dst; - return std::string(); - } - *q++ = '\0'; - std::string result(dst); - delete[] dst; - return result; -#endif + lcf::Encoder enc(ToString(source_encoding)); + std::string out = ToString(str_to_encode); + enc.Encode(out); + return out; } std::string ReaderUtil::Normalize(StringView str) {