From b590cac88efdc1536183408a0ae5c838d891d9ce Mon Sep 17 00:00:00 2001 From: recursive_tree Date: Sun, 13 Aug 2023 21:46:10 +0200 Subject: [PATCH] fix ccp richtext modifications --- src/Helpers/helpers.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Helpers/helpers.php b/src/Helpers/helpers.php index 3d1fa066..e185f908 100644 --- a/src/Helpers/helpers.php +++ b/src/Helpers/helpers.php @@ -128,6 +128,16 @@ function clean_ccp_html($html, $acceptable_tags = '
') if (empty($html)) return ''; + // CCP's rich text might be encapsulated in u''. Remove the u' + $html = preg_match("/u'(.*)'/", $html, $match) ? $match[1] : $html; + + + // handle escaped UTF-8 data + // taken from https://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha + $html = preg_replace_callback('/\\\\u([0-9a-fA-F]{4})/', function ($match) { + return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UTF-16BE'); + }, $html); + // Handle Unicode cases. $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');