diff --git a/lib/Util/pQuery/pQuery.php b/lib/Util/pQuery/pQuery.php index b8108d9087..09c98c030f 100644 --- a/lib/Util/pQuery/pQuery.php +++ b/lib/Util/pQuery/pQuery.php @@ -18,10 +18,10 @@ class DomNode extends \pQuery\DomNode { var $childClass = 'MailPoet\Util\pQuery\DomNode'; function getInnerText() { - return html_entity_decode($this->toString(true, true, 1), ENT_QUOTES, 'UTF-8'); + return html_entity_decode($this->toString(true, true, 1), ENT_NOQUOTES, 'UTF-8'); } function getOuterText() { - return html_entity_decode($this->toString(), ENT_QUOTES, 'UTF-8'); + return html_entity_decode($this->toString(), ENT_NOQUOTES, 'UTF-8'); } -} \ No newline at end of file +} diff --git a/tests/unit/Util/PQueryTest.php b/tests/unit/Util/PQueryTest.php new file mode 100644 index 0000000000..64f5ee26fc --- /dev/null +++ b/tests/unit/Util/PQueryTest.php @@ -0,0 +1,65 @@ +'; + $domnode = pQuery::parseStr($html); + $inner_text = $domnode->getInnerText(); + expect($inner_text)->equals(""); + } + + function testQuotesAreCorrectlyEscaped() { + $html_characters = ['"', '"', ''']; + + foreach($html_characters as $char) { + $this->parseTest($char); + } + } + + function testEncodedHtmlNamesAreDecoded() { + $html_names = ['&', '<', '>', ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', '¨', '©', 'ª', '«', '¬', '­', '®', '¯', '°', '±', '²', '³', '´', 'µ', '¶', '·', '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; + + foreach($html_names as $char) { + $this->parseTest($char, $equals = false); + } + } + + function testEncodedHtmlNumbersAreDecoded() { + // Tested numbers are from https://www.ascii.cl/htmlcodes.htm + $html_numbers = array_merge(range(40, 126), range(160, 255), [32, 33, 35, 36, 37, 38, 338, 339, 352, 353, 376, 402, 8211, 8212, 8216, 8217, 8218, 8220, 8221, 8222, 8224, 8225, 8226, 8230, 8240, 8364, 8482]); + + foreach($html_numbers as $char) { + $this->parseTest('&#'.$char.';', $equals = false); + } + } + + function testItCanParseRealHtmlSnippets() { + $snippets = [ + '
', + '', + 'twitter', + 'read full post', + '
KIZOMBA SUR, el último evento del verano que no te debes perder 🌞 Aprovecha ahora esta oferta limitada y llama ➡️➡️+34 660 144 954
', + 'Kizomba Sur', + ' Darse de Baja | Gestionar suscripción
Loves Dance to Loves Kizomba
Avda. Marconi 2 11009 Cádiz ', + ' Meer lezen ', + '

Les petits Bollandistes : vies des Saints de l\'Acien et du Nouveau Testament, des Martyrs, des Pères, des Auteurs sacrés et ecclésiastiques (17 Volumes). Supplément aux vies des saints et spécialement aux Petits bollandistes : d\'après les documents hagiographiques les plus authentiques et les plus récents (3 volumes). (Complete Set, 20 volumes)

', + ]; + + foreach($snippets as $snippet) { + $this->parseTest($snippet); + } + } + + function parseTest($html, $equals = true) { + $parsed_html = pQuery::parseStr($html)->getInnerText(); + if($equals) { + expect($parsed_html)->equals($html); + } else { + expect($parsed_html)->notEquals($html); + } + } +}