fix unit tests - Support Microsoft encoding of non breaking line in UTF-8

This commit is contained in:
Eric
2019-11-26 15:44:17 +01:00
parent a382d6ad35
commit 87c794b22e
2 changed files with 3 additions and 63 deletions

View File

@@ -241,6 +241,8 @@ class HTMLDOMSanitizer extends HTMLSanitizer
public function __construct()
{
parent::__construct();
// Building href validation pattern from url and email validation patterns as the patterns are not used the same way in HTML content than in standard attributes value.
// eg. "foo@bar.com" vs "mailto:foo@bar.com?subject=Title&body=Hello%20world"
if (!array_key_exists('href', self::$aAttrsWhiteList))
@@ -273,7 +275,7 @@ class HTMLDOMSanitizer extends HTMLSanitizer
// therefore we have to do the transformation upfront
$sHTML = preg_replace('@<o:p>(\s|&nbsp;)*</o:p>@', '<br>', $sHTML);
// Replace badly encoded non breaking space
$sHTML = preg_replace('~\xc2\xa0~', '&nbsp;', $sHTML);
$sHTML = preg_replace('~\xc2\xa0~', ' ', $sHTML);
@$this->oDoc->loadHTML('<?xml encoding="UTF-8"?>'.$sHTML); // For loading HTML chunks where the character set is not specified