From e15d4bfab66c97db42d68121a28294dfb78db618 Mon Sep 17 00:00:00 2001 From: Pierre Goiffon Date: Tue, 23 Nov 2021 16:58:47 +0100 Subject: [PATCH 1/4] =?UTF-8?q?N=C2=B04360=20Security=20hardening?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/attributedef.class.inc.php | 7 + core/config.class.inc.php | 8 + core/htmlsanitizer.class.inc.php | 534 +++++++++++------- .../sanitizer/AbstractDOMSanitizerTest.php | 75 +++ test/core/sanitizer/HTMLDOMSanitizerTest.php | 260 +++++++++ test/core/sanitizer/InlineImageMock.php | 39 ++ test/core/sanitizer/SvgDOMSanitizerTest.php | 53 ++ test/core/sanitizer/input/scripts.html | 7 + test/core/sanitizer/input/scripts.svg | 8 + test/core/sanitizer/input/whitelist_test.html | 14 + test/core/sanitizer/output/scripts.html | 3 + test/core/sanitizer/output/scripts.svg | 4 + 12 files changed, 819 insertions(+), 193 deletions(-) create mode 100644 test/core/sanitizer/AbstractDOMSanitizerTest.php create mode 100644 test/core/sanitizer/HTMLDOMSanitizerTest.php create mode 100644 test/core/sanitizer/InlineImageMock.php create mode 100644 test/core/sanitizer/SvgDOMSanitizerTest.php create mode 100644 test/core/sanitizer/input/scripts.html create mode 100644 test/core/sanitizer/input/scripts.svg create mode 100644 test/core/sanitizer/input/whitelist_test.html create mode 100644 test/core/sanitizer/output/scripts.html create mode 100644 test/core/sanitizer/output/scripts.svg diff --git a/core/attributedef.class.inc.php b/core/attributedef.class.inc.php index 21c31fdf2..89a853e65 100644 --- a/core/attributedef.class.inc.php +++ b/core/attributedef.class.inc.php @@ -7337,6 +7337,13 @@ class AttributeImage extends AttributeBlob { $oDoc = parent::MakeRealValue($proposedValue, $oHostObj); + if (($oDoc instanceof ormDocument) + && (false === $oDoc->IsEmpty()) + && ($oDoc->GetMimeType() === 'image/svg+xml')) { + $sCleanSvg = HTMLSanitizer::Sanitize($oDoc->GetData(), 'svg_sanitizer'); + $oDoc = new ormDocument($sCleanSvg, $oDoc->GetMimeType(), $oDoc->GetFileName()); + } + // The validation of the MIME Type is done by CheckFormat below return $oDoc; } diff --git a/core/config.class.inc.php b/core/config.class.inc.php index 2f7508bae..dd8b7b114 100644 --- a/core/config.class.inc.php +++ b/core/config.class.inc.php @@ -1049,6 +1049,14 @@ class Config 'source_of_value' => '', 'show_in_conf_sample' => false, ), + 'svg_sanitizer' => array( + 'type' => 'string', + 'description' => 'The class to use for SVG sanitization : allow to provide a custom made sanitizer', + 'default' => 'SvgDOMSanitizer', + 'value' => '', + 'source_of_value' => '', + 'show_in_conf_sample' => false, + ), 'inline_image_max_display_width' => array( 'type' => 'integer', 'description' => 'The maximum width (in pixels) when displaying images inside an HTML formatted attribute. Images will be displayed using this this maximum width.', diff --git a/core/htmlsanitizer.class.inc.php b/core/htmlsanitizer.class.inc.php index 34a1747dc..4eada221d 100644 --- a/core/htmlsanitizer.class.inc.php +++ b/core/htmlsanitizer.class.inc.php @@ -34,43 +34,51 @@ abstract class HTMLSanitizer /** * Sanitize an HTML string with the configured sanitizer, falling back to HTMLDOMSanitizer in case of Exception or invalid configuration + * * @param string $sHTML + * @param string $sConfigKey + * * @return string + * @noinspection SelfClassReferencingInspection */ - public static function Sanitize($sHTML) + public static function Sanitize($sHTML, $sConfigKey = 'html_sanitizer') { - $sSanitizerClass = MetaModel::GetConfig()->Get('html_sanitizer'); - if(!class_exists($sSanitizerClass)) - { - IssueLog::Warning('The configured "html_sanitizer" class "'.$sSanitizerClass.'" is not a valid class. Will use HTMLDOMSanitizer as the default sanitizer.'); + $sSanitizerClass = MetaModel::GetConfig()->Get($sConfigKey); + if (!class_exists($sSanitizerClass)) { + IssueLog::Warning('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a valid class. Will use HTMLDOMSanitizer as the default sanitizer.'); $sSanitizerClass = 'HTMLDOMSanitizer'; + } else if (false === is_subclass_of($sSanitizerClass, HTMLSanitizer::class)) { + if ($sConfigKey === 'html_sanitizer') { + IssueLog::Warning('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a subclass of HTMLSanitizer. Will use HTMLDOMSanitizer as the default sanitizer.'); + $sSanitizerClass = 'HTMLDOMSanitizer'; + } + if ($sConfigKey === 'svg_sanitizer') { + IssueLog::Error('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a subclass of '.HTMLSanitizer::class.' ! Won\'t sanitize string.'); + + return $sHTML; + } } - else if(!is_subclass_of($sSanitizerClass, 'HTMLSanitizer')) - { - IssueLog::Warning('The configured "html_sanitizer" class "'.$sSanitizerClass.'" is not a subclass of HTMLSanitizer. Will use HTMLDOMSanitizer as the default sanitizer.'); - $sSanitizerClass = 'HTMLDOMSanitizer'; - } - - try - { + + try { $oSanitizer = new $sSanitizerClass(); $sCleanHTML = $oSanitizer->DoSanitize($sHTML); } - catch(Exception $e) - { - if($sSanitizerClass != 'HTMLDOMSanitizer') - { - IssueLog::Warning('Failed to sanitize an HTML string with "'.$sSanitizerClass.'". The following exception occured: '.$e->getMessage()); - IssueLog::Warning('Will try to sanitize with HTMLDOMSanitizer.'); - // try again with the HTMLDOMSanitizer - $oSanitizer = new HTMLDOMSanitizer(); - $sCleanHTML = $oSanitizer->DoSanitize($sHTML); - } - else - { - IssueLog::Error('Failed to sanitize an HTML string with "HTMLDOMSanitizer". The following exception occured: '.$e->getMessage()); - IssueLog::Error('The HTML will NOT be sanitized.'); - $sCleanHTML = $sHTML; + catch(Exception $e) { + if ($sConfigKey === 'html_sanitizer') { + if ($sSanitizerClass !== HTMLDOMSanitizer::class) { + IssueLog::Warning('Failed to sanitize an HTML string with "'.$sSanitizerClass.'". The following exception occured: '.$e->getMessage()); + IssueLog::Warning('Will try to sanitize with HTMLDOMSanitizer.'); + // try again with the HTMLDOMSanitizer + $oSanitizer = new HTMLDOMSanitizer(); + $sCleanHTML = $oSanitizer->DoSanitize($sHTML); + } else { + IssueLog::Error('Failed to sanitize an HTML string with "HTMLDOMSanitizer". The following exception occured: '.$e->getMessage()); + IssueLog::Error('The HTML will NOT be sanitized.'); + $sCleanHTML = $sHTML; + } + } else { + IssueLog::Error('Failed to sanitize string with "'.$sSanitizerClass.'", will return original value ! Exception: '.$e->getMessage()); + $sCleanHTML = $sHTML; } } return $sCleanHTML; @@ -94,67 +102,167 @@ class HTMLNullSanitizer extends HTMLSanitizer { return $sHTML; } - + } + /** - * A standard-compliant HTMLSanitizer based on the HTMLPurifier library by Edward Z. Yang - * Complete but quite slow - * http://htmlpurifier.org + * Common implementation for sanitizer using DOM parsing */ -/* -class HTMLPurifierSanitizer extends HTMLSanitizer +abstract class DOMSanitizer extends HTMLSanitizer { - protected static $oPurifier = null; - - public function __construct() - { - if (self::$oPurifier == null) - { - $sLibPath = APPROOT.'lib/htmlpurifier/HTMLPurifier.auto.php'; - if (!file_exists($sLibPath)) - { - throw new Exception("Missing library '$sLibPath', cannot use HTMLPurifierSanitizer."); - } - require_once($sLibPath); - - $oPurifierConfig = HTMLPurifier_Config::createDefault(); - $oPurifierConfig->set('Core.Encoding', 'UTF-8'); // defaults to 'UTF-8' - $oPurifierConfig->set('HTML.Doctype', 'XHTML 1.0 Strict'); // defaults to 'XHTML 1.0 Transitional' - $oPurifierConfig->set('URI.AllowedSchemes', array ( - 'http' => true, - 'https' => true, - 'data' => true, // This one is not present by default - )); - $sPurifierCache = APPROOT.'data/HTMLPurifier'; - if (!is_dir($sPurifierCache)) - { - mkdir($sPurifierCache); - } - if (!is_dir($sPurifierCache)) - { - throw new Exception("Could not create the cache directory '$sPurifierCache'"); - } - $oPurifierConfig->set('Cache.SerializerPath', $sPurifierCache); // no trailing slash - self::$oPurifier = new HTMLPurifier($oPurifierConfig); - } - } - + /** @var DOMDocument */ + protected $oDoc; + + abstract public function GetTagsWhiteList(); + + abstract public function GetTagsBlackList(); + + abstract public function GetAttrsWhiteList(); + + abstract public function GetAttrsBlackList(); + + abstract public function GetStylesWhiteList(); + public function DoSanitize($sHTML) { - $sCleanHtml = self::$oPurifier->purify($sHTML); - return $sCleanHtml; + $this->oDoc = new DOMDocument(); + $this->oDoc->preserveWhitespace = true; + + // MS outlook implements empty lines by the mean of

+ // We have to transform that into


(which is how Thunderbird implements empty lines) + // Unfortunately, DOMDocument::loadHTML does not take the tag namespaces into account (once loaded there is no way to know if the tag did have a namespace) + // therefore we have to do the transformation upfront + $sHTML = preg_replace('@(\s| )*@', '
', $sHTML); + + $this->LoadDoc($sHTML); + + $this->CleanNode($this->oDoc); + + $sCleanHtml = $this->PrintDoc(); + + return $sCleanHtml; + } + + abstract public function LoadDoc($sHTML); + + /** + * @return string cleaned source + * @uses \DOMSanitizer::oDoc + */ + abstract public function PrintDoc(); + + protected function CleanNode(DOMNode $oElement) + { + $aAttrToRemove = array(); + // Gather the attributes to remove + if ($oElement->hasAttributes()) { + foreach ($oElement->attributes as $oAttr) { + $sAttr = strtolower($oAttr->name); + if ((false === empty($this->GetAttrsBlackList())) + && (in_array($sAttr, $this->GetAttrsBlackList(), true))) { + $aAttrToRemove[] = $oAttr->name; + } else if ((false === empty($this->GetTagsWhiteList())) + && (false === in_array($sAttr, $this->GetTagsWhiteList()[strtolower($oElement->tagName)]))) { + $aAttrToRemove[] = $oAttr->name; + } else if (!$this->IsValidAttributeContent($sAttr, $oAttr->value)) { + // Invalid content + $aAttrToRemove[] = $oAttr->name; + } else if ($sAttr == 'style') { + // Special processing for style tags + $sCleanStyle = $this->CleanStyle($oAttr->value); + if ($sCleanStyle == '') { + // Invalid content + $aAttrToRemove[] = $oAttr->name; + } else { + $oElement->setAttribute($oAttr->name, $sCleanStyle); + } + } + } + // Now remove them + foreach($aAttrToRemove as $sName) + { + $oElement->removeAttribute($sName); + } + } + + if ($oElement->hasChildNodes()) + { + $aChildElementsToRemove = array(); + // Gather the child noes to remove + foreach($oElement->childNodes as $oNode) { + if ($oNode instanceof DOMElement) { + $sNodeTagName = strtolower($oNode->tagName); + } + if (($oNode instanceof DOMElement) + && (false === empty($this->GetTagsBlackList())) + && (in_array($sNodeTagName, $this->GetTagsBlackList(), true))) { + $aChildElementsToRemove[] = $oNode; + } else if (($oNode instanceof DOMElement) + && (false === empty($this->GetTagsWhiteList())) + && (false === array_key_exists($sNodeTagName, $this->GetTagsWhiteList()))) { + $aChildElementsToRemove[] = $oNode; + } else if ($oNode instanceof DOMComment) { + $aChildElementsToRemove[] = $oNode; + } else { + // Recurse + $this->CleanNode($oNode); + if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img')) { + InlineImage::ProcessImageTag($oNode); + } + } + } + // Now remove them + foreach($aChildElementsToRemove as $oDomElement) + { + $oElement->removeChild($oDomElement); + } + } + } + + protected function IsValidAttributeContent($sAttributeName, $sValue) + { + if ((false === empty($this->GetAttrsBlackList())) + && (in_array($sAttributeName, $this->GetAttrsBlackList(), true))) { + return true; + } + + if (array_key_exists($sAttributeName, $this->GetAttrsWhiteList())) { + return preg_match($this->GetAttrsWhiteList()[$sAttributeName], $sValue); + } + + return true; + } + + protected function CleanStyle($sStyle) + { + if (empty($this->GetStylesWhiteList())) { + return $sStyle; + } + + $aAllowedStyles = array(); + $aItems = explode(';', $sStyle); + { + foreach ($aItems as $sItem) { + $aElements = explode(':', trim($sItem)); + if (in_array(trim(strtolower($aElements[0])), $this->GetStylesWhiteList())) { + $aAllowedStyles[] = trim($sItem); + } + } + } + + return implode(';', $aAllowedStyles); } } -*/ -class HTMLDOMSanitizer extends HTMLSanitizer + +class HTMLDOMSanitizer extends DOMSanitizer { protected $oDoc; /** - * @var array * @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations + * @var array */ protected static $aTagsWhiteList = array( 'html' => array(), @@ -203,6 +311,7 @@ class HTMLDOMSanitizer extends HTMLSanitizer 'q' => array(), 'hr' => array('style'), 'pre' => array(), + 'center' => array(), ); protected static $aAttrsWhiteList = array( @@ -210,8 +319,8 @@ class HTMLDOMSanitizer extends HTMLSanitizer ); /** - * @var array * @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations + * @var array */ protected static $aStylesWhiteList = array( 'background-color', @@ -235,160 +344,199 @@ class HTMLDOMSanitizer extends HTMLSanitizer 'white-space', ); + public function GetTagsWhiteList() + { + return static::$aTagsWhiteList; + } + + public function GetTagsBlackList() + { + return []; + } + + public function GetAttrsWhiteList() + { + return static::$aAttrsWhiteList; + } + + public function GetAttrsBlackList() + { + return []; + } + + public function GetStylesWhiteList() + { + return static::$aStylesWhiteList; + } + public function __construct() { + parent::__construct(); + // Building href validation pattern from url and email validation patterns as the patterns are not used the same way in HTML content than in standard attributes value. // eg. "foo@bar.com" vs "mailto:foo@bar.com?subject=Title&body=Hello%20world" - if (!array_key_exists('href', self::$aAttrsWhiteList)) - { + if (!array_key_exists('href', self::$aAttrsWhiteList)) { // Regular urls $sUrlPattern = utils::GetConfig()->Get('url_validation_pattern'); // Mailto urls - $sMailtoPattern = '(mailto:(' . utils::GetConfig()->Get('email_validation_pattern') . ')(?:\?(?:subject|body)=([a-zA-Z0-9+\$_.-]*)(?:&(?:subject|body)=([a-zA-Z0-9+\$_.-]*))?)?)'; + $sMailtoPattern = '(mailto:('.utils::GetConfig()->Get('email_validation_pattern').')(?:\?(?:subject|body)=([a-zA-Z0-9+\$_.-]*)(?:&(?:subject|body)=([a-zA-Z0-9+\$_.-]*))?)?)'; // Notification placeholders // eg. $this->caller_id$, $this->hyperlink()$, $this->hyperlink(portal)$, $APP_URL$, $MODULES_URL$, ... // Note: Authorize both $xxx$ and %24xxx%24 as the latter one is encoded when used in HTML attributes (eg. a[href]) $sPlaceholderPattern = '(\$|%24)[\w-]*(->[\w]*(\([\w-]*?\))?)?(\$|%24)'; - $sPattern = $sUrlPattern . '|' . $sMailtoPattern . '|' . $sPlaceholderPattern; + $sPattern = $sUrlPattern.'|'.$sMailtoPattern.'|'.$sPlaceholderPattern; $sPattern = '/'.str_replace('/', '\/', $sPattern).'/i'; self::$aAttrsWhiteList['href'] = $sPattern; } } - public function DoSanitize($sHTML) + public function LoadDoc($sHTML) { - $this->oDoc = new DOMDocument(); - $this->oDoc->preserveWhitespace = true; - - // MS outlook implements empty lines by the mean of

- // We have to transform that into


(which is how Thunderbird implements empty lines) - // Unfortunately, DOMDocument::loadHTML does not take the tag namespaces into account (once loaded there is no way to know if the tag did have a namespace) - // therefore we have to do the transformation upfront - $sHTML = preg_replace('@(\s| )*@', '
', $sHTML); - @$this->oDoc->loadHTML(''.$sHTML); // For loading HTML chunks where the character set is not specified - - $this->CleanNode($this->oDoc); - + } + + public function PrintDoc() + { $oXPath = new DOMXPath($this->oDoc); $sXPath = "//body"; $oNodesList = $oXPath->query($sXPath); - - if ($oNodesList->length == 0) - { + + if ($oNodesList->length == 0) { // No body, save the whole document $sCleanHtml = $this->oDoc->saveHTML(); - } - else - { + } else { // Export only the content of the body tag $sCleanHtml = $this->oDoc->saveHTML($oNodesList->item(0)); // remove the body tag itself - $sCleanHtml = str_replace( array('', ''), '', $sCleanHtml); + $sCleanHtml = str_replace(array('', ''), '', $sCleanHtml); } - + return $sCleanHtml; } - - protected function CleanNode(DOMNode $oElement) +} + + +/** + * @since 2.6.5 2.7.6 3.0.0 N°4360 + */ +class SvgDOMSanitizer extends DOMSanitizer +{ + public function GetTagsWhiteList() { - $aAttrToRemove = array(); - // Gather the attributes to remove - if ($oElement->hasAttributes()) - { - foreach($oElement->attributes as $oAttr) - { - $sAttr = strtolower($oAttr->name); - if (!in_array($sAttr, self::$aTagsWhiteList[strtolower($oElement->tagName)])) - { - // Forbidden (or unknown) attribute - $aAttrToRemove[] = $oAttr->name; - } - else if (!$this->IsValidAttributeContent($sAttr, $oAttr->value)) - { - // Invalid content - $aAttrToRemove[] = $oAttr->name; - } - else if ($sAttr == 'style') - { - // Special processing for style tags - $sCleanStyle = $this->CleanStyle($oAttr->value); - if ($sCleanStyle == '') - { - // Invalid content - $aAttrToRemove[] = $oAttr->name; - } - else - { - $oElement->setAttribute($oAttr->name, $sCleanStyle); - } - } - } - // Now remove them - foreach($aAttrToRemove as $sName) - { - $oElement->removeAttribute($sName); - } - } - - if ($oElement->hasChildNodes()) - { - $aChildElementsToRemove = array(); - // Gather the child noes to remove - foreach($oElement->childNodes as $oNode) - { - if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList))) - { - $aChildElementsToRemove[] = $oNode; - } - else if ($oNode instanceof DOMComment) - { - $aChildElementsToRemove[] = $oNode; - } - else - { - // Recurse - $this->CleanNode($oNode); - if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img')) - { - InlineImage::ProcessImageTag($oNode); - } - } - } - // Now remove them - foreach($aChildElementsToRemove as $oDomElement) - { - $oElement->removeChild($oDomElement); - } - } + return []; } - protected function CleanStyle($sStyle) + /** + * @return string[] + * @link https://developer.mozilla.org/en-US/docs/Web/SVG/Element/script + */ + public function GetTagsBlackList() { - $aAllowedStyles = array(); - $aItems = explode(';', $sStyle); - { - foreach($aItems as $sItem) - { - $aElements = explode(':', trim($sItem)); - if (in_array(trim(strtolower($aElements[0])), static::$aStylesWhiteList)) - { - $aAllowedStyles[] = trim($sItem); - } - } - } - return implode(';', $aAllowedStyles); + return [ + 'script', + ]; } - - protected function IsValidAttributeContent($sAttributeName, $sValue) + + public function GetAttrsWhiteList() { - if (array_key_exists($sAttributeName, self::$aAttrsWhiteList)) - { - return preg_match(self::$aAttrsWhiteList[$sAttributeName], $sValue); - } - return true; + return []; } -} \ No newline at end of file + + /** + * @return string[] + * @link https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/Events#document_event_attributes + */ + public function GetAttrsBlackList() + { + return [ + 'onbegin', + 'onbegin', + 'onrepeat', + 'onabort', + 'onerror', + 'onerror', + 'onscroll', + 'onunload', + 'oncopy', + 'oncut', + 'onpaste', + 'oncancel', + 'oncanplay', + 'oncanplaythrough', + 'onchange', + 'onclick', + 'onclose', + 'oncuechange', + 'ondblclick', + 'ondrag', + 'ondragend', + 'ondragenter', + 'ondragleave', + 'ondragover', + 'ondragstart', + 'ondrop', + 'ondurationchange', + 'onemptied', + 'onended', + 'onerror', + 'onfocus', + 'oninput', + 'oninvalid', + 'onkeydown', + 'onkeypress', + 'onkeyup', + 'onload', + 'onloadeddata', + 'onloadedmetadata', + 'onloadstart', + 'onmousedown', + 'onmouseenter', + 'onmouseleave', + 'onmousemove', + 'onmouseout', + 'onmouseover', + 'onmouseup', + 'onmousewheel', + 'onpause', + 'onplay', + 'onplaying', + 'onprogress', + 'onratechange', + 'onreset', + 'onresize', + 'onscroll', + 'onseeked', + 'onseeking', + 'onselect', + 'onshow', + 'onstalled', + 'onsubmit', + 'onsuspend', + 'ontimeupdate', + 'ontoggle', + 'onvolumechange', + 'onwaiting', + 'onactivate', + 'onfocusin', + 'onfocusout', + ]; + } + + public function GetStylesWhiteList() + { + return []; + } + + public function LoadDoc($sHTML) + { + @$this->oDoc->loadXml($sHTML, LIBXML_NOBLANKS); + } + + public function PrintDoc() + { + return $this->oDoc->saveXML(); + } +} diff --git a/test/core/sanitizer/AbstractDOMSanitizerTest.php b/test/core/sanitizer/AbstractDOMSanitizerTest.php new file mode 100644 index 000000000..469de3f2a --- /dev/null +++ b/test/core/sanitizer/AbstractDOMSanitizerTest.php @@ -0,0 +1,75 @@ +ReadTestFile($sFileToTest, self::INPUT_DIRECTORY); + $sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY); + $sOutputHtml = $this->RemoveNewLines($sOutputHtml); + + $oSanitizer = new HTMLDOMSanitizer(); + $sRes = $oSanitizer->DoSanitize($sInputHtml); + + // Removing newlines as the parser gives different results depending on the PHP version + // Didn't manage to get it right : + // - no php.ini difference + // - playing with the parser preserveWhitespace/formatOutput parser options didn't help + // So we're removing new lines on both sides :/ + $sOutputHtml = $this->RemoveNewLines($sOutputHtml); + $sRes = $this->RemoveNewLines($sRes); + + $this->debug($sRes); + $this->assertEquals($sOutputHtml, $sRes); + } + + public function DoSanitizeProvider() + { + return array( + array( + 'scripts.html', + ), + ); + } + + /** + * @dataProvider WhiteListProvider + * + * @param string $sHtmlToTest HTML content + */ + public function testDoSanitizeWhiteList($sHtmlToTest) + { + $oSanitizer = new HTMLDOMSanitizer(); + $sRes = $oSanitizer->DoSanitize($sHtmlToTest); + + // Removing newlines as the parser gives different results depending on the PHP version + // Didn't manage to get it right : + // - no php.ini difference + // - playing with the parser preserveWhitespace/formatOutput parser options didn't help + // So we're removing new lines on both sides :/ + $sHtmlToTest = $this->RemoveNewLines($sHtmlToTest); + $sRes = $this->RemoveNewLines($sRes); + + $this->debug($sRes); + $this->assertEquals($sHtmlToTest, $sRes); + } + + public function WhiteListProvider() + { + // This is a copy of \HTMLDOMSanitizer::$aTagsWhiteList + // should stay a copy as we want to check we're not removing something by mistake as it was done with the CENTER tag (N°2558) + $aTagsWhiteList = array( + // we don't test HTML and BODY as the parser removes them if context isn't appropriate + 'a' => array('href', 'name', 'style', 'target', 'title'), + 'p' => array('style'), + 'blockquote' => array('style'), + 'br' => array(), + 'span' => array('style'), + 'div' => array('style'), + 'b' => array(), + 'i' => array(), + 'u' => array(), + 'em' => array(), + 'strong' => array(), + 'img' => array('src', 'style', 'alt', 'title'), + 'ul' => array('style'), + 'ol' => array('style'), + 'li' => array('style'), + 'h1' => array('style'), + 'h2' => array('style'), + 'h3' => array('style'), + 'h4' => array('style'), + 'nav' => array('style'), + 'section' => array('style'), + 'code' => array('style'), + 'table' => array('style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'), + 'thead' => array('style'), + 'tbody' => array('style'), + 'tr' => array('style', 'colspan', 'rowspan'), + 'td' => array('style', 'colspan', 'rowspan'), + 'th' => array('style', 'colspan', 'rowspan'), + 'fieldset' => array('style'), + 'legend' => array('style'), + 'font' => array('face', 'color', 'style', 'size'), + 'big' => array(), + 'small' => array(), + 'tt' => array(), + 'kbd' => array(), + 'samp' => array(), + 'var' => array(), + 'del' => array(), + 's' => array(), // strikethrough + 'ins' => array(), + 'cite' => array(), + 'q' => array(), + 'hr' => array('style'), + 'pre' => array(), + 'center' => array(), + ); + $aTestCaseArray = array(); + + $sInputText = $this->ReadTestFile('whitelist_test.html', self::INPUT_DIRECTORY); + foreach ($aTagsWhiteList as $sTag => $aTagAttributes) { + $sTestCaseText = $sInputText; + $sStartTag = "<$sTag"; + $iAttrCounter = 0; + foreach ($aTagAttributes as $sTagAttribute) { + $sStartTag .= $this->GetTagAttributeValue($sTagAttribute, $iAttrCounter); + $iAttrCounter++; + } + $sStartTag .= '>'; + $sTestCaseText = str_replace('##START_TAG##', $sStartTag, $sTestCaseText); + + $sClosingTag = $this->IsClosingTag($sTag) ? "" : ''; + $sTestCaseText = str_replace('##END_TAG##', $sClosingTag, $sTestCaseText); + + $aTestCaseArray[$sTag] = array($sTestCaseText); + } + + return $aTestCaseArray; + } + + /** + * @dataProvider RemoveBlackListedTagContentProvider + */ + public function testDoSanitizeRemoveBlackListedTagContent($html, $expected) + { + $oSanitizer = new HTMLDOMSanitizer(); + $sSanitizedHtml = $oSanitizer->DoSanitize($html); + + $this->assertEquals($expected, str_replace("\n", '', $sSanitizedHtml)); + } + + public function RemoveBlackListedTagContentProvider() + { + return array( + 'basic' => array( + 'html' => 'foobaz', + 'expected' => '

foobaz

', + ), + 'basic with body' => array( + 'html' => 'foobaz', + 'expected' => 'foobaz', + ), + 'basic with html and body tags' => array( + 'html' => 'foobaz', + 'expected' => 'foobaz', + ), + 'basic with attributes' => array( + 'html' => 'foobaz', + 'expected' => '

foobaz

', + ), + 'basic with comment' => array( + 'html' => 'foobaz', + 'expected' => '

foobaz

', + ), + 'basic with contentRemovable tag' => array( + 'html' => 'foobaz', + 'expected' => '

foobaz

', + ), + 'nested' => array( + 'html' => 'beforeoofafter', + 'expected' => '

beforeafter

', + ), + 'nested with not closed br' => array( + 'html' => 'beforeoofafter', + 'expected' => '

beforeafter

', + ), + 'nested with allowed' => array( + 'html' => 'beforeafter', + 'expected' => '

beforeafter

', + ), + 'nested with spaces' => array( + 'html' => 'beforeafter', + 'expected' => '

beforeafter

', + ), + 'nested with attributes' => array( + 'html' => 'beforeafter', + 'expected' => '

beforeafter

', + ), + 'nested with allowed and attributes and spaces ' => array( + 'html' => 'beforeafter', + 'expected' => 'beforeafter', + ), + 'nested with allowed and contentRemovable tags' => array( + 'html' => 'beforemiddleafter', + 'expected' => 'beforemiddleafter', + ), + + 'regression: if head present => body is not trimmed' => array( + 'html' => 'bar', + 'expected' => 'bar', + ), + ); + } + + /** + * @dataProvider CallInlineImageProcessImageTagProvider + */ + public function testDoSanitizeCallInlineImageProcessImageTag($sHtml, $iExpectedCount) + { + require_once APPROOT.'test/core/sanitizer/InlineImageMock.php'; + + $oSanitizer = new HTMLDOMSanitizer(); + $oSanitizer->DoSanitize($sHtml); + + $iCalledCount = \InlineImage::GetCallCounter(); + $this->assertEquals($iExpectedCount, $iCalledCount); + } + + public function CallInlineImageProcessImageTagProvider() + { + return array( + 'no image' => array( + 'html' => '

bar

', + 'expected' => 0, + ), + 'basic image' => array( + 'html' => '', + 'expected' => 1, + ), + 'nested images within forbidden tags' => array( + 'html' => '', + 'expected' => 2, + ), +// This test will be restored with the ticket n°2556 +// 'nested images within forbidden and removed tags' => array( +// 'html' => '', +// 'expected' => 2, +// ), + ); + } +} + diff --git a/test/core/sanitizer/InlineImageMock.php b/test/core/sanitizer/InlineImageMock.php new file mode 100644 index 000000000..9ebcda852 --- /dev/null +++ b/test/core/sanitizer/InlineImageMock.php @@ -0,0 +1,39 @@ + + * + */ + +/** + * Mock class used by @see \Combodo\iTop\Test\UnitTest\Core\HTMLDOMSanitizerTest + * + */ +class InlineImage +{ + private static $iCallCounter = 0; + + public static function ProcessImageTag(DOMNode $oNode) + { + self::$iCallCounter++; + } + + public static function GetCallCounter() + { + return self::$iCallCounter; + } +} \ No newline at end of file diff --git a/test/core/sanitizer/SvgDOMSanitizerTest.php b/test/core/sanitizer/SvgDOMSanitizerTest.php new file mode 100644 index 000000000..b45a95a39 --- /dev/null +++ b/test/core/sanitizer/SvgDOMSanitizerTest.php @@ -0,0 +1,53 @@ +ReadTestFile($sFileToTest, self::INPUT_DIRECTORY); + $sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY); + $sOutputHtml = $this->RemoveNewLines($sOutputHtml); + + $oSanitizer = new SvgDOMSanitizer(); + $sRes = $oSanitizer->DoSanitize($sInputHtml); + + // Removing newlines as the parser gives different results depending on the PHP version + // Didn't manage to get it right : + // - no php.ini difference + // - playing with the parser preserveWhitespace/formatOutput parser options didn't help + // So we're removing new lines on both sides :/ + $sOutputHtml = $this->RemoveNewLines($sOutputHtml); + $sRes = $this->RemoveNewLines($sRes); + + $this->debug($sRes); + $this->assertEquals($sOutputHtml, $sRes); + } + + public function DoSanitizeProvider() + { + return array( + array( + 'scripts.svg', + ), + ); + } +} + diff --git a/test/core/sanitizer/input/scripts.html b/test/core/sanitizer/input/scripts.html new file mode 100644 index 000000000..5dceaecae --- /dev/null +++ b/test/core/sanitizer/input/scripts.html @@ -0,0 +1,7 @@ +

Test with lots of JS scripts to filter !

+ +

+ + diff --git a/test/core/sanitizer/input/scripts.svg b/test/core/sanitizer/input/scripts.svg new file mode 100644 index 000000000..eba8dbd7d --- /dev/null +++ b/test/core/sanitizer/input/scripts.svg @@ -0,0 +1,8 @@ + + + + + + \ No newline at end of file diff --git a/test/core/sanitizer/input/whitelist_test.html b/test/core/sanitizer/input/whitelist_test.html new file mode 100644 index 000000000..556812da7 --- /dev/null +++ b/test/core/sanitizer/input/whitelist_test.html @@ -0,0 +1,14 @@ +##START_TAG## +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam luctus semper diam et fermentum. Cras nisi mauris, rutrum id turpis at, +sagittis tempus erat. Sed tempus vel purus id sagittis. Suspendisse ullamcorper eros vel semper malesuada. Vivamus malesuada tellus quis +nisi consequat, quis tristique magna eleifend. Quisque eget turpis lacinia, vehicula turpis vel, aliquet diam. Aenean eu nunc ac velit +condimentum posuere. Vivamus congue velit cursus eros mollis, vitae eleifend urna finibus. + +In accumsan sed sem nec sollicitudin. Sed pretium, neque et rhoncus volutpat, urna massa semper ex, et faucibus mauris sapien eu libero. +Sed vel accumsan nibh, tempus accumsan mi. Maecenas gravida imperdiet leo id euismod. Mauris pharetra mattis facilisis. Suspendisse +dictum vel orci ac luctus. Proin ultricies erat sit amet leo sollicitudin, quis lacinia felis volutpat. Praesent molestie quam et magna +tempor aliquet. Sed quam nisi, dictum ac gravida et, suscipit et augue. Fusce ac purus eget leo scelerisque bibendum. Proin in semper +erat, eu congue diam. Vivamus purus eros, consectetur laoreet gravida in, ultricies eget nibh. Mauris hendrerit euismod ex at facilisis. +Integer lacus eros, posuere finibus libero facilisis, eleifend gravida neque. Integer feugiat elit vel leo aliquet suscipit. Etiam +auctor ligula sed eros vulputate tristique ac eget magna. +##END_TAG## diff --git a/test/core/sanitizer/output/scripts.html b/test/core/sanitizer/output/scripts.html new file mode 100644 index 000000000..ac0043227 --- /dev/null +++ b/test/core/sanitizer/output/scripts.html @@ -0,0 +1,3 @@ +

Test with lots of JS scripts to filter !

+ +

diff --git a/test/core/sanitizer/output/scripts.svg b/test/core/sanitizer/output/scripts.svg new file mode 100644 index 000000000..99308ba27 --- /dev/null +++ b/test/core/sanitizer/output/scripts.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file From 81a2a9278c36c5c11d5a1d9fae81a1755798c338 Mon Sep 17 00:00:00 2001 From: Pierre Goiffon Date: Tue, 23 Nov 2021 17:38:30 +0100 Subject: [PATCH 2/4] =?UTF-8?q?:white=5Fcheck=5Fmark:=20N=C2=B04360=20Fix?= =?UTF-8?q?=20SvgDOMSanitizer=20expected=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/core/sanitizer/output/scripts.svg | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/core/sanitizer/output/scripts.svg b/test/core/sanitizer/output/scripts.svg index 99308ba27..41508b547 100644 --- a/test/core/sanitizer/output/scripts.svg +++ b/test/core/sanitizer/output/scripts.svg @@ -1,4 +1 @@ - - - - \ No newline at end of file + \ No newline at end of file From 65f9f86bccbe6434f0226e60f4af01abe307cba2 Mon Sep 17 00:00:00 2001 From: Pierre Goiffon Date: Tue, 23 Nov 2021 18:50:02 +0100 Subject: [PATCH 3/4] =?UTF-8?q?N=C2=B03635=20DictionariesConsistencyTest?= =?UTF-8?q?=20:=20now=20we=20can=20have=20multiple=20possible=20localized?= =?UTF-8?q?=20language=20desc=20The=20'Espa=C3=B1ol,=20Castella=C3=B1o'=20?= =?UTF-8?q?to=20'Espa=C3=B1ol,=20Castellano'=20was=20causing=20problem=20o?= =?UTF-8?q?n=20builds=20with=20other=20modules=20that=20we=20don't=20want?= =?UTF-8?q?=20to=20update=20!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/integration/DictionariesConsistencyTest.php | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/integration/DictionariesConsistencyTest.php b/test/integration/DictionariesConsistencyTest.php index 3836ab4b1..e52500237 100644 --- a/test/integration/DictionariesConsistencyTest.php +++ b/test/integration/DictionariesConsistencyTest.php @@ -34,7 +34,10 @@ class DictionariesConsistencyTest extends ItopTestCase 'da' => array('DA DA', 'Danish', 'Dansk'), 'de' => array('DE DE', 'German', 'Deutsch'), 'en' => array('EN US', 'English', 'English'), - 'es_cr' => array('ES CR', 'Spanish', 'Español, Castellano'), + 'es_cr' => array('ES CR', 'Spanish', array( + 'Español, Castellaño', // old value + 'Español, Castellano', // new value since N°3635 + )), 'fr' => array('FR FR', 'French', 'Français'), 'hu' => array('HU HU', 'Hungarian', 'Magyar'), 'it' => array('IT IT', 'Italian', 'Italiano'), @@ -60,10 +63,10 @@ class DictionariesConsistencyTest extends ItopTestCase $sExpectedLanguageCode = $aPrefixToLanguageData[$sLangPrefix][0]; $sExpectedEnglishLanguageDesc = $aPrefixToLanguageData[$sLangPrefix][1]; - $sExpectedLocalizedLanguageDesc = $aPrefixToLanguageData[$sLangPrefix][2]; + $aExpectedLocalizedLanguageDesc = $aPrefixToLanguageData[$sLangPrefix][2]; $sDictPHP = file_get_contents($sDictFile); - if ($iCount = preg_match_all("@Dict::Add\('(.*)'\s*,\s*'(.*)'\s*,\s*'(.*)'@", $sDictPHP, $aMatches) === false) + if ($iCount = (preg_match_all("@Dict::Add\('(.*)'\s*,\s*'(.*)'\s*,\s*'(.*)'@", $sDictPHP, $aMatches) === false)) { static::fail("Pattern not working"); } @@ -84,7 +87,10 @@ class DictionariesConsistencyTest extends ItopTestCase } foreach ($aMatches[3] as $sLocalizedLanguageDesc) { - static::assertSame($sExpectedLocalizedLanguageDesc, $sLocalizedLanguageDesc, + if (false === is_array($aExpectedLocalizedLanguageDesc)) { + $aExpectedLocalizedLanguageDesc = array($aExpectedLocalizedLanguageDesc); + } + static::assertContains($sLocalizedLanguageDesc,$aExpectedLocalizedLanguageDesc, "Unexpected language description for Dict::Add in dictionary $sDictFile"); } } From 2d67594ccff7b312f1163521622ea72e67979ed4 Mon Sep 17 00:00:00 2001 From: Pierre Goiffon Date: Wed, 24 Nov 2021 11:32:10 +0100 Subject: [PATCH 4/4] =?UTF-8?q?N=C2=B04213=20Fix=20EnumSet=20rendering=20o?= =?UTF-8?q?n=20details=20form=20in=20portal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/ormtagset.class.inc.php | 2 +- .../bssetfieldrenderer.class.inc.php | 33 +++++++++++++++---- .../renderer/renderingoutput.class.inc.php | 4 ++- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/core/ormtagset.class.inc.php b/core/ormtagset.class.inc.php index ab6e4aa78..8f8155016 100644 --- a/core/ormtagset.class.inc.php +++ b/core/ormtagset.class.inc.php @@ -138,7 +138,7 @@ final class ormTagSet extends ormSet } /** - * @return array of tags indexed by code + * @return array index: code, value: corresponding {@see \TagSetFieldData} */ public function GetTags() { diff --git a/sources/renderer/bootstrap/fieldrenderer/bssetfieldrenderer.class.inc.php b/sources/renderer/bootstrap/fieldrenderer/bssetfieldrenderer.class.inc.php index 59d6269db..4a1c39e03 100644 --- a/sources/renderer/bootstrap/fieldrenderer/bssetfieldrenderer.class.inc.php +++ b/sources/renderer/bootstrap/fieldrenderer/bssetfieldrenderer.class.inc.php @@ -21,6 +21,7 @@ namespace Combodo\iTop\Renderer\Bootstrap\FieldRenderer; use MetaModel; +use utils; /** * Description of BsSetFieldRenderer @@ -105,18 +106,36 @@ EOF // ... in view mode else { - $aItems = $oOrmItemSet->GetTags(); + if ($oOrmItemSet instanceof \ormTagSet) { + $aItems = $oOrmItemSet->GetTags(); + $fExtractTagData = static function($oTag, &$sItemLabel, &$sItemDescription) { + $sItemLabel = $oTag->Get('label'); + $sItemDescription = $oTag->Get('description'); + }; + } else { + $aItems = $oOrmItemSet->GetValues(); + $oAttDef = MetaModel::GetAttributeDef($oOrmItemSet->GetClass(), $oOrmItemSet->GetAttCode()); + $fExtractTagData = static function($sEnumSetValue, &$sItemLabel, &$sItemDescription) use ($oAttDef) { + $sItemLabel = $oAttDef->GetValueLabel($sEnumSetValue); + $sItemDescription = ''; + }; + } + $oOutput->AddHtml('
') ->AddHtml(''); - foreach($aItems as $sItemCode => $oItem) + foreach($aItems as $sItemCode => $value) { - $sItemLabel = $oItem->Get('label'); - $sItemDescription = $oItem->Get('description'); + $fExtractTagData($value, $sItemLabel, $sItemDescription); + + $sDescriptionAttr = (empty($sItemDescription)) + ? '' + : ' data-description="'.utils::HtmlEntities($sItemDescription).'"'; + $oOutput->AddHtml('') + ->AddHtml('"') + ->AddHtml($sDescriptionAttr) + ->AddHtml('>') ->AddHtml($sItemLabel, true) ->AddHtml(''); } diff --git a/sources/renderer/renderingoutput.class.inc.php b/sources/renderer/renderingoutput.class.inc.php index 44ffa13d4..59892860d 100644 --- a/sources/renderer/renderingoutput.class.inc.php +++ b/sources/renderer/renderingoutput.class.inc.php @@ -20,6 +20,8 @@ namespace Combodo\iTop\Renderer; +use utils; + /** * Description of RenderingOutput * @@ -118,7 +120,7 @@ class RenderingOutput */ public function AddHtml($sHtml, $bEncodeHtmlEntities = false) { - $this->sHtml .= ($bEncodeHtmlEntities) ? htmlentities($sHtml, ENT_QUOTES, 'UTF-8') : $sHtml; + $this->sHtml .= ($bEncodeHtmlEntities) ? utils::HtmlEntities($sHtml) : $sHtml; return $this; }