From d3525190d5c0bdb07ccb9f3924f46b205f5f5bf5 Mon Sep 17 00:00:00 2001 From: bruno DA SILVA Date: Thu, 14 May 2020 10:49:31 +0200 Subject: [PATCH] =?UTF-8?q?N=C2=B02556=20-=20Html=20sanitization=20preserv?= =?UTF-8?q?e=20content=20of=20removed=20tags=20(except=20for=20a=20forbidd?= =?UTF-8?q?en=20list)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit forbidden list: see $aTagsContentRemovableList (cherry picked from commit 746b47bb0e61292c89b6aa994ffbc68dbe31d1c4) (cherry picked from commit 79909fadc082189ecf39b4a0cde09b70799227fb) --- core/htmlsanitizer.class.inc.php | 205 +++++++++++++++++------- test/core/HTMLDOMSanitizerTest.php | 115 +++++++++++++ test/core/sanitizer/InlineImageMock.php | 39 +++++ 3 files changed, 302 insertions(+), 57 deletions(-) create mode 100644 test/core/sanitizer/InlineImageMock.php diff --git a/core/htmlsanitizer.class.inc.php b/core/htmlsanitizer.class.inc.php index b2a7aa4c7..5bcae80eb 100644 --- a/core/htmlsanitizer.class.inc.php +++ b/core/htmlsanitizer.class.inc.php @@ -160,53 +160,65 @@ class HTMLDOMSanitizer extends HTMLSanitizer * @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations */ protected static $aTagsWhiteList = array( - 'html' => array(), - 'body' => array(), 'a' => array('href', 'name', 'style', 'target', 'title'), - 'p' => array('style'), - 'blockquote' => array('style'), - 'br' => array(), - 'span' => array('style'), - 'div' => array('style'), 'b' => array(), - 'i' => array(), - 'u' => array(), + 'big' => array(), + 'blockquote' => array('style'), + 'body' => array(), + 'br' => array(), + 'center' => array(), + 'cite' => array(), + 'code' => array('style', 'class'), + 'del' => array(), + 'div' => array('style'), 'em' => array(), - 'strong' => array(), - 'img' => array('src', 'style', 'alt', 'title'), - 'ul' => array('style'), - 'ol' => array('style'), - 'li' => array('style'), + 'fieldset' => array('style'), + 'font' => array('face', 'color', 'style', 'size'), 'h1' => array('style'), 'h2' => array('style'), 'h3' => array('style'), 'h4' => array('style'), + 'hr' => array('style'), + 'html' => array(), + 'i' => array(), + 'img' => array('src', 'style', 'alt', 'title'), + 'ins' => array(), + 'kbd' => array(), + 'legend' => array('style'), + 'li' => array('style'), 'nav' => array('style'), + 'ol' => array('style'), + 'p' => array('style'), + 'pre' => array(), + 'q' => array(), + 'samp' => array(), + 's' => array(), // strikethrough 'section' => array('style'), - 'code' => array('style', 'class'), + 'small' => array(), + 'span' => array('style'), + 'strong' => array(), 'table' => array('style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'), - 'thead' => array('style'), 'tbody' => array('style'), - 'tr' => array('style', 'colspan', 'rowspan'), 'td' => array('style', 'colspan', 'rowspan'), 'th' => array('style', 'colspan', 'rowspan'), - 'fieldset' => array('style'), - 'legend' => array('style'), - 'font' => array('face', 'color', 'style', 'size'), - 'big' => array(), - 'small' => array(), + 'thead' => array('style'), + 'tr' => array('style', 'colspan', 'rowspan'), 'tt' => array(), - 'kbd' => array(), - 'samp' => array(), + 'u' => array(), + 'ul' => array('style'), 'var' => array(), - 'del' => array(), - 's' => array(), // strikethrough - 'ins' => array(), - 'cite' => array(), - 'q' => array(), - 'hr' => array('style'), - 'pre' => array(), - 'center' => array(), + ); + + protected static $aTagsContentRemovableList = array( + 'applet', + 'basefont', + 'canvas', + 'code', + 'dialog', + 'embed', + 'object', + 'script', + 'style', ); protected static $aAttrsWhiteList = array( @@ -302,6 +314,108 @@ class HTMLDOMSanitizer extends HTMLSanitizer } protected function CleanNode(DOMNode $oElement) + { + $this->CleanNodeRemoveForbiddenTags($oElement); + $this->CleanNodeHandleImages($oElement); + $this->CleanNodeRemoveForbiddenAttributes($oElement); + } + + protected function CleanNodeRemoveForbiddenTags(DOMNode $oElement) + { + if ($oElement->hasChildNodes()) + { + $aValidatedNodes = array(); + do + { + $bChildRemoved = false; + + $aNodes = array(); + foreach($oElement->childNodes as $oNode) + { + $aNodes[] = $oNode; + } + + foreach($aNodes as $oNode) + { + if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList))) + { + $bChildRemoved = true; + $this->SmartRemoveChild($oElement, $oNode); + } + else if ($oNode instanceof DOMComment) + { + $oElement->removeChild($oNode); + } + else + { + //if the node is kept, we can recurse into it, bu we want to perform this only once (see the do/while above?) + $bAlreadyValidated = false; + /** @var \DOMNode $oValidatedNode */ + foreach ($aValidatedNodes as $oValidatedNode) + { + if ($oValidatedNode->isSameNode($oNode)) + { + $bAlreadyValidated = true; + break; + } + } + if (! $bAlreadyValidated) + { + $this->CleanNodeRemoveForbiddenTags($oNode); + $aValidatedNodes[] = $oNode; + } + } + } + } while ($bChildRemoved); + } + } + + /** + * Remove a node, but move its inner nodes in the parent. + * Note: invalid/forbidden tags may be moved up, so they have to be checked again. + * + * @param \DOMNode $oParent + * @param \DOMElement $oRemovable + */ + private function SmartRemoveChild(DOMNode $oParent, DOMElement $oRemovable) + { + if (!$oRemovable->hasChildNodes()) + { + $oParent->removeChild($oRemovable); + } + else if (in_array(strtolower($oRemovable->tagName), self::$aTagsContentRemovableList)) + { + $oParent->removeChild($oRemovable); + } + else + { + /** @var \DOMNode $oNode */ + foreach ($oRemovable->childNodes as $oNode) + { + $oNode = $oNode->cloneNode(true); + $oParent->insertBefore($oNode, $oRemovable); + } + + $oParent->removeChild($oRemovable); + } + } + + protected function CleanNodeHandleImages(DOMNode $oElement) + { + if ($oElement->hasChildNodes()) + { + foreach($oElement->childNodes as $oNode) + { + $this->CleanNodeHandleImages($oNode); + if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img')) + { + InlineImage::ProcessImageTag($oNode); + } + } + } + } + + protected function CleanNodeRemoveForbiddenAttributes(DOMNode $oElement) { $aAttrToRemove = array(); // Gather the attributes to remove @@ -341,35 +455,12 @@ class HTMLDOMSanitizer extends HTMLSanitizer $oElement->removeAttribute($sName); } } - + if ($oElement->hasChildNodes()) { - $aChildElementsToRemove = array(); - // Gather the child noes to remove foreach($oElement->childNodes as $oNode) { - if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList))) - { - $aChildElementsToRemove[] = $oNode; - } - else if ($oNode instanceof DOMComment) - { - $aChildElementsToRemove[] = $oNode; - } - else - { - // Recurse - $this->CleanNode($oNode); - if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img')) - { - InlineImage::ProcessImageTag($oNode); - } - } - } - // Now remove them - foreach($aChildElementsToRemove as $oDomElement) - { - $oElement->removeChild($oDomElement); + $this->CleanNodeRemoveForbiddenAttributes($oNode); } } } diff --git a/test/core/HTMLDOMSanitizerTest.php b/test/core/HTMLDOMSanitizerTest.php index 004b1cc74..d20656494 100644 --- a/test/core/HTMLDOMSanitizerTest.php +++ b/test/core/HTMLDOMSanitizerTest.php @@ -168,6 +168,83 @@ class HTMLDOMSanitizerTest extends ItopTestCase return $aTestCaseArray; } + /** + * Test the fix for ticket N°2556 + * + * @dataProvider PreserveBlackListedTagContentProvider + * + */ + public function testDoSanitizePreserveBlackListedTagContent($html, $expected) + { + $oSanitizer = new HTMLDOMSanitizer(); + $sSanitizedHtml = $oSanitizer->DoSanitize($html); + + $this->assertEquals($expected, str_replace("\n", '', $sSanitizedHtml)); + } + + public function PreserveBlackListedTagContentProvider() + { + return array( + 'basic' => array( + 'html' => '', + 'expected' => 'bar', + ), + 'basic with body' => array( + 'html' => '', + 'expected' => 'bar', + ), + 'basic with html and body tags' => array( + 'html' => '', + 'expected' => 'bar', + ), + 'basic with attributes' => array( + 'html' => '', + 'expected' => 'bar', + ), + 'basic with comment' => array( + 'html' => '', + 'expected' => 'bar', + ), + 'basic with contentRemovable tag' => array( + 'html' => '', + 'expected' => 'bar', + ), + 'nested' => array( + 'html' => 'oof', + 'expected' => 'foobazoofbaroof', + ), + 'nested with not closed br' => array( + 'html' => 'oof', + 'expected' => 'foobazoof
baroof', + ), + 'nested with allowed' => array( + 'html' => '', + 'expected' => '

baz

zab
oof', + ), + 'nested with spaces' => array( + 'html' => '', + 'expected' => 'baz oof', + ), + 'nested with attributes' => array( + 'html' => '', + 'expected' => 'bazoof', + ), + 'nested with allowed and attributes and spaces ' => array( + 'html' => '', + 'expected' => '
bazrab
oof', + ), + 'nested with allowed and contentRemovable tags' => array( + 'html' => '', + 'expected' => '
bazrab
oof', + ), + + 'regression: if head present => body is not trimmed' => array( + 'html' => 'bar', + 'expected' => 'bar', + ), + ); + } + /** * Generates an appropriate value for the given attribute, or use the counter if needed. * This is necessary as most of the attributes with empty or inappropriate values (like a numeric for a href) are removed by the parser @@ -202,5 +279,43 @@ class HTMLDOMSanitizerTest extends ItopTestCase return true; } + + /** + * @dataProvider CallInlineImageProcessImageTagProvider + */ + public function testDoSanitizeCallInlineImageProcessImageTag($sHtml, $iExpectedCount) + { + require_once APPROOT.'test/core/sanitizer/InlineImageMock.php'; + + $oSanitizer = new HTMLDOMSanitizer(); + $oSanitizer->DoSanitize($sHtml); + + $iCalledCount = \InlineImage::GetCallCounter(); + $this->assertEquals($iExpectedCount, $iCalledCount); + } + + public function CallInlineImageProcessImageTagProvider() + { + return array( + 'no image' => array( + 'html' => '

bar

', + 'expected' => 0, + ), + 'basic image' => array( + 'html' => '', + 'expected' => 1, + ), + 'nested images within forbidden tags' => array( + 'html' => '', + 'expected' => 5, + ), + 'nested images within forbidden and removed tags' => array( + 'html' => '', + 'expected' => 3, + ), + ); + } + + } diff --git a/test/core/sanitizer/InlineImageMock.php b/test/core/sanitizer/InlineImageMock.php new file mode 100644 index 000000000..d5ad33218 --- /dev/null +++ b/test/core/sanitizer/InlineImageMock.php @@ -0,0 +1,39 @@ + + * + */ + +/** + * Mock class used by @see \Combodo\iTop\Test\UnitTest\Core\HTMLDOMSanitizerTest + * + */ +class InlineImage +{ + private static $iCallCounter = 0; + + public static function ProcessImageTag(DOMNode $oNode) + { + self::$iCallCounter++; + } + + public static function GetCallCounter() + { + return self::$iCallCounter; + } +} \ No newline at end of file