N°2556 - Html sanitization preserve content of removed tags (except for a forbidden list)

forbidden list: see $aTagsContentRemovableList

(cherry picked from commit 746b47bb0e)
(cherry picked from commit 79909fadc0)
This commit is contained in:
bruno DA SILVA
2020-05-14 10:49:31 +02:00
committed by Pierre Goiffon
parent f20808d929
commit d3525190d5
3 changed files with 302 additions and 57 deletions

View File

@@ -160,53 +160,65 @@ class HTMLDOMSanitizer extends HTMLSanitizer
* @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations
*/
protected static $aTagsWhiteList = array(
'html' => array(),
'body' => array(),
'a' => array('href', 'name', 'style', 'target', 'title'),
'p' => array('style'),
'blockquote' => array('style'),
'br' => array(),
'span' => array('style'),
'div' => array('style'),
'b' => array(),
'i' => array(),
'u' => array(),
'big' => array(),
'blockquote' => array('style'),
'body' => array(),
'br' => array(),
'center' => array(),
'cite' => array(),
'code' => array('style', 'class'),
'del' => array(),
'div' => array('style'),
'em' => array(),
'strong' => array(),
'img' => array('src', 'style', 'alt', 'title'),
'ul' => array('style'),
'ol' => array('style'),
'li' => array('style'),
'fieldset' => array('style'),
'font' => array('face', 'color', 'style', 'size'),
'h1' => array('style'),
'h2' => array('style'),
'h3' => array('style'),
'h4' => array('style'),
'hr' => array('style'),
'html' => array(),
'i' => array(),
'img' => array('src', 'style', 'alt', 'title'),
'ins' => array(),
'kbd' => array(),
'legend' => array('style'),
'li' => array('style'),
'nav' => array('style'),
'ol' => array('style'),
'p' => array('style'),
'pre' => array(),
'q' => array(),
'samp' => array(),
's' => array(), // strikethrough
'section' => array('style'),
'code' => array('style', 'class'),
'small' => array(),
'span' => array('style'),
'strong' => array(),
'table' => array('style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'),
'thead' => array('style'),
'tbody' => array('style'),
'tr' => array('style', 'colspan', 'rowspan'),
'td' => array('style', 'colspan', 'rowspan'),
'th' => array('style', 'colspan', 'rowspan'),
'fieldset' => array('style'),
'legend' => array('style'),
'font' => array('face', 'color', 'style', 'size'),
'big' => array(),
'small' => array(),
'thead' => array('style'),
'tr' => array('style', 'colspan', 'rowspan'),
'tt' => array(),
'kbd' => array(),
'samp' => array(),
'u' => array(),
'ul' => array('style'),
'var' => array(),
'del' => array(),
's' => array(), // strikethrough
'ins' => array(),
'cite' => array(),
'q' => array(),
'hr' => array('style'),
'pre' => array(),
'center' => array(),
);
protected static $aTagsContentRemovableList = array(
'applet',
'basefont',
'canvas',
'code',
'dialog',
'embed',
'object',
'script',
'style',
);
protected static $aAttrsWhiteList = array(
@@ -302,6 +314,108 @@ class HTMLDOMSanitizer extends HTMLSanitizer
}
protected function CleanNode(DOMNode $oElement)
{
$this->CleanNodeRemoveForbiddenTags($oElement);
$this->CleanNodeHandleImages($oElement);
$this->CleanNodeRemoveForbiddenAttributes($oElement);
}
protected function CleanNodeRemoveForbiddenTags(DOMNode $oElement)
{
if ($oElement->hasChildNodes())
{
$aValidatedNodes = array();
do
{
$bChildRemoved = false;
$aNodes = array();
foreach($oElement->childNodes as $oNode)
{
$aNodes[] = $oNode;
}
foreach($aNodes as $oNode)
{
if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList)))
{
$bChildRemoved = true;
$this->SmartRemoveChild($oElement, $oNode);
}
else if ($oNode instanceof DOMComment)
{
$oElement->removeChild($oNode);
}
else
{
//if the node is kept, we can recurse into it, bu we want to perform this only once (see the do/while above?)
$bAlreadyValidated = false;
/** @var \DOMNode $oValidatedNode */
foreach ($aValidatedNodes as $oValidatedNode)
{
if ($oValidatedNode->isSameNode($oNode))
{
$bAlreadyValidated = true;
break;
}
}
if (! $bAlreadyValidated)
{
$this->CleanNodeRemoveForbiddenTags($oNode);
$aValidatedNodes[] = $oNode;
}
}
}
} while ($bChildRemoved);
}
}
/**
* Remove a node, but move its inner nodes in the parent.
* Note: invalid/forbidden tags may be moved up, so they have to be checked again.
*
* @param \DOMNode $oParent
* @param \DOMElement $oRemovable
*/
private function SmartRemoveChild(DOMNode $oParent, DOMElement $oRemovable)
{
if (!$oRemovable->hasChildNodes())
{
$oParent->removeChild($oRemovable);
}
else if (in_array(strtolower($oRemovable->tagName), self::$aTagsContentRemovableList))
{
$oParent->removeChild($oRemovable);
}
else
{
/** @var \DOMNode $oNode */
foreach ($oRemovable->childNodes as $oNode)
{
$oNode = $oNode->cloneNode(true);
$oParent->insertBefore($oNode, $oRemovable);
}
$oParent->removeChild($oRemovable);
}
}
protected function CleanNodeHandleImages(DOMNode $oElement)
{
if ($oElement->hasChildNodes())
{
foreach($oElement->childNodes as $oNode)
{
$this->CleanNodeHandleImages($oNode);
if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img'))
{
InlineImage::ProcessImageTag($oNode);
}
}
}
}
protected function CleanNodeRemoveForbiddenAttributes(DOMNode $oElement)
{
$aAttrToRemove = array();
// Gather the attributes to remove
@@ -341,35 +455,12 @@ class HTMLDOMSanitizer extends HTMLSanitizer
$oElement->removeAttribute($sName);
}
}
if ($oElement->hasChildNodes())
{
$aChildElementsToRemove = array();
// Gather the child noes to remove
foreach($oElement->childNodes as $oNode)
{
if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList)))
{
$aChildElementsToRemove[] = $oNode;
}
else if ($oNode instanceof DOMComment)
{
$aChildElementsToRemove[] = $oNode;
}
else
{
// Recurse
$this->CleanNode($oNode);
if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img'))
{
InlineImage::ProcessImageTag($oNode);
}
}
}
// Now remove them
foreach($aChildElementsToRemove as $oDomElement)
{
$oElement->removeChild($oDomElement);
$this->CleanNodeRemoveForbiddenAttributes($oNode);
}
}
}

View File

@@ -168,6 +168,83 @@ class HTMLDOMSanitizerTest extends ItopTestCase
return $aTestCaseArray;
}
/**
* Test the fix for ticket N°2556
*
* @dataProvider PreserveBlackListedTagContentProvider
*
*/
public function testDoSanitizePreserveBlackListedTagContent($html, $expected)
{
$oSanitizer = new HTMLDOMSanitizer();
$sSanitizedHtml = $oSanitizer->DoSanitize($html);
$this->assertEquals($expected, str_replace("\n", '', $sSanitizedHtml));
}
public function PreserveBlackListedTagContentProvider()
{
return array(
'basic' => array(
'html' => '<iframe>bar</iframe>',
'expected' => 'bar',
),
'basic with body' => array(
'html' => '<body><iframe>bar</iframe></body>',
'expected' => 'bar',
),
'basic with html and body tags' => array(
'html' => '<html><body lang="EN-GB" link="#0563C1" vlink="#954F72"><iframe>bar</iframe></body></html>',
'expected' => 'bar',
),
'basic with attributes' => array(
'html' => '<iframe baz="1">bar</iframe>',
'expected' => 'bar',
),
'basic with comment' => array(
'html' => '<iframe baz="1">bar<!-- foo --></iframe>',
'expected' => 'bar',
),
'basic with contentRemovable tag' => array(
'html' => '<iframe baz="1">bar<style>foo</style><script>boo</script></iframe>',
'expected' => 'bar',
),
'nested' => array(
'html' => '<iframe>foo<article>baz</article>oof<article><iframe>bar</iframe>oof</article></iframe>',
'expected' => 'foobazoofbaroof',
),
'nested with not closed br' => array(
'html' => '<iframe>foo<article>baz</article>oof<br><article><iframe>bar</iframe>oof</article></iframe>',
'expected' => 'foobazoof<br>baroof',
),
'nested with allowed' => array(
'html' => '<iframe><div><article><p>baz</p>zab</article></div>oof</iframe>',
'expected' => '<div><p>baz</p>zab</div>oof',
),
'nested with spaces' => array(
'html' => '<iframe><article>baz</article> oof</iframe>',
'expected' => 'baz oof',
),
'nested with attributes' => array(
'html' => '<iframe baz="1"><article baz="1" biz="2">baz</article>oof</iframe>',
'expected' => 'bazoof',
),
'nested with allowed and attributes and spaces ' => array(
'html' => '<html><body><iframe baz="1"><div baz="baz"><article baz="1" biz="2">baz</article>rab</div> oof</iframe></body></html>',
'expected' => '<div>bazrab</div> oof',
),
'nested with allowed and contentRemovable tags' => array(
'html' => '<html><body><iframe baz="1"><div ><article>baz</article>rab</div> oof<embed>embedTExt</embed></iframe><style>foo</style><script>boo</script></body></html>',
'expected' => '<div>bazrab</div> oof',
),
'regression: if head present => body is not trimmed' => array(
'html' => '<html><head></head><body lang="EN-GB" link="#0563C1" vlink="#954F72">bar</body></html>',
'expected' => 'bar',
),
);
}
/**
* Generates an appropriate value for the given attribute, or use the counter if needed.
* This is necessary as most of the attributes with empty or inappropriate values (like a numeric for a href) are removed by the parser
@@ -202,5 +279,43 @@ class HTMLDOMSanitizerTest extends ItopTestCase
return true;
}
/**
* @dataProvider CallInlineImageProcessImageTagProvider
*/
public function testDoSanitizeCallInlineImageProcessImageTag($sHtml, $iExpectedCount)
{
require_once APPROOT.'test/core/sanitizer/InlineImageMock.php';
$oSanitizer = new HTMLDOMSanitizer();
$oSanitizer->DoSanitize($sHtml);
$iCalledCount = \InlineImage::GetCallCounter();
$this->assertEquals($iExpectedCount, $iCalledCount);
}
public function CallInlineImageProcessImageTagProvider()
{
return array(
'no image' => array(
'html' => '<p>bar</p>',
'expected' => 0,
),
'basic image' => array(
'html' => '<img />',
'expected' => 1,
),
'nested images within forbidden tags' => array(
'html' => '<html><body><img /><iframe baz="1"><div baz="baz"><article baz="1" biz="2">baz<img /><img /></article>rab</div> oof<img /></iframe><img /></body></html>',
'expected' => 5,
),
'nested images within forbidden and removed tags' => array(
'html' => '<html><body><img /><iframe baz="1"><div baz="baz"><object baz="1" biz="2">baz<img /><img /></object>rab</div> oof<img /></iframe><img /></body></html>',
'expected' => 3,
),
);
}
}

View File

@@ -0,0 +1,39 @@
<?php
/**
* Copyright (C) 2010-2020 Combodo SARL
*
* This file is part of iTop.
*
* iTop is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* iTop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with iTop. If not, see <http: *www.gnu.org/licenses/>
*
*/
/**
* Mock class used by @see \Combodo\iTop\Test\UnitTest\Core\HTMLDOMSanitizerTest
*
*/
class InlineImage
{
private static $iCallCounter = 0;
public static function ProcessImageTag(DOMNode $oNode)
{
self::$iCallCounter++;
}
public static function GetCallCounter()
{
return self::$iCallCounter;
}
}