N°4360 Security hardening

This commit is contained in:
Pierre Goiffon
2021-11-23 16:58:47 +01:00
parent 3e8dd2f4a5
commit e15d4bfab6
12 changed files with 819 additions and 193 deletions

View File

@@ -7337,6 +7337,13 @@ class AttributeImage extends AttributeBlob
{
$oDoc = parent::MakeRealValue($proposedValue, $oHostObj);
if (($oDoc instanceof ormDocument)
&& (false === $oDoc->IsEmpty())
&& ($oDoc->GetMimeType() === 'image/svg+xml')) {
$sCleanSvg = HTMLSanitizer::Sanitize($oDoc->GetData(), 'svg_sanitizer');
$oDoc = new ormDocument($sCleanSvg, $oDoc->GetMimeType(), $oDoc->GetFileName());
}
// The validation of the MIME Type is done by CheckFormat below
return $oDoc;
}

View File

@@ -1049,6 +1049,14 @@ class Config
'source_of_value' => '',
'show_in_conf_sample' => false,
),
'svg_sanitizer' => array(
'type' => 'string',
'description' => 'The class to use for SVG sanitization : allow to provide a custom made sanitizer',
'default' => 'SvgDOMSanitizer',
'value' => '',
'source_of_value' => '',
'show_in_conf_sample' => false,
),
'inline_image_max_display_width' => array(
'type' => 'integer',
'description' => 'The maximum width (in pixels) when displaying images inside an HTML formatted attribute. Images will be displayed using this this maximum width.',

View File

@@ -34,43 +34,51 @@ abstract class HTMLSanitizer
/**
* Sanitize an HTML string with the configured sanitizer, falling back to HTMLDOMSanitizer in case of Exception or invalid configuration
*
* @param string $sHTML
* @param string $sConfigKey
*
* @return string
* @noinspection SelfClassReferencingInspection
*/
public static function Sanitize($sHTML)
public static function Sanitize($sHTML, $sConfigKey = 'html_sanitizer')
{
$sSanitizerClass = MetaModel::GetConfig()->Get('html_sanitizer');
if(!class_exists($sSanitizerClass))
{
IssueLog::Warning('The configured "html_sanitizer" class "'.$sSanitizerClass.'" is not a valid class. Will use HTMLDOMSanitizer as the default sanitizer.');
$sSanitizerClass = MetaModel::GetConfig()->Get($sConfigKey);
if (!class_exists($sSanitizerClass)) {
IssueLog::Warning('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a valid class. Will use HTMLDOMSanitizer as the default sanitizer.');
$sSanitizerClass = 'HTMLDOMSanitizer';
} else if (false === is_subclass_of($sSanitizerClass, HTMLSanitizer::class)) {
if ($sConfigKey === 'html_sanitizer') {
IssueLog::Warning('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a subclass of HTMLSanitizer. Will use HTMLDOMSanitizer as the default sanitizer.');
$sSanitizerClass = 'HTMLDOMSanitizer';
}
if ($sConfigKey === 'svg_sanitizer') {
IssueLog::Error('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a subclass of '.HTMLSanitizer::class.' ! Won\'t sanitize string.');
return $sHTML;
}
}
else if(!is_subclass_of($sSanitizerClass, 'HTMLSanitizer'))
{
IssueLog::Warning('The configured "html_sanitizer" class "'.$sSanitizerClass.'" is not a subclass of HTMLSanitizer. Will use HTMLDOMSanitizer as the default sanitizer.');
$sSanitizerClass = 'HTMLDOMSanitizer';
}
try
{
try {
$oSanitizer = new $sSanitizerClass();
$sCleanHTML = $oSanitizer->DoSanitize($sHTML);
}
catch(Exception $e)
{
if($sSanitizerClass != 'HTMLDOMSanitizer')
{
IssueLog::Warning('Failed to sanitize an HTML string with "'.$sSanitizerClass.'". The following exception occured: '.$e->getMessage());
IssueLog::Warning('Will try to sanitize with HTMLDOMSanitizer.');
// try again with the HTMLDOMSanitizer
$oSanitizer = new HTMLDOMSanitizer();
$sCleanHTML = $oSanitizer->DoSanitize($sHTML);
}
else
{
IssueLog::Error('Failed to sanitize an HTML string with "HTMLDOMSanitizer". The following exception occured: '.$e->getMessage());
IssueLog::Error('The HTML will NOT be sanitized.');
$sCleanHTML = $sHTML;
catch(Exception $e) {
if ($sConfigKey === 'html_sanitizer') {
if ($sSanitizerClass !== HTMLDOMSanitizer::class) {
IssueLog::Warning('Failed to sanitize an HTML string with "'.$sSanitizerClass.'". The following exception occured: '.$e->getMessage());
IssueLog::Warning('Will try to sanitize with HTMLDOMSanitizer.');
// try again with the HTMLDOMSanitizer
$oSanitizer = new HTMLDOMSanitizer();
$sCleanHTML = $oSanitizer->DoSanitize($sHTML);
} else {
IssueLog::Error('Failed to sanitize an HTML string with "HTMLDOMSanitizer". The following exception occured: '.$e->getMessage());
IssueLog::Error('The HTML will NOT be sanitized.');
$sCleanHTML = $sHTML;
}
} else {
IssueLog::Error('Failed to sanitize string with "'.$sSanitizerClass.'", will return original value ! Exception: '.$e->getMessage());
$sCleanHTML = $sHTML;
}
}
return $sCleanHTML;
@@ -94,67 +102,167 @@ class HTMLNullSanitizer extends HTMLSanitizer
{
return $sHTML;
}
}
/**
* A standard-compliant HTMLSanitizer based on the HTMLPurifier library by Edward Z. Yang
* Complete but quite slow
* http://htmlpurifier.org
* Common implementation for sanitizer using DOM parsing
*/
/*
class HTMLPurifierSanitizer extends HTMLSanitizer
abstract class DOMSanitizer extends HTMLSanitizer
{
protected static $oPurifier = null;
public function __construct()
{
if (self::$oPurifier == null)
{
$sLibPath = APPROOT.'lib/htmlpurifier/HTMLPurifier.auto.php';
if (!file_exists($sLibPath))
{
throw new Exception("Missing library '$sLibPath', cannot use HTMLPurifierSanitizer.");
}
require_once($sLibPath);
$oPurifierConfig = HTMLPurifier_Config::createDefault();
$oPurifierConfig->set('Core.Encoding', 'UTF-8'); // defaults to 'UTF-8'
$oPurifierConfig->set('HTML.Doctype', 'XHTML 1.0 Strict'); // defaults to 'XHTML 1.0 Transitional'
$oPurifierConfig->set('URI.AllowedSchemes', array (
'http' => true,
'https' => true,
'data' => true, // This one is not present by default
));
$sPurifierCache = APPROOT.'data/HTMLPurifier';
if (!is_dir($sPurifierCache))
{
mkdir($sPurifierCache);
}
if (!is_dir($sPurifierCache))
{
throw new Exception("Could not create the cache directory '$sPurifierCache'");
}
$oPurifierConfig->set('Cache.SerializerPath', $sPurifierCache); // no trailing slash
self::$oPurifier = new HTMLPurifier($oPurifierConfig);
}
}
/** @var DOMDocument */
protected $oDoc;
abstract public function GetTagsWhiteList();
abstract public function GetTagsBlackList();
abstract public function GetAttrsWhiteList();
abstract public function GetAttrsBlackList();
abstract public function GetStylesWhiteList();
public function DoSanitize($sHTML)
{
$sCleanHtml = self::$oPurifier->purify($sHTML);
return $sCleanHtml;
$this->oDoc = new DOMDocument();
$this->oDoc->preserveWhitespace = true;
// MS outlook implements empty lines by the mean of <p><o:p></o:p></p>
// We have to transform that into <p><br></p> (which is how Thunderbird implements empty lines)
// Unfortunately, DOMDocument::loadHTML does not take the tag namespaces into account (once loaded there is no way to know if the tag did have a namespace)
// therefore we have to do the transformation upfront
$sHTML = preg_replace('@<o:p>(\s|&nbsp;)*</o:p>@', '<br>', $sHTML);
$this->LoadDoc($sHTML);
$this->CleanNode($this->oDoc);
$sCleanHtml = $this->PrintDoc();
return $sCleanHtml;
}
abstract public function LoadDoc($sHTML);
/**
* @return string cleaned source
* @uses \DOMSanitizer::oDoc
*/
abstract public function PrintDoc();
protected function CleanNode(DOMNode $oElement)
{
$aAttrToRemove = array();
// Gather the attributes to remove
if ($oElement->hasAttributes()) {
foreach ($oElement->attributes as $oAttr) {
$sAttr = strtolower($oAttr->name);
if ((false === empty($this->GetAttrsBlackList()))
&& (in_array($sAttr, $this->GetAttrsBlackList(), true))) {
$aAttrToRemove[] = $oAttr->name;
} else if ((false === empty($this->GetTagsWhiteList()))
&& (false === in_array($sAttr, $this->GetTagsWhiteList()[strtolower($oElement->tagName)]))) {
$aAttrToRemove[] = $oAttr->name;
} else if (!$this->IsValidAttributeContent($sAttr, $oAttr->value)) {
// Invalid content
$aAttrToRemove[] = $oAttr->name;
} else if ($sAttr == 'style') {
// Special processing for style tags
$sCleanStyle = $this->CleanStyle($oAttr->value);
if ($sCleanStyle == '') {
// Invalid content
$aAttrToRemove[] = $oAttr->name;
} else {
$oElement->setAttribute($oAttr->name, $sCleanStyle);
}
}
}
// Now remove them
foreach($aAttrToRemove as $sName)
{
$oElement->removeAttribute($sName);
}
}
if ($oElement->hasChildNodes())
{
$aChildElementsToRemove = array();
// Gather the child noes to remove
foreach($oElement->childNodes as $oNode) {
if ($oNode instanceof DOMElement) {
$sNodeTagName = strtolower($oNode->tagName);
}
if (($oNode instanceof DOMElement)
&& (false === empty($this->GetTagsBlackList()))
&& (in_array($sNodeTagName, $this->GetTagsBlackList(), true))) {
$aChildElementsToRemove[] = $oNode;
} else if (($oNode instanceof DOMElement)
&& (false === empty($this->GetTagsWhiteList()))
&& (false === array_key_exists($sNodeTagName, $this->GetTagsWhiteList()))) {
$aChildElementsToRemove[] = $oNode;
} else if ($oNode instanceof DOMComment) {
$aChildElementsToRemove[] = $oNode;
} else {
// Recurse
$this->CleanNode($oNode);
if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img')) {
InlineImage::ProcessImageTag($oNode);
}
}
}
// Now remove them
foreach($aChildElementsToRemove as $oDomElement)
{
$oElement->removeChild($oDomElement);
}
}
}
protected function IsValidAttributeContent($sAttributeName, $sValue)
{
if ((false === empty($this->GetAttrsBlackList()))
&& (in_array($sAttributeName, $this->GetAttrsBlackList(), true))) {
return true;
}
if (array_key_exists($sAttributeName, $this->GetAttrsWhiteList())) {
return preg_match($this->GetAttrsWhiteList()[$sAttributeName], $sValue);
}
return true;
}
protected function CleanStyle($sStyle)
{
if (empty($this->GetStylesWhiteList())) {
return $sStyle;
}
$aAllowedStyles = array();
$aItems = explode(';', $sStyle);
{
foreach ($aItems as $sItem) {
$aElements = explode(':', trim($sItem));
if (in_array(trim(strtolower($aElements[0])), $this->GetStylesWhiteList())) {
$aAllowedStyles[] = trim($sItem);
}
}
}
return implode(';', $aAllowedStyles);
}
}
*/
class HTMLDOMSanitizer extends HTMLSanitizer
class HTMLDOMSanitizer extends DOMSanitizer
{
protected $oDoc;
/**
* @var array
* @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations
* @var array
*/
protected static $aTagsWhiteList = array(
'html' => array(),
@@ -203,6 +311,7 @@ class HTMLDOMSanitizer extends HTMLSanitizer
'q' => array(),
'hr' => array('style'),
'pre' => array(),
'center' => array(),
);
protected static $aAttrsWhiteList = array(
@@ -210,8 +319,8 @@ class HTMLDOMSanitizer extends HTMLSanitizer
);
/**
* @var array
* @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations
* @var array
*/
protected static $aStylesWhiteList = array(
'background-color',
@@ -235,160 +344,199 @@ class HTMLDOMSanitizer extends HTMLSanitizer
'white-space',
);
public function GetTagsWhiteList()
{
return static::$aTagsWhiteList;
}
public function GetTagsBlackList()
{
return [];
}
public function GetAttrsWhiteList()
{
return static::$aAttrsWhiteList;
}
public function GetAttrsBlackList()
{
return [];
}
public function GetStylesWhiteList()
{
return static::$aStylesWhiteList;
}
public function __construct()
{
parent::__construct();
// Building href validation pattern from url and email validation patterns as the patterns are not used the same way in HTML content than in standard attributes value.
// eg. "foo@bar.com" vs "mailto:foo@bar.com?subject=Title&body=Hello%20world"
if (!array_key_exists('href', self::$aAttrsWhiteList))
{
if (!array_key_exists('href', self::$aAttrsWhiteList)) {
// Regular urls
$sUrlPattern = utils::GetConfig()->Get('url_validation_pattern');
// Mailto urls
$sMailtoPattern = '(mailto:(' . utils::GetConfig()->Get('email_validation_pattern') . ')(?:\?(?:subject|body)=([a-zA-Z0-9+\$_.-]*)(?:&(?:subject|body)=([a-zA-Z0-9+\$_.-]*))?)?)';
$sMailtoPattern = '(mailto:('.utils::GetConfig()->Get('email_validation_pattern').')(?:\?(?:subject|body)=([a-zA-Z0-9+\$_.-]*)(?:&(?:subject|body)=([a-zA-Z0-9+\$_.-]*))?)?)';
// Notification placeholders
// eg. $this->caller_id$, $this->hyperlink()$, $this->hyperlink(portal)$, $APP_URL$, $MODULES_URL$, ...
// Note: Authorize both $xxx$ and %24xxx%24 as the latter one is encoded when used in HTML attributes (eg. a[href])
$sPlaceholderPattern = '(\$|%24)[\w-]*(->[\w]*(\([\w-]*?\))?)?(\$|%24)';
$sPattern = $sUrlPattern . '|' . $sMailtoPattern . '|' . $sPlaceholderPattern;
$sPattern = $sUrlPattern.'|'.$sMailtoPattern.'|'.$sPlaceholderPattern;
$sPattern = '/'.str_replace('/', '\/', $sPattern).'/i';
self::$aAttrsWhiteList['href'] = $sPattern;
}
}
public function DoSanitize($sHTML)
public function LoadDoc($sHTML)
{
$this->oDoc = new DOMDocument();
$this->oDoc->preserveWhitespace = true;
// MS outlook implements empty lines by the mean of <p><o:p></o:p></p>
// We have to transform that into <p><br></p> (which is how Thunderbird implements empty lines)
// Unfortunately, DOMDocument::loadHTML does not take the tag namespaces into account (once loaded there is no way to know if the tag did have a namespace)
// therefore we have to do the transformation upfront
$sHTML = preg_replace('@<o:p>(\s|&nbsp;)*</o:p>@', '<br>', $sHTML);
@$this->oDoc->loadHTML('<?xml encoding="UTF-8"?>'.$sHTML); // For loading HTML chunks where the character set is not specified
$this->CleanNode($this->oDoc);
}
public function PrintDoc()
{
$oXPath = new DOMXPath($this->oDoc);
$sXPath = "//body";
$oNodesList = $oXPath->query($sXPath);
if ($oNodesList->length == 0)
{
if ($oNodesList->length == 0) {
// No body, save the whole document
$sCleanHtml = $this->oDoc->saveHTML();
}
else
{
} else {
// Export only the content of the body tag
$sCleanHtml = $this->oDoc->saveHTML($oNodesList->item(0));
// remove the body tag itself
$sCleanHtml = str_replace( array('<body>', '</body>'), '', $sCleanHtml);
$sCleanHtml = str_replace(array('<body>', '</body>'), '', $sCleanHtml);
}
return $sCleanHtml;
}
protected function CleanNode(DOMNode $oElement)
}
/**
* @since 2.6.5 2.7.6 3.0.0 N°4360
*/
class SvgDOMSanitizer extends DOMSanitizer
{
public function GetTagsWhiteList()
{
$aAttrToRemove = array();
// Gather the attributes to remove
if ($oElement->hasAttributes())
{
foreach($oElement->attributes as $oAttr)
{
$sAttr = strtolower($oAttr->name);
if (!in_array($sAttr, self::$aTagsWhiteList[strtolower($oElement->tagName)]))
{
// Forbidden (or unknown) attribute
$aAttrToRemove[] = $oAttr->name;
}
else if (!$this->IsValidAttributeContent($sAttr, $oAttr->value))
{
// Invalid content
$aAttrToRemove[] = $oAttr->name;
}
else if ($sAttr == 'style')
{
// Special processing for style tags
$sCleanStyle = $this->CleanStyle($oAttr->value);
if ($sCleanStyle == '')
{
// Invalid content
$aAttrToRemove[] = $oAttr->name;
}
else
{
$oElement->setAttribute($oAttr->name, $sCleanStyle);
}
}
}
// Now remove them
foreach($aAttrToRemove as $sName)
{
$oElement->removeAttribute($sName);
}
}
if ($oElement->hasChildNodes())
{
$aChildElementsToRemove = array();
// Gather the child noes to remove
foreach($oElement->childNodes as $oNode)
{
if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList)))
{
$aChildElementsToRemove[] = $oNode;
}
else if ($oNode instanceof DOMComment)
{
$aChildElementsToRemove[] = $oNode;
}
else
{
// Recurse
$this->CleanNode($oNode);
if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img'))
{
InlineImage::ProcessImageTag($oNode);
}
}
}
// Now remove them
foreach($aChildElementsToRemove as $oDomElement)
{
$oElement->removeChild($oDomElement);
}
}
return [];
}
protected function CleanStyle($sStyle)
/**
* @return string[]
* @link https://developer.mozilla.org/en-US/docs/Web/SVG/Element/script
*/
public function GetTagsBlackList()
{
$aAllowedStyles = array();
$aItems = explode(';', $sStyle);
{
foreach($aItems as $sItem)
{
$aElements = explode(':', trim($sItem));
if (in_array(trim(strtolower($aElements[0])), static::$aStylesWhiteList))
{
$aAllowedStyles[] = trim($sItem);
}
}
}
return implode(';', $aAllowedStyles);
return [
'script',
];
}
protected function IsValidAttributeContent($sAttributeName, $sValue)
public function GetAttrsWhiteList()
{
if (array_key_exists($sAttributeName, self::$aAttrsWhiteList))
{
return preg_match(self::$aAttrsWhiteList[$sAttributeName], $sValue);
}
return true;
return [];
}
}
/**
* @return string[]
* @link https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/Events#document_event_attributes
*/
public function GetAttrsBlackList()
{
return [
'onbegin',
'onbegin',
'onrepeat',
'onabort',
'onerror',
'onerror',
'onscroll',
'onunload',
'oncopy',
'oncut',
'onpaste',
'oncancel',
'oncanplay',
'oncanplaythrough',
'onchange',
'onclick',
'onclose',
'oncuechange',
'ondblclick',
'ondrag',
'ondragend',
'ondragenter',
'ondragleave',
'ondragover',
'ondragstart',
'ondrop',
'ondurationchange',
'onemptied',
'onended',
'onerror',
'onfocus',
'oninput',
'oninvalid',
'onkeydown',
'onkeypress',
'onkeyup',
'onload',
'onloadeddata',
'onloadedmetadata',
'onloadstart',
'onmousedown',
'onmouseenter',
'onmouseleave',
'onmousemove',
'onmouseout',
'onmouseover',
'onmouseup',
'onmousewheel',
'onpause',
'onplay',
'onplaying',
'onprogress',
'onratechange',
'onreset',
'onresize',
'onscroll',
'onseeked',
'onseeking',
'onselect',
'onshow',
'onstalled',
'onsubmit',
'onsuspend',
'ontimeupdate',
'ontoggle',
'onvolumechange',
'onwaiting',
'onactivate',
'onfocusin',
'onfocusout',
];
}
public function GetStylesWhiteList()
{
return [];
}
public function LoadDoc($sHTML)
{
@$this->oDoc->loadXml($sHTML, LIBXML_NOBLANKS);
}
public function PrintDoc()
{
return $this->oDoc->saveXML();
}
}

View File

@@ -0,0 +1,75 @@
<?php
namespace Combodo\iTop\Test\UnitTest\Core\Sanitizer;
use Combodo\iTop\Test\UnitTest\ItopTestCase;
/**
* @runTestsInSeparateProcesses
* @preserveGlobalState disabled
* @backupGlobals disabled
*/
abstract class AbstractDOMSanitizerTest extends ItopTestCase
{
const INPUT_DIRECTORY = 'input';
const OUTPUT_DIRECTORY = 'output';
protected function setUp()
{
parent::setUp();
require_once(APPROOT.'application/utils.inc.php');
require_once(APPROOT.'core/htmlsanitizer.class.inc.php');
require_once(APPROOT.'test/core/sanitizer/InlineImageMock.php');
}
protected function ReadTestFile($sFileToTest, $sFolderName)
{
$sCurrentPath = __DIR__;
return file_get_contents($sCurrentPath.DIRECTORY_SEPARATOR
.$sFolderName.DIRECTORY_SEPARATOR
.$sFileToTest);
}
protected function RemoveNewLines($sText)
{
$sText = str_replace("\r\n", "\n", $sText);
$sText = str_replace("\r", "\n", $sText);
$sText = str_replace("\n", '', $sText);
return $sText;
}
/**
* Generates an appropriate value for the given attribute, or use the counter if needed.
* This is necessary as most of the attributes with empty or inappropriate values (like a numeric for a href) are removed by the parser
*
* @param string $sTagAttribute
* @param int $iAttributeCounter
*
* @return string attribute value
*/
protected function GetTagAttributeValue($sTagAttribute, $iAttributeCounter)
{
$sTagAttrValue = ' '.$sTagAttribute.'="';
if (in_array($sTagAttribute, array('href', 'src'))) {
return $sTagAttrValue.'http://www.combodo.com"';
}
if ($sTagAttribute === 'style') {
return $sTagAttrValue.'color: black"';
}
return $sTagAttrValue.$iAttributeCounter.'"';
}
protected function IsClosingTag($sTag)
{
if (in_array($sTag, array('br', 'img', 'hr'))) {
return false;
}
return true;
}
}

View File

@@ -0,0 +1,260 @@
<?php
namespace Combodo\iTop\Test\UnitTest\Core\Sanitizer;
use HTMLDOMSanitizer;
require_once __DIR__.'/AbstractDOMSanitizerTest.php';
/**
* @runTestsInSeparateProcesses
* @preserveGlobalState disabled
* @backupGlobals disabled
*/
class HTMLDOMSanitizerTest extends AbstractDOMSanitizerTest
{
/**
* @dataProvider DoSanitizeProvider
*
* @param string $sFileToTest filename
*/
public function testDoSanitize($sFileToTest)
{
$sInputHtml = $this->ReadTestFile($sFileToTest, self::INPUT_DIRECTORY);
$sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY);
$sOutputHtml = $this->RemoveNewLines($sOutputHtml);
$oSanitizer = new HTMLDOMSanitizer();
$sRes = $oSanitizer->DoSanitize($sInputHtml);
// Removing newlines as the parser gives different results depending on the PHP version
// Didn't manage to get it right :
// - no php.ini difference
// - playing with the parser preserveWhitespace/formatOutput parser options didn't help
// So we're removing new lines on both sides :/
$sOutputHtml = $this->RemoveNewLines($sOutputHtml);
$sRes = $this->RemoveNewLines($sRes);
$this->debug($sRes);
$this->assertEquals($sOutputHtml, $sRes);
}
public function DoSanitizeProvider()
{
return array(
array(
'scripts.html',
),
);
}
/**
* @dataProvider WhiteListProvider
*
* @param string $sHtmlToTest HTML content
*/
public function testDoSanitizeWhiteList($sHtmlToTest)
{
$oSanitizer = new HTMLDOMSanitizer();
$sRes = $oSanitizer->DoSanitize($sHtmlToTest);
// Removing newlines as the parser gives different results depending on the PHP version
// Didn't manage to get it right :
// - no php.ini difference
// - playing with the parser preserveWhitespace/formatOutput parser options didn't help
// So we're removing new lines on both sides :/
$sHtmlToTest = $this->RemoveNewLines($sHtmlToTest);
$sRes = $this->RemoveNewLines($sRes);
$this->debug($sRes);
$this->assertEquals($sHtmlToTest, $sRes);
}
public function WhiteListProvider()
{
// This is a copy of \HTMLDOMSanitizer::$aTagsWhiteList
// should stay a copy as we want to check we're not removing something by mistake as it was done with the CENTER tag (N°2558)
$aTagsWhiteList = array(
// we don't test HTML and BODY as the parser removes them if context isn't appropriate
'a' => array('href', 'name', 'style', 'target', 'title'),
'p' => array('style'),
'blockquote' => array('style'),
'br' => array(),
'span' => array('style'),
'div' => array('style'),
'b' => array(),
'i' => array(),
'u' => array(),
'em' => array(),
'strong' => array(),
'img' => array('src', 'style', 'alt', 'title'),
'ul' => array('style'),
'ol' => array('style'),
'li' => array('style'),
'h1' => array('style'),
'h2' => array('style'),
'h3' => array('style'),
'h4' => array('style'),
'nav' => array('style'),
'section' => array('style'),
'code' => array('style'),
'table' => array('style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'),
'thead' => array('style'),
'tbody' => array('style'),
'tr' => array('style', 'colspan', 'rowspan'),
'td' => array('style', 'colspan', 'rowspan'),
'th' => array('style', 'colspan', 'rowspan'),
'fieldset' => array('style'),
'legend' => array('style'),
'font' => array('face', 'color', 'style', 'size'),
'big' => array(),
'small' => array(),
'tt' => array(),
'kbd' => array(),
'samp' => array(),
'var' => array(),
'del' => array(),
's' => array(), // strikethrough
'ins' => array(),
'cite' => array(),
'q' => array(),
'hr' => array('style'),
'pre' => array(),
'center' => array(),
);
$aTestCaseArray = array();
$sInputText = $this->ReadTestFile('whitelist_test.html', self::INPUT_DIRECTORY);
foreach ($aTagsWhiteList as $sTag => $aTagAttributes) {
$sTestCaseText = $sInputText;
$sStartTag = "<$sTag";
$iAttrCounter = 0;
foreach ($aTagAttributes as $sTagAttribute) {
$sStartTag .= $this->GetTagAttributeValue($sTagAttribute, $iAttrCounter);
$iAttrCounter++;
}
$sStartTag .= '>';
$sTestCaseText = str_replace('##START_TAG##', $sStartTag, $sTestCaseText);
$sClosingTag = $this->IsClosingTag($sTag) ? "</$sTag>" : '';
$sTestCaseText = str_replace('##END_TAG##', $sClosingTag, $sTestCaseText);
$aTestCaseArray[$sTag] = array($sTestCaseText);
}
return $aTestCaseArray;
}
/**
* @dataProvider RemoveBlackListedTagContentProvider
*/
public function testDoSanitizeRemoveBlackListedTagContent($html, $expected)
{
$oSanitizer = new HTMLDOMSanitizer();
$sSanitizedHtml = $oSanitizer->DoSanitize($html);
$this->assertEquals($expected, str_replace("\n", '', $sSanitizedHtml));
}
public function RemoveBlackListedTagContentProvider()
{
return array(
'basic' => array(
'html' => 'foo<iframe>bar</iframe>baz',
'expected' => '<p>foobaz</p>',
),
'basic with body' => array(
'html' => '<body>foo<iframe>bar</iframe>baz</body>',
'expected' => 'foobaz',
),
'basic with html and body tags' => array(
'html' => '<html><body lang="EN-GB" link="#0563C1" vlink="#954F72">foo<iframe>bar</iframe>baz</body></html>',
'expected' => 'foobaz',
),
'basic with attributes' => array(
'html' => 'foo<iframe baz="1">bar</iframe>baz',
'expected' => '<p>foobaz</p>',
),
'basic with comment' => array(
'html' => 'foo<iframe baz="1">bar<!-- foo --></iframe>baz',
'expected' => '<p>foobaz</p>',
),
'basic with contentRemovable tag' => array(
'html' => 'foo<iframe baz="1">bar<style>foo</style><script>boo</script></iframe>baz',
'expected' => '<p>foobaz</p>',
),
'nested' => array(
'html' => 'before<iframe>foo<article>baz</article>oof<article><iframe>bar</iframe>oof</article></iframe>after',
'expected' => '<p>beforeafter</p>',
),
'nested with not closed br' => array(
'html' => 'before<iframe>foo<article>baz</article>oof<br><article><iframe>bar</iframe>oof</article></iframe>after',
'expected' => '<p>beforeafter</p>',
),
'nested with allowed' => array(
'html' => 'before<iframe><div><article><p>baz</p>zab</article></div>oof</iframe>after',
'expected' => '<p>beforeafter</p>',
),
'nested with spaces' => array(
'html' => 'before<iframe><article>baz</article> oof</iframe>after',
'expected' => '<p>beforeafter</p>',
),
'nested with attributes' => array(
'html' => 'before<iframe baz="1"><article baz="1" biz="2">baz</article>oof</iframe>after',
'expected' => '<p>beforeafter</p>',
),
'nested with allowed and attributes and spaces ' => array(
'html' => '<html><body>before<iframe baz="1"><div baz="baz"><article baz="1" biz="2">baz</article>rab</div> oof</iframe>after</body></html>',
'expected' => 'beforeafter',
),
'nested with allowed and contentRemovable tags' => array(
'html' => '<html><body>before<iframe baz="1"><div ><article>baz</article>rab</div> oof<embed>embedTExt</embed></iframe>middle<style>foo</style>after<script>boo</script></body></html>',
'expected' => 'beforemiddleafter',
),
'regression: if head present => body is not trimmed' => array(
'html' => '<html><head></head><body lang="EN-GB" link="#0563C1" vlink="#954F72">bar</body></html>',
'expected' => 'bar',
),
);
}
/**
* @dataProvider CallInlineImageProcessImageTagProvider
*/
public function testDoSanitizeCallInlineImageProcessImageTag($sHtml, $iExpectedCount)
{
require_once APPROOT.'test/core/sanitizer/InlineImageMock.php';
$oSanitizer = new HTMLDOMSanitizer();
$oSanitizer->DoSanitize($sHtml);
$iCalledCount = \InlineImage::GetCallCounter();
$this->assertEquals($iExpectedCount, $iCalledCount);
}
public function CallInlineImageProcessImageTagProvider()
{
return array(
'no image' => array(
'html' => '<p>bar</p>',
'expected' => 0,
),
'basic image' => array(
'html' => '<img />',
'expected' => 1,
),
'nested images within forbidden tags' => array(
'html' => '<html><body><img /><iframe baz="1"><div baz="baz"><article baz="1" biz="2">baz<img /><img /></article>rab</div> oof<img /></iframe><img /></body></html>',
'expected' => 2,
),
// This test will be restored with the ticket n°2556
// 'nested images within forbidden and removed tags' => array(
// 'html' => '<html><body><img /><iframe baz="1"><div baz="baz"><object baz="1" biz="2">baz<img /><img /></object>rab</div> oof<img /></iframe><img /></body></html>',
// 'expected' => 2,
// ),
);
}
}

View File

@@ -0,0 +1,39 @@
<?php
/**
* Copyright (C) 2010-2021 Combodo SARL
*
* This file is part of iTop.
*
* iTop is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* iTop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with iTop. If not, see <http: *www.gnu.org/licenses/>
*
*/
/**
* Mock class used by @see \Combodo\iTop\Test\UnitTest\Core\HTMLDOMSanitizerTest
*
*/
class InlineImage
{
private static $iCallCounter = 0;
public static function ProcessImageTag(DOMNode $oNode)
{
self::$iCallCounter++;
}
public static function GetCallCounter()
{
return self::$iCallCounter;
}
}

View File

@@ -0,0 +1,53 @@
<?php
namespace Combodo\iTop\Test\UnitTest\Core\Sanitizer;
use SvgDOMSanitizer;
require_once __DIR__.'/AbstractDOMSanitizerTest.php';
/**
* @runTestsInSeparateProcesses
* @preserveGlobalState disabled
* @backupGlobals disabled
*/
class SvgDOMSanitizerTest extends AbstractDOMSanitizerTest
{
/**
* @dataProvider DoSanitizeProvider
*
* @param string $sFileToTest filename
*/
public function testDoSanitize($sFileToTest)
{
$sInputHtml = $this->ReadTestFile($sFileToTest, self::INPUT_DIRECTORY);
$sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY);
$sOutputHtml = $this->RemoveNewLines($sOutputHtml);
$oSanitizer = new SvgDOMSanitizer();
$sRes = $oSanitizer->DoSanitize($sInputHtml);
// Removing newlines as the parser gives different results depending on the PHP version
// Didn't manage to get it right :
// - no php.ini difference
// - playing with the parser preserveWhitespace/formatOutput parser options didn't help
// So we're removing new lines on both sides :/
$sOutputHtml = $this->RemoveNewLines($sOutputHtml);
$sRes = $this->RemoveNewLines($sRes);
$this->debug($sRes);
$this->assertEquals($sOutputHtml, $sRes);
}
public function DoSanitizeProvider()
{
return array(
array(
'scripts.svg',
),
);
}
}

View File

@@ -0,0 +1,7 @@
<h1>Test with lots of JS scripts to filter !</h1>
<p><img src="http://toto.invalid/" onerror="alert('hello world !');"></p>
<script>
alert("hello world !");
</script>

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" version="1.1" baseProfile="full" onload="alert('hello world !');">
<rect width="300" height="100" style="fill:rgb(0,0,255);stroke-width:3;stroke:rgb(0,0,0)"/>
<script type="text/javascript">
alert("XSS");
</script>
</svg>

After

Width:  |  Height:  |  Size: 418 B

View File

@@ -0,0 +1,14 @@
##START_TAG##
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam luctus semper diam et fermentum. Cras nisi mauris, rutrum id turpis at,
sagittis tempus erat. Sed tempus vel purus id sagittis. Suspendisse ullamcorper eros vel semper malesuada. Vivamus malesuada tellus quis
nisi consequat, quis tristique magna eleifend. Quisque eget turpis lacinia, vehicula turpis vel, aliquet diam. Aenean eu nunc ac velit
condimentum posuere. Vivamus congue velit cursus eros mollis, vitae eleifend urna finibus.
In accumsan sed sem nec sollicitudin. Sed pretium, neque et rhoncus volutpat, urna massa semper ex, et faucibus mauris sapien eu libero.
Sed vel accumsan nibh, tempus accumsan mi. Maecenas gravida imperdiet leo id euismod. Mauris pharetra mattis facilisis. Suspendisse
dictum vel orci ac luctus. Proin ultricies erat sit amet leo sollicitudin, quis lacinia felis volutpat. Praesent molestie quam et magna
tempor aliquet. Sed quam nisi, dictum ac gravida et, suscipit et augue. Fusce ac purus eget leo scelerisque bibendum. Proin in semper
erat, eu congue diam. Vivamus purus eros, consectetur laoreet gravida in, ultricies eget nibh. Mauris hendrerit euismod ex at facilisis.
Integer lacus eros, posuere finibus libero facilisis, eleifend gravida neque. Integer feugiat elit vel leo aliquet suscipit. Etiam
auctor ligula sed eros vulputate tristique ac eget magna.
##END_TAG##

View File

@@ -0,0 +1,3 @@
<h1>Test with lots of JS scripts to filter !</h1>
<p><img src="http://toto.invalid/"></p>

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" version="1.1" baseProfile="full">
<rect width="300" height="100" style="fill:rgb(0,0,255);stroke-width:3;stroke:rgb(0,0,0)"/>
</svg>

After

Width:  |  Height:  |  Size: 312 B