diff --git a/core/attributedef.class.inc.php b/core/attributedef.class.inc.php
index 21c31fdf2..89a853e65 100644
--- a/core/attributedef.class.inc.php
+++ b/core/attributedef.class.inc.php
@@ -7337,6 +7337,13 @@ class AttributeImage extends AttributeBlob
{
$oDoc = parent::MakeRealValue($proposedValue, $oHostObj);
+ if (($oDoc instanceof ormDocument)
+ && (false === $oDoc->IsEmpty())
+ && ($oDoc->GetMimeType() === 'image/svg+xml')) {
+ $sCleanSvg = HTMLSanitizer::Sanitize($oDoc->GetData(), 'svg_sanitizer');
+ $oDoc = new ormDocument($sCleanSvg, $oDoc->GetMimeType(), $oDoc->GetFileName());
+ }
+
// The validation of the MIME Type is done by CheckFormat below
return $oDoc;
}
diff --git a/core/config.class.inc.php b/core/config.class.inc.php
index 2f7508bae..dd8b7b114 100644
--- a/core/config.class.inc.php
+++ b/core/config.class.inc.php
@@ -1049,6 +1049,14 @@ class Config
'source_of_value' => '',
'show_in_conf_sample' => false,
),
+ 'svg_sanitizer' => array(
+ 'type' => 'string',
+ 'description' => 'The class to use for SVG sanitization : allow to provide a custom made sanitizer',
+ 'default' => 'SvgDOMSanitizer',
+ 'value' => '',
+ 'source_of_value' => '',
+ 'show_in_conf_sample' => false,
+ ),
'inline_image_max_display_width' => array(
'type' => 'integer',
'description' => 'The maximum width (in pixels) when displaying images inside an HTML formatted attribute. Images will be displayed using this this maximum width.',
diff --git a/core/htmlsanitizer.class.inc.php b/core/htmlsanitizer.class.inc.php
index 34a1747dc..4eada221d 100644
--- a/core/htmlsanitizer.class.inc.php
+++ b/core/htmlsanitizer.class.inc.php
@@ -34,43 +34,51 @@ abstract class HTMLSanitizer
/**
* Sanitize an HTML string with the configured sanitizer, falling back to HTMLDOMSanitizer in case of Exception or invalid configuration
+ *
* @param string $sHTML
+ * @param string $sConfigKey
+ *
* @return string
+ * @noinspection SelfClassReferencingInspection
*/
- public static function Sanitize($sHTML)
+ public static function Sanitize($sHTML, $sConfigKey = 'html_sanitizer')
{
- $sSanitizerClass = MetaModel::GetConfig()->Get('html_sanitizer');
- if(!class_exists($sSanitizerClass))
- {
- IssueLog::Warning('The configured "html_sanitizer" class "'.$sSanitizerClass.'" is not a valid class. Will use HTMLDOMSanitizer as the default sanitizer.');
+ $sSanitizerClass = MetaModel::GetConfig()->Get($sConfigKey);
+ if (!class_exists($sSanitizerClass)) {
+ IssueLog::Warning('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a valid class. Will use HTMLDOMSanitizer as the default sanitizer.');
$sSanitizerClass = 'HTMLDOMSanitizer';
+ } else if (false === is_subclass_of($sSanitizerClass, HTMLSanitizer::class)) {
+ if ($sConfigKey === 'html_sanitizer') {
+ IssueLog::Warning('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a subclass of HTMLSanitizer. Will use HTMLDOMSanitizer as the default sanitizer.');
+ $sSanitizerClass = 'HTMLDOMSanitizer';
+ }
+ if ($sConfigKey === 'svg_sanitizer') {
+ IssueLog::Error('The configured "'.$sConfigKey.'" class "'.$sSanitizerClass.'" is not a subclass of '.HTMLSanitizer::class.' ! Won\'t sanitize string.');
+
+ return $sHTML;
+ }
}
- else if(!is_subclass_of($sSanitizerClass, 'HTMLSanitizer'))
- {
- IssueLog::Warning('The configured "html_sanitizer" class "'.$sSanitizerClass.'" is not a subclass of HTMLSanitizer. Will use HTMLDOMSanitizer as the default sanitizer.');
- $sSanitizerClass = 'HTMLDOMSanitizer';
- }
-
- try
- {
+
+ try {
$oSanitizer = new $sSanitizerClass();
$sCleanHTML = $oSanitizer->DoSanitize($sHTML);
}
- catch(Exception $e)
- {
- if($sSanitizerClass != 'HTMLDOMSanitizer')
- {
- IssueLog::Warning('Failed to sanitize an HTML string with "'.$sSanitizerClass.'". The following exception occured: '.$e->getMessage());
- IssueLog::Warning('Will try to sanitize with HTMLDOMSanitizer.');
- // try again with the HTMLDOMSanitizer
- $oSanitizer = new HTMLDOMSanitizer();
- $sCleanHTML = $oSanitizer->DoSanitize($sHTML);
- }
- else
- {
- IssueLog::Error('Failed to sanitize an HTML string with "HTMLDOMSanitizer". The following exception occured: '.$e->getMessage());
- IssueLog::Error('The HTML will NOT be sanitized.');
- $sCleanHTML = $sHTML;
+ catch(Exception $e) {
+ if ($sConfigKey === 'html_sanitizer') {
+ if ($sSanitizerClass !== HTMLDOMSanitizer::class) {
+ IssueLog::Warning('Failed to sanitize an HTML string with "'.$sSanitizerClass.'". The following exception occured: '.$e->getMessage());
+ IssueLog::Warning('Will try to sanitize with HTMLDOMSanitizer.');
+ // try again with the HTMLDOMSanitizer
+ $oSanitizer = new HTMLDOMSanitizer();
+ $sCleanHTML = $oSanitizer->DoSanitize($sHTML);
+ } else {
+ IssueLog::Error('Failed to sanitize an HTML string with "HTMLDOMSanitizer". The following exception occured: '.$e->getMessage());
+ IssueLog::Error('The HTML will NOT be sanitized.');
+ $sCleanHTML = $sHTML;
+ }
+ } else {
+ IssueLog::Error('Failed to sanitize string with "'.$sSanitizerClass.'", will return original value ! Exception: '.$e->getMessage());
+ $sCleanHTML = $sHTML;
}
}
return $sCleanHTML;
@@ -94,67 +102,167 @@ class HTMLNullSanitizer extends HTMLSanitizer
{
return $sHTML;
}
-
+
}
+
/**
- * A standard-compliant HTMLSanitizer based on the HTMLPurifier library by Edward Z. Yang
- * Complete but quite slow
- * http://htmlpurifier.org
+ * Common implementation for sanitizer using DOM parsing
*/
-/*
-class HTMLPurifierSanitizer extends HTMLSanitizer
+abstract class DOMSanitizer extends HTMLSanitizer
{
- protected static $oPurifier = null;
-
- public function __construct()
- {
- if (self::$oPurifier == null)
- {
- $sLibPath = APPROOT.'lib/htmlpurifier/HTMLPurifier.auto.php';
- if (!file_exists($sLibPath))
- {
- throw new Exception("Missing library '$sLibPath', cannot use HTMLPurifierSanitizer.");
- }
- require_once($sLibPath);
-
- $oPurifierConfig = HTMLPurifier_Config::createDefault();
- $oPurifierConfig->set('Core.Encoding', 'UTF-8'); // defaults to 'UTF-8'
- $oPurifierConfig->set('HTML.Doctype', 'XHTML 1.0 Strict'); // defaults to 'XHTML 1.0 Transitional'
- $oPurifierConfig->set('URI.AllowedSchemes', array (
- 'http' => true,
- 'https' => true,
- 'data' => true, // This one is not present by default
- ));
- $sPurifierCache = APPROOT.'data/HTMLPurifier';
- if (!is_dir($sPurifierCache))
- {
- mkdir($sPurifierCache);
- }
- if (!is_dir($sPurifierCache))
- {
- throw new Exception("Could not create the cache directory '$sPurifierCache'");
- }
- $oPurifierConfig->set('Cache.SerializerPath', $sPurifierCache); // no trailing slash
- self::$oPurifier = new HTMLPurifier($oPurifierConfig);
- }
- }
-
+ /** @var DOMDocument */
+ protected $oDoc;
+
+ abstract public function GetTagsWhiteList();
+
+ abstract public function GetTagsBlackList();
+
+ abstract public function GetAttrsWhiteList();
+
+ abstract public function GetAttrsBlackList();
+
+ abstract public function GetStylesWhiteList();
+
public function DoSanitize($sHTML)
{
- $sCleanHtml = self::$oPurifier->purify($sHTML);
- return $sCleanHtml;
+ $this->oDoc = new DOMDocument();
+ $this->oDoc->preserveWhitespace = true;
+
+ // MS outlook implements empty lines by the mean of
+ // We have to transform that into
(which is how Thunderbird implements empty lines)
+ // Unfortunately, DOMDocument::loadHTML does not take the tag namespaces into account (once loaded there is no way to know if the tag did have a namespace)
+ // therefore we have to do the transformation upfront
+ $sHTML = preg_replace('@(\s| )*@', ' ', $sHTML);
+
+ $this->LoadDoc($sHTML);
+
+ $this->CleanNode($this->oDoc);
+
+ $sCleanHtml = $this->PrintDoc();
+
+ return $sCleanHtml;
+ }
+
+ abstract public function LoadDoc($sHTML);
+
+ /**
+ * @return string cleaned source
+ * @uses \DOMSanitizer::oDoc
+ */
+ abstract public function PrintDoc();
+
+ protected function CleanNode(DOMNode $oElement)
+ {
+ $aAttrToRemove = array();
+ // Gather the attributes to remove
+ if ($oElement->hasAttributes()) {
+ foreach ($oElement->attributes as $oAttr) {
+ $sAttr = strtolower($oAttr->name);
+ if ((false === empty($this->GetAttrsBlackList()))
+ && (in_array($sAttr, $this->GetAttrsBlackList(), true))) {
+ $aAttrToRemove[] = $oAttr->name;
+ } else if ((false === empty($this->GetTagsWhiteList()))
+ && (false === in_array($sAttr, $this->GetTagsWhiteList()[strtolower($oElement->tagName)]))) {
+ $aAttrToRemove[] = $oAttr->name;
+ } else if (!$this->IsValidAttributeContent($sAttr, $oAttr->value)) {
+ // Invalid content
+ $aAttrToRemove[] = $oAttr->name;
+ } else if ($sAttr == 'style') {
+ // Special processing for style tags
+ $sCleanStyle = $this->CleanStyle($oAttr->value);
+ if ($sCleanStyle == '') {
+ // Invalid content
+ $aAttrToRemove[] = $oAttr->name;
+ } else {
+ $oElement->setAttribute($oAttr->name, $sCleanStyle);
+ }
+ }
+ }
+ // Now remove them
+ foreach($aAttrToRemove as $sName)
+ {
+ $oElement->removeAttribute($sName);
+ }
+ }
+
+ if ($oElement->hasChildNodes())
+ {
+ $aChildElementsToRemove = array();
+ // Gather the child noes to remove
+ foreach($oElement->childNodes as $oNode) {
+ if ($oNode instanceof DOMElement) {
+ $sNodeTagName = strtolower($oNode->tagName);
+ }
+ if (($oNode instanceof DOMElement)
+ && (false === empty($this->GetTagsBlackList()))
+ && (in_array($sNodeTagName, $this->GetTagsBlackList(), true))) {
+ $aChildElementsToRemove[] = $oNode;
+ } else if (($oNode instanceof DOMElement)
+ && (false === empty($this->GetTagsWhiteList()))
+ && (false === array_key_exists($sNodeTagName, $this->GetTagsWhiteList()))) {
+ $aChildElementsToRemove[] = $oNode;
+ } else if ($oNode instanceof DOMComment) {
+ $aChildElementsToRemove[] = $oNode;
+ } else {
+ // Recurse
+ $this->CleanNode($oNode);
+ if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img')) {
+ InlineImage::ProcessImageTag($oNode);
+ }
+ }
+ }
+ // Now remove them
+ foreach($aChildElementsToRemove as $oDomElement)
+ {
+ $oElement->removeChild($oDomElement);
+ }
+ }
+ }
+
+ protected function IsValidAttributeContent($sAttributeName, $sValue)
+ {
+ if ((false === empty($this->GetAttrsBlackList()))
+ && (in_array($sAttributeName, $this->GetAttrsBlackList(), true))) {
+ return true;
+ }
+
+ if (array_key_exists($sAttributeName, $this->GetAttrsWhiteList())) {
+ return preg_match($this->GetAttrsWhiteList()[$sAttributeName], $sValue);
+ }
+
+ return true;
+ }
+
+ protected function CleanStyle($sStyle)
+ {
+ if (empty($this->GetStylesWhiteList())) {
+ return $sStyle;
+ }
+
+ $aAllowedStyles = array();
+ $aItems = explode(';', $sStyle);
+ {
+ foreach ($aItems as $sItem) {
+ $aElements = explode(':', trim($sItem));
+ if (in_array(trim(strtolower($aElements[0])), $this->GetStylesWhiteList())) {
+ $aAllowedStyles[] = trim($sItem);
+ }
+ }
+ }
+
+ return implode(';', $aAllowedStyles);
}
}
-*/
-class HTMLDOMSanitizer extends HTMLSanitizer
+
+class HTMLDOMSanitizer extends DOMSanitizer
{
protected $oDoc;
/**
- * @var array
* @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations
+ * @var array
*/
protected static $aTagsWhiteList = array(
'html' => array(),
@@ -203,6 +311,7 @@ class HTMLDOMSanitizer extends HTMLSanitizer
'q' => array(),
'hr' => array('style'),
'pre' => array(),
+ 'center' => array(),
);
protected static $aAttrsWhiteList = array(
@@ -210,8 +319,8 @@ class HTMLDOMSanitizer extends HTMLSanitizer
);
/**
- * @var array
* @see https://www.itophub.io/wiki/page?id=2_6_0%3Aadmin%3Arich_text_limitations
+ * @var array
*/
protected static $aStylesWhiteList = array(
'background-color',
@@ -235,160 +344,199 @@ class HTMLDOMSanitizer extends HTMLSanitizer
'white-space',
);
+ public function GetTagsWhiteList()
+ {
+ return static::$aTagsWhiteList;
+ }
+
+ public function GetTagsBlackList()
+ {
+ return [];
+ }
+
+ public function GetAttrsWhiteList()
+ {
+ return static::$aAttrsWhiteList;
+ }
+
+ public function GetAttrsBlackList()
+ {
+ return [];
+ }
+
+ public function GetStylesWhiteList()
+ {
+ return static::$aStylesWhiteList;
+ }
+
public function __construct()
{
+ parent::__construct();
+
// Building href validation pattern from url and email validation patterns as the patterns are not used the same way in HTML content than in standard attributes value.
// eg. "foo@bar.com" vs "mailto:foo@bar.com?subject=Title&body=Hello%20world"
- if (!array_key_exists('href', self::$aAttrsWhiteList))
- {
+ if (!array_key_exists('href', self::$aAttrsWhiteList)) {
// Regular urls
$sUrlPattern = utils::GetConfig()->Get('url_validation_pattern');
// Mailto urls
- $sMailtoPattern = '(mailto:(' . utils::GetConfig()->Get('email_validation_pattern') . ')(?:\?(?:subject|body)=([a-zA-Z0-9+\$_.-]*)(?:&(?:subject|body)=([a-zA-Z0-9+\$_.-]*))?)?)';
+ $sMailtoPattern = '(mailto:('.utils::GetConfig()->Get('email_validation_pattern').')(?:\?(?:subject|body)=([a-zA-Z0-9+\$_.-]*)(?:&(?:subject|body)=([a-zA-Z0-9+\$_.-]*))?)?)';
// Notification placeholders
// eg. $this->caller_id$, $this->hyperlink()$, $this->hyperlink(portal)$, $APP_URL$, $MODULES_URL$, ...
// Note: Authorize both $xxx$ and %24xxx%24 as the latter one is encoded when used in HTML attributes (eg. a[href])
$sPlaceholderPattern = '(\$|%24)[\w-]*(->[\w]*(\([\w-]*?\))?)?(\$|%24)';
- $sPattern = $sUrlPattern . '|' . $sMailtoPattern . '|' . $sPlaceholderPattern;
+ $sPattern = $sUrlPattern.'|'.$sMailtoPattern.'|'.$sPlaceholderPattern;
$sPattern = '/'.str_replace('/', '\/', $sPattern).'/i';
self::$aAttrsWhiteList['href'] = $sPattern;
}
}
- public function DoSanitize($sHTML)
+ public function LoadDoc($sHTML)
{
- $this->oDoc = new DOMDocument();
- $this->oDoc->preserveWhitespace = true;
-
- // MS outlook implements empty lines by the mean of
- // We have to transform that into
(which is how Thunderbird implements empty lines)
- // Unfortunately, DOMDocument::loadHTML does not take the tag namespaces into account (once loaded there is no way to know if the tag did have a namespace)
- // therefore we have to do the transformation upfront
- $sHTML = preg_replace('@(\s| )*@', ' ', $sHTML);
-
@$this->oDoc->loadHTML(''.$sHTML); // For loading HTML chunks where the character set is not specified
-
- $this->CleanNode($this->oDoc);
-
+ }
+
+ public function PrintDoc()
+ {
$oXPath = new DOMXPath($this->oDoc);
$sXPath = "//body";
$oNodesList = $oXPath->query($sXPath);
-
- if ($oNodesList->length == 0)
- {
+
+ if ($oNodesList->length == 0) {
// No body, save the whole document
$sCleanHtml = $this->oDoc->saveHTML();
- }
- else
- {
+ } else {
// Export only the content of the body tag
$sCleanHtml = $this->oDoc->saveHTML($oNodesList->item(0));
// remove the body tag itself
- $sCleanHtml = str_replace( array('', ''), '', $sCleanHtml);
+ $sCleanHtml = str_replace(array('', ''), '', $sCleanHtml);
}
-
+
return $sCleanHtml;
}
-
- protected function CleanNode(DOMNode $oElement)
+}
+
+
+/**
+ * @since 2.6.5 2.7.6 3.0.0 N°4360
+ */
+class SvgDOMSanitizer extends DOMSanitizer
+{
+ public function GetTagsWhiteList()
{
- $aAttrToRemove = array();
- // Gather the attributes to remove
- if ($oElement->hasAttributes())
- {
- foreach($oElement->attributes as $oAttr)
- {
- $sAttr = strtolower($oAttr->name);
- if (!in_array($sAttr, self::$aTagsWhiteList[strtolower($oElement->tagName)]))
- {
- // Forbidden (or unknown) attribute
- $aAttrToRemove[] = $oAttr->name;
- }
- else if (!$this->IsValidAttributeContent($sAttr, $oAttr->value))
- {
- // Invalid content
- $aAttrToRemove[] = $oAttr->name;
- }
- else if ($sAttr == 'style')
- {
- // Special processing for style tags
- $sCleanStyle = $this->CleanStyle($oAttr->value);
- if ($sCleanStyle == '')
- {
- // Invalid content
- $aAttrToRemove[] = $oAttr->name;
- }
- else
- {
- $oElement->setAttribute($oAttr->name, $sCleanStyle);
- }
- }
- }
- // Now remove them
- foreach($aAttrToRemove as $sName)
- {
- $oElement->removeAttribute($sName);
- }
- }
-
- if ($oElement->hasChildNodes())
- {
- $aChildElementsToRemove = array();
- // Gather the child noes to remove
- foreach($oElement->childNodes as $oNode)
- {
- if (($oNode instanceof DOMElement) && (!array_key_exists(strtolower($oNode->tagName), self::$aTagsWhiteList)))
- {
- $aChildElementsToRemove[] = $oNode;
- }
- else if ($oNode instanceof DOMComment)
- {
- $aChildElementsToRemove[] = $oNode;
- }
- else
- {
- // Recurse
- $this->CleanNode($oNode);
- if (($oNode instanceof DOMElement) && (strtolower($oNode->tagName) == 'img'))
- {
- InlineImage::ProcessImageTag($oNode);
- }
- }
- }
- // Now remove them
- foreach($aChildElementsToRemove as $oDomElement)
- {
- $oElement->removeChild($oDomElement);
- }
- }
+ return [];
}
- protected function CleanStyle($sStyle)
+ /**
+ * @return string[]
+ * @link https://developer.mozilla.org/en-US/docs/Web/SVG/Element/script
+ */
+ public function GetTagsBlackList()
{
- $aAllowedStyles = array();
- $aItems = explode(';', $sStyle);
- {
- foreach($aItems as $sItem)
- {
- $aElements = explode(':', trim($sItem));
- if (in_array(trim(strtolower($aElements[0])), static::$aStylesWhiteList))
- {
- $aAllowedStyles[] = trim($sItem);
- }
- }
- }
- return implode(';', $aAllowedStyles);
+ return [
+ 'script',
+ ];
}
-
- protected function IsValidAttributeContent($sAttributeName, $sValue)
+
+ public function GetAttrsWhiteList()
{
- if (array_key_exists($sAttributeName, self::$aAttrsWhiteList))
- {
- return preg_match(self::$aAttrsWhiteList[$sAttributeName], $sValue);
- }
- return true;
+ return [];
}
-}
\ No newline at end of file
+
+ /**
+ * @return string[]
+ * @link https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/Events#document_event_attributes
+ */
+ public function GetAttrsBlackList()
+ {
+ return [
+ 'onbegin',
+ 'onbegin',
+ 'onrepeat',
+ 'onabort',
+ 'onerror',
+ 'onerror',
+ 'onscroll',
+ 'onunload',
+ 'oncopy',
+ 'oncut',
+ 'onpaste',
+ 'oncancel',
+ 'oncanplay',
+ 'oncanplaythrough',
+ 'onchange',
+ 'onclick',
+ 'onclose',
+ 'oncuechange',
+ 'ondblclick',
+ 'ondrag',
+ 'ondragend',
+ 'ondragenter',
+ 'ondragleave',
+ 'ondragover',
+ 'ondragstart',
+ 'ondrop',
+ 'ondurationchange',
+ 'onemptied',
+ 'onended',
+ 'onerror',
+ 'onfocus',
+ 'oninput',
+ 'oninvalid',
+ 'onkeydown',
+ 'onkeypress',
+ 'onkeyup',
+ 'onload',
+ 'onloadeddata',
+ 'onloadedmetadata',
+ 'onloadstart',
+ 'onmousedown',
+ 'onmouseenter',
+ 'onmouseleave',
+ 'onmousemove',
+ 'onmouseout',
+ 'onmouseover',
+ 'onmouseup',
+ 'onmousewheel',
+ 'onpause',
+ 'onplay',
+ 'onplaying',
+ 'onprogress',
+ 'onratechange',
+ 'onreset',
+ 'onresize',
+ 'onscroll',
+ 'onseeked',
+ 'onseeking',
+ 'onselect',
+ 'onshow',
+ 'onstalled',
+ 'onsubmit',
+ 'onsuspend',
+ 'ontimeupdate',
+ 'ontoggle',
+ 'onvolumechange',
+ 'onwaiting',
+ 'onactivate',
+ 'onfocusin',
+ 'onfocusout',
+ ];
+ }
+
+ public function GetStylesWhiteList()
+ {
+ return [];
+ }
+
+ public function LoadDoc($sHTML)
+ {
+ @$this->oDoc->loadXml($sHTML, LIBXML_NOBLANKS);
+ }
+
+ public function PrintDoc()
+ {
+ return $this->oDoc->saveXML();
+ }
+}
diff --git a/test/core/sanitizer/AbstractDOMSanitizerTest.php b/test/core/sanitizer/AbstractDOMSanitizerTest.php
new file mode 100644
index 000000000..469de3f2a
--- /dev/null
+++ b/test/core/sanitizer/AbstractDOMSanitizerTest.php
@@ -0,0 +1,75 @@
+ReadTestFile($sFileToTest, self::INPUT_DIRECTORY);
+ $sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY);
+ $sOutputHtml = $this->RemoveNewLines($sOutputHtml);
+
+ $oSanitizer = new HTMLDOMSanitizer();
+ $sRes = $oSanitizer->DoSanitize($sInputHtml);
+
+ // Removing newlines as the parser gives different results depending on the PHP version
+ // Didn't manage to get it right :
+ // - no php.ini difference
+ // - playing with the parser preserveWhitespace/formatOutput parser options didn't help
+ // So we're removing new lines on both sides :/
+ $sOutputHtml = $this->RemoveNewLines($sOutputHtml);
+ $sRes = $this->RemoveNewLines($sRes);
+
+ $this->debug($sRes);
+ $this->assertEquals($sOutputHtml, $sRes);
+ }
+
+ public function DoSanitizeProvider()
+ {
+ return array(
+ array(
+ 'scripts.html',
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider WhiteListProvider
+ *
+ * @param string $sHtmlToTest HTML content
+ */
+ public function testDoSanitizeWhiteList($sHtmlToTest)
+ {
+ $oSanitizer = new HTMLDOMSanitizer();
+ $sRes = $oSanitizer->DoSanitize($sHtmlToTest);
+
+ // Removing newlines as the parser gives different results depending on the PHP version
+ // Didn't manage to get it right :
+ // - no php.ini difference
+ // - playing with the parser preserveWhitespace/formatOutput parser options didn't help
+ // So we're removing new lines on both sides :/
+ $sHtmlToTest = $this->RemoveNewLines($sHtmlToTest);
+ $sRes = $this->RemoveNewLines($sRes);
+
+ $this->debug($sRes);
+ $this->assertEquals($sHtmlToTest, $sRes);
+ }
+
+ public function WhiteListProvider()
+ {
+ // This is a copy of \HTMLDOMSanitizer::$aTagsWhiteList
+ // should stay a copy as we want to check we're not removing something by mistake as it was done with the CENTER tag (N°2558)
+ $aTagsWhiteList = array(
+ // we don't test HTML and BODY as the parser removes them if context isn't appropriate
+ 'a' => array('href', 'name', 'style', 'target', 'title'),
+ 'p' => array('style'),
+ 'blockquote' => array('style'),
+ 'br' => array(),
+ 'span' => array('style'),
+ 'div' => array('style'),
+ 'b' => array(),
+ 'i' => array(),
+ 'u' => array(),
+ 'em' => array(),
+ 'strong' => array(),
+ 'img' => array('src', 'style', 'alt', 'title'),
+ 'ul' => array('style'),
+ 'ol' => array('style'),
+ 'li' => array('style'),
+ 'h1' => array('style'),
+ 'h2' => array('style'),
+ 'h3' => array('style'),
+ 'h4' => array('style'),
+ 'nav' => array('style'),
+ 'section' => array('style'),
+ 'code' => array('style'),
+ 'table' => array('style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'),
+ 'thead' => array('style'),
+ 'tbody' => array('style'),
+ 'tr' => array('style', 'colspan', 'rowspan'),
+ 'td' => array('style', 'colspan', 'rowspan'),
+ 'th' => array('style', 'colspan', 'rowspan'),
+ 'fieldset' => array('style'),
+ 'legend' => array('style'),
+ 'font' => array('face', 'color', 'style', 'size'),
+ 'big' => array(),
+ 'small' => array(),
+ 'tt' => array(),
+ 'kbd' => array(),
+ 'samp' => array(),
+ 'var' => array(),
+ 'del' => array(),
+ 's' => array(), // strikethrough
+ 'ins' => array(),
+ 'cite' => array(),
+ 'q' => array(),
+ 'hr' => array('style'),
+ 'pre' => array(),
+ 'center' => array(),
+ );
+ $aTestCaseArray = array();
+
+ $sInputText = $this->ReadTestFile('whitelist_test.html', self::INPUT_DIRECTORY);
+ foreach ($aTagsWhiteList as $sTag => $aTagAttributes) {
+ $sTestCaseText = $sInputText;
+ $sStartTag = "<$sTag";
+ $iAttrCounter = 0;
+ foreach ($aTagAttributes as $sTagAttribute) {
+ $sStartTag .= $this->GetTagAttributeValue($sTagAttribute, $iAttrCounter);
+ $iAttrCounter++;
+ }
+ $sStartTag .= '>';
+ $sTestCaseText = str_replace('##START_TAG##', $sStartTag, $sTestCaseText);
+
+ $sClosingTag = $this->IsClosingTag($sTag) ? "$sTag>" : '';
+ $sTestCaseText = str_replace('##END_TAG##', $sClosingTag, $sTestCaseText);
+
+ $aTestCaseArray[$sTag] = array($sTestCaseText);
+ }
+
+ return $aTestCaseArray;
+ }
+
+ /**
+ * @dataProvider RemoveBlackListedTagContentProvider
+ */
+ public function testDoSanitizeRemoveBlackListedTagContent($html, $expected)
+ {
+ $oSanitizer = new HTMLDOMSanitizer();
+ $sSanitizedHtml = $oSanitizer->DoSanitize($html);
+
+ $this->assertEquals($expected, str_replace("\n", '', $sSanitizedHtml));
+ }
+
+ public function RemoveBlackListedTagContentProvider()
+ {
+ return array(
+ 'basic' => array(
+ 'html' => 'foobaz',
+ 'expected' => '
foobaz
',
+ ),
+ 'basic with body' => array(
+ 'html' => 'foobaz',
+ 'expected' => 'foobaz',
+ ),
+ 'basic with html and body tags' => array(
+ 'html' => 'foobaz',
+ 'expected' => 'foobaz',
+ ),
+ 'basic with attributes' => array(
+ 'html' => 'foobaz',
+ 'expected' => '
oof',
+ 'expected' => 2,
+ ),
+// This test will be restored with the ticket n°2556
+// 'nested images within forbidden and removed tags' => array(
+// 'html' => '
rab
oof',
+// 'expected' => 2,
+// ),
+ );
+ }
+}
+
diff --git a/test/core/sanitizer/InlineImageMock.php b/test/core/sanitizer/InlineImageMock.php
new file mode 100644
index 000000000..9ebcda852
--- /dev/null
+++ b/test/core/sanitizer/InlineImageMock.php
@@ -0,0 +1,39 @@
+
+ *
+ */
+
+/**
+ * Mock class used by @see \Combodo\iTop\Test\UnitTest\Core\HTMLDOMSanitizerTest
+ *
+ */
+class InlineImage
+{
+ private static $iCallCounter = 0;
+
+ public static function ProcessImageTag(DOMNode $oNode)
+ {
+ self::$iCallCounter++;
+ }
+
+ public static function GetCallCounter()
+ {
+ return self::$iCallCounter;
+ }
+}
\ No newline at end of file
diff --git a/test/core/sanitizer/SvgDOMSanitizerTest.php b/test/core/sanitizer/SvgDOMSanitizerTest.php
new file mode 100644
index 000000000..b45a95a39
--- /dev/null
+++ b/test/core/sanitizer/SvgDOMSanitizerTest.php
@@ -0,0 +1,53 @@
+ReadTestFile($sFileToTest, self::INPUT_DIRECTORY);
+ $sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY);
+ $sOutputHtml = $this->RemoveNewLines($sOutputHtml);
+
+ $oSanitizer = new SvgDOMSanitizer();
+ $sRes = $oSanitizer->DoSanitize($sInputHtml);
+
+ // Removing newlines as the parser gives different results depending on the PHP version
+ // Didn't manage to get it right :
+ // - no php.ini difference
+ // - playing with the parser preserveWhitespace/formatOutput parser options didn't help
+ // So we're removing new lines on both sides :/
+ $sOutputHtml = $this->RemoveNewLines($sOutputHtml);
+ $sRes = $this->RemoveNewLines($sRes);
+
+ $this->debug($sRes);
+ $this->assertEquals($sOutputHtml, $sRes);
+ }
+
+ public function DoSanitizeProvider()
+ {
+ return array(
+ array(
+ 'scripts.svg',
+ ),
+ );
+ }
+}
+
diff --git a/test/core/sanitizer/input/scripts.html b/test/core/sanitizer/input/scripts.html
new file mode 100644
index 000000000..5dceaecae
--- /dev/null
+++ b/test/core/sanitizer/input/scripts.html
@@ -0,0 +1,7 @@
+
Test with lots of JS scripts to filter !
+
+
+
+
diff --git a/test/core/sanitizer/input/scripts.svg b/test/core/sanitizer/input/scripts.svg
new file mode 100644
index 000000000..eba8dbd7d
--- /dev/null
+++ b/test/core/sanitizer/input/scripts.svg
@@ -0,0 +1,8 @@
+
+
+
\ No newline at end of file
diff --git a/test/core/sanitizer/input/whitelist_test.html b/test/core/sanitizer/input/whitelist_test.html
new file mode 100644
index 000000000..556812da7
--- /dev/null
+++ b/test/core/sanitizer/input/whitelist_test.html
@@ -0,0 +1,14 @@
+##START_TAG##
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam luctus semper diam et fermentum. Cras nisi mauris, rutrum id turpis at,
+sagittis tempus erat. Sed tempus vel purus id sagittis. Suspendisse ullamcorper eros vel semper malesuada. Vivamus malesuada tellus quis
+nisi consequat, quis tristique magna eleifend. Quisque eget turpis lacinia, vehicula turpis vel, aliquet diam. Aenean eu nunc ac velit
+condimentum posuere. Vivamus congue velit cursus eros mollis, vitae eleifend urna finibus.
+
+In accumsan sed sem nec sollicitudin. Sed pretium, neque et rhoncus volutpat, urna massa semper ex, et faucibus mauris sapien eu libero.
+Sed vel accumsan nibh, tempus accumsan mi. Maecenas gravida imperdiet leo id euismod. Mauris pharetra mattis facilisis. Suspendisse
+dictum vel orci ac luctus. Proin ultricies erat sit amet leo sollicitudin, quis lacinia felis volutpat. Praesent molestie quam et magna
+tempor aliquet. Sed quam nisi, dictum ac gravida et, suscipit et augue. Fusce ac purus eget leo scelerisque bibendum. Proin in semper
+erat, eu congue diam. Vivamus purus eros, consectetur laoreet gravida in, ultricies eget nibh. Mauris hendrerit euismod ex at facilisis.
+Integer lacus eros, posuere finibus libero facilisis, eleifend gravida neque. Integer feugiat elit vel leo aliquet suscipit. Etiam
+auctor ligula sed eros vulputate tristique ac eget magna.
+##END_TAG##
diff --git a/test/core/sanitizer/output/scripts.html b/test/core/sanitizer/output/scripts.html
new file mode 100644
index 000000000..ac0043227
--- /dev/null
+++ b/test/core/sanitizer/output/scripts.html
@@ -0,0 +1,3 @@
+
Test with lots of JS scripts to filter !
+
+
diff --git a/test/core/sanitizer/output/scripts.svg b/test/core/sanitizer/output/scripts.svg
new file mode 100644
index 000000000..99308ba27
--- /dev/null
+++ b/test/core/sanitizer/output/scripts.svg
@@ -0,0 +1,4 @@
+
+
\ No newline at end of file