Files
iTop/test/core/HTMLDOMSanitizerTest.php
2019-12-03 11:09:30 +01:00

215 lines
5.9 KiB
PHP

<?php
namespace Combodo\iTop\Test\UnitTest\Core;
use Combodo\iTop\Test\UnitTest\ItopTestCase;
use HTMLDOMSanitizer;
/**
* @runTestsInSeparateProcesses
* @preserveGlobalState disabled
* @backupGlobals disabled
*/
class HTMLDOMSanitizerTest extends ItopTestCase
{
const INPUT_DIRECTORY = 'sanitizer/input';
const OUTPUT_DIRECTORY = 'sanitizer/output';
/**
* @dataProvider DoSanitizeProvider
*
* @param string $sFileToTest filename
*/
public function testDoSanitize($sFileToTest)
{
$sInputHtml = $this->ReadTestFile($sFileToTest, self::INPUT_DIRECTORY);
$sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY);
<<<<<<< HEAD
$oSanitizer = new HTMLDOMSanitizer();
$sRes = $oSanitizer->DoSanitize($sInputHtml);
// Removing newlines as the parser gives different results depending on the PHP version
// Didn't manage to get it right :
// - no php.ini difference
// - playing with the parser preserveWhitespace/formatOutput parser options didn't help
// So we're removing new lines on both sides :/
$sOutputHtml = $this->RemoveNewLines($sOutputHtml);
$sRes = $this->RemoveNewLines($sRes);
=======
$sOutputHtml = $this->RemoveNewLines($sOutputHtml);
$oSanitizer = new HTMLDOMSanitizer();
$sRes = $oSanitizer->DoSanitize($sInputHtml);
>>>>>>> speedup_jenkins
$this->debug($sRes);
$this->assertEquals($sOutputHtml, $sRes);
}
private function ReadTestFile($sFileToTest, $sFolderName)
{
$sCurrentPath = __DIR__;
return file_get_contents($sCurrentPath.DIRECTORY_SEPARATOR
.$sFolderName.DIRECTORY_SEPARATOR
.$sFileToTest);
}
private function RemoveNewLines($sText)
{
$sText = str_replace("\r\n", "\n", $sText);
$sText = str_replace("\r", "\n", $sText);
$sText = str_replace("\n", '', $sText);
return $sText;
}
public function DoSanitizeProvider()
{
return array(
array(
'utf-8_wrong_character_email_truncated.txt',
),
);
}
/**
* @dataProvider WhiteListProvider
*
* @param string $sHtmlToTest HTML content
*/
public function testDoSanitizeWhiteList($sHtmlToTest)
{
$oSanitizer = new HTMLDOMSanitizer();
$sRes = $oSanitizer->DoSanitize($sHtmlToTest);
<<<<<<< HEAD
// Removing newlines as the parser gives different results depending on the PHP version
// Didn't manage to get it right :
// - no php.ini difference
// - playing with the parser preserveWhitespace/formatOutput parser options didn't help
// So we're removing new lines on both sides :/
$sHtmlToTest = $this->RemoveNewLines($sHtmlToTest);
$sRes = $this->RemoveNewLines($sRes);
=======
$this->debug($sRes);
>>>>>>> speedup_jenkins
$this->assertEquals($sHtmlToTest, $sRes);
}
public function WhiteListProvider()
{
// This is a copy of \HTMLDOMSanitizer::$aTagsWhiteList
// should stay a copy as we want to check we're not removing something by mistake as it was done with the CENTER tag (N°2558)
$aTagsWhiteList = array(
// we don't test HTML and BODY as the parser removes them if context isn't appropriate
'a' => array('href', 'name', 'style', 'target', 'title'),
'p' => array('style'),
'blockquote' => array('style'),
'br' => array(),
'span' => array('style'),
'div' => array('style'),
'b' => array(),
'i' => array(),
'u' => array(),
'em' => array(),
'strong' => array(),
'img' => array('src', 'style', 'alt', 'title'),
'ul' => array('style'),
'ol' => array('style'),
'li' => array('style'),
'h1' => array('style'),
'h2' => array('style'),
'h3' => array('style'),
'h4' => array('style'),
'nav' => array('style'),
'section' => array('style'),
'code' => array('style'),
'table' => array('style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'),
'thead' => array('style'),
'tbody' => array('style'),
'tr' => array('style', 'colspan', 'rowspan'),
'td' => array('style', 'colspan', 'rowspan'),
'th' => array('style', 'colspan', 'rowspan'),
'fieldset' => array('style'),
'legend' => array('style'),
'font' => array('face', 'color', 'style', 'size'),
'big' => array(),
'small' => array(),
'tt' => array(),
'kbd' => array(),
'samp' => array(),
'var' => array(),
'del' => array(),
's' => array(), // strikethrough
'ins' => array(),
'cite' => array(),
'q' => array(),
'hr' => array('style'),
'pre' => array(),
'center' => array(),
);
$aTestCaseArray = array();
$sInputText = $this->ReadTestFile('whitelist_test.html', self::INPUT_DIRECTORY);
foreach ($aTagsWhiteList as $sTag => $aTagAttributes)
{
$sTestCaseText = $sInputText;
$sStartTag = "<$sTag";
$iAttrCounter = 0;
foreach ($aTagAttributes as $sTagAttribute)
{
$sStartTag .= $this->GetTagAttributeValue($sTagAttribute, $iAttrCounter);
$iAttrCounter++;
}
$sStartTag .= '>';
$sTestCaseText = str_replace('##START_TAG##', $sStartTag, $sTestCaseText);
$sClosingTag = $this->IsClosingTag($sTag) ? "</$sTag>" : '';
$sTestCaseText = str_replace('##END_TAG##', $sClosingTag, $sTestCaseText);
$aTestCaseArray[$sTag] = array($sTestCaseText);
}
return $aTestCaseArray;
}
/**
* Generates an appropriate value for the given attribute, or use the counter if needed.
* This is necessary as most of the attributes with empty or inappropriate values (like a numeric for a href) are removed by the parser
*
* @param string $sTagAttribute
* @param int $iAttributeCounter
*
* @return string attribute value
*/
private function GetTagAttributeValue($sTagAttribute, $iAttributeCounter)
{
$sTagAttrValue = ' '.$sTagAttribute.'="';
if (in_array($sTagAttribute, array('href', 'src')))
{
return $sTagAttrValue.'http://www.combodo.com"';
}
if ($sTagAttribute === 'style')
{
return $sTagAttrValue.'color: black"';
}
return $sTagAttrValue.$iAttributeCounter.'"';
}
private function IsClosingTag($sTag)
{
if (in_array($sTag, array('br', 'img', 'hr')))
{
return false;
}
return true;
}
}