ReadTestFile($sFileToTest, self::INPUT_DIRECTORY); $sOutputHtml = $this->ReadTestFile($sFileToTest, self::OUTPUT_DIRECTORY); $sOutputHtml = $this->RemoveNewLines($sOutputHtml); $oSanitizer = new HTMLDOMSanitizer(); $sRes = $oSanitizer->DoSanitize($sInputHtml); // Removing newlines as the parser gives different results depending on the PHP version // Didn't manage to get it right : // - no php.ini difference // - playing with the parser preserveWhitespace/formatOutput parser options didn't help // So we're removing new lines on both sides :/ $sOutputHtml = $this->RemoveNewLines($sOutputHtml); $sRes = $this->RemoveNewLines($sRes); $this->debug($sRes); $this->assertEquals($sOutputHtml, $sRes); } public function DoSanitizeProvider() { return [ [ 'scripts.html', ], ]; } /** * @dataProvider WhiteListProvider * * @param string $sHtmlToTest HTML content */ public function testDoSanitizeWhiteList($sHtmlToTest) { $oSanitizer = new HTMLDOMSanitizer(); $sRes = $oSanitizer->DoSanitize($sHtmlToTest); // Removing newlines as the parser gives different results depending on the PHP version // Didn't manage to get it right : // - no php.ini difference // - playing with the parser preserveWhitespace/formatOutput parser options didn't help // So we're removing new lines on both sides :/ $sHtmlToTest = $this->RemoveNewLines($sHtmlToTest); $sRes = $this->RemoveNewLines($sRes); $this->debug($sRes); $this->assertEquals($sHtmlToTest, $sRes); } public function WhiteListProvider() { // This is a copy of \HTMLDOMSanitizer::$aTagsWhiteList // should stay a copy as we want to check we're not removing something by mistake as it was done with the CENTER tag (N°2558) $aTagsWhiteList = [ // we don't test HTML and BODY as the parser removes them if context isn't appropriate 'a' => ['href', 'name', 'style', 'target', 'title'], 'p' => ['style'], 'blockquote' => ['style'], 'br' => [], 'span' => ['style'], 'div' => ['style'], 'b' => [], 'i' => [], 'u' => [], 'em' => [], 'strong' => [], 'img' => ['src', 'style', 'alt', 'title'], 'ul' => ['style'], 'ol' => ['reversed', 'start', 'style', 'type'], 'li' => ['style', 'value'], 'h1' => ['style'], 'h2' => ['style'], 'h3' => ['style'], 'h4' => ['style'], 'nav' => ['style'], 'section' => ['style'], 'code' => ['style'], 'table' => ['style', 'width', 'summary', 'align', 'border', 'cellpadding', 'cellspacing'], 'thead' => ['style'], 'tbody' => ['style'], 'tr' => ['style', 'colspan', 'rowspan'], 'td' => ['style', 'colspan', 'rowspan'], 'th' => ['style', 'colspan', 'rowspan'], 'fieldset' => ['style'], 'legend' => ['style'], 'font' => ['face', 'color', 'style', 'size'], 'big' => [], 'small' => [], 'tt' => [], 'kbd' => [], 'samp' => [], 'var' => [], 'del' => [], 's' => [], // strikethrough 'ins' => [], 'cite' => [], 'q' => [], 'hr' => ['style'], 'pre' => [], 'center' => [], ]; $aTestCaseArray = []; $sInputText = $this->ReadTestFile('whitelist_test.html', self::INPUT_DIRECTORY); foreach ($aTagsWhiteList as $sTag => $aTagAttributes) { $sTestCaseText = $sInputText; $sStartTag = "<$sTag"; $iAttrCounter = 0; foreach ($aTagAttributes as $sTagAttribute) { $sStartTag .= $this->GetTagAttributeValue($sTagAttribute, $iAttrCounter); $iAttrCounter++; } $sStartTag .= '>'; $sTestCaseText = str_replace('##START_TAG##', $sStartTag, $sTestCaseText); $sClosingTag = $this->IsClosingTag($sTag) ? "" : ''; $sTestCaseText = str_replace('##END_TAG##', $sClosingTag, $sTestCaseText); $aTestCaseArray[$sTag] = [$sTestCaseText]; } return $aTestCaseArray; } /** * @dataProvider RemoveBlackListedTagContentProvider */ public function testDoSanitizeRemoveBlackListedTagContent($html, $expected) { $oSanitizer = new HTMLDOMSanitizer(); $sSanitizedHtml = $oSanitizer->DoSanitize($html); $this->assertEquals($expected, str_replace("\n", '', $sSanitizedHtml)); } public function RemoveBlackListedTagContentProvider() { return [ 'basic' => [ 'html' => 'foobaz', 'expected' => '

foobaz

', ], 'basic with body' => [ 'html' => 'foobaz', 'expected' => 'foobaz', ], 'basic with html and body tags' => [ 'html' => 'foobaz', 'expected' => 'foobaz', ], 'basic with attributes' => [ 'html' => 'foobaz', 'expected' => '

foobaz

', ], 'basic with comment' => [ 'html' => 'foobaz', 'expected' => '

foobaz

', ], 'basic with contentRemovable tag' => [ 'html' => 'foobaz', 'expected' => '

foobaz

', ], 'nested' => [ 'html' => 'beforeoofafter', 'expected' => '

beforeafter

', ], 'nested with not closed br' => [ 'html' => 'beforeoofafter', 'expected' => '

beforeafter

', ], 'nested with allowed' => [ 'html' => 'beforeafter', 'expected' => '

beforeafter

', ], 'nested with spaces' => [ 'html' => 'beforeafter', 'expected' => '

beforeafter

', ], 'nested with attributes' => [ 'html' => 'beforeafter', 'expected' => '

beforeafter

', ], 'nested with allowed and attributes and spaces ' => [ 'html' => 'beforeafter', 'expected' => 'beforeafter', ], 'nested with allowed and contentRemovable tags' => [ 'html' => 'beforemiddleafter', 'expected' => 'beforemiddleafter', ], 'regression: if head present => body is not trimmed' => [ 'html' => 'bar', 'expected' => 'bar', ], 'ordered list with attributes' => [ 'html' => '
  1. Some list item
', 'expected' => '
  1. Some list item
', ], ]; } /** * @dataProvider CallInlineImageProcessImageTagProvider * @uses \InlineImageMock */ public function testDoSanitizeCallInlineImageProcessImageTag($sHtml, $iExpectedCount) { $this->RequireOnceUnitTestFile('./InlineImageMock.php'); InlineImageMock::ResetCallCounter(); $oSanitizer = new HTMLDOMSanitizer(InlineImageMock::class); $oSanitizer->DoSanitize($sHtml); $iCalledCount = \InlineImageMock::GetCallCounter(); $this->assertEquals($iExpectedCount, $iCalledCount); } public function CallInlineImageProcessImageTagProvider() { return [ 'no image' => [ 'html' => '

bar

', 'expected' => 0, ], 'basic image' => [ 'html' => '', 'expected' => 1, ], 'nested images within forbidden tags' => [ 'html' => '', 'expected' => 2, ], // This test will be restored with the ticket n°2556 // 'nested images within forbidden and removed tags' => array( // 'html' => '', // 'expected' => 2, // ), ]; } }