Re-dump autoloader and composer.lock

This commit is contained in:
Stephen Abello
2025-09-18 10:26:38 +02:00
parent 7e515e7216
commit edbe4974ac
613 changed files with 5661 additions and 4259 deletions

View File

@@ -20,7 +20,7 @@ namespace Pelago\Emogrifier\Caching;
*
* @internal
*/
class SimpleStringCache
final class SimpleStringCache
{
/**
* @var array<string, string>

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Pelago\Emogrifier\Css;
use Pelago\Emogrifier\Utilities\Preg;
use Sabberworm\CSS\CSSList\AtRuleBlockList as CssAtRuleBlockList;
use Sabberworm\CSS\CSSList\Document as SabberwormCssDocument;
use Sabberworm\CSS\Parser as CssParser;
@@ -21,7 +22,7 @@ use Sabberworm\CSS\Settings as ParserSettings;
*
* @internal
*/
class CssDocument
final class CssDocument
{
/**
* @var SabberwormCssDocument
@@ -61,7 +62,8 @@ class CssDocument
*/
private function hasNestedAtRule(string $css): bool
{
return \preg_match('/@(?:media|supports|(?:-webkit-|-moz-|-ms-|-o-)?+(keyframes|document))\\b/', $css) === 1;
return (new Preg())
->match('/@(?:media|supports|(?:-webkit-|-moz-|-ms-|-o-)?+(keyframes|document))\\b/', $css) !== 0;
}
/**
@@ -140,7 +142,8 @@ class CssDocument
$allowedMediaTypes
);
$mediaTypesMatcher = \implode('|', $escapedAllowedMediaTypes);
$isAllowed = \preg_match('/^\\s*+(?:only\\s++)?+(?:' . $mediaTypesMatcher . ')/i', $mediaType) > 0;
$isAllowed
= (new Preg())->match('/^\\s*+(?:only\\s++)?+(?:' . $mediaTypesMatcher . ')/i', $mediaType) !== 0;
} else {
$isAllowed = true;
}

View File

@@ -12,7 +12,7 @@ use Sabberworm\CSS\RuleSet\DeclarationBlock;
*
* @internal
*/
class StyleRule
final class StyleRule
{
/**
* @var DeclarationBlock
@@ -43,7 +43,7 @@ class StyleRule
$selectors = $this->declarationBlock->getSelectors();
return \array_map(
static function (Selector $selector): string {
return (string)$selector;
return (string) $selector;
},
$selectors
);

View File

@@ -7,13 +7,15 @@ namespace Pelago\Emogrifier;
use Pelago\Emogrifier\Css\CssDocument;
use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
use Pelago\Emogrifier\Utilities\CssConcatenator;
use Pelago\Emogrifier\Utilities\DeclarationBlockParser;
use Pelago\Emogrifier\Utilities\Preg;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\CssSelector\Exception\ParseException;
/**
* This class provides functions for converting CSS styles into inline style attributes in your HTML code.
*/
class CssInliner extends AbstractHtmlProcessor
final class CssInliner extends AbstractHtmlProcessor
{
/**
* @var int
@@ -23,12 +25,7 @@ class CssInliner extends AbstractHtmlProcessor
/**
* @var int
*/
private const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 1;
/**
* @var int
*/
private const CACHE_KEY_COMBINED_STYLES = 2;
private const CACHE_KEY_COMBINED_STYLES = 1;
/**
* Regular expression component matching a static pseudo class in a selector, without the preceding ":",
@@ -39,7 +36,7 @@ class CssInliner extends AbstractHtmlProcessor
* @var string
*/
private const PSEUDO_CLASS_MATCHER
= 'empty|(?:first|last|nth(?:-last)?+|only)-(?:child|of-type)|not\\([[:ascii:]]*\\)';
= 'empty|(?:first|last|nth(?:-last)?+|only)-(?:child|of-type)|not\\([[:ascii:]]*\\)|root';
/**
* This regular expression componenet matches an `...of-type` pseudo class name, without the preceding ":". These
@@ -56,11 +53,23 @@ class CssInliner extends AbstractHtmlProcessor
*/
private const COMBINATOR_MATCHER = '(?:\\s++|\\s*+[>+~]\\s*+)(?=[[:alpha:]_\\-.#*:\\[])';
/**
* options array key for `querySelectorAll`
*
* @var string
*/
private const QSA_ALWAYS_THROW_PARSE_EXCEPTION = 'alwaysThrowParseException';
/**
* @var array<string, bool>
*/
private $excludedSelectors = [];
/**
* @var array<non-empty-string, bool>
*/
private $excludedCssSelectors = [];
/**
* @var array<string, bool>
*/
@@ -69,13 +78,11 @@ class CssInliner extends AbstractHtmlProcessor
/**
* @var array{
* 0: array<string, int>,
* 1: array<string, array<string, string>>,
* 2: array<string, string>
* 1: array<string, string>
* }
*/
private $caches = [
self::CACHE_KEY_SELECTOR => [],
self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
self::CACHE_KEY_COMBINED_STYLES => [],
];
@@ -164,7 +171,11 @@ class CssInliner extends AbstractHtmlProcessor
* @return $this
*
* @throws ParseException in debug mode, if an invalid selector is encountered
* @throws \RuntimeException in debug mode, if an internal PCRE error occurs
* @throws \RuntimeException
* in debug mode, if an internal PCRE error occurs
* or `CssSelectorConverter::toXPath` returns an invalid XPath expression
* @throws \UnexpectedValueException
* if a selector query result includes a node which is not a `DOMElement`
*/
public function inlineCss(string $css = ''): self
{
@@ -183,23 +194,12 @@ class CssInliner extends AbstractHtmlProcessor
$excludedNodes = $this->getNodesToExclude();
$cssRules = $this->collateCssRules($parsedCss);
$cssSelectorConverter = $this->getCssSelectorConverter();
foreach ($cssRules['inlinable'] as $cssRule) {
try {
$nodesMatchingCssSelectors = $this->getXPath()
->query($cssSelectorConverter->toXPath($cssRule['selector']));
/** @var \DOMElement $node */
foreach ($nodesMatchingCssSelectors as $node) {
if (\in_array($node, $excludedNodes, true)) {
continue;
}
$this->copyInlinableCssToStyleAttribute($node, $cssRule);
}
} catch (ParseException $e) {
if ($this->debug) {
throw $e;
foreach ($this->querySelectorAll($cssRule['selector']) as $node) {
if (\in_array($node, $excludedNodes, true)) {
continue;
}
$this->copyInlinableCssToStyleAttribute($this->ensureNodeIsElement($node), $cssRule);
}
}
@@ -301,6 +301,36 @@ class CssInliner extends AbstractHtmlProcessor
return $this;
}
/**
* Adds a selector to exclude CSS selector from emogrification.
*
* @param non-empty-string $selector the selector to exclude, e.g., `.editor`
*
* @return $this
*/
public function addExcludedCssSelector(string $selector): self
{
$this->excludedCssSelectors[$selector] = true;
return $this;
}
/**
* No longer excludes the CSS selector from emogrification.
*
* @param non-empty-string $selector the selector to no longer exclude, e.g., `.editor`
*
* @return $this
*/
public function removeExcludedCssSelector(string $selector): self
{
if (isset($this->excludedCssSelectors[$selector])) {
unset($this->excludedCssSelectors[$selector]);
}
return $this;
}
/**
* Sets the debug mode.
*
@@ -357,7 +387,6 @@ class CssInliner extends AbstractHtmlProcessor
{
$this->caches = [
self::CACHE_KEY_SELECTOR => [],
self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
self::CACHE_KEY_COMBINED_STYLES => [],
];
}
@@ -417,11 +446,13 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function normalizeStyleAttributes(\DOMElement $node): void
{
$normalizedOriginalStyle = \preg_replace_callback(
'/-?+[_a-zA-Z][\\w\\-]*+(?=:)/S',
$declarationBlockParser = new DeclarationBlockParser();
$normalizedOriginalStyle = (new Preg())->throwExceptions($this->debug)->replaceCallback(
'/-{0,2}+[_a-zA-Z][\\w\\-]*+(?=:)/S',
/** @param array<array-key, string> $propertyNameMatches */
static function (array $propertyNameMatches): string {
return \strtolower($propertyNameMatches[0]);
static function (array $propertyNameMatches) use ($declarationBlockParser): string {
return $declarationBlockParser->normalizePropertyName($propertyNameMatches[0]);
},
$node->getAttribute('style')
);
@@ -429,55 +460,13 @@ class CssInliner extends AbstractHtmlProcessor
// In order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles.
$nodePath = $node->getNodePath();
if (\is_string($nodePath) && !isset($this->styleAttributesForNodes[$nodePath])) {
$this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
$this->styleAttributesForNodes[$nodePath] = $declarationBlockParser->parse($normalizedOriginalStyle);
$this->visitedNodes[$nodePath] = $node;
}
$node->setAttribute('style', $normalizedOriginalStyle);
}
/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array
{
if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
}
$properties = [];
foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
/** @var list<string> $matches */
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}
$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
$this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;
return $properties;
}
/**
* Returns CSS content.
*
@@ -509,34 +498,74 @@ class CssInliner extends AbstractHtmlProcessor
*
* @return list<\DOMElement>
*
* @throws ParseException
* @throws \UnexpectedValueException
* @throws ParseException in debug mode, if an invalid selector is encountered
* @throws \RuntimeException in debug mode, if `CssSelectorConverter::toXPath` returns an invalid XPath expression
* @throws \UnexpectedValueException if the selector query result includes a node which is not a `DOMElement`
*/
private function getNodesToExclude(): array
{
$excludedNodes = [];
foreach (\array_keys($this->excludedSelectors) as $selectorToExclude) {
try {
$matchingNodes = $this->getXPath()
->query($this->getCssSelectorConverter()->toXPath($selectorToExclude));
foreach ($matchingNodes as $node) {
if (!$node instanceof \DOMElement) {
$path = $node->getNodePath() ?? '$node';
throw new \UnexpectedValueException($path . ' is not a DOMElement.', 1617975914);
}
$excludedNodes[] = $node;
}
} catch (ParseException $e) {
if ($this->debug) {
throw $e;
}
foreach ($this->querySelectorAll($selectorToExclude) as $node) {
$excludedNodes[] = $this->ensureNodeIsElement($node);
}
}
return $excludedNodes;
}
/**
* @param array{}|array{alwaysThrowParseException: bool} $options
* This is an array of option values to control behaviour:
* - `QSA_ALWAYS_THROW_PARSE_EXCEPTION` - `bool` - throw any `ParseException` regardless of debug setting.
*
* @return \DOMNodeList<\DOMNode> the HTML elements that match the provided CSS `$selectors`
*
* @throws ParseException
* in debug mode (or with `QSA_ALWAYS_THROW_PARSE_EXCEPTION` option), if an invalid selector is encountered
* @throws \RuntimeException in debug mode, if `CssSelectorConverter::toXPath` returns an invalid XPath expression
*/
private function querySelectorAll(string $selectors, array $options = []): \DOMNodeList
{
try {
$result = $this->getXPath()->query($this->getCssSelectorConverter()->toXPath($selectors));
if ($result === false) {
throw new \RuntimeException('query failed with selector \'' . $selectors . '\'', 1726533051);
}
return $result;
} catch (ParseException $exception) {
$alwaysThrowParseException = $options[self::QSA_ALWAYS_THROW_PARSE_EXCEPTION] ?? false;
if ($this->debug || $alwaysThrowParseException) {
throw $exception;
}
return new \DOMNodeList();
} catch (\RuntimeException $exception) {
if (
$this->debug
) {
throw $exception;
}
// `RuntimeException` indicates a bug in CssSelector so pass the message to the error handler.
\trigger_error($exception->getMessage());
return new \DOMNodeList();
}
}
/**
* @throws \UnexpectedValueException if `$node` is not a `DOMElement`
*/
private function ensureNodeIsElement(\DOMNode $node): \DOMElement
{
if (!$node instanceof \DOMElement) {
$path = $node->getNodePath() ?? '$node';
throw new \UnexpectedValueException($path . ' is not a DOMElement.', 1617975914);
}
return $node;
}
/**
* @return CssSelectorConverter
*/
@@ -577,6 +606,7 @@ class CssInliner extends AbstractHtmlProcessor
{
$matches = $parsedCss->getStyleRulesData(\array_keys($this->allowedMediaTypes));
$preg = (new Preg())->throwExceptions($this->debug);
$cssRules = [
'inlinable' => [],
'uninlinable' => [],
@@ -588,7 +618,21 @@ class CssInliner extends AbstractHtmlProcessor
$mediaQuery = $cssRule->getContainingAtRule();
$declarationsBlock = $cssRule->getDeclarationAsText();
foreach ($cssRule->getSelectors() as $selector) {
$selectors = $cssRule->getSelectors();
// Maybe exclude CSS selectors
if (\count($this->excludedCssSelectors) > 0) {
// Normalize spaces, line breaks & tabs
$selectorsNormalized = \array_map(static function (string $selector) use ($preg): string {
return $preg->replace('@\\s++@u', ' ', $selector);
}, $selectors);
$selectors = \array_filter($selectorsNormalized, function (string $selector): bool {
return !isset($this->excludedCssSelectors[$selector]);
});
}
foreach ($selectors as $selector) {
// don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
// only allow structural pseudo-classes
$hasPseudoElement = \strpos($selector, '::') !== false;
@@ -634,15 +678,17 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function hasUnsupportedPseudoClass(string $selector): bool
{
if (\preg_match('/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-]/i', $selector)) {
$preg = (new Preg())->throwExceptions($this->debug);
if ($preg->match('/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-]/i', $selector) !== 0) {
return true;
}
if (!\preg_match('/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i', $selector)) {
if ($preg->match('/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i', $selector) === 0) {
return false;
}
foreach (\preg_split('/' . self::COMBINATOR_MATCHER . '/', $selector) as $selectorPart) {
foreach ($preg->split('/' . self::COMBINATOR_MATCHER . '/', $selector) as $selectorPart) {
if ($this->selectorPartHasUnsupportedOfTypePseudoClass($selectorPart)) {
return true;
}
@@ -661,11 +707,13 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function selectorPartHasUnsupportedOfTypePseudoClass(string $selectorPart): bool
{
if (\preg_match('/^[\\w\\-]/', $selectorPart)) {
$preg = (new Preg())->throwExceptions($this->debug);
if ($preg->match('/^[\\w\\-]/', $selectorPart) !== 0) {
return false;
}
return (bool)\preg_match('/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i', $selectorPart);
return $preg->match('/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i', $selectorPart) !== 0;
}
/**
@@ -693,19 +741,20 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function getCssSelectorPrecedence(string $selector): int
{
$selectorKey = \md5($selector);
$selectorKey = $selector;
if (isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
}
$preg = (new Preg())->throwExceptions($this->debug);
$precedence = 0;
foreach ($this->selectorPrecedenceMatchers as $matcher => $value) {
if (\trim($selector) === '') {
break;
}
$number = 0;
$selector = \preg_replace('/' . $matcher . '\\w+/', '', $selector, -1, $number);
$precedence += ($value * (int)$number);
$count = 0;
$selector = $preg->replace('/' . $matcher . '\\w+/', '', $selector, -1, $count);
$precedence += ($value * $count);
}
$this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
@@ -729,7 +778,8 @@ class CssInliner extends AbstractHtmlProcessor
private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssRule): void
{
$declarationsBlock = $cssRule['declarationsBlock'];
$newStyleDeclarations = $this->parseCssDeclarationsBlock($declarationsBlock);
$declarationBlockParser = new DeclarationBlockParser();
$newStyleDeclarations = $declarationBlockParser->parse($declarationsBlock);
if ($newStyleDeclarations === []) {
return;
}
@@ -737,7 +787,7 @@ class CssInliner extends AbstractHtmlProcessor
// if it has a style attribute, get it, process it, and append (overwrite) new stuff
if ($node->hasAttribute('style')) {
// break it up into an associative array
$oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$oldStyleDeclarations = $declarationBlockParser->parse($node->getAttribute('style'));
} else {
$oldStyleDeclarations = [];
}
@@ -757,6 +807,8 @@ class CssInliner extends AbstractHtmlProcessor
* @param array<string, string> $newStyles
*
* @return string
*
* @throws \UnexpectedValueException if an empty property name is encountered (which should not happen)
*/
private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles): string
{
@@ -784,9 +836,16 @@ class CssInliner extends AbstractHtmlProcessor
$combinedStyles = \array_merge($oldStyles, $newStyles);
$declarationBlockParser = new DeclarationBlockParser();
$style = '';
foreach ($combinedStyles as $attributeName => $attributeValue) {
$style .= \strtolower(\trim($attributeName)) . ': ' . \trim($attributeValue) . '; ';
$trimmedAttributeName = \trim($attributeName);
if ($trimmedAttributeName === '') {
throw new \UnexpectedValueException('An empty property name was encountered.', 1727046078);
}
$propertyName = $declarationBlockParser->normalizePropertyName($trimmedAttributeName);
$propertyValue = \trim($attributeValue);
$style .= $propertyName . ': ' . $propertyValue . '; ';
}
$trimmedStyle = \rtrim($style);
@@ -804,7 +863,7 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function attributeValueIsImportant(string $attributeValue): bool
{
return (bool)\preg_match('/!\\s*+important$/i', $attributeValue);
return (new Preg())->throwExceptions($this->debug)->match('/!\\s*+important$/i', $attributeValue) !== 0;
}
/**
@@ -812,9 +871,10 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function fillStyleAttributesWithMergedStyles(): void
{
$declarationBlockParser = new DeclarationBlockParser();
foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
$node = $this->visitedNodes[$nodePath];
$currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$currentStyleAttributes = $declarationBlockParser->parse($node->getAttribute('style'));
$node->setAttribute(
'style',
$this->generateStyleStringFromDeclarationsArrays(
@@ -853,14 +913,16 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node): void
{
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$style = $node->getAttribute('style');
$inlineStyleDeclarations = (new DeclarationBlockParser())->parse((bool) $style ? $style : '');
/** @var array<string, string> $regularStyleDeclarations */
$regularStyleDeclarations = [];
/** @var array<string, string> $importantStyleDeclarations */
$importantStyleDeclarations = [];
foreach ($inlineStyleDeclarations as $property => $value) {
if ($this->attributeValueIsImportant($value)) {
$importantStyleDeclarations[$property] = $this->pregReplace('/\\s*+!\\s*+important$/i', '', $value);
$importantStyleDeclarations[$property]
= (new Preg())->throwExceptions($this->debug)->replace('/\\s*+!\\s*+important$/i', '', $value);
} else {
$regularStyleDeclarations[$property] = $value;
}
@@ -944,12 +1006,14 @@ class CssInliner extends AbstractHtmlProcessor
*
* @return bool
*
* @throws ParseException
* @throws ParseException in debug mode, if an invalid selector is encountered
* @throws \RuntimeException in debug mode, if `CssSelectorConverter::toXPath` returns an invalid XPath expression
*/
private function existsMatchForCssSelector(string $cssSelector): bool
{
try {
$nodesMatchingSelector = $this->getXPath()->query($this->getCssSelectorConverter()->toXPath($cssSelector));
$nodesMatchingSelector
= $this->querySelectorAll($cssSelector, [self::QSA_ALWAYS_THROW_PARSE_EXCEPTION => true]);
} catch (ParseException $e) {
if ($this->debug) {
throw $e;
@@ -957,7 +1021,7 @@ class CssInliner extends AbstractHtmlProcessor
return true;
}
return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0;
return $nodesMatchingSelector->length !== 0;
}
/**
@@ -972,9 +1036,11 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function removeUnmatchablePseudoComponents(string $selector): string
{
$preg = (new Preg())->throwExceptions($this->debug);
// The regex allows nested brackets via `(?2)`.
// A space is temporarily prepended because the callback can't determine if the match was at the very start.
$selectorWithoutNots = \ltrim(\preg_replace_callback(
$selectorWithoutNots = \ltrim((new Preg())->throwExceptions($this->debug)->replaceCallback(
'/([\\s>+~]?+):not(\\([^()]*+(?:(?2)[^()]*+)*+\\))/i',
/** @param array<array-key, string> $matches */
function (array $matches): string {
@@ -989,10 +1055,11 @@ class CssInliner extends AbstractHtmlProcessor
);
if (
!\preg_match(
$preg->match(
'/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i',
$selectorWithoutUnmatchablePseudoComponents
)
=== 0
) {
return $selectorWithoutUnmatchablePseudoComponents;
}
@@ -1000,7 +1067,7 @@ class CssInliner extends AbstractHtmlProcessor
function (string $selectorPart): string {
return $this->removeUnsupportedOfTypePseudoClasses($selectorPart);
},
\preg_split(
$preg->split(
'/(' . self::COMBINATOR_MATCHER . ')/',
$selectorWithoutUnmatchablePseudoComponents,
-1,
@@ -1041,7 +1108,7 @@ class CssInliner extends AbstractHtmlProcessor
*/
private function removeSelectorComponents(string $matcher, string $selector): string
{
return \preg_replace(
return (new Preg())->throwExceptions($this->debug)->replace(
['/([\\s>+~]|^)' . $matcher . '/i', '/' . $matcher . '/i'],
['$1*', ''],
$selector
@@ -1141,58 +1208,4 @@ class CssInliner extends AbstractHtmlProcessor
return $node;
}
/**
* Wraps `preg_replace`. If an error occurs (which is highly unlikely), either it is logged and the original
* `$subject` is returned, or in debug mode an exception is thrown.
*
* This method only supports strings, not arrays of strings.
*
* @param non-empty-string $pattern
* @param string $replacement
* @param string $subject
*
* @return string
*
* @throws \RuntimeException
*/
private function pregReplace(string $pattern, string $replacement, string $subject): string
{
$result = \preg_replace($pattern, $replacement, $subject);
if (!\is_string($result)) {
$this->logOrThrowPregLastError();
$result = $subject;
}
return $result;
}
/**
* Obtains the name of the error constant for `preg_last_error` (based on code posted at
* {@see https://www.php.net/manual/en/function.preg-last-error.php#124124}) and puts it into an error message
* which is either passed to `trigger_error` (in non-debug mode) or an exception which is thrown (in debug mode).
*
* @throws \RuntimeException
*/
private function logOrThrowPregLastError(): void
{
$pcreConstants = \get_defined_constants(true)['pcre'];
$pcreErrorConstantNames = \array_flip(\array_filter(
$pcreConstants,
static function (string $key): bool {
return \substr($key, -6) === '_ERROR';
},
ARRAY_FILTER_USE_KEY
));
$pregLastError = \preg_last_error();
$message = 'PCRE regex execution error `' . (string)($pcreErrorConstantNames[$pregLastError] ?? $pregLastError)
. '`';
if ($this->debug) {
throw new \RuntimeException($message, 1592870147);
}
\trigger_error($message);
}
}

View File

@@ -4,12 +4,12 @@ declare(strict_types=1);
namespace Pelago\Emogrifier\HtmlProcessor;
use Pelago\Emogrifier\Utilities\Preg;
/**
* Base class for HTML processor that e.g., can remove, add or modify nodes or attributes.
*
* The "vanilla" subclass is the HtmlNormalizer.
*
* @psalm-consistent-constructor
*/
abstract class AbstractHtmlProcessor
{
@@ -71,9 +71,7 @@ abstract class AbstractHtmlProcessor
*
* Please use `::fromHtml` or `::fromDomDocument` instead.
*/
private function __construct()
{
}
private function __construct() {}
/**
* Builds a new instance from the given HTML.
@@ -184,7 +182,7 @@ abstract class AbstractHtmlProcessor
$htmlWithPossibleErroneousClosingTags = $this->getDomDocument()->saveHTML($this->getBodyElement());
$bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
return \preg_replace('%</?+body(?:\\s[^>]*+)?+>%', '', $bodyNodeHtml);
return (new Preg())->replace('%</?+body(?:\\s[^>]*+)?+>%', '', $bodyNodeHtml);
}
/**
@@ -196,7 +194,24 @@ abstract class AbstractHtmlProcessor
*/
private function removeSelfClosingTagsClosingTags(string $html): string
{
return \preg_replace('%</' . self::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '>%', '', $html);
return (new Preg())->replace('%</' . self::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '>%', '', $html);
}
/**
* Returns the HTML element.
*
* This method assumes that there always is an HTML element, throwing an exception otherwise.
*
* @throws \UnexpectedValueException
*/
protected function getHtmlElement(): \DOMElement
{
$htmlElement = $this->getDomDocument()->getElementsByTagName('html')->item(0);
if (!$htmlElement instanceof \DOMElement) {
throw new \UnexpectedValueException('There is no HTML element although there should be one.', 1569930853);
}
return $htmlElement;
}
/**
@@ -292,7 +307,7 @@ abstract class AbstractHtmlProcessor
private function normalizeDocumentType(string $html): string
{
// Limit to replacing the first occurrence: as an optimization; and in case an example exists as unescaped text.
return \preg_replace(
return (new Preg())->replace(
'/<!DOCTYPE\\s++html(?=[\\s>])/i',
'<!DOCTYPE html',
$html,
@@ -317,17 +332,17 @@ abstract class AbstractHtmlProcessor
// We are trying to insert the meta tag to the right spot in the DOM.
// If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
$hasHeadTag = \preg_match('/<head[\\s>]/i', $html);
$hasHeadTag = (new Preg())->match('/<head[\\s>]/i', $html) !== 0;
$hasHtmlTag = \stripos($html, '<html') !== false;
if ($hasHeadTag) {
$reworkedHtml = \preg_replace(
$reworkedHtml = (new Preg())->replace(
'/<head(?=[\\s>])([^>]*+)>/i',
'<head$1>' . self::CONTENT_TYPE_META_TAG,
$html
);
} elseif ($hasHtmlTag) {
$reworkedHtml = \preg_replace(
$reworkedHtml = (new Preg())->replace(
'/<html(.*?)>/is',
'<html$1><head>' . self::CONTENT_TYPE_META_TAG . '</head>',
$html
@@ -350,7 +365,11 @@ abstract class AbstractHtmlProcessor
*/
private function hasContentTypeMetaTagInHead(string $html): bool
{
\preg_match('%^.*?(?=<meta(?=\\s)[^>]*\\shttp-equiv=(["\']?+)Content-Type\\g{-1}[\\s/>])%is', $html, $matches);
(new Preg())->match(
'%^.*?(?=<meta(?=\\s)[^>]*\\shttp-equiv=(["\']?+)Content-Type\\g{-1}[\\s/>])%is',
$html,
$matches
);
if (isset($matches[0])) {
$htmlBefore = $matches[0];
try {
@@ -380,9 +399,10 @@ abstract class AbstractHtmlProcessor
*/
private function hasEndOfHeadElement(string $html): bool
{
$headEndTagMatchCount
= \preg_match('%<(?!' . self::TAGNAME_ALLOWED_BEFORE_BODY_MATCHER . '[\\s/>])\\w|</head>%i', $html);
if (\is_int($headEndTagMatchCount) && $headEndTagMatchCount > 0) {
if (
(new Preg())->match('%<(?!' . self::TAGNAME_ALLOWED_BEFORE_BODY_MATCHER . '[\\s/>])\\w|</head>%i', $html)
!== 0
) {
// An exception to the implicit end of the `<head>` is any content within a `<template>` element, as well in
// comments. As an optimization, this is only checked for if a potential `<head>` end tag is found.
$htmlWithoutCommentsOrTemplates = $this->removeHtmlTemplateElements($this->removeHtmlComments($html));
@@ -407,12 +427,7 @@ abstract class AbstractHtmlProcessor
*/
private function removeHtmlComments(string $html): string
{
$result = \preg_replace(self::HTML_COMMENT_PATTERN, '', $html);
if (!\is_string($result)) {
throw new \RuntimeException('Internal PCRE error', 1616521475);
}
return $result;
return (new Preg())->throwExceptions(true)->replace(self::HTML_COMMENT_PATTERN, '', $html);
}
/**
@@ -427,12 +442,7 @@ abstract class AbstractHtmlProcessor
*/
private function removeHtmlTemplateElements(string $html): string
{
$result = \preg_replace(self::HTML_TEMPLATE_ELEMENT_PATTERN, '', $html);
if (!\is_string($result)) {
throw new \RuntimeException('Internal PCRE error', 1616519652);
}
return $result;
return (new Preg())->throwExceptions(true)->replace(self::HTML_TEMPLATE_ELEMENT_PATTERN, '', $html);
}
/**
@@ -445,7 +455,7 @@ abstract class AbstractHtmlProcessor
*/
private function ensurePhpUnrecognizedSelfClosingTagsAreXml(string $html): string
{
return \preg_replace(
return (new Preg())->replace(
'%<' . self::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '\\b[^>]*+(?<!/)(?=>)%',
'$0/',
$html
@@ -463,10 +473,6 @@ abstract class AbstractHtmlProcessor
return;
}
$htmlElement = $this->getDomDocument()->getElementsByTagName('html')->item(0);
if (!$htmlElement instanceof \DOMElement) {
throw new \UnexpectedValueException('There is no HTML element although there should be one.', 1569930853);
}
$htmlElement->appendChild($this->getDomDocument()->createElement('body'));
$this->getHtmlElement()->appendChild($this->getDomDocument()->createElement('body'));
}
}

View File

@@ -4,6 +4,9 @@ declare(strict_types=1);
namespace Pelago\Emogrifier\HtmlProcessor;
use Pelago\Emogrifier\Utilities\DeclarationBlockParser;
use Pelago\Emogrifier\Utilities\Preg;
/**
* This HtmlProcessor can convert style HTML attributes to the corresponding other visual HTML attributes,
* e.g. it converts style="width: 100px" to width="100".
@@ -12,12 +15,12 @@ namespace Pelago\Emogrifier\HtmlProcessor;
*
* To trigger the conversion, call the convertCssToVisualAttributes method.
*/
class CssToAttributeConverter extends AbstractHtmlProcessor
final class CssToAttributeConverter extends AbstractHtmlProcessor
{
/**
* This multi-level array contains simple mappings of CSS properties to
* HTML attributes. If a mapping only applies to certain HTML nodes or
* only for certain values, the mapping is an object with a whitelist
* only for certain values, the mapping is an object with an allowlist
* of nodes and values.
*
* @var array<string, array{attribute: string, nodes?: array<int, string>, values?: array<int, string>}>
@@ -42,11 +45,6 @@ class CssToAttributeConverter extends AbstractHtmlProcessor
],
];
/**
* @var array<string, array<string, string>>
*/
private static $parsedCssCache = [];
/**
* Maps the CSS from the style nodes to visual HTML attributes.
*
@@ -54,9 +52,10 @@ class CssToAttributeConverter extends AbstractHtmlProcessor
*/
public function convertCssToVisualAttributes(): self
{
$declarationBlockParser = new DeclarationBlockParser();
/** @var \DOMElement $node */
foreach ($this->getAllNodesWithStyleAttribute() as $node) {
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$inlineStyleDeclarations = $declarationBlockParser->parse($node->getAttribute('style'));
$this->mapCssToHtmlAttributes($inlineStyleDeclarations, $node);
}
@@ -73,48 +72,6 @@ class CssToAttributeConverter extends AbstractHtmlProcessor
return $this->getXPath()->query('//*[@style]');
}
/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array
{
if (isset(self::$parsedCssCache[$cssDeclarationsBlock])) {
return self::$parsedCssCache[$cssDeclarationsBlock];
}
$properties = [];
foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
/** @var array<int, string> $matches */
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}
$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
self::$parsedCssCache[$cssDeclarationsBlock] = $properties;
return $properties;
}
/**
* Applies $styles to $node.
*
@@ -228,12 +185,14 @@ class CssToAttributeConverter extends AbstractHtmlProcessor
*/
private function mapWidthOrHeightProperty(\DOMElement $node, string $value, string $property): void
{
$preg = new Preg();
// only parse values in px and %, but not values like "auto"
if (!\preg_match('/^(\\d+)(\\.(\\d+))?(px|%)$/', $value)) {
if ($preg->match('/^(\\d+)(\\.(\\d+))?(px|%)$/', $value) === 0) {
return;
}
$number = \preg_replace('/[^0-9.%]/', '', $value);
$number = $preg->replace('/[^0-9.%]/', '', $value);
$node->setAttribute($property, $number);
}
@@ -289,8 +248,7 @@ class CssToAttributeConverter extends AbstractHtmlProcessor
*/
private function parseCssShorthandValue(string $value): array
{
/** @var array<int, string> $values */
$values = \preg_split('/\\s+/', $value);
$values = (new Preg())->split('/\\s+/', $value);
$css = [];
$css['top'] = $values[0];

View File

@@ -11,6 +11,4 @@ namespace Pelago\Emogrifier\HtmlProcessor;
* - add HEAD and BODY elements (if they are missing)
* - reformat the HTML
*/
class HtmlNormalizer extends AbstractHtmlProcessor
{
}
final class HtmlNormalizer extends AbstractHtmlProcessor {}

View File

@@ -6,11 +6,12 @@ namespace Pelago\Emogrifier\HtmlProcessor;
use Pelago\Emogrifier\CssInliner;
use Pelago\Emogrifier\Utilities\ArrayIntersector;
use Pelago\Emogrifier\Utilities\Preg;
/**
* This class can remove things from HTML.
*/
class HtmlPruner extends AbstractHtmlProcessor
final class HtmlPruner extends AbstractHtmlProcessor
{
/**
* We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only
@@ -84,9 +85,10 @@ class HtmlPruner extends AbstractHtmlProcessor
{
$classesToKeepIntersector = new ArrayIntersector($classesToKeep);
$preg = new Preg();
/** @var \DOMElement $element */
foreach ($elements as $element) {
$elementClasses = \preg_split('/\\s++/', \trim($element->getAttribute('class')));
$elementClasses = $preg->split('/\\s++/', \trim($element->getAttribute('class')));
$elementClassesToKeep = $classesToKeepIntersector->intersectWith($elementClasses);
if ($elementClassesToKeep !== []) {
$element->setAttribute('class', \implode(' ', $elementClassesToKeep));
@@ -124,9 +126,11 @@ class HtmlPruner extends AbstractHtmlProcessor
*/
public function removeRedundantClassesAfterCssInlined(CssInliner $cssInliner): self
{
$preg = new Preg();
$classesToKeepAsKeys = [];
foreach ($cssInliner->getMatchingUninlinableSelectors() as $selector) {
\preg_match_all('/\\.(-?+[_a-zA-Z][\\w\\-]*+)/', $selector, $matches);
$preg->matchAll('/\\.(-?+[_a-zA-Z][\\w\\-]*+)/', $selector, $matches);
$classesToKeepAsKeys += \array_fill_keys($matches[1], true);
}

View File

@@ -17,7 +17,7 @@ namespace Pelago\Emogrifier\Utilities;
*
* @internal
*/
class ArrayIntersector
final class ArrayIntersector
{
/**
* the array with which the object was constructed, with all its keys exchanged with their associated values

View File

@@ -37,7 +37,7 @@ namespace Pelago\Emogrifier\Utilities;
*
* @internal
*/
class CssConcatenator
final class CssConcatenator
{
/**
* Array of media rules in order. Each element is an object with the following properties:
@@ -89,7 +89,7 @@ class CssConcatenator
$lastDeclarationsBlockWithoutSemicolon = \rtrim(\rtrim($lastRuleBlock->declarationsBlock), ';');
$lastRuleBlock->declarationsBlock = $lastDeclarationsBlockWithoutSemicolon . ';' . $declarationsBlock;
} else {
$mediaRule->ruleBlocks[] = (object)\compact('selectorsAsKeys', 'declarationsBlock');
$mediaRule->ruleBlocks[] = (object) \compact('selectorsAsKeys', 'declarationsBlock');
}
}
}
@@ -121,7 +121,7 @@ class CssConcatenator
return $lastMediaRule;
}
$newMediaRule = (object)[
$newMediaRule = (object) [
'media' => $media,
'ruleBlocks' => [],
];