Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add masterminds/html5 #210

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
"symfony/css-selector": "^2.7 || ^3.0 || ^4.0 || ^5.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0 || ^7.5"
"phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0 || ^7.5",
"masterminds/html5": "^2.7"
},
"suggest": {
"masterminds/html5": "To use a HTML5 parser instead of native DOM parser."
},
"autoload": {
"psr-4": {
Expand Down
93 changes: 90 additions & 3 deletions src/CssToInlineStyles.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace TijsVerkoyen\CssToInlineStyles;

use LogicException;
use Masterminds\HTML5;
use Symfony\Component\CssSelector\CssSelector;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\CssSelector\Exception\ExceptionInterface;
Expand All @@ -13,11 +15,28 @@ class CssToInlineStyles
{
private $cssConverter;

public function __construct()
/** @var HTML5|null */
private $html5Parser;

/** @var bool */
private $isHtml5Document = false;

/**
* @param bool|null $useHtml5Parser Whether to use a HTML5 parser or the native DOM parser
*/
public function __construct($useHtml5Parser = null)
{
if (class_exists('Symfony\Component\CssSelector\CssSelectorConverter')) {
$this->cssConverter = new CssSelectorConverter();
}

if ($useHtml5Parser) {
if (! class_exists(HTML5::class)) {
throw new LogicException('Using the HTML5 parser requires the html5-php library. Try running "composer require masterminds/html5".');
}

$this->html5Parser = new HTML5(['disable_html_ns' => true]);
}
}

/**
Expand Down Expand Up @@ -110,27 +129,95 @@ public function getInlineStyles(\DOMElement $element)
* @return \DOMDocument
*/
protected function createDomDocumentFromHtml($html)
{
$this->isHtml5Document = false;

if ($this->canParseHtml5String($html)) {
return $this->parseHtml5($html);
}

return $this->parseXhtml($html);
}

/**
* @param string $html
* @return \DOMDocument
*/
protected function parseHtml5($html)
{
$this->isHtml5Document = true;

return $this->html5Parser->parse($this->convertToHtmlEntities($html));
}

/**
* @param string $html
* @return \DOMDocument
*/
protected function parseXhtml($html)
{
$document = new \DOMDocument('1.0', 'UTF-8');
$internalErrors = libxml_use_internal_errors(true);
$document->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$document->loadHTML($this->convertToHtmlEntities($html));
libxml_use_internal_errors($internalErrors);
$document->formatOutput = true;

return $document;
}

/**
* @param string $content
* @return bool
*/
protected function canParseHtml5String($content)
{
if (null === $this->html5Parser) {
return false;
}

if (false === ($pos = stripos($content, '<!doctype html>'))) {
return false;
}

$header = substr($content, 0, $pos);

return '' === $header || $this->isValidHtml5Heading($header);
}

/**
* @param string $heading
* @return bool
*/
protected function isValidHtml5Heading($heading)
{
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
}

/**
* @param string $html
* @return array|false|string
*/
protected function convertToHtmlEntities($html)
{
return mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
}

/**
* @param \DOMDocument $document
*
* @return string
*/
protected function getHtmlFromDocument(\DOMDocument $document)
{
$parser = $document;
if (null !== $this->html5Parser && $this->isHtml5Document) {
$parser = $this->html5Parser;
}

// retrieve the document element
// we do it this way to preserve the utf-8 encoding
$htmlElement = $document->documentElement;
$html = $document->saveHTML($htmlElement);
$html = $parser->saveHTML($htmlElement);
$html = trim($html);

// retrieve the doctype
Expand Down
66 changes: 66 additions & 0 deletions tests/HTML5ParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?php

namespace TijsVerkoyen\CssToInlineStyles\tests;

use PHPUnit\Framework\TestCase;
use TijsVerkoyen\CssToInlineStyles\CssToInlineStyles;

class HTML5ParserTest extends TestCase
{
/**
* @var CssToInlineStyles
*/
protected $cssToInlineStyles;

/**
* @before
*/
protected function prepare()
{
$this->cssToInlineStyles = new CssToInlineStyles(true);
}

/**
* @after
*/
protected function clear()
{
$this->cssToInlineStyles = null;
}

public function testBasicHtml()
{
$html = '<!doctype html><html><head><style>body{color:blue}</style></head><body><p>foo</p></body></html>';
$css = 'p { color: red; }';
$expected = <<<EOF
<!doctype html>
<html><head><style>body{color:blue}</style></head><body style="color: blue;"><p style="color: red;">foo</p></body></html>
EOF;

$this->assertEquals($expected, $this->cssToInlineStyles->convert($html, $css));
}

public function testSwitchingParser()
{
// HTML4
$html = '<html><head><style>body{color:blue}</style></head><body><p>foo</p></body></html>';
$css = 'p { color: red; }';
$expected = <<<EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head><style>body{color:blue}</style></head>
<body style="color: blue;"><p style="color: red;">foo</p></body>
</html>
EOF;

$this->assertEquals($expected, $this->cssToInlineStyles->convert($html, $css));

// HTML5
$html = '<!doctype html>' . $html;
$expected = <<<EOF
<!doctype html>
<html><head><style>body{color:blue}</style></head><body style="color: blue;"><p style="color: red;">foo</p></body></html>
EOF;
$this->assertEquals($expected, $this->cssToInlineStyles->convert($html, $css));
}
}