-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
HtmlSanitizer.php
140 lines (115 loc) · 4.95 KB
/
HtmlSanitizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\HtmlSanitizer;
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor;
/**
* @author Titouan Galopin <[email protected]>
*/
final class HtmlSanitizer implements HtmlSanitizerInterface
{
private ParserInterface $parser;
/**
* @var array<string, DomVisitor>
*/
private array $domVisitors = [];
public function __construct(
private HtmlSanitizerConfig $config,
?ParserInterface $parser = null,
) {
$this->config = $config;
$this->parser = $parser ?? new MastermindsParser();
}
public function sanitize(string $input): string
{
return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input);
}
public function sanitizeFor(string $element, string $input): string
{
return $this->sanitizeWithContext(
W3CReference::CONTEXTS_MAP[StringSanitizer::htmlLower($element)] ?? W3CReference::CONTEXT_BODY,
$input
);
}
private function sanitizeWithContext(string $context, string $input): string
{
// Text context: early return with HTML encoding
if (W3CReference::CONTEXT_TEXT === $context) {
return StringSanitizer::encodeHtmlEntities($input);
}
// Other context: build a DOM visitor
$this->domVisitors[$context] ??= $this->createDomVisitorForContext($context);
// Prevent DOS attack induced by extremely long HTML strings
if (-1 !== $this->config->getMaxInputLength() && \strlen($input) > $this->config->getMaxInputLength()) {
$input = substr($input, 0, $this->config->getMaxInputLength());
}
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
// site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
if (!$this->isValidUtf8($input)) {
return '';
}
// Remove NULL character
$input = str_replace(\chr(0), '', $input);
// Parse as HTML
if (!$parsed = $this->parser->parse($input)) {
return '';
}
// Visit the DOM tree and render the sanitized nodes
return $this->domVisitors[$context]->visit($parsed)?->render() ?? '';
}
private function isValidUtf8(string $html): bool
{
// preg_match() fails silently on strings containing invalid UTF-8.
return '' === $html || preg_match('//u', $html);
}
private function createDomVisitorForContext(string $context): DomVisitor
{
$elementsConfig = [];
// Head: only a few elements are allowed
if (W3CReference::CONTEXT_HEAD === $context) {
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
}
}
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
}
}
return new DomVisitor($this->config, $elementsConfig);
}
// Body: allow any configured element that isn't in <head>
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
}
}
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
}
}
return new DomVisitor($this->config, $elementsConfig);
}
}