From 43a5621de575506b053bedc4cdceaf3aefdd8b7b Mon Sep 17 00:00:00 2001 From: Sean Rand Date: Wed, 1 May 2013 13:42:24 +0200 Subject: [PATCH] Leave encoding alone, only decode special HTML entities Using html_entity_decode caused too much UTF-8 related breakage, htmlspecialchars_decode should be safer and does the job just as well. --- helpers/ContentLoader.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/ContentLoader.php b/helpers/ContentLoader.php index 2130436ed2..126e1ef622 100644 --- a/helpers/ContentLoader.php +++ b/helpers/ContentLoader.php @@ -106,7 +106,7 @@ public function fetch($source) { // sanitize content html $content = htmLawed( - html_entity_decode($item->getContent(), ENT_COMPAT, 'UTF-8'), + htmlspecialchars_decode($item->getContent()), array( "safe" => 1, "deny_attribute" => '* -alt -title -src -href', @@ -116,7 +116,7 @@ public function fetch($source) { "elements" => 'div,p,ul,li,a,img,dl,dt,h1,h2,h3,h4,h5,h6,ol,br,table,tr,td,blockquote,pre,ins,del,th,thead,tbody,b,i,strong,em,tt' ) ); - $title = html_entity_decode($item->getTitle(), ENT_COMPAT, 'UTF-8'); + $title = htmlspecialchars_decode($item->getTitle()); $title = htmLawed($title, array("deny_attribute" => "*", "elements" => "-*")); \F3::get('logger')->log('item content sanitized', \DEBUG);