diff --git a/classes/handler/public.php b/classes/handler/public.php index f73427cbf..318cecd72 100755 --- a/classes/handler/public.php +++ b/classes/handler/public.php @@ -329,7 +329,7 @@ class Handler_Public extends Handler { if (!$og_image) { $tmpdoc = new DOMDocument(); - if (@$tmpdoc->loadHTML(mb_substr($content, 0, 131070))) { + if (@$tmpdoc->loadHTML('' . mb_substr($content, 0, 131070))) { $tmpxpath = new DOMXPath($tmpdoc); $imgs = $tmpxpath->query("//img"); diff --git a/classes/rssutils.php b/classes/rssutils.php index 82a6963e8..5aff2f8a3 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -1200,12 +1200,8 @@ class RSSUtils { static function cache_media($html, $site_url) { libxml_use_internal_errors(true); - $charset_hack = ' - - '; - $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $html); + $doc->loadHTML('' . $html); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])'); diff --git a/include/functions.php b/include/functions.php index 5ebd4e0ff..d59e79126 100755 --- a/include/functions.php +++ b/include/functions.php @@ -562,7 +562,7 @@ libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($html); + $doc->loadHTML('' . $html); $xpath = new DOMXPath($doc); $base = $xpath->query('/html/head/base[@href]'); @@ -1518,14 +1518,10 @@ // plugins work on original source URLs used before caching function rewrite_cached_urls($str) { - $charset_hack = ' - - '; - $res = trim($str); if (!$res) return ''; $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $res); + $doc->loadHTML('' . $res); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])'); @@ -1580,16 +1576,10 @@ $res = trim($str); if (!$res) return ''; - $charset_hack = ' - - '; - - $res = trim($res); if (!$res) return ''; - libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $res); + $doc->loadHTML('' . $res); $xpath = new DOMXPath($doc); $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix(); @@ -2115,7 +2105,7 @@ libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($content); + $doc->loadHTML('' . $content); $xpath = new DOMXPath($doc); $entries = $xpath->query('/html/head/link[@rel="alternate" and '. '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]'); @@ -2136,7 +2126,7 @@ } function is_html($content) { - return preg_match("/loadHTML($charset_hack . $article["content"]); + @$doc->loadHTML('' . $article["content"]); if ($doc) { $xpath = new DOMXPath($doc); diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 117646c30..32c54a2c7 100755 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -172,14 +172,10 @@ class Af_Readability extends Plugin { if (!$tmpdoc->loadHTML($tmp)) return false; + // this is the worst hack yet :( if (strtolower($tmpdoc->encoding) != 'utf-8') { - $tmpxpath = new DOMXPath($tmpdoc); - - foreach ($tmpxpath->query("//meta") as $elem) { - $elem->parentNode->removeChild($elem); - } - - $tmp = $tmpdoc->saveHTML(); + $tmp = preg_replace("//i", "", $tmp); + $tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding); } try { @@ -210,7 +206,6 @@ class Af_Readability extends Plugin { } catch (Exception $e) { return false; } - } return false; diff --git a/plugins/af_tumblr_1280/init.php b/plugins/af_tumblr_1280/init.php index 8aba0e652..5d7f366a4 100755 --- a/plugins/af_tumblr_1280/init.php +++ b/plugins/af_tumblr_1280/init.php @@ -25,12 +25,8 @@ class Af_Tumblr_1280 extends Plugin { if (!function_exists("curl_init") || ini_get("open_basedir")) return $article; - $charset_hack = ' - - '; - $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $article["content"]); + $doc->loadHTML('' . $article["content"]); $found = false; @@ -92,4 +88,4 @@ class Af_Tumblr_1280 extends Plugin { return 2; } -} \ No newline at end of file +} diff --git a/plugins/af_zz_imgproxy/init.php b/plugins/af_zz_imgproxy/init.php index 2cd8fcaf0..b172d4563 100755 --- a/plugins/af_zz_imgproxy/init.php +++ b/plugins/af_zz_imgproxy/init.php @@ -155,7 +155,7 @@ class Af_Zz_ImgProxy extends Plugin { $proxy_all = $this->host->get($this, "proxy_all"); $doc = new DOMDocument(); - if (@$doc->loadHTML($article["content"])) { + if (@$doc->loadHTML('' . $article["content"])) { $xpath = new DOMXPath($doc); $imgs = $xpath->query("//img[@src]"); diff --git a/plugins/cache_starred_images/init.php b/plugins/cache_starred_images/init.php index a1916e226..714d4cb9b 100755 --- a/plugins/cache_starred_images/init.php +++ b/plugins/cache_starred_images/init.php @@ -190,12 +190,8 @@ class Cache_Starred_Images extends Plugin implements IHandler { return; } - $charset_hack = ' - - '; - $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $content); + $doc->loadHTML('' . $content); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])|(//video/source[@src])');