fetch_file_contents: decompress gzipped data

af_readability: remove utf8 preamble hack
This commit is contained in:
Andrew Dolgov 2018-12-21 17:50:16 +03:00
parent 782eda45db
commit a5517fe857
3 changed files with 20 additions and 2 deletions

View File

@ -1612,7 +1612,7 @@ class RSSUtils {
} }
} }
private static function is_gzipped($feed_data) { static function is_gzipped($feed_data) {
return mb_strpos($feed_data, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII") === 0; return mb_strpos($feed_data, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII") === 0;
} }

View File

@ -437,6 +437,14 @@
curl_close($ch); curl_close($ch);
$is_gzipped = RSSUtils::is_gzipped($contents);
if ($is_gzipped) {
$tmp = @gzdecode($contents);
if ($tmp) $contents = $tmp;
}
return $contents; return $contents;
} else { } else {
@ -522,6 +530,15 @@
return false; return false;
} }
$is_gzipped = RSSUtils::is_gzipped($data);
if ($is_gzipped) {
$tmp = @gzdecode($data);
if ($tmp) $data = $tmp;
}
return $data; return $data;
} }

View File

@ -148,6 +148,7 @@ class Af_Readability extends Plugin {
} }
public function extract_content($url) { public function extract_content($url) {
global $fetch_effective_url; global $fetch_effective_url;
$tmp = fetch_file_contents([ $tmp = fetch_file_contents([
@ -158,7 +159,7 @@ class Af_Readability extends Plugin {
if ($tmp && mb_strlen($tmp) < 1024 * 500) { if ($tmp && mb_strlen($tmp) < 1024 * 500) {
$tmpdoc = new DOMDocument("1.0", "UTF-8"); $tmpdoc = new DOMDocument("1.0", "UTF-8");
if (!$tmpdoc->loadHTML('<?xml encoding="utf-8" ?>\n' . $tmp)) if (!$tmpdoc->loadHTML($tmp))
return false; return false;
if (strtolower($tmpdoc->encoding) != 'utf-8') { if (strtolower($tmpdoc->encoding) != 'utf-8') {