mirror of
https://tt-rss.org/git/tt-rss.git
synced 2024-06-28 12:10:52 +02:00
af_readability: add a workaround for meta charset html pages
This commit is contained in:
parent
6322fc6870
commit
b7d1306b19
|
@ -101,6 +101,19 @@ class Af_Readability extends Plugin {
|
||||||
$tmp = fetch_file_contents($article["link"]);
|
$tmp = fetch_file_contents($article["link"]);
|
||||||
|
|
||||||
if ($tmp) {
|
if ($tmp) {
|
||||||
|
$tmpdoc = new DOMDocument("1.0", "UTF-8");
|
||||||
|
$tmpdoc->loadHTML($tmp);
|
||||||
|
|
||||||
|
if ($tmpdoc->encoding != 'UTF-8') {
|
||||||
|
$tmpxpath = new DOMXPath($tmpdoc);
|
||||||
|
|
||||||
|
foreach ($tmpxpath->query("//meta") as $elem) {
|
||||||
|
$elem->parentNode->removeChild($elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
$tmp = $tmpdoc->saveHTML();
|
||||||
|
}
|
||||||
|
|
||||||
$r = new Readability($tmp, $article["link"]);
|
$r = new Readability($tmp, $article["link"]);
|
||||||
|
|
||||||
if ($r->init()) {
|
if ($r->init()) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user