fetch_file_contents: support retarded schema-less urls

af_comics_dilbert: fix for new dilbert.com shenanigans
This commit is contained in:
Andrew Dolgov 2014-12-03 22:43:25 +03:00
parent 2681f684c7
commit 9fd581336e
2 changed files with 19 additions and 6 deletions

View File

@ -357,6 +357,9 @@
$url = ltrim($url, ' ');
$url = str_replace(' ', '%20', $url);
if (strpos($url, "//") === 0)
$url = 'http:' . $url;
if (!defined('NO_CURL') && function_exists('curl_init')) {
$fetch_curl_used = true;

View File

@ -6,23 +6,33 @@ class Af_Comics_Dilbert extends Af_ComicFilter {
}
function process(&$article) {
$owner_uid = $article["owner_uid"];
if (strpos($article["guid"], "dilbert.com") !== FALSE) {
$res = fetch_file_contents($article["link"], false, false, false,
false, false, 0,
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)");
global $fetch_last_error_content;
if (!$res && $fetch_last_error_content)
$res = $fetch_last_error_content;
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
@$doc->loadHTML($res);
$basenode = false;
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$basenode = $xpath->query('//div[@class="STR_Image"]')->item(0);
/* $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array();
foreach ($entries as $entry) {
if (preg_match("/dyn\/str_strip\/.*zoom\.gif$/", $entry->getAttribute("src"), $matches)) {
if (preg_match("/dyn\/str_strip\/.*strip\.gif$/", $entry->getAttribute("src"), $matches)) {
$entry->setAttribute("src",
rewrite_relative_url("http://dilbert.com/",
@ -31,7 +41,7 @@ class Af_Comics_Dilbert extends Af_ComicFilter {
$basenode = $entry;
break;
}
}
} */
if ($basenode) {
$article["content"] = $doc->saveXML($basenode);