From 2aef804f4b2cee7baf569f707eb74c27005d7daa Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 20 Aug 2018 12:12:32 +0300 Subject: [PATCH] split transparent rewriting of locally cached media URLs to execute after both sanitize() and HOOK_RENDER_ARTICLE to allow plugins work on original source URLs consistently --- classes/api.php | 4 +++ classes/article.php | 2 ++ classes/feeds.php | 2 ++ include/functions.php | 81 ++++++++++++++++++++++++++++++------------- 4 files changed, 65 insertions(+), 24 deletions(-) mode change 100644 => 100755 classes/api.php diff --git a/classes/api.php b/classes/api.php old mode 100644 new mode 100755 index 4c321d77e..5dbf8dc1f --- a/classes/api.php +++ b/classes/api.php @@ -379,6 +379,8 @@ class API extends Handler { $article = $p->hook_render_article_api(array("article" => $article)); } + $article['content'] = rewrite_cached_urls($article['content']); + array_push($articles, $article); } @@ -799,6 +801,8 @@ class API extends Handler { $headline_row = $p->hook_render_article_api(array("headline" => $headline_row)); } + $headline_row['content'] = rewrite_cached_urls($headline_row['content']); + array_push($headlines, $headline_row); } } else if (is_numeric($result) && $result == -1) { diff --git a/classes/article.php b/classes/article.php index c8ee5b931..71dfdabc4 100755 --- a/classes/article.php +++ b/classes/article.php @@ -610,6 +610,8 @@ class Article extends Handler_Protected { $line = $p->hook_render_article($line); } + $line['content'] = rewrite_cached_urls($line['content']); + $num_comments = (int) $line["num_comments"]; $entry_comments = ""; diff --git a/classes/feeds.php b/classes/feeds.php index 47a6b56b8..6bf14f45e 100755 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -477,6 +477,8 @@ class Feeds extends Handler_Protected { $line = $p->hook_render_article_cdm($line); } + $line['content'] = rewrite_cached_urls($line['content']); + if ($vfeed_group_enabled && $line["feed_title"]) { if ($feed_id != $vgroup_last_feed) { diff --git a/include/functions.php b/include/functions.php index a4e0e4d02..8acea8ef4 100755 --- a/include/functions.php +++ b/include/functions.php @@ -1564,6 +1564,62 @@ return false; } + // check for locally cached (media) URLs and rewrite to local versions + // this is called separately after sanitize() and plugin render article hooks to allow + // plugins work on original source URLs used before caching + + function rewrite_cached_urls($str) { + $charset_hack = ' + + '; + + $res = trim($str); if (!$res) return ''; + + $doc = new DOMDocument(); + $doc->loadHTML($charset_hack . $res); + $xpath = new DOMXPath($doc); + + $entries = $xpath->query('(//img[@src]|//video/source[@src]|//audio/source[@src])'); + + $need_saving = false; + + foreach ($entries as $entry) { + + if ($entry->hasAttribute('src')) { + + // should be already absolutized because this is called after sanitize() + $src = $entry->getAttribute('src'); + $cached_filename = CACHE_DIR . '/images/' . sha1($src); + + if (file_exists($cached_filename)) { + + // this is strictly cosmetic + if ($entry->tagName == 'img') { + $suffix = ".png"; + } else if ($entry->parentNode && $entry->parentNode->tagName == "video") { + $suffix = ".mp4"; + } else if ($entry->parentNode && $entry->parentNode->tagName == "audio") { + $suffix = ".ogg"; + } else { + $suffix = ""; + } + + $src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix; + + $entry->setAttribute('src', $src); + $need_saving = true; + } + } + } + + if ($need_saving) { + $doc->removeChild($doc->firstChild); //remove doctype + $res = $doc->saveHTML(); + } + + return $res; + } + function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) { if (!$owner) $owner = $_SESSION["uid"]; @@ -1596,31 +1652,8 @@ if ($entry->hasAttribute('src')) { $src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')); - $cached_filename = CACHE_DIR . '/images/' . sha1($src); - if (file_exists($cached_filename)) { - - // this is strictly cosmetic - if ($entry->tagName == 'img') { - $suffix = ".png"; - } else if ($entry->parentNode && $entry->parentNode->tagName == "video") { - $suffix = ".mp4"; - } else if ($entry->parentNode && $entry->parentNode->tagName == "audio") { - $suffix = ".ogg"; - } else { - $suffix = ""; - } - - $src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix; - - if ($entry->hasAttribute('srcset')) { - $entry->removeAttribute('srcset'); - } - - if ($entry->hasAttribute('sizes')) { - $entry->removeAttribute('sizes'); - } - } + // cache stuff has gone to rewrite_cached_urls() $entry->setAttribute('src', $src); }