From 3e4701116d9a7a2b93646f2c9aed80b63175d206 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 16 Aug 2019 15:29:24 +0300 Subject: [PATCH] af_readability: add missing file --- classes/backend.php | 2 +- classes/handler/public.php | 20 ++--- classes/pluginhandler.php | 5 +- classes/pluginhost.php | 2 +- classes/rpc.php | 2 +- include/functions.php | 2 +- .../andreskrey/Readability/Configuration.php | 26 ------ .../Readability/Nodes/DOM/DOMNodeList.php | 82 +++++++++++++++++++ .../Readability/Nodes/NodeTrait.php | 51 +++++++++--- .../Readability/Nodes/NodeUtility.php | 20 +++++ .../andreskrey/Readability/Readability.php | 56 ++++++++++--- 11 files changed, 202 insertions(+), 66 deletions(-) create mode 100644 plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php diff --git a/classes/backend.php b/classes/backend.php index 5bd724728..122e28c65 100644 --- a/classes/backend.php +++ b/classes/backend.php @@ -88,7 +88,7 @@ class Backend extends Handler { } function help() { - $topic = basename(clean($_REQUEST["topic"])); // only one for now + $topic = clean_filename($_REQUEST["topic"]); // only one for now if ($topic == "main") { $info = get_hotkeys_info(); diff --git a/classes/handler/public.php b/classes/handler/public.php index 05a84494b..91413b976 100755 --- a/classes/handler/public.php +++ b/classes/handler/public.php @@ -1203,30 +1203,30 @@ class Handler_Public extends Handler { public function pluginhandler() { $host = new PluginHost(); - $plugin = basename(clean($_REQUEST["plugin"])); + $plugin_name = clean_filename($_REQUEST["plugin"]); $method = clean($_REQUEST["pmethod"]); - $host->load($plugin, PluginHost::KIND_USER, 0); + $host->load($plugin_name, PluginHost::KIND_USER, 0); $host->load_data(); - $pclass = $host->get_plugin($plugin); + $plugin = $host->get_plugin($plugin_name); - if ($pclass) { - if (method_exists($pclass, $method)) { - if ($pclass->is_public_method($method)) { - $pclass->$method(); + if ($plugin) { + if (method_exists($plugin, $method)) { + if ($plugin->is_public_method($method)) { + $plugin->$method(); } else { - user_error("pluginhandler: Requested private method '$method' of plugin '$plugin'."); + user_error("PluginHandler[PUBLIC]: Requested private method '$method' of plugin '$plugin_name'.", E_USER_WARNING); header("Content-Type: text/json"); print error_json(6); } } else { - user_error("pluginhandler: Requested unknown method '$method' of plugin '$plugin'."); + user_error("PluginHandler[PUBLIC]: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING); header("Content-Type: text/json"); print error_json(13); } } else { - user_error("pluginhandler: Requested method '$method' of unknown plugin '$plugin'."); + user_error("PluginHandler[PUBLIC]: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING); header("Content-Type: text/json"); print error_json(14); } diff --git a/classes/pluginhandler.php b/classes/pluginhandler.php index d10343e09..9682e440f 100644 --- a/classes/pluginhandler.php +++ b/classes/pluginhandler.php @@ -5,15 +5,18 @@ class PluginHandler extends Handler_Protected { } function catchall($method) { - $plugin = PluginHost::getInstance()->get_plugin(clean($_REQUEST["plugin"])); + $plugin_name = clean($_REQUEST["plugin"]); + $plugin = PluginHost::getInstance()->get_plugin($plugin_name); if ($plugin) { if (method_exists($plugin, $method)) { $plugin->$method(); } else { + user_error("PluginHandler: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING); print error_json(13); } } else { + user_error("PluginHandler: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING); print error_json(14); } } diff --git a/classes/pluginhost.php b/classes/pluginhost.php index 9330e9e5e..eab808ae9 100755 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -186,7 +186,7 @@ class PluginHost { foreach ($plugins as $class) { $class = trim($class); - $class_file = strtolower(basename($class)); + $class_file = strtolower(clean_filename($class)); if (!is_dir(__DIR__."/../plugins/$class_file") && !is_dir(__DIR__."/../plugins.local/$class_file")) continue; diff --git a/classes/rpc.php b/classes/rpc.php index 8736cbb65..84c9cfe92 100755 --- a/classes/rpc.php +++ b/classes/rpc.php @@ -572,7 +572,7 @@ class RPC extends Handler_Protected { function log() { $msg = clean($_REQUEST['msg']); - $file = basename(clean($_REQUEST['file'])); + $file = clean_filename($_REQUEST['file']); $line = (int) clean($_REQUEST['line']); $context = clean($_REQUEST['context']); diff --git a/include/functions.php b/include/functions.php index c326ac468..e78f0de9d 100644 --- a/include/functions.php +++ b/include/functions.php @@ -593,7 +593,7 @@ } function clean_filename($filename) { - return basename(preg_replace("/\.\.|[\/\\\]/", "", $filename)); + return basename(preg_replace("/\.\.|[\/\\\]/", "", clean($filename))); } function make_password($length = 12) { diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php index 6c17bc757..0632399c6 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php @@ -166,32 +166,6 @@ class Configuration return $this; } - /** - * @deprecated Use getCharThreshold. Will be removed in version 2.0 - * - * @return int - */ - public function getWordThreshold() - { - @trigger_error('getWordThreshold was replaced with getCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED); - - return $this->charThreshold; - } - - /** - * @param int $charThreshold - * - * @return $this - */ - public function setWordThreshold($charThreshold) - { - @trigger_error('setWordThreshold was replaced with setCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED); - - $this->charThreshold = $charThreshold; - - return $this; - } - /** * @return bool */ diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php new file mode 100644 index 000000000..5149c0b98 --- /dev/null +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php @@ -0,0 +1,82 @@ +length is hidden + * from the user and cannot be extended, changed, or tweaked. + */ +class DOMNodeList implements \Countable, \IteratorAggregate +{ + /** + * @var array + */ + protected $items = []; + + /** + * @var int + */ + protected $length = 0; + + /** + * To allow access to length in the same way that DOMNodeList allows. + * + * {@inheritdoc} + */ + public function __get($name) + { + switch ($name) { + case 'length': + return $this->length; + default: + trigger_error(sprintf('Undefined property: %s::%s', static::class, $name)); + } + } + + /** + * @param DOMNode|DOMElement|DOMComment $node + * + * @return DOMNodeList + */ + public function add($node) + { + $this->items[] = $node; + $this->length++; + + return $this; + } + + /** + * @param int $offset + * + * @return DOMNode|DOMElement|DOMComment + */ + public function item(int $offset) + { + return $this->items[$offset]; + } + + /** + * @return int|void + */ + public function count(): int + { + return $this->length; + } + + /** + * To make it compatible with iterator_to_array() function. + * + * {@inheritdoc} + */ + public function getIterator(): \ArrayIterator + { + return new \ArrayIterator($this->items); + } +} diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php index d7060ccbb..5198bbb5f 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php @@ -181,11 +181,11 @@ trait NodeTrait /** * Override for native hasAttribute. * - * @see getAttribute - * * @param $attributeName * * @return bool + * + * @see getAttribute */ public function hasAttribute($attributeName) { @@ -317,10 +317,14 @@ trait NodeTrait * * @param bool $filterEmptyDOMText Filter empty DOMText nodes? * + * @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0 + * * @return array */ public function getChildren($filterEmptyDOMText = false) { + @trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED); + $ret = iterator_to_array($this->childNodes); if ($filterEmptyDOMText) { // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number @@ -418,12 +422,12 @@ trait NodeTrait public function hasSingleTagInsideElement($tag) { // There should be exactly 1 element child with given tag - if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) { + if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) { return false; } // And there should be no text nodes with real content - return array_reduce($children, function ($carry, $child) { + return array_reduce(iterator_to_array($children), function ($carry, $child) { if (!$carry === false) { return false; } @@ -443,7 +447,7 @@ trait NodeTrait { $result = false; if ($this->hasChildNodes()) { - foreach ($this->getChildren() as $child) { + foreach ($this->childNodes as $child) { if (in_array($child->nodeName, $this->divToPElements)) { $result = true; } else { @@ -500,18 +504,22 @@ trait NodeTrait ); } + /** + * In the original JS project they check if the node has the style display=none, which unfortunately + * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none". + * + * Might be a good idea to check for classes or other attributes like 'aria-hidden' + * + * @return bool + */ public function isProbablyVisible() { - /* - * In the original JS project they check if the node has the style display=none, which unfortunately - * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none". - * - * Might be a good idea to check for classes or other attributes like 'aria-hidden' - */ - return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden'); } + /** + * @return bool + */ public function isWhitespace() { return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) || @@ -557,4 +565,23 @@ trait NodeTrait $count -= ($count - $nodes->length); } } + + /** + * Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this + * function to get the first one that is an DOMElement node. + * + * @return \DOMElement|null + */ + public function getFirstElementChild() + { + if ($this->childNodes instanceof \Traversable) { + foreach ($this->childNodes as $node) { + if ($node instanceof \DOMElement) { + return $node; + } + } + } + + return null; + } } diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php index 7a1f18ee4..cbf78bae0 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php @@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes; use andreskrey\Readability\Nodes\DOM\DOMDocument; use andreskrey\Readability\Nodes\DOM\DOMElement; use andreskrey\Readability\Nodes\DOM\DOMNode; +use andreskrey\Readability\Nodes\DOM\DOMNodeList; /** * Class NodeUtility. @@ -157,4 +158,23 @@ class NodeUtility return ($originalNode) ? $originalNode->nextSibling : $originalNode; } + + /** + * Remove all empty DOMNodes from DOMNodeLists. + * + * @param \DOMNodeList $list + * + * @return DOMNodeList + */ + public static function filterTextNodes(\DOMNodeList $list) + { + $newList = new DOMNodeList(); + foreach ($list as $node) { + if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) { + $newList->add($node); + } + } + + return $newList; + } } diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php index 7b7eed6bf..6bcbf78d7 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php @@ -56,6 +56,13 @@ class Readability */ protected $author = null; + /** + * Website name. + * + * @var string|null + */ + protected $siteName = null; + /** * Direction of the text. * @@ -287,10 +294,10 @@ class Readability $values = []; // property is a space-separated list of values - $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image)\s*/i'; + $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image|site_name)(?!:)\s*/i'; // name is a single value - $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image)\s*$/i'; + $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image|site_name)(?!:)\s*$/i'; // Find description tags. foreach ($this->dom->getElementsByTagName('meta') as $meta) { @@ -332,7 +339,6 @@ class Readability * This could be easily replaced with an ugly set of isset($values['key']) or a bunch of ??s. * Will probably replace it with ??s after dropping support of PHP5.6 */ - $key = current(array_intersect([ 'dc:title', 'dcterm:title', @@ -373,11 +379,18 @@ class Readability // get main image $key = current(array_intersect([ + 'image', 'og:image', 'twitter:image' ], array_keys($values))); $this->setImage(isset($values[$key]) ? $values[$key] : null); + + $key = current(array_intersect([ + 'og:site_name' + ], array_keys($values))); + + $this->setSiteName(isset($values[$key]) ? $values[$key] : null); } /** @@ -722,7 +735,7 @@ class Readability */ if ($node->hasSingleTagInsideElement('p') && $node->getLinkDensity() < 0.25) { $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128))); - $pNode = $node->getChildren(true)[0]; + $pNode = NodeUtility::filterTextNodes($node->childNodes)->item(0); $node->parentNode->replaceChild($pNode, $node); $node = $pNode; $elementsToScore[] = $node; @@ -1082,7 +1095,7 @@ class Readability // If the top candidate is the only child, use parent instead. This will help sibling // joining logic when adjacent content is actually located in parent's sibling node. $parentOfTopCandidate = $topCandidate->parentNode; - while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) { + while ($parentOfTopCandidate->nodeName !== 'body' && count(NodeUtility::filterTextNodes($parentOfTopCandidate->childNodes)) === 1) { $topCandidate = $parentOfTopCandidate; $parentOfTopCandidate = $topCandidate->parentNode; } @@ -1102,14 +1115,16 @@ class Readability $siblingScoreThreshold = max(10, $topCandidate->contentScore * 0.2); // Keep potential top candidate's parent node to try to get text direction of it later. $parentOfTopCandidate = $topCandidate->parentNode; - $siblings = $parentOfTopCandidate->getChildren(); + $siblings = $parentOfTopCandidate->childNodes; $hasContent = false; $this->logger->info('[Rating] Adding top candidate siblings...'); - /** @var DOMElement $sibling */ - foreach ($siblings as $sibling) { + /* @var DOMElement $sibling */ + // Can't foreach here because down there we might change the tag name and that causes the foreach to skip items + for ($i = 0; $i < $siblings->length; $i++) { + $sibling = $siblings[$i]; $append = false; if ($sibling === $topCandidate) { @@ -1147,7 +1162,6 @@ class Readability * We have a node that isn't a common block level element, like a form or td tag. * Turn it into a div so it doesn't get filtered out later by accident. */ - $sibling = NodeUtility::setNodeTag($sibling, 'div'); } @@ -1266,11 +1280,11 @@ class Readability // Remove single-cell tables foreach ($article->shiftingAwareGetElementsByTagName('table') as $table) { /** @var DOMNode $table */ - $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table; + $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->getFirstElementChild() : $table; if ($tbody->hasSingleTagInsideElement('tr')) { - $row = $tbody->firstChild; + $row = $tbody->getFirstElementChild(); if ($row->hasSingleTagInsideElement('td')) { - $cell = $row->firstChild; + $cell = $row->getFirstElementChild(); $cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($cell->childNodes), function ($carry, $node) { return $node->isPhrasingContent() && $carry; }, true)) ? 'p' : 'div'); @@ -1597,7 +1611,7 @@ class Readability $node->removeAttribute('class'); } - for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) { + for ($node = $node->getFirstElementChild(); $node !== null; $node = $node->nextSibling) { $this->_cleanClasses($node); } } @@ -1756,6 +1770,22 @@ class Readability $this->author = $author; } + /** + * @return string|null + */ + public function getSiteName() + { + return $this->siteName; + } + + /** + * @param string $siteName + */ + protected function setSiteName($siteName) + { + $this->siteName = $siteName; + } + /** * @return null|string */