parser: clean() attribute values by default (except content)

This commit is contained in:
Andrew Dolgov 2018-12-26 10:16:11 +03:00
parent 949bfa3457
commit 55ef85adc0
4 changed files with 55 additions and 55 deletions

View File

@ -8,7 +8,7 @@ class FeedItem_Atom extends FeedItem_Common {
if ($id) {
return $id->nodeValue;
} else {
return $this->get_link();
return clean($this->get_link());
}
}
@ -44,9 +44,9 @@ class FeedItem_Atom extends FeedItem_Common {
$base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link);
if ($base)
return rewrite_relative_url($base, trim($link->getAttribute("href")));
return rewrite_relative_url($base, clean(trim($link->getAttribute("href"))));
else
return trim($link->getAttribute("href"));
return clean(trim($link->getAttribute("href")));
}
}
@ -56,7 +56,7 @@ class FeedItem_Atom extends FeedItem_Common {
$title = $this->elem->getElementsByTagName("title")->item(0);
if ($title) {
return trim($title->nodeValue);
return clean(trim($title->nodeValue));
}
}
@ -113,7 +113,7 @@ class FeedItem_Atom extends FeedItem_Common {
$categories = $this->xpath->query("dc:subject", $this->elem);
foreach ($categories as $cat) {
array_push($cats, trim($cat->nodeValue));
array_push($cats, clean(trim($cat->nodeValue)));
}
return $cats;
@ -129,9 +129,9 @@ class FeedItem_Atom extends FeedItem_Common {
if ($link->getAttribute("rel") == "enclosure") {
$enc = new FeedEnclosure();
$enc->type = $link->getAttribute("type");
$enc->link = $link->getAttribute("href");
$enc->length = $link->getAttribute("length");
$enc->type = clean($link->getAttribute("type"));
$enc->link = clean($link->getAttribute("href"));
$enc->length = clean($link->getAttribute("length"));
array_push($encs, $enc);
}
@ -147,12 +147,12 @@ class FeedItem_Atom extends FeedItem_Common {
$lang = $this->elem->getAttributeNS(self::NS_XML, "lang");
if (!empty($lang)) {
return $lang;
return clean($lang);
} else {
// Fall back to the language declared on the feed, if any.
foreach ($this->doc->childNodes as $child) {
if (method_exists($child, "getAttributeNS")) {
return $child->getAttributeNS(self::NS_XML, "lang");
return clean($child->getAttributeNS(self::NS_XML, "lang"));
}
}
}

View File

@ -31,20 +31,20 @@ abstract class FeedItem_Common extends FeedItem {
if ($author) {
$name = $author->getElementsByTagName("name")->item(0);
if ($name) return $name->nodeValue;
if ($name) return clean($name->nodeValue);
$email = $author->getElementsByTagName("email")->item(0);
if ($email) return $email->nodeValue;
if ($email) return clean($email->nodeValue);
if ($author->nodeValue)
return $author->nodeValue;
return clean($author->nodeValue);
}
$author = $this->xpath->query("dc:creator", $this->elem)->item(0);
if ($author) {
return $author->nodeValue;
return clean($author->nodeValue);
}
}
@ -53,15 +53,15 @@ abstract class FeedItem_Common extends FeedItem {
//might give a wrong result if a default namespace was declared (possible with XPath 2.0)
$com_url = $this->xpath->query("comments", $this->elem)->item(0);
if($com_url)
return $com_url->nodeValue;
if ($com_url)
return clean($com_url->nodeValue);
//Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common.
//'text/html' for type is too restrictive?
$com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0);
if($com_url)
return $com_url->nodeValue;
if ($com_url)
return clean($com_url->nodeValue);
}
function get_comments_count() {
@ -70,7 +70,7 @@ abstract class FeedItem_Common extends FeedItem {
$comments = $this->xpath->query($query, $this->elem)->item(0);
if ($comments) {
return $comments->nodeValue;
return clean($comments->nodeValue);
}
}
@ -83,19 +83,19 @@ abstract class FeedItem_Common extends FeedItem {
foreach ($enclosures as $enclosure) {
$enc = new FeedEnclosure();
$enc->type = $enclosure->getAttribute("type");
$enc->link = $enclosure->getAttribute("url");
$enc->length = $enclosure->getAttribute("length");
$enc->height = $enclosure->getAttribute("height");
$enc->width = $enclosure->getAttribute("width");
$enc->type = clean($enclosure->getAttribute("type"));
$enc->link = clean($enclosure->getAttribute("url"));
$enc->length = clean($enclosure->getAttribute("length"));
$enc->height = clean($enclosure->getAttribute("height"));
$enc->width = clean($enclosure->getAttribute("width"));
$medium = $enclosure->getAttribute("medium");
$medium = clean($enclosure->getAttribute("medium"));
if (!$enc->type && $medium) {
$enc->type = strtolower("$medium/generic");
}
$desc = $this->xpath->query("media:description", $enclosure)->item(0);
if ($desc) $enc->title = strip_tags($desc->nodeValue);
if ($desc) $enc->title = clean($desc->nodeValue);
array_push($encs, $enc);
}
@ -108,23 +108,23 @@ abstract class FeedItem_Common extends FeedItem {
$content = $this->xpath->query("media:content", $enclosure)->item(0);
if ($content) {
$enc->type = $content->getAttribute("type");
$enc->link = $content->getAttribute("url");
$enc->length = $content->getAttribute("length");
$enc->height = $content->getAttribute("height");
$enc->width = $content->getAttribute("width");
$enc->type = clean($content->getAttribute("type"));
$enc->link = clean($content->getAttribute("url"));
$enc->length = clean($content->getAttribute("length"));
$enc->height = clean($content->getAttribute("height"));
$enc->width = clean($content->getAttribute("width"));
$medium = $content->getAttribute("medium");
$medium = clean($content->getAttribute("medium"));
if (!$enc->type && $medium) {
$enc->type = strtolower("$medium/generic");
}
$desc = $this->xpath->query("media:description", $content)->item(0);
if ($desc) {
$enc->title = strip_tags($desc->nodeValue);
$enc->title = clean($desc->nodeValue);
} else {
$desc = $this->xpath->query("media:description", $enclosure)->item(0);
if ($desc) $enc->title = strip_tags($desc->nodeValue);
if ($desc) $enc->title = clean($desc->nodeValue);
}
array_push($encs, $enc);
@ -137,9 +137,9 @@ abstract class FeedItem_Common extends FeedItem {
$enc = new FeedEnclosure();
$enc->type = "image/generic";
$enc->link = $enclosure->getAttribute("url");
$enc->height = $enclosure->getAttribute("height");
$enc->width = $enclosure->getAttribute("width");
$enc->link = clean($enclosure->getAttribute("url"));
$enc->height = clean($enclosure->getAttribute("height"));
$enc->width = clean($enclosure->getAttribute("width"));
array_push($encs, $enc);
}

View File

@ -4,9 +4,9 @@ class FeedItem_RSS extends FeedItem_Common {
$id = $this->elem->getElementsByTagName("guid")->item(0);
if ($id) {
return $id->nodeValue;
return clean($id->nodeValue);
} else {
return $this->get_link();
return clean($this->get_link());
}
}
@ -33,20 +33,20 @@ class FeedItem_RSS extends FeedItem_Common {
|| $link->getAttribute("rel") == "alternate"
|| $link->getAttribute("rel") == "standout")) {
return trim($link->getAttribute("href"));
return clean(trim($link->getAttribute("href")));
}
}
$link = $this->elem->getElementsByTagName("guid")->item(0);
if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") {
return trim($link->nodeValue);
return clean(trim($link->nodeValue));
}
$link = $this->elem->getElementsByTagName("link")->item(0);
if ($link) {
return trim($link->nodeValue);
return clean(trim($link->nodeValue));
}
}
@ -54,7 +54,7 @@ class FeedItem_RSS extends FeedItem_Common {
$title = $this->xpath->query("title", $this->elem)->item(0);
if ($title) {
return trim($title->nodeValue);
return clean(trim($title->nodeValue));
}
// if the document has a default namespace then querying for
@ -62,7 +62,7 @@ class FeedItem_RSS extends FeedItem_Common {
$title = $this->elem->getElementsByTagName("title")->item(0);
if ($title) {
return trim($title->nodeValue);
return clean(trim($title->nodeValue));
}
}
@ -106,7 +106,7 @@ class FeedItem_RSS extends FeedItem_Common {
$categories = $this->xpath->query("dc:subject", $this->elem);
foreach ($categories as $cat) {
array_push($cats, trim($cat->nodeValue));
array_push($cats, clean(trim($cat->nodeValue)));
}
return $cats;
@ -120,11 +120,11 @@ class FeedItem_RSS extends FeedItem_Common {
foreach ($enclosures as $enclosure) {
$enc = new FeedEnclosure();
$enc->type = $enclosure->getAttribute("type");
$enc->link = $enclosure->getAttribute("url");
$enc->length = $enclosure->getAttribute("length");
$enc->height = $enclosure->getAttribute("height");
$enc->width = $enclosure->getAttribute("width");
$enc->type = clean($enclosure->getAttribute("type"));
$enc->link = clean($enclosure->getAttribute("url"));
$enc->length = clean($enclosure->getAttribute("length"));
$enc->height = clean($enclosure->getAttribute("height"));
$enc->width = clean($enclosure->getAttribute("width"));
array_push($encs, $enc);
}
@ -141,7 +141,7 @@ class FeedItem_RSS extends FeedItem_Common {
return "";
}
return $languages[0]->textContent;
return clean($languages[0]->textContent);
}
}

View File

@ -246,11 +246,11 @@ class FeedParser {
}
function get_link() {
return $this->link;
return clean($this->link);
}
function get_title() {
return $this->title;
return clean($this->title);
}
function get_items() {
@ -266,7 +266,7 @@ class FeedParser {
foreach ($links as $link) {
if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
array_push($rv, trim($link->getAttribute('href')));
array_push($rv, clean(trim($link->getAttribute('href'))));
}
}
break;
@ -275,7 +275,7 @@ class FeedParser {
foreach ($links as $link) {
if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
array_push($rv, trim($link->getAttribute('href')));
array_push($rv, clean(trim($link->getAttribute('href'))));
}
}
break;