diff --git a/classes/article.php b/classes/article.php index 943528f2a..fc81838ed 100755 --- a/classes/article.php +++ b/classes/article.php @@ -305,19 +305,9 @@ class Article extends Handler_Protected { post_int_id = ? AND owner_uid = ?"); $sth->execute([$int_id, $_SESSION['uid']]); + $tags = FeedItem_Common::normalize_categories($tags); + foreach ($tags as $tag) { - $tag = Article::sanitize_tag($tag); - - if (!Article::tag_is_valid($tag)) { - continue; - } - - if (preg_match("/^[0-9]*$/", $tag)) { - continue; - } - - // print ""; - if ($tag != '') { $sth = $this->pdo->prepare("INSERT INTO ttrss_tags (post_int_id, owner_uid, tag_name) @@ -331,7 +321,6 @@ class Article extends Handler_Protected { /* update tag cache */ - sort($tags_to_cache); $tags_str = join(",", $tags_to_cache); $sth = $this->pdo->prepare("UPDATE ttrss_user_entries @@ -802,27 +791,6 @@ class Article extends Handler_Protected { return $rv; } - static function sanitize_tag($tag) { - $tag = trim($tag); - - $tag = mb_strtolower($tag, 'utf-8'); - - $tag = preg_replace('/[,\'\"\+\>\<]/', "", $tag); - - if (DB_TYPE == "mysql") { - $tag = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $tag); - } - - return $tag; - } - - static function tag_is_valid($tag) { - if (!$tag || is_numeric($tag) || mb_strlen($tag) > 250) - return false; - - return true; - } - static function get_article_image($enclosures, $content, $site_url) { $article_image = ""; diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index a962b59f2..a03080981 100755 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -103,20 +103,20 @@ class FeedItem_Atom extends FeedItem_Common { function get_categories() { $categories = $this->elem->getElementsByTagName("category"); - $cats = array(); + $cats = []; foreach ($categories as $cat) { if ($cat->hasAttribute("term")) - array_push($cats, trim($cat->getAttribute("term"))); + array_push($cats, $cat->getAttribute("term")); } $categories = $this->xpath->query("dc:subject", $this->elem); foreach ($categories as $cat) { - array_push($cats, clean(trim($cat->nodeValue))); + array_push($cats, $cat->nodeValue); } - return $cats; + return $this->normalize_categories($cats); } function get_enclosures() { diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php index 3193ed273..f208f4a48 100755 --- a/classes/feeditem/common.php +++ b/classes/feeditem/common.php @@ -162,4 +162,35 @@ abstract class FeedItem_Common extends FeedItem { } } + static function normalize_categories($cats) { + + $tmp = []; + + foreach ($cats as $rawcat) { + $tmp = array_merge($tmp, explode(",", $rawcat)); + } + + $tmp = array_map(function($srccat) { + $cat = clean(trim(mb_strtolower($srccat))); + + // we don't support numeric tags + if (is_numeric($cat)) + $cat = 't:' . $cat; + + $cat = preg_replace('/[,\'\"]/', "", $cat); + + if (DB_TYPE == "mysql") { + $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat); + } + + if (mb_strlen($cat) > 250) + $cat = mb_substr($cat, 0, 250); + + return $cat; + }, $tmp); + + asort($tmp); + + return array_unique($tmp); + } } diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 916c73ec4..1f7953c51 100755 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -97,19 +97,19 @@ class FeedItem_RSS extends FeedItem_Common { function get_categories() { $categories = $this->elem->getElementsByTagName("category"); - $cats = array(); + $cats = []; foreach ($categories as $cat) { - array_push($cats, trim($cat->nodeValue)); + array_push($cats, $cat->nodeValue); } $categories = $this->xpath->query("dc:subject", $this->elem); foreach ($categories as $cat) { - array_push($cats, clean(trim($cat->nodeValue))); + array_push($cats, $cat->nodeValue); } - return $cats; + return $this->normalize_categories($cats); } function get_enclosures() { diff --git a/classes/rssutils.php b/classes/rssutils.php index 3db758b48..1482e6ea9 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -626,28 +626,8 @@ class RSSUtils { Debug::log("author $entry_author", Debug::$LOG_VERBOSE); Debug::log("looking for tags...", Debug::$LOG_VERBOSE); - // parse entries into tags - - $additional_tags = array(); - - $additional_tags_src = $item->get_categories(); - - if (is_array($additional_tags_src)) { - foreach ($additional_tags_src as $tobj) { - array_push($additional_tags, $tobj); - } - } - - $entry_tags = array_unique($additional_tags); - - for ($i = 0; $i < count($entry_tags); $i++) { - $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8'); - - // we don't support numeric tags, let's prefix them - if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i]; - } - - Debug::log("tags found: " . join(",", $entry_tags), Debug::$LOG_VERBOSE); + $entry_tags = $item->get_categories(); + Debug::log("tags found: " . join(", ", $entry_tags), Debug::$LOG_VERBOSE); Debug::log("done collecting data.", Debug::$LOG_VERBOSE); @@ -1107,9 +1087,7 @@ class RSSUtils { $manual_tags = trim_array(explode(",", $f["param"])); foreach ($manual_tags as $tag) { - if (Article::tag_is_valid($tag)) { - array_push($entry_tags, $tag); - } + array_push($entry_tags, $tag); } } } @@ -1122,19 +1100,17 @@ class RSSUtils { $filtered_tags = array(); $tags_to_cache = array(); - if ($entry_tags && is_array($entry_tags)) { - foreach ($entry_tags as $tag) { - if (array_search($tag, $boring_tags) === false) { - array_push($filtered_tags, $tag); - } + foreach ($entry_tags as $tag) { + if (array_search($tag, $boring_tags) === false) { + array_push($filtered_tags, $tag); } } $filtered_tags = array_unique($filtered_tags); - if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) { - Debug::log("filtered article tags:", Debug::$LOG_VERBOSE); - print_r($filtered_tags); + if (Debug::get_loglevel() >= Debug::$LOG_VERBOSE) { + Debug::log("filtered tags: " . implode(", ", $filtered_tags), Debug::$LOG_VERBOSE); + } // Save article tags in the database @@ -1149,12 +1125,9 @@ class RSSUtils { (owner_uid,tag_name,post_int_id) VALUES (?, ?, ?)"); + $filtered_tags = FeedItem_Common::normalize_categories($filtered_tags); + foreach ($filtered_tags as $tag) { - - $tag = Article::sanitize_tag($tag); - - if (!Article::tag_is_valid($tag)) continue; - $tsth->execute([$tag, $entry_int_id, $owner_uid]); if (!$tsth->fetch()) { @@ -1165,9 +1138,6 @@ class RSSUtils { } /* update the cache */ - - $tags_to_cache = array_unique($tags_to_cache); - $tags_str = join(",", $tags_to_cache); $tsth = $pdo->prepare("UPDATE ttrss_user_entries