remove magpie, fix article filter plugins

This commit is contained in:
Andrew Dolgov 2012-12-24 13:45:34 +04:00
parent e935c2bc54
commit 19b3992b78
18 changed files with 108 additions and 3226 deletions

View File

@ -93,17 +93,6 @@
1440 => __("Daily"),
10080 => __("Weekly"));
$update_methods = array(
0 => __("Default"),
1 => __("Magpie"),
2 => __("SimplePie"));
if (DEFAULT_UPDATE_METHOD == "1") {
$update_methods[0] .= ' (SimplePie)';
} else {
$update_methods[0] .= ' (Magpie)';
}
$access_level_names = array(
0 => __("User"),
5 => __("Power User"),

View File

@ -57,7 +57,11 @@ class PluginHost {
}
function get_hooks($type) {
return $this->hooks[$type];
if (isset($this->hooks[$type])) {
return $this->hooks[$type];
} else {
return array();
}
}
function load($classlist) {

View File

@ -475,7 +475,6 @@ class Pref_Feeds extends Handler_Protected {
function editfeed() {
global $purge_intervals;
global $update_intervals;
global $update_methods;
$feed_id = db_escape_string($_REQUEST["id"]);
@ -547,20 +546,10 @@ class Pref_Feeds extends Handler_Protected {
print_select_hash("update_interval", $update_interval, $update_intervals,
'dojoType="dijit.form.Select"');
/* Update method */
$update_method = db_fetch_result($result, 0, "update_method",
'dojoType="dijit.form.Select"');
print " " . __('using') . " ";
print_select_hash("update_method", $update_method, $update_methods,
'dojoType="dijit.form.Select"');
/* Purge intl */
$purge_interval = db_fetch_result($result, 0, "purge_interval");
/* Purge intl */
print "<hr/>";
print __('Article purging:') . " ";
@ -746,7 +735,6 @@ class Pref_Feeds extends Handler_Protected {
function editfeeds() {
global $purge_intervals;
global $update_intervals;
global $update_methods;
$feed_ids = db_escape_string($_REQUEST["ids"]);
@ -805,13 +793,6 @@ class Pref_Feeds extends Handler_Protected {
$this->batch_edit_cbox("update_interval");
/* Update method */
print " " . __('using') . " ";
print_select_hash("update_method", $update_method, $update_methods,
'disabled="1" dojoType="dijit.form.Select"');
$this->batch_edit_cbox("update_method");
/* Purge intl */
if (FORCE_ARTICLE_PURGE == 0) {
@ -929,7 +910,6 @@ class Pref_Feeds extends Handler_Protected {
db_escape_string($_POST["cache_images"]));
$cache_content = checkbox_to_sql_bool(
db_escape_string($_POST["cache_content"]));
$update_method = (int) db_escape_string($_POST["update_method"]);
$always_display_enclosures = checkbox_to_sql_bool(
db_escape_string($_POST["always_display_enclosures"]));
@ -970,8 +950,7 @@ class Pref_Feeds extends Handler_Protected {
always_display_enclosures = $always_display_enclosures,
mark_unread_on_update = $mark_unread_on_update,
update_on_checksum_change = $update_on_checksum_change,
update_method = '$update_method'
WHERE id = '$feed_id' AND owner_uid = " . $_SESSION["uid"]);
WHERE id = '$feed_id' AND owner_uid = " . $_SESSION["uid"]);
} else {
$feed_data = array();
@ -1045,10 +1024,6 @@ class Pref_Feeds extends Handler_Protected {
$qpart = "rtl_content = $rtl_content";
break;
case "update_method":
$qpart = "update_method = '$update_method'";
break;
case "cat_id":
$qpart = $category_qpart_nocomma;
break;

View File

@ -684,8 +684,6 @@ class RPC extends Handler_Protected {
db_query($this->link, "BEGIN");
$update_method = 0;
if ($cat_id == "0" || !$cat_id) {
$cat_qpart = "NULL";
} else {
@ -701,7 +699,7 @@ class RPC extends Handler_Protected {
"INSERT INTO ttrss_feeds
(owner_uid,feed_url,title,cat_id,auth_login,auth_pass,update_method)
VALUES ('".$_SESSION["uid"]."', '$feed',
'[Unknown]', $cat_qpart, '$login', '$pass', '$update_method')");
'[Unknown]', $cat_qpart, '$login', '$pass', 0)");
}
db_query($this->link, "COMMIT");

View File

@ -74,11 +74,6 @@
// *** Feed settings ***
// *********************
define('DEFAULT_UPDATE_METHOD', 1);
// Which feed parsing library to use as default:
// 0 - Magpie (deprecated)
// 1 - SimplePie
define('FORCE_ARTICLE_PURGE', 0);
// When this option is not 0, users ability to control feed purging
// intervals is disabled and all articles (which are not starred)

View File

@ -3,6 +3,7 @@
define('SCHEMA_VERSION', 99);
$fetch_last_error = false;
$pluginhost = false;
function __autoload($class) {
$class_file = str_replace("_", "/", strtolower(basename($class)));
@ -101,11 +102,6 @@
require_once 'db-prefs.php';
require_once 'version.php';
define('MAGPIE_OUTPUT_ENCODING', 'UTF-8');
define('SELF_USER_AGENT', 'Tiny Tiny RSS/' . VERSION . ' (http://tt-rss.org/)');
define('MAGPIE_USER_AGENT', SELF_USER_AGENT);
ini_set('user_agent', SELF_USER_AGENT);
require_once 'lib/pubsubhubbub/publisher.php';
@ -1629,8 +1625,6 @@
if (!$url || !validate_feed_url($url)) return array("code" => 2);
$update_method = 0;
$contents = @fetch_file_contents($url, false, $auth_login, $auth_pass);
if (!$contents) {
@ -1664,7 +1658,7 @@
"INSERT INTO ttrss_feeds
(owner_uid,feed_url,title,cat_id, auth_login,auth_pass,update_method)
VALUES ('".$_SESSION["uid"]."', '$url',
'[Unknown]', $cat_qpart, '$auth_login', '$auth_pass', '$update_method')");
'[Unknown]', $cat_qpart, '$auth_login', '$auth_pass', 0)");
$result = db_query($link,
"SELECT id FROM ttrss_feeds WHERE feed_url = '$url'

View File

@ -1,4 +1,4 @@
<?php # This file has been generated at: Thu Nov 22 16:05:39 MSK 2012
<?php # This file has been generated at: Mon Dec 24 13:34:00 MSK 2012
__("Title");
__("Title or Content");

View File

@ -163,8 +163,6 @@
$override_url = false) {
require_once "lib/simplepie/simplepie.inc";
require_once "lib/magpierss/rss_fetch.inc";
require_once 'lib/magpierss/rss_utils.inc';
$debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
@ -173,7 +171,7 @@
}
$result = db_query($link, "SELECT id,update_interval,auth_login,
feed_url,auth_pass,cache_images,update_method,last_updated,cache_content,
feed_url,auth_pass,cache_images,last_updated,cache_content,
mark_unread_on_update, owner_uid, update_on_checksum_change,
pubsub_state
FROM ttrss_feeds WHERE id = '$feed'");
@ -185,7 +183,6 @@
return false;
}
$update_method = db_fetch_result($result, 0, "update_method");
$last_updated = db_fetch_result($result, 0, "last_updated");
$owner_uid = db_fetch_result($result, 0, "owner_uid");
$mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result,
@ -200,27 +197,6 @@
$auth_login = db_fetch_result($result, 0, "auth_login");
$auth_pass = db_fetch_result($result, 0, "auth_pass");
if ($update_method == 0)
$update_method = DEFAULT_UPDATE_METHOD + 1;
// 1 - Magpie
// 2 - SimplePie
// 3 - Twitter OAuth
if ($update_method == 2)
$use_simplepie = true;
else
$use_simplepie = false;
if ($debug_enabled) {
_debug("update method: $update_method (feed setting: $update_method) (use simplepie: $use_simplepie)\n");
}
if ($update_method == 1) {
$auth_login = urlencode($auth_login);
$auth_pass = urlencode($auth_pass);
}
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
$cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
$fetch_url = db_fetch_result($result, 0, "feed_url");
@ -248,43 +224,33 @@
$cache_age = (is_null($last_updated) || $last_updated == '1970-01-01 00:00:00') ?
-1 : get_feed_update_interval($link, $feed) * 60;
if ($update_method == 1) {
$simplepie_cache_dir = CACHE_DIR . "/simplepie";
define('MAGPIE_CACHE_AGE', $cache_age);
define('MAGPIE_CACHE_ON', !$no_cache);
define('MAGPIE_FETCH_TIME_OUT', $no_cache ? 15 : 60);
define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie");
$rss = @fetch_rss($fetch_url);
} else {
$simplepie_cache_dir = CACHE_DIR . "/simplepie";
if (!is_dir($simplepie_cache_dir)) {
mkdir($simplepie_cache_dir);
}
$rss = new SimplePie();
$rss->set_useragent(SELF_USER_AGENT);
$rss->set_timeout($no_cache ? 15 : 60);
$rss->set_feed_url($fetch_url);
$rss->set_output_encoding('UTF-8');
//$rss->force_feed(true);
if ($debug_enabled) {
_debug("feed update interval (sec): " .
get_feed_update_interval($link, $feed)*60);
}
$rss->enable_cache(!$no_cache);
if (!$no_cache) {
$rss->set_cache_location($simplepie_cache_dir);
$rss->set_cache_duration($cache_age);
}
$rss->init();
if (!is_dir($simplepie_cache_dir)) {
mkdir($simplepie_cache_dir);
}
$rss = new SimplePie();
$rss->set_useragent(SELF_USER_AGENT);
$rss->set_timeout($no_cache ? 15 : 60);
$rss->set_feed_url($fetch_url);
$rss->set_output_encoding('UTF-8');
//$rss->force_feed(true);
if ($debug_enabled) {
_debug("feed update interval (sec): " .
get_feed_update_interval($link, $feed)*60);
}
$rss->enable_cache(!$no_cache);
if (!$no_cache) {
$rss->set_cache_location($simplepie_cache_dir);
$rss->set_cache_duration($cache_age);
}
$rss->init();
// print_r($rss);
if ($debug_enabled) {
@ -293,13 +259,7 @@
$feed = db_escape_string($feed);
if ($update_method == 2) {
$fetch_ok = !$rss->error();
} else {
$fetch_ok = !!$rss;
}
if ($fetch_ok) {
if (!$rss->error()) {
if ($debug_enabled) {
_debug("update_rss_feed: processing feed data...");
@ -326,19 +286,7 @@
$owner_uid = db_fetch_result($result, 0, "owner_uid");
if ($use_simplepie) {
$site_url = db_escape_string(trim($rss->get_link()));
} else {
$site_url = db_escape_string(trim($rss->channel["link"]));
}
// weird, weird Magpie
if (!$use_simplepie) {
if (!$site_url) $site_url = db_escape_string($rss->channel["link_"]);
}
$site_url = rewrite_relative_url($fetch_url, $site_url);
$site_url = substr($site_url, 0, 250);
$site_url = db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 250));
if ($debug_enabled) {
_debug("update_rss_feed: checking favicon...");
@ -353,11 +301,7 @@
if (!$registered_title || $registered_title == "[Unknown]") {
if ($use_simplepie) {
$feed_title = db_escape_string($rss->get_title());
} else {
$feed_title = db_escape_string($rss->channel["title"]);
}
$feed_title = db_escape_string($rss->get_title());
if ($debug_enabled) {
_debug("update_rss_feed: registering title: $feed_title");
@ -372,16 +316,8 @@
site_url = '$site_url' WHERE id = '$feed'");
}
// print "I: " . $rss->channel["image"]["url"];
if (!$use_simplepie) {
$icon_url = db_escape_string(trim($rss->image["url"]));
} else {
$icon_url = db_escape_string(trim($rss->get_image_url()));
}
$icon_url = rewrite_relative_url($fetch_url, $icon_url);
$icon_url = substr($icon_url, 0, 250);
$icon_url = db_escape_string(mb_substr(
rewrite_relative_url($fetch_url, $rss->get_image_url()), 0, 250));
if ($icon_url && $orig_icon_url != $icon_url) {
db_query($link, "UPDATE ttrss_feeds SET icon_url = '$icon_url' WHERE id = '$feed'");
@ -399,24 +335,11 @@
_debug("update_rss_feed: " . count($filters) . " filters loaded.");
}
if ($use_simplepie) {
$iterator = $rss->get_items();
} else {
$iterator = $rss->items;
if (!$iterator || !is_array($iterator)) $iterator = $rss->entries;
if (!$iterator || !is_array($iterator)) $iterator = $rss;
}
if (!is_array($iterator)) {
/* db_query($link, "UPDATE ttrss_feeds
SET last_error = 'Parse error: can\'t find any articles.'
WHERE id = '$feed'"); */
// clear any errors and mark feed as updated if fetched okay
// even if it's blank
$items = $rss->get_items();
if (!is_array($items)) {
if ($debug_enabled) {
_debug("update_rss_feed: entry iterator is not an array, no articles?");
_debug("update_rss_feed: no articles found.");
}
db_query($link, "UPDATE ttrss_feeds
@ -430,34 +353,13 @@
if ($debug_enabled) _debug("update_rss_feed: checking for PUSH hub...");
$feed_hub_url = false;
if ($use_simplepie) {
$links = $rss->get_links('hub');
if ($links && is_array($links)) {
foreach ($links as $l) {
$feed_hub_url = $l;
break;
}
}
$links = $rss->get_links('hub');
} else {
$atom = $rss->channel['atom'];
if ($atom) {
if ($atom['link@rel'] == 'hub') {
$feed_hub_url = $atom['link@href'];
}
if (!$feed_hub_url && $atom['link#'] > 1) {
for ($i = 2; $i <= $atom['link#']; $i++) {
if ($atom["link#$i@rel"] == 'hub') {
$feed_hub_url = $atom["link#$i@href"];
break;
}
}
}
} else {
$feed_hub_url = $rss->channel['link_hub'];
if ($links && is_array($links)) {
foreach ($links as $l) {
$feed_hub_url = $l;
break;
}
}
@ -487,25 +389,14 @@
_debug("update_rss_feed: processing articles...");
}
foreach ($iterator as $item) {
foreach ($items as $item) {
if ($_REQUEST['xdebug'] == 2) {
print_r($item);
}
if ($use_simplepie) {
$entry_guid = $item->get_id();
if (!$entry_guid) $entry_guid = $item->get_link();
if (!$entry_guid) $entry_guid = make_guid_from_title($item->get_title());
} else {
$entry_guid = $item["id"];
if (!$entry_guid) $entry_guid = $item["guid"];
if (!$entry_guid) $entry_guid = $item["about"];
if (!$entry_guid) $entry_guid = $item["link"];
if (!$entry_guid) $entry_guid = make_guid_from_title($item["title"]);
}
$entry_guid = $item->get_id();
if (!$entry_guid) $entry_guid = $item->get_link();
if (!$entry_guid) $entry_guid = make_guid_from_title($item->get_title());
if ($cache_content) {
$entry_guid = "ccache:$entry_guid";
@ -523,21 +414,9 @@
$entry_timestamp = "";
if ($use_simplepie) {
$entry_timestamp = strtotime($item->get_date());
} else {
$rss_2_date = $item['pubdate'];
$rss_1_date = $item['dc']['date'];
$atom_date = $item['issued'];
if (!$atom_date) $atom_date = $item['updated'];
$entry_timestamp = strtotime($item->get_date());
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
}
if ($entry_timestamp == "" || $entry_timestamp == -1 || !$entry_timestamp) {
if ($entry_timestamp == -1 || !$entry_timestamp) {
$entry_timestamp = time();
$no_orig_date = 'true';
} else {
@ -550,21 +429,8 @@
_debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]");
}
if ($use_simplepie) {
$entry_title = $item->get_title();
} else {
$entry_title = trim(strip_tags($item["title"]));
}
if ($use_simplepie) {
$entry_link = $item->get_link();
} else {
// strange Magpie workaround
$entry_link = $item["link_"];
if (!$entry_link) $entry_link = $item["link"];
}
$entry_link = rewrite_relative_url($site_url, $entry_link);
$entry_title = $item->get_title();
$entry_link = rewrite_relative_url($site_url, $item->get_link());
if ($debug_enabled) {
_debug("update_rss_feed: title $entry_title");
@ -573,37 +439,8 @@
if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
$entry_link = strip_tags($entry_link);
if ($use_simplepie) {
$entry_content = $item->get_content();
if (!$entry_content) $entry_content = $item->get_description();
} else {
$entry_content = $item["content:escaped"];
if (!$entry_content) $entry_content = $item["content:encoded"];
if (!$entry_content && is_array($entry_content)) $entry_content = $item["content"]["encoded"];
if (!$entry_content) $entry_content = $item["content"];
if (is_array($entry_content)) $entry_content = $entry_content[0];
// Magpie bugs are getting ridiculous
if (trim($entry_content) == "Array") $entry_content = false;
if (!$entry_content) $entry_content = $item["atom_content"];
if (!$entry_content) $entry_content = $item["summary"];
if (!$entry_content ||
strlen($entry_content) < strlen($item["description"])) {
$entry_content = $item["description"];
};
// WTF
if (is_array($entry_content)) {
$entry_content = $entry_content["encoded"];
if (!$entry_content) $entry_content = $entry_content["escaped"];
}
}
$entry_content = $item->get_content();
if (!$entry_content) $entry_content = $item->get_description();
if ($cache_images && is_writable(CACHE_DIR . '/images'))
$entry_content = cache_images($entry_content, $site_url, $debug_enabled);
@ -616,60 +453,30 @@
$entry_cached_content = "";
if ($use_simplepie) {
$entry_comments = strip_tags($item->data["comments"]);
if ($item->get_author()) {
$entry_author_item = $item->get_author();
$entry_author = $entry_author_item->get_name();
if (!$entry_author) $entry_author = $entry_author_item->get_email();
$entry_comments = $item->data["comments"];
$entry_author = db_escape_string($entry_author);
}
} else {
$entry_comments = strip_tags($item["comments"]);
if ($item->get_author()) {
$entry_author_item = $item->get_author();
$entry_author = $entry_author_item->get_name();
if (!$entry_author) $entry_author = $entry_author_item->get_email();
$entry_author = db_escape_string(strip_tags($item['dc']['creator']));
if ($item['author']) {
if (is_array($item['author'])) {
if (!$entry_author) {
$entry_author = db_escape_string(strip_tags($item['author']['name']));
}
if (!$entry_author) {
$entry_author = db_escape_string(strip_tags($item['author']['email']));
}
}
if (!$entry_author) {
$entry_author = db_escape_string(strip_tags($item['author']));
}
}
$entry_author = db_escape_string($entry_author);
}
if (preg_match('/^[\t\n\r ]*$/', $entry_author)) $entry_author = '';
$entry_guid = db_escape_string(strip_tags($entry_guid));
$entry_guid = mb_substr($entry_guid, 0, 250);
$entry_guid = db_escape_string(mb_substr($entry_guid, 0, 250));
$result = db_query($link, "SELECT id FROM ttrss_entries
WHERE guid = '$entry_guid'");
$entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
$entry_author = mb_substr($entry_author, 0, 250);
$entry_comments = db_escape_string(mb_substr($entry_comments, 0, 250));
$entry_author = db_escape_string(mb_substr($entry_author, 0, 250));
if ($use_simplepie) {
$num_comments = $item->get_item_tags('http://purl.org/rss/1.0/modules/slash/', 'comments');
$num_comments = $item->get_item_tags('http://purl.org/rss/1.0/modules/slash/', 'comments');
if (is_array($num_comments) && is_array($num_comments[0])) {
$num_comments = (int) $num_comments[0]["data"];
} else {
$num_comments = 0;
}
if (is_array($num_comments) && is_array($num_comments[0])) {
$num_comments = (int) $num_comments[0]["data"];
} else {
$num_comments = (int) $item["slash"]["comments"];
$num_comments = 0;
}
if ($debug_enabled) {
@ -681,58 +488,17 @@
$additional_tags = array();
if ($use_simplepie) {
$additional_tags_src = $item->get_categories();
$additional_tags_src = $item->get_categories();
if (is_array($additional_tags_src)) {
foreach ($additional_tags_src as $tobj) {
array_push($additional_tags, $tobj->get_term());
}
if (is_array($additional_tags_src)) {
foreach ($additional_tags_src as $tobj) {
array_push($additional_tags, $tobj->get_term());
}
}
if ($debug_enabled) {
_debug("update_rss_feed: category tags:");
print_r($additional_tags);
}
} else {
$t_ctr = $item['category#'];
if ($t_ctr == 0) {
$additional_tags = array();
} else if ($t_ctr > 0) {
$additional_tags = array($item['category']);
if ($item['category@term']) {
array_push($additional_tags, $item['category@term']);
}
for ($i = 0; $i <= $t_ctr; $i++ ) {
if ($item["category#$i"]) {
array_push($additional_tags, $item["category#$i"]);
}
if ($item["category#$i@term"]) {
array_push($additional_tags, $item["category#$i@term"]);
}
}
}
// parse <dc:subject> elements
$t_ctr = $item['dc']['subject#'];
if ($t_ctr > 0) {
array_push($additional_tags, $item['dc']['subject']);
for ($i = 0; $i <= $t_ctr; $i++ ) {
if ($item['dc']["subject#$i"]) {
array_push($additional_tags, $item['dc']["subject#$i"]);
}
}
}
if ($debug_enabled) {
_debug("update_rss_feed: category tags:");
print_r($additional_tags);
}
if ($debug_enabled) {
@ -767,28 +533,26 @@
// TODO: less memory-hungry implementation
global $pluginhost;
foreach ($pluginhost->get_hooks($pluginhost::HOOK_ARTICLE_FILTER) as $p) {
if ($debug_enabled) {
_debug("update_rss_feed: applying plugin filters...");
}
$article = array("owner_uid" => $owner_uid,
"title" => $entry_title,
"content" => $entry_content,
"link" => $entry_link,
"tags" => $entry_tags,
"author" => $entry_author);
foreach ($filter_plugins as $plugin) {
$article = $plugin->hook_article_filter($article);
}
$entry_title = $article["title"];
$entry_content = $article["content"];
$entry_tags = $article["tags"];
$entry_author = $article["author"];
if ($debug_enabled) {
_debug("update_rss_feed: applying plugin filters..");
}
$article = array("owner_uid" => $owner_uid,
"title" => $entry_title,
"content" => $entry_content,
"link" => $entry_link,
"tags" => $entry_tags,
"author" => $entry_author);
foreach ($pluginhost->get_hooks($pluginhost::HOOK_ARTICLE_FILTER) as $plugin) {
$article = $plugin->hook_article_filter($article);
}
$entry_title = $article["title"];
$entry_content = $article["content"];
$entry_tags = $article["tags"];
$entry_author = $article["author"];
$entry_content = db_escape_string($entry_content, false);
$entry_title = db_escape_string($entry_title);
$entry_author = db_escape_string($entry_author);
@ -1111,67 +875,16 @@
$enclosures = array();
if ($use_simplepie) {
$encs = $item->get_enclosures();
if (is_array($encs)) {
foreach ($encs as $e) {
$e_item = array(
$e->link, $e->type, $e->length);
array_push($enclosures, $e_item);
}
}
} else {
// <enclosure>
$e_ctr = $item['enclosure#'];
if ($e_ctr > 0) {
$e_item = array($item['enclosure@url'],
$item['enclosure@type'],
$item['enclosure@length']);
$encs = $item->get_enclosures();
if (is_array($encs)) {
foreach ($encs as $e) {
$e_item = array(
$e->link, $e->type, $e->length);
array_push($enclosures, $e_item);
for ($i = 0; $i <= $e_ctr; $i++ ) {
if ($item["enclosure#$i@url"]) {
$e_item = array($item["enclosure#$i@url"],
$item["enclosure#$i@type"],
$item["enclosure#$i@length"]);
array_push($enclosures, $e_item);
}
}
}
// <media:content>
// can there be many of those? yes -fox
$m_ctr = $item['media']['content#'];
if ($m_ctr > 0) {
$e_item = array($item['media']['content@url'],
$item['media']['content@medium'],
$item['media']['content@length']);
array_push($enclosures, $e_item);
for ($i = 0; $i <= $m_ctr; $i++ ) {
if ($item["media"]["content#$i@url"]) {
$e_item = array($item["media"]["content#$i@url"],
$item["media"]["content#$i@medium"],
$item["media"]["content#$i@length"]);
array_push($enclosures, $e_item);
}
}
}
}
if ($debug_enabled) {
_debug("update_rss_feed: article enclosures:");
print_r($enclosures);
@ -1315,11 +1028,7 @@
} else {
if ($use_simplepie) {
$error_msg = mb_substr($rss->error(), 0, 250);
} else {
$error_msg = mb_substr(magpie_error(), 0, 250);
}
$error_msg = mb_substr($rss->error(), 0, 250);
if ($debug_enabled) {
_debug("update_rss_feed: error fetching feed: $error_msg");
@ -1332,9 +1041,7 @@
last_updated = NOW() WHERE id = '$feed'");
}
if ($use_simplepie) {
unset($rss);
}
unset($rss);
if ($debug_enabled) {
_debug("update_rss_feed: done");
@ -1407,7 +1114,7 @@
}
function expire_cached_files($debug) {
foreach (array("magpie", "simplepie", "images", "export") as $dir) {
foreach (array("simplepie", "images", "export") as $dir) {
$cache_dir = CACHE_DIR . "/$dir";
if ($debug) _debug("Expiring $cache_dir");

View File

@ -1,3 +1,3 @@
<?php # This file has been generated at: Sun Dec 23 14:53:06 MSK 2012
<?php # This file has been generated at: Mon Dec 24 13:27:42 MSK 2012
define('GENERATED_CONFIG_CHECK', 26);
$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'SINGLE_USER_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_MODULES', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'DEFAULT_UPDATE_METHOD', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_ENABLED', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_EXPIRE_TIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_HOST', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'CHECK_FOR_NEW_VERSION', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'FEEDBACK_URL', 'CONFIG_VERSION'); ?>
$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'SINGLE_USER_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_MODULES', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_ENABLED', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_EXPIRE_TIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_HOST', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'CHECK_FOR_NEW_VERSION', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'FEEDBACK_URL', 'CONFIG_VERSION'); ?>

View File

@ -1 +0,0 @@
kellan <kellan@protest.net>

View File

@ -1,48 +0,0 @@
NAME
MagpieRSS - a simple RSS integration tool
SYNOPSIS
require_once(rss_fetch.inc);
$url = $_GET['url'];
$rss = fetch_rss( $url );
echo "Channel Title: " . $rss->channel['title'] . "<p>";
echo "<ul>";
foreach ($rss->items as $item) {
$href = $item['link'];
$title = $item['title'];
echo "<li><a href=$href>$title</a></li>";
}
echo "</ul>";
DESCRIPTION
MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like",
and simple to use.
Some features include:
* supports RSS 0.9 - 1.0, with limited RSS 2.0 support
* supports namespaces, and modules, including mod_content and mod_event
* open minded [1]
* simple, functional interface, to object oriented backend parser
* automatic caching of parsed RSS objects makes its easy to integrate
* supports conditional GET with Last-Modified, and ETag
* uses constants for easy override of default behaviour
* heavily commented
1. By open minded I mean Magpie will accept any tag it finds in good faith that
it was supposed to be here. For strict validation, look elsewhere.
GETTING STARTED
COPYRIGHT:
Copyright(c) 2002 kellan@protest.net. All rights reserved.
This software is released under the GNU General Public License.
Please read the disclaimer at the top of the Snoopy.class.inc file.

File diff suppressed because it is too large Load Diff

View File

@ -1,200 +0,0 @@
<?php
/*
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_cache.inc, a simple, rolling(no GC), cache
* for RSS objects, keyed on URL.
* Author: Kellan Elliott-McCrea <kellan@protest.net>
* Version: 0.51
* License: GPL
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* http://lists.sourceforge.net/lists/listinfo/magpierss-general
*
*/
class RSSCache {
var $BASE_CACHE = './cache'; // where the cache files are stored
var $MAX_AGE = 3600; // when are files stale, default one hour
var $ERROR = ""; // accumulate error messages
function RSSCache ($base='', $age='') {
if ( $base ) {
$this->BASE_CACHE = $base;
}
if ( $age ) {
$this->MAX_AGE = $age;
}
// attempt to make the cache directory
if ( ! file_exists( $this->BASE_CACHE ) ) {
$status = @mkdir( $this->BASE_CACHE, 0755 );
// if make failed
if ( ! $status ) {
$this->error(
"Cache couldn't make dir '" . $this->BASE_CACHE . "'."
);
}
}
}
/*=======================================================================*\
Function: set
Purpose: add an item to the cache, keyed on url
Input: url from wich the rss file was fetched
Output: true on sucess
\*=======================================================================*/
function set ($url, $rss) {
$this->ERROR = "";
$cache_file = $this->file_name( $url );
$fp = @fopen( $cache_file, 'w' );
if ( ! $fp ) {
$this->error(
"Cache unable to open file for writing: $cache_file"
);
return 0;
}
$data = $this->serialize( $rss );
fwrite( $fp, $data );
fclose( $fp );
return $cache_file;
}
/*=======================================================================*\
Function: get
Purpose: fetch an item from the cache
Input: url from wich the rss file was fetched
Output: cached object on HIT, false on MISS
\*=======================================================================*/
function get ($url) {
$this->ERROR = "";
$cache_file = $this->file_name( $url );
if ( ! file_exists( $cache_file ) ) {
$this->debug(
"Cache doesn't contain: $url (cache file: $cache_file)"
);
return 0;
}
$fp = @fopen($cache_file, 'r');
if ( ! $fp ) {
$this->error(
"Failed to open cache file for reading: $cache_file"
);
return 0;
}
if ($filesize = filesize($cache_file) ) {
$data = fread( $fp, filesize($cache_file) );
$rss = $this->unserialize( $data );
return $rss;
}
return 0;
}
/*=======================================================================*\
Function: check_cache
Purpose: check a url for membership in the cache
and whether the object is older then MAX_AGE (ie. STALE)
Input: url from wich the rss file was fetched
Output: cached object on HIT, false on MISS
\*=======================================================================*/
function check_cache ( $url ) {
$this->ERROR = "";
$filename = $this->file_name( $url );
if ( file_exists( $filename ) ) {
// find how long ago the file was added to the cache
// and whether that is longer then MAX_AGE
$mtime = filemtime( $filename );
$age = time() - $mtime;
if ( $this->MAX_AGE > $age ) {
// object exists and is current
return 'HIT';
}
else {
// object exists but is old
return 'STALE';
}
}
else {
// object does not exist
return 'MISS';
}
}
function cache_age( $cache_key ) {
$filename = $this->file_name( $url );
if ( file_exists( $filename ) ) {
$mtime = filemtime( $filename );
$age = time() - $mtime;
return $age;
}
else {
return -1;
}
}
/*=======================================================================*\
Function: serialize
\*=======================================================================*/
function serialize ( $rss ) {
return serialize( $rss );
}
/*=======================================================================*\
Function: unserialize
\*=======================================================================*/
function unserialize ( $data ) {
return unserialize( $data );
}
/*=======================================================================*\
Function: file_name
Purpose: map url to location in cache
Input: url from wich the rss file was fetched
Output: a file name
\*=======================================================================*/
function file_name ($url) {
$filename = md5( $url );
return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) );
}
/*=======================================================================*\
Function: error
Purpose: register error
\*=======================================================================*/
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
$this->ERROR = $errormsg;
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
}
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
if ( MAGPIE_DEBUG ) {
$this->error("MagpieRSS [debug] $debugmsg", $lvl);
}
}
}
?>

View File

@ -1,462 +0,0 @@
<?php
/*
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_fetch.inc, a simple functional interface
to fetching and parsing RSS files, via the
function fetch_rss()
* Author: Kellan Elliott-McCrea <kellan@protest.net>
* License: GPL
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*
*/
// Setup MAGPIE_DIR for use on hosts that don't include
// the current path in include_path.
// with thanks to rajiv and smarty
if (!defined('DIR_SEP')) {
define('DIR_SEP', DIRECTORY_SEPARATOR);
}
if (!defined('MAGPIE_DIR')) {
define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
}
require_once( MAGPIE_DIR . 'rss_parse.inc' );
require_once( MAGPIE_DIR . 'rss_cache.inc' );
// for including 3rd party libraries
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
/*
* CONSTANTS - redefine these in your script to change the
* behaviour of fetch_rss() currently, most options effect the cache
*
* MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
* For me a built in cache was essential to creating a "PHP-like"
* feel to Magpie, see rss_cache.inc for rationale
*
*
* MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
* This should be a location that the webserver can write to. If this
* directory does not already exist Mapie will try to be smart and create
* it. This will often fail for permissions reasons.
*
*
* MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
*
*
* MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
* instead of returning stale object?
*
* MAGPIE_DEBUG - Display debugging notices?
*
*/
/*=======================================================================*\
Function: fetch_rss:
Purpose: return RSS object for the give url
maintain the cache
Input: url of RSS file
Output: parsed RSS object (see rss_parse.inc)
NOTES ON CACHEING:
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
NOTES ON RETRIEVING REMOTE FILES:
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
return a cached object, and touch the cache object upon recieving a
304.
NOTES ON FAILED REQUESTS:
If there is an HTTP error while fetching an RSS object, the cached
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
\*=======================================================================*/
define('MAGPIE_VERSION', '0.72');
$MAGPIE_ERROR = "";
function fetch_rss ($url) {
// initialize constants
init();
if ( !isset($url) ) {
error("fetch_rss called without a url");
return false;
}
// if cache is disabled
if ( !MAGPIE_CACHE_ON ) {
// fetch file, and parse it
$resp = _fetch_remote_file( $url );
if ( is_success( $resp->status ) ) {
return _response_to_rss( $resp );
}
else {
error("Failed to fetch $url and cache is off");
return false;
}
}
// else cache is ON
else {
// Flow
// 1. check cache
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
$cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
if (MAGPIE_DEBUG and $cache->ERROR) {
debug($cache->ERROR, E_USER_WARNING);
}
$cache_status = 0; // response of check_cache
$request_headers = array(); // HTTP headers to send with fetch
$rss = 0; // parsed RSS object
$errormsg = 0; // errors, if any
// store parsed XML by desired output encoding
// as character munging happens at parse time
$cache_key = $url . MAGPIE_OUTPUT_ENCODING;
if (!$cache->ERROR) {
// return cache HIT, MISS, or STALE
$cache_status = $cache->check_cache( $cache_key);
}
// if object cached, and cache is fresh, return cached obj
if ( $cache_status == 'HIT' ) {
$rss = $cache->get( $cache_key );
if ( isset($rss) and $rss ) {
// should be cache age
$rss->from_cache = 1;
if ( MAGPIE_DEBUG > 1) {
debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
}
return $rss;
}
}
// else attempt a conditional get
// setup headers
if ( $cache_status == 'STALE' ) {
$rss = $cache->get( $cache_key );
if ( $rss and $rss->etag and $rss->last_modified ) {
$request_headers['If-None-Match'] = $rss->etag;
$request_headers['If-Last-Modified'] = $rss->last_modified;
}
}
$resp = _fetch_remote_file( $url, $request_headers );
if (isset($resp) and $resp) {
if ($resp->status == '304' ) {
// we have the most current copy
if ( MAGPIE_DEBUG > 1) {
debug("Got 304 for $url");
}
// reset cache on 304 (at minutillo insistent prodding)
$cache->set($cache_key, $rss);
return $rss;
}
elseif ( is_success( $resp->status ) ) {
$rss = _response_to_rss( $resp );
if ( $rss ) {
if (MAGPIE_DEBUG > 1) {
debug("Fetch successful");
}
// add object to cache
$cache->set( $cache_key, $rss );
return $rss;
}
}
else {
$errormsg = "Failed to fetch $url ";
if ( $resp->status == '-100' ) {
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
}
elseif ( $resp->error ) {
# compensate for Snoopy's annoying habbit to tacking
# on '\n'
$http_error = substr($resp->error, 0, -2);
$errormsg .= "(HTTP Error: $http_error)";
}
else {
$errormsg .= "(HTTP Response: " . $resp->response_code .')';
}
}
}
else {
$errormsg = "Unable to retrieve RSS file for unknown reasons.";
}
// else fetch failed
// attempt to return cached object
if ($rss) {
if ( MAGPIE_DEBUG ) {
debug("Returning STALE object for $url");
}
return $rss;
}
// else we totally failed
error( $errormsg );
return false;
} // end if ( !MAGPIE_CACHE_ON ) {
} // end fetch_rss()
/*=======================================================================*\
Function: error
Purpose: set MAGPIE_ERROR, and trigger error
\*=======================================================================*/
function error ($errormsg, $lvl=E_USER_WARNING) {
global $MAGPIE_ERROR;
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( $errormsg ) {
$errormsg = "MagpieRSS: $errormsg";
$MAGPIE_ERROR = $errormsg;
trigger_error( $errormsg, $lvl);
}
}
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
}
/*=======================================================================*\
Function: magpie_error
Purpose: accessor for the magpie error variable
\*=======================================================================*/
function magpie_error ($errormsg="") {
global $MAGPIE_ERROR;
if ( isset($errormsg) and $errormsg ) {
$MAGPIE_ERROR = $errormsg;
}
return $MAGPIE_ERROR;
}
/*=======================================================================*\
Function: _fetch_remote_file
Purpose: retrieve an arbitrary remote file
Input: url of the remote file
headers to send along with the request (optional)
Output: an HTTP response object (see Snoopy.class.inc)
\*=======================================================================*/
function _fetch_remote_file ($url, $headers = "" ) {
// Snoopy is an HTTP client in PHP
$client = new Snoopy();
$client->agent = MAGPIE_USER_AGENT;
$client->read_timeout = MAGPIE_FETCH_TIME_OUT;
$client->use_gzip = MAGPIE_USE_GZIP;
if (is_array($headers) ) {
$client->rawheaders = $headers;
}
@$client->fetch($url);
return $client;
}
/*=======================================================================*\
Function: _response_to_rss
Purpose: parse an HTTP response object into an RSS object
Input: an HTTP response object (see Snoopy)
Output: parsed RSS object (see rss_parse)
\*=======================================================================*/
function _response_to_rss ($resp) {
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
// if RSS parsed successfully
if ( $rss and !$rss->ERROR) {
// find Etag, and Last-Modified
foreach($resp->headers as $h) {
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
if (strpos($h, ": ")) {
list($field, $val) = explode(": ", $h, 2);
}
else {
$field = $h;
$val = "";
}
if ( $field == 'ETag' ) {
$rss->etag = $val;
}
if ( $field == 'Last-Modified' ) {
$rss->last_modified = $val;
}
}
return $rss;
} // else construct error message
else {
$errormsg = "Failed to parse RSS file.";
if ($rss) {
$errormsg .= " (" . $rss->ERROR . ")";
}
error($errormsg);
return false;
} // end if ($rss and !$rss->error)
}
/*=======================================================================*\
Function: init
Purpose: setup constants with default values
check for user overrides
\*=======================================================================*/
function init () {
if ( defined('MAGPIE_INITALIZED') ) {
return;
}
else {
define('MAGPIE_INITALIZED', true);
}
if ( !defined('MAGPIE_CACHE_ON') ) {
define('MAGPIE_CACHE_ON', true);
}
if ( !defined('MAGPIE_CACHE_DIR') ) {
define('MAGPIE_CACHE_DIR', './cache');
}
if ( !defined('MAGPIE_CACHE_AGE') ) {
define('MAGPIE_CACHE_AGE', 60*60); // one hour
}
if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
define('MAGPIE_CACHE_FRESH_ONLY', false);
}
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
}
if ( !defined('MAGPIE_INPUT_ENCODING') ) {
define('MAGPIE_INPUT_ENCODING', null);
}
if ( !defined('MAGPIE_DETECT_ENCODING') ) {
define('MAGPIE_DETECT_ENCODING', true);
}
if ( !defined('MAGPIE_DEBUG') ) {
define('MAGPIE_DEBUG', 0);
}
if ( !defined('MAGPIE_USER_AGENT') ) {
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
if ( MAGPIE_CACHE_ON ) {
$ua = $ua . ')';
}
else {
$ua = $ua . '; No cache)';
}
if ( defined('MAGPIE_USER_AGENT_EXT') ) {
$ua = $ua . MAGPIE_USER_AGENT_EXT;
}
define('MAGPIE_USER_AGENT', $ua);
}
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
}
// use gzip encoding to fetch rss files if supported?
if ( !defined('MAGPIE_USE_GZIP') ) {
define('MAGPIE_USE_GZIP', true);
}
}
// NOTE: the following code should really be in Snoopy, or at least
// somewhere other then rss_fetch!
/*=======================================================================*\
HTTP STATUS CODE PREDICATES
These functions attempt to classify an HTTP status code
based on RFC 2616 and RFC 2518.
All of them take an HTTP status code as input, and return true or false
All this code is adapted from LWP's HTTP::Status.
\*=======================================================================*/
/*=======================================================================*\
Function: is_info
Purpose: return true if Informational status code
\*=======================================================================*/
function is_info ($sc) {
return $sc >= 100 && $sc < 200;
}
/*=======================================================================*\
Function: is_success
Purpose: return true if Successful status code
\*=======================================================================*/
function is_success ($sc) {
return $sc >= 200 && $sc < 300;
}
/*=======================================================================*\
Function: is_redirect
Purpose: return true if Redirection status code
\*=======================================================================*/
function is_redirect ($sc) {
return $sc >= 300 && $sc < 400;
}
/*=======================================================================*\
Function: is_error
Purpose: return true if Error status code
\*=======================================================================*/
function is_error ($sc) {
return $sc >= 400 && $sc < 600;
}
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a client error
\*=======================================================================*/
function is_client_error ($sc) {
return $sc >= 400 && $sc < 500;
}
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a server error
\*=======================================================================*/
function is_server_error ($sc) {
return $sc >= 500 && $sc < 600;
}
?>

View File

@ -1,999 +0,0 @@
<?php
/**
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_parse.inc - parse an RSS or Atom feed
* return as a simple object.
*
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, Atom 0.3, and Atom 1.0
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*
* @author Kellan Elliott-McCrea <kellan@protest.net>
* @version 0.8
* @license GPL
*
*/
define('RSS', 'RSS');
define('ATOM', 'Atom');
function _convert_entities ($string) {
# Source: http://www.w3.org/TR/REC-html40/sgml/entities.html
$html_entities = array(
"&nbsp", "&iexcl", "&cent", "&pound", "&curren", "&yen", "&brvbar", "&sect", "&uml", "&copy",
"&ordf", "&laquo", "&not", "&shy", "&reg", "&macr", "&deg", "&plusmn", "&sup2", "&sup3",
"&acute", "&micro", "&para", "&middot", "&cedil", "&sup1", "&ordm", "&raquo", "&frac14", "&frac12",
"&frac34", "&iquest", "&Agrave", "&Aacute", "&Acirc", "&Atilde", "&Auml", "&Aring", "&AElig", "&Ccedil",
"&Egrave", "&Eacute", "&Ecirc", "&Euml", "&Igrave", "&Iacute", "&Icirc", "&Iuml", "&ETH", "&Ntilde",
"&Ograve", "&Oacute", "&Ocirc", "&Otilde", "&Ouml", "&times", "&Oslash", "&Ugrave", "&Uacute", "&Ucirc",
"&Uuml", "&Yacute", "&THORN", "&szlig", "&agrave", "&aacute", "&acirc", "&atilde", "&auml", "&aring",
"&aelig", "&ccedil", "&egrave", "&eacute", "&ecirc", "&euml", "&igrave", "&iacute", "&icirc", "&iuml",
"&eth", "&ntilde", "&ograve", "&oacute", "&ocirc", "&otilde", "&ouml", "&divide", "&oslash", "&ugrave",
"&uacute", "&ucirc", "&uuml", "&yacute", "&thorn", "&yuml",);
$numeric_entities = array(
"&#160;", "&#161;", "&#162;", "&#163;", "&#164;", "&#165;", "&#166;", "&#167;", "&#168;", "&#169;",
"&#170;", "&#171;", "&#172;", "&#173;", "&#174;", "&#175;", "&#176;", "&#177;", "&#178;", "&#179;",
"&#180;", "&#181;", "&#182;", "&#183;", "&#184;", "&#185;", "&#186;", "&#187;", "&#188;", "&#189;",
"&#190;", "&#191;", "&#192;", "&#193;", "&#194;", "&#195;", "&#196;", "&#197;", "&#198;", "&#199;",
"&#200;", "&#201;", "&#202;", "&#203;", "&#204;", "&#205;", "&#206;", "&#207;", "&#208;", "&#209;",
"&#210;", "&#211;", "&#212;", "&#213;", "&#214;", "&#215;", "&#216;", "&#217;", "&#218;", "&#219;",
"&#220;", "&#221;", "&#222;", "&#223;", "&#224;", "&#225;", "&#226;", "&#227;", "&#228;", "&#229;",
"&#230;", "&#231;", "&#232;", "&#233;", "&#234;", "&#235;", "&#236;", "&#237;", "&#238;", "&#239;",
"&#240;", "&#241;", "&#242;", "&#243;", "&#244;", "&#245;", "&#246;", "&#247;", "&#248;", "&#249;",
"&#250;", "&#251;", "&#252;", "&#253;", "&#254;", "&#255;");
return str_replace($html_entities, $numeric_entities, $string);
}
require_once (MAGPIE_DIR . 'rss_utils.inc');
/**
* Hybrid parser, and object, takes RSS as a string and returns a simple object.
*
* see: rss_fetch.inc for a simpler interface with integrated caching support
*
*/
class MagpieRSS {
var $parser;
var $current_item = array(); // item currently being parsed
var $items = array(); // collection of parsed items
var $channel = array(); // hash of channel fields
var $textinput = array();
var $image = array();
var $feed_type;
var $feed_version;
var $encoding = ''; // output encoding of parsed rss
var $_source_encoding = ''; // only set if we have to parse xml prolog
var $ERROR = "";
var $WARNING = "";
// define some constants
var $_ATOM_CONTENT_CONSTRUCTS = array(
'content', 'summary', 'title', /* common */
'info', 'tagline', 'copyright', /* Atom 0.3 */
'rights', 'subtitle', /* Atom 1.0 */
);
var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
// parser variables, useless if you're not a parser, treat as private
var $stack = array(); // parser stack
var $inchannel = false;
var $initem = false;
var $incontent = array(); // non-empty if in namespaced XML content field
var $exclude_top = false; // true when Atom 1.0 type="xhtml"
var $intextinput = false;
var $inimage = false;
var $current_namespace = false;
/**
* Set up XML parser, parse source, and return populated RSS object..
*
* @param string $source string containing the RSS to be parsed
*
* NOTE: Probably a good idea to leave the encoding options alone unless
* you know what you're doing as PHP's character set support is
* a little weird.
*
* NOTE: A lot of this is unnecessary but harmless with PHP5
*
*
* @param string $output_encoding output the parsed RSS in this character
* set defaults to ISO-8859-1 as this is PHP's
* default.
*
* NOTE: might be changed to UTF-8 in future
* versions.
*
* @param string $input_encoding the character set of the incoming RSS source.
* Leave blank and Magpie will try to figure it
* out.
*
*
* @param bool $detect_encoding if false Magpie won't attempt to detect
* source encoding. (caveat emptor)
*
*/
function MagpieRSS ($source, $output_encoding='ISO-8859-1',
$input_encoding=null, $detect_encoding=true)
{
# if PHP xml isn't compiled in, die
#
if (!function_exists('xml_parser_create')) {
$this->error( "Failed to load PHP's XML Extension. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
if (!is_resource($parser)) {
$this->error( "Failed to create an instance of PHP's XML parser. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
$this->parser = $parser;
# pass in parser, and a reference to this object
# setup handlers
#
xml_set_object( $this->parser, $this );
xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$source=str_replace("&lt;","&#60;",$source);
$source=str_replace("&gt;","&#62;",$source);
$source=str_replace("&amp;","&#38;",$source);
$status = xml_parse( $this->parser, $source );
# try to force convert everything to UTF-8 and parse again
# to salvage at least some data from the feed
if (! $status) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
xml_parser_free( $this->parser );
if (preg_match('/<\?xml.*?encoding="([^ ]+)".*?\?>/',
$source, $matches)) {
$enc = $matches[1];
} else {
$enc = mb_detect_encoding($string);
}
# try fix XML, pass 1
$source = mb_convert_encoding($source, "UTF-8", $enc);
list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
$this->parser = $parser;
xml_set_object( $this->parser, $this );
xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$status = xml_parse( $this->parser, $source);
# try to fix XML, pass 2
if (! $status) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
$source = _convert_entities($source);
list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
$this->parser = $parser;
xml_set_object( $this->parser, $this );
xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$status = xml_parse( $this->parser, $source);
}
}
}
}
if (! $status ) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
$xml_error = xml_error_string( $errorcode );
$error_line = xml_get_current_line_number($this->parser);
$error_col = xml_get_current_column_number($this->parser);
$errormsg = "$xml_error at line $error_line, column $error_col";
$this->error( $errormsg );
}
}
xml_parser_free( $this->parser );
$this->normalize();
}
function feed_start_element($p, $element, &$attrs) {
$el = $element = strtolower($element);
$attrs = array_change_key_case($attrs, CASE_LOWER);
// check for a namespace, and split if found
// Don't munge content tags
if ( empty($this->incontent) ) {
$ns = false;
if ( strpos( $element, ':' ) ) {
list($ns, $el) = split( ':', $element, 2);
}
if ( $ns and $ns != 'rdf' ) {
$this->current_namespace = $ns;
}
}
# if feed type isn't set, then this is first element of feed
# identify feed from root element
#
if (!isset($this->feed_type) ) {
if ( $el == 'rdf' ) {
$this->feed_type = RSS;
$this->feed_version = '1.0';
}
elseif ( $el == 'rss' ) {
$this->feed_type = RSS;
$this->feed_version = $attrs['version'];
}
elseif ( $el == 'feed' ) {
$this->feed_type = ATOM;
if ($attrs['xmlns'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
$this->feed_version = '1.0';
}
else { // Atom 0.3, probably.
$this->feed_version = $attrs['version'];
}
$this->inchannel = true;
}
return;
}
// if we're inside a namespaced content construct, treat tags as text
if ( !empty($this->incontent) )
{
if ((count($this->incontent) > 1) or !$this->exclude_top) {
// if tags are inlined, then flatten
$attrs_str = join(' ',
array_map('map_attrs',
array_keys($attrs),
array_values($attrs) )
);
if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; }
$this->append_content( "<{$element}{$attrs_str}>" );
}
array_push($this->incontent, $el); // stack for parsing content XML
}
elseif ( $el == 'channel' ) {
$this->inchannel = true;
}
elseif ($el == 'item' or $el == 'entry' )
{
$this->initem = true;
if ( isset($attrs['rdf:about']) ) {
$this->current_item['about'] = $attrs['rdf:about'];
}
}
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'textinput' )
{
$this->intextinput = true;
}
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'image' )
{
$this->inimage = true;
}
// set stack[0] to current element
else {
// Atom support many links per containing element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
$atom_link = false;
if ($this->feed_type == ATOM and $el == 'link') {
$atom_link = true;
if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
$el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
}
}
# handle atom content constructs
elseif ( $this->feed_type == ATOM and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
{
// avoid clashing w/ RSS mod_content
if ($el == 'content' ) {
$el = 'atom_content';
}
// assume that everything accepts namespaced XML
// (that will pass through some non-validating feeds;
// but so what? this isn't a validating parser)
$this->incontent = array();
array_push($this->incontent, $el); // start a stack
if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') {
$this->exclude_top = true;
} else {
$this->exclude_top = false;
}
}
# Handle inline XHTML body elements --CWJ
elseif (($this->current_namespace=='xhtml' or
(isset($attrs['xmlns']) and $attrs['xmlns'] == 'http://www.w3.org/1999/xhtml'))
and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS) )
{
$this->current_namespace = 'xhtml';
$this->incontent = array();
array_push($this->incontent, $el); // start a stack
$this->exclude_top = false;
}
array_unshift($this->stack, $el);
$elpath = join('_', array_reverse($this->stack));
$n = $this->element_count($elpath);
$this->element_count($elpath, $n+1);
if ($n > 0) {
array_shift($this->stack);
array_unshift($this->stack, $el.'#'.($n+1));
$elpath = join('_', array_reverse($this->stack));
}
// this makes the baby Jesus cry, but we can't do it in normalize()
// because we've made the element name for Atom links unpredictable
// by tacking on the relation to the end. -CWJ
if ($atom_link and isset($attrs['href'])) {
$this->append($elpath, $attrs['href']);
}
// add attributes
if (count($attrs) > 0) {
$this->append($elpath.'@', join(',', array_keys($attrs)));
foreach ($attrs as $attr => $value) {
$this->append($elpath.'@'.$attr, $value);
}
}
}
}
function feed_cdata ($p, $text) {
if ($this->incontent) {
$this->append_content( $text );
}
else {
$current_el = join('_', array_reverse($this->stack));
$this->append($current_el, $text);
}
}
function feed_end_element ($p, $el) {
$el = strtolower($el);
if ( $this->incontent ) {
$opener = array_pop($this->incontent);
// Don't get bamboozled by namespace voodoo
if (strpos($el, ':')) { list($ns, $closer) = split(':', $el); }
else { $ns = false; $closer = $el; }
// Don't get bamboozled by our munging of <atom:content>, either
if ($this->feed_type == ATOM and $closer == 'content') {
$closer = 'atom_content';
}
// balance tags properly
// note: i don't think this is actually neccessary
if ($opener != $closer) {
array_push($this->incontent, $opener);
$this->append_content("<$el />");
} elseif ($this->incontent) { // are we in the content construct still?
if ((count($this->incontent) > 1) or !$this->exclude_top) {
$this->append_content("</$el>");
}
} else { // shift the opening of the content construct off the normal stack
array_shift( $this->stack );
}
}
elseif ( $el == 'item' or $el == 'entry' )
{
$this->items[] = $this->current_item;
$this->current_item = array();
$this->initem = false;
$this->current_category = 0;
}
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' )
{
$this->intextinput = false;
}
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' )
{
$this->inimage = false;
}
elseif ($el == 'channel' or $el == 'feed' )
{
$this->inchannel = false;
}
else {
array_shift( $this->stack );
}
if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
$this->current_namespace = false;
}
}
function concat (&$str1, $str2="") {
if (!isset($str1) ) {
$str1="";
}
$str1 .= $str2;
}
function append_content($text) {
if ( $this->initem ) {
if ($this->current_namespace) {
$this->concat( $this->current_item[$this->current_namespace][ reset($this->incontent) ], $text );
} else {
$this->concat( $this->current_item[ reset($this->incontent) ], $text );
}
}
elseif ( $this->inchannel ) {
if ($this->current_namespace) {
$this->concat( $this->channel[$this->current_namespace][ reset($this->incontent) ], $text );
} else {
$this->concat( $this->channel[ reset($this->incontent) ], $text );
}
}
}
// smart append - field and namespace aware
function append($el, $text) {
if (!$el) {
return;
}
if ( $this->current_namespace )
{
if ( $this->initem ) {
$this->concat(
$this->current_item[ $this->current_namespace ][ $el ], $text);
}
elseif ($this->inchannel) {
$this->concat(
$this->channel[ $this->current_namespace][ $el ], $text );
}
elseif ($this->intextinput) {
$this->concat(
$this->textinput[ $this->current_namespace][ $el ], $text );
}
elseif ($this->inimage) {
$this->concat(
$this->image[ $this->current_namespace ][ $el ], $text );
}
}
else {
if ( $this->initem ) {
$this->concat(
$this->current_item[ $el ], $text);
}
elseif ($this->intextinput) {
$this->concat(
$this->textinput[ $el ], $text );
}
elseif ($this->inimage) {
$this->concat(
$this->image[ $el ], $text );
}
elseif ($this->inchannel) {
$this->concat(
$this->channel[ $el ], $text );
}
}
}
// smart count - field and namespace aware
function element_count ($el, $set = NULL) {
if (!$el) {
return;
}
if ( $this->current_namespace )
{
if ( $this->initem ) {
if (!is_null($set)) { $this->current_item[ $this->current_namespace ][ $el.'#' ] = $set; }
$ret = (isset($this->current_item[ $this->current_namespace ][ $el.'#' ]) ?
$this->current_item[ $this->current_namespace ][ $el.'#' ] : 0);
}
elseif ($this->inchannel) {
if (!is_null($set)) { $this->channel[ $this->current_namespace ][ $el.'#' ] = $set; }
$ret = (isset($this->channel[ $this->current_namespace][ $el.'#' ]) ?
$this->channel[ $this->current_namespace][ $el.'#' ] : 0);
}
}
else {
if ( $this->initem ) {
if (!is_null($set)) { $this->current_item[ $el.'#' ] = $set; }
$ret = (isset($this->current_item[ $el.'#' ]) ?
$this->current_item[ $el.'#' ] : 0);
}
elseif ($this->inchannel) {
if (!is_null($set)) {$this->channel[ $el.'#' ] = $set; }
$ret = (isset($this->channel[ $el.'#' ]) ?
$this->channel[ $el.'#' ] : 0);
}
}
return $ret;
}
function normalize_enclosure (&$source, $from, &$dest, $to, $i) {
$id_from = $this->element_id($from, $i);
$id_to = $this->element_id($to, $i);
if (isset($source["{$id_from}@"])) {
foreach (explode(',', $source["{$id_from}@"]) as $attr) {
if ($from=='link_enclosure' and $attr=='href') { // from Atom
$dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"];
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
}
elseif ($from=='enclosure' and $attr=='url') { // from RSS
$dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"];
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
}
else {
$dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"];
}
}
}
}
function normalize_atom_person (&$source, $person, &$dest, $to, $i) {
$id = $this->element_id($person, $i);
$id_to = $this->element_id($to, $i);
// Atom 0.3 <=> Atom 1.0
if ($this->feed_version >= 1.0) { $used = 'uri'; $norm = 'url'; }
else { $used = 'url'; $norm = 'uri'; }
if (isset($source["{$id}_{$used}"])) {
$dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"];
}
// Atom to RSS 2.0 and Dublin Core
// RSS 2.0 person strings should be valid e-mail addresses if possible.
if (isset($source["{$id}_email"])) {
$rss_author = $source["{$id}_email"];
}
if (isset($source["{$id}_name"])) {
$rss_author = $source["{$id}_name"]
. (isset($rss_author) ? " <$rss_author>" : '');
}
if (isset($rss_author)) {
$source[$id] = $rss_author; // goes to top-level author or contributor
$dest[$id_to] = $rss_author; // goes to dc:creator or dc:contributor
}
}
// Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core...
function normalize_category (&$source, $from, &$dest, $to, $i) {
$cat_id = $this->element_id($from, $i);
$dc_id = $this->element_id($to, $i);
// first normalize category elements: Atom 1.0 <=> RSS 2.0
if ( isset($source["{$cat_id}@term"]) ) { // category identifier
$source[$cat_id] = $source["{$cat_id}@term"];
} elseif ( $this->feed_type == RSS ) {
$source["{$cat_id}@term"] = $source[$cat_id];
}
if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy
$source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"];
} elseif ( isset($source["{$cat_id}@domain"]) ) {
$source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"];
}
// Now put the identifier into dc:subject
$dest[$dc_id] = $source[$cat_id];
}
// ... or vice versa
function normalize_dc_subject (&$source, $from, &$dest, $to, $i) {
$dc_id = $this->element_id($from, $i);
$cat_id = $this->element_id($to, $i);
$dest[$cat_id] = $source[$dc_id]; // RSS 2.0
$dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0
}
// simplify the logic for normalize(). Makes sure that count of elements and
// each of multiple elements is normalized properly. If you need to mess
// with things like attributes or change formats or the like, pass it a
// callback to handle each element.
function normalize_element (&$source, $from, &$dest, $to, $via = NULL) {
if (isset($source[$from]) or isset($source["{$from}#"])) {
if (isset($source["{$from}#"])) {
$n = $source["{$from}#"];
$dest["{$to}#"] = $source["{$from}#"];
}
else { $n = 1; }
for ($i = 1; $i <= $n; $i++) {
if (isset($via)) { // custom callback for ninja attacks
$this->{$via}($source, $from, $dest, $to, $i);
}
else { // just make it the same
$from_id = $this->element_id($from, $i);
$to_id = $this->element_id($to, $i);
$dest[$to_id] = $source[$from_id];
}
}
}
}
function normalize () {
// if atom populate rss fields and normalize 0.3 and 1.0 feeds
if ( $this->is_atom() ) {
// Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!)
if ($this->feed_version < 1.0) {
$this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle');
$this->normalize_element($this->channel, 'copyright', $this->channel, 'rights');
$this->normalize_element($this->channel, 'modified', $this->channel, 'updated');
} else {
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline');
$this->normalize_element($this->channel, 'rights', $this->channel, 'copyright');
$this->normalize_element($this->channel, 'updated', $this->channel, 'modified');
}
$this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person');
$this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person');
// Atom elements to RSS elements
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'description');
if ( isset($this->channel['logo']) ) {
$this->normalize_element($this->channel, 'logo', $this->image, 'url');
$this->normalize_element($this->channel, 'link', $this->image, 'link');
$this->normalize_element($this->channel, 'title', $this->image, 'title');
}
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
// Atom 1.0 elements <=> Atom 0.3 elements
if ($this->feed_version < 1.0) {
$this->normalize_element($item, 'modified', $item, 'updated');
$this->normalize_element($item, 'issued', $item, 'published');
} else {
$this->normalize_element($item, 'updated', $item, 'modified');
$this->normalize_element($item, 'published', $item, 'issued');
}
// "If an atom:entry element does not contain
// atom:author elements, then the atom:author elements
// of the contained atom:source element are considered
// to apply. In an Atom Feed Document, the atom:author
// elements of the containing atom:feed element are
// considered to apply to the entry if there are no
// atom:author elements in the locations described
// above." <http://atompub.org/2005/08/17/draft-ietf-atompub-format-11.html#rfc.section.4.2.1>
if (!isset($item["author#"])) {
if (isset($item["source_author#"])) { // from aggregation source
$source = $item;
$author = "source_author";
} elseif (isset($this->channel["author#"])) { // from containing feed
$source = $this->channel;
$author = "author";
}
$item["author#"] = $source["{$author}#"];
for ($au = 1; $au <= $item["author#"]; $au++) {
$id_to = $this->element_id('author', $au);
$id_from = $this->element_id($author, $au);
$item[$id_to] = $source[$id_from];
foreach (array('name', 'email', 'uri', 'url') as $what) {
if (isset($source["{$id_from}_{$what}"])) {
$item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"];
}
}
}
}
// Atom elements to RSS elements
$this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person');
$this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person');
$this->normalize_element($item, 'summary', $item, 'description');
$this->normalize_element($item, 'atom_content', $item['content'], 'encoded');
$this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure');
// Categories
if ( isset($item['category#']) ) { // Atom 1.0 categories to dc:subject and RSS 2.0 categories
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
}
elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
}
// Normalized item timestamp
$atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated'];
if ( $atom_date ) {
$epoch = @parse_w3cdtf($atom_date);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
elseif ( $this->is_rss() ) {
// RSS elements to Atom elements
$this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3
$this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!)
$this->normalize_element($this->image, 'url', $this->channel, 'logo');
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
// RSS elements to Atom elements
$this->normalize_element($item, 'description', $item, 'summary');
$this->normalize_element($item['content'], 'encoded', $item, 'atom_content');
$this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure');
// Categories
if ( isset($item['category#']) ) { // RSS 2.0 categories to dc:subject and Atom 1.0 categories
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
}
elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
}
// Normalized item timestamp
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
$epoch = @parse_w3cdtf($item['dc']['date']);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
elseif ( isset($item['pubdate']) ) {
$epoch = @strtotime($item['pubdate']);
if ($epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
}
function is_rss () {
if ( $this->feed_type == RSS ) {
return $this->feed_version;
}
else {
return false;
}
}
function is_atom() {
if ( $this->feed_type == ATOM ) {
return $this->feed_version;
}
else {
return false;
}
}
/**
* return XML parser, and possibly re-encoded source
*
*/
function create_parser($source, $out_enc, $in_enc, $detect) {
if ( substr(phpversion(),0,1) == 5) {
$parser = $this->php5_create_parser($in_enc, $detect);
}
else {
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
}
if ($out_enc) {
$this->encoding = $out_enc;
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
}
return array($parser, $source);
}
/**
* Instantiate an XML parser under PHP5
*
* PHP5 will do a fine job of detecting input encoding
* if passed an empty string as the encoding.
*
* All hail libxml2!
*
*/
function php5_create_parser($in_enc, $detect) {
// by default php5 does a fine job of detecting input encodings
if(!$detect && $in_enc) {
return xml_parser_create($in_enc);
}
else {
return xml_parser_create('');
}
}
/**
* Instaniate an XML parser under PHP4
*
* Unfortunately PHP4's support for character encodings
* and especially XML and character encodings sucks. As
* long as the documents you parse only contain characters
* from the ISO-8859-1 character set (a superset of ASCII,
* and a subset of UTF-8) you're fine. However once you
* step out of that comfy little world things get mad, bad,
* and dangerous to know.
*
* The following code is based on SJM's work with FoF
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
*
*/
function php4_create_parser($source, $in_enc, $detect) {
if ( !$detect ) {
return array(xml_parser_create($in_enc), $source);
}
if (!$in_enc) {
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
$in_enc = strtoupper($m[1]);
$this->source_encoding = $in_enc;
}
else {
$in_enc = 'UTF-8';
}
}
if ($this->known_encoding($in_enc)) {
return array(xml_parser_create($in_enc), $source);
}
// the dectected encoding is not one of the simple encodings PHP knows
// attempt to use the iconv extension to
// cast the XML to a known encoding
// @see http://php.net/iconv
if (function_exists('iconv')) {
$encoded_source = iconv($in_enc,'UTF-8', $source);
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// iconv didn't work, try mb_convert_encoding
// @see http://php.net/mbstring
if(function_exists('mb_convert_encoding')) {
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// else
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
"You may see strange artifacts, and mangled characters.",
E_USER_NOTICE);
return array(xml_parser_create(), $source);
}
function known_encoding($enc) {
$enc = strtoupper($enc);
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
return $enc;
}
else {
return false;
}
}
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
$notices = E_USER_NOTICE|E_NOTICE;
if ( $lvl&$notices ) {
$this->WARNING = $errormsg;
} else {
$this->ERROR = $errormsg;
}
}
// magic ID function for multiple elemenets.
// can be called as static MagpieRSS::element_id()
function element_id ($el, $counter) {
return $el . (($counter > 1) ? '#'.$counter : '');
}
} // end class RSS
function map_attrs($k, $v) {
return "$k=\"$v\"";
}
// patch to support medieval versions of PHP4.1.x,
// courtesy, Ryan Currie, ryan@digibliss.com
if (!function_exists('array_change_key_case')) {
define("CASE_UPPER",1);
define("CASE_LOWER",0);
function array_change_key_case($array,$case=CASE_LOWER) {
if ($case==CASE_LOWER) $cmd='strtolower';
elseif ($case==CASE_UPPER) $cmd='strtoupper';
foreach($array as $key=>$value) {
$output[$cmd($key)]=$value;
}
return $output;
}
}
?>

View File

@ -1,67 +0,0 @@
<?php
/*
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_utils.inc, utility methods for working with RSS
* Author: Kellan Elliott-McCrea <kellan@protest.net>
* Version: 0.51
* License: GPL
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*/
/*======================================================================*\
Function: parse_w3cdtf
Purpose: parse a W3CDTF date into unix epoch
NOTE: http://www.w3.org/TR/NOTE-datetime
\*======================================================================*/
function parse_w3cdtf ( $date_str ) {
# regex to match wc3dtf
$pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
if ( preg_match( $pat, $date_str, $match ) ) {
list( $year, $month, $day, $hours, $minutes, $seconds) =
array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[7]);
# calc epoch for current date assuming GMT
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
$offset = 0;
if ( $match[11] == 'Z' ) {
# zulu time, aka GMT
}
else {
list( $tz_mod, $tz_hour, $tz_min ) =
array( $match[8], $match[9], $match[10]);
# zero out the variables
if ( ! $tz_hour ) { $tz_hour = 0; }
if ( ! $tz_min ) { $tz_min = 0; }
$offset_secs = (($tz_hour*60)+$tz_min)*60;
# is timezone ahead of GMT? then subtract offset
#
if ( $tz_mod == '+' ) {
$offset_secs = $offset_secs * -1;
}
$offset = $offset_secs;
}
$epoch = $epoch + $offset;
return $epoch;
}
else {
return -1;
}
}
?>

View File

@ -204,7 +204,6 @@ class Updater extends Plugin {
CACHE_DIR,
CACHE_DIR . "/export",
CACHE_DIR . "/images",
CACHE_DIR . "/magpie",
CACHE_DIR . "/simplepie",
ICONS_DIR,
LOCK_DIRECTORY);

View File

@ -23,7 +23,6 @@
require_once "config.php";
require_once "db.php";
require_once "db-prefs.php";
require_once "lib/magpierss/rss_fetch.inc";
define('MAX_JOBS', 2);
define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL);