From df659891ac0597c3f58b7714ddd4103ccad66b07 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 4 Aug 2015 13:32:52 +0300 Subject: [PATCH] implement per-feed stemming language setting --- classes/pref/feeds.php | 35 ++++++++++++++++++++++++++++++++++- include/functions2.php | 2 +- include/rssfuncs.php | 6 +++++- schema/ttrss_schema_mysql.sql | 1 + schema/ttrss_schema_pgsql.sql | 1 + schema/versions/mysql/128.sql | 5 +++++ schema/versions/pgsql/128.sql | 5 +++++ update.php | 10 +++++----- 8 files changed, 57 insertions(+), 8 deletions(-) diff --git a/classes/pref/feeds.php b/classes/pref/feeds.php index efa2c2af9..01197d92c 100644 --- a/classes/pref/feeds.php +++ b/classes/pref/feeds.php @@ -1,5 +1,7 @@ dbh->fetch_result($result, 0, "feed_language"); + + print "
"; + + print __('Language:') . " "; + print_select("feed_language", $feed_language, $this::$feed_languages, + 'dojoType="dijit.form.Select"'); + } + print ""; print "
".__("Update")."
"; @@ -807,6 +821,18 @@ class Pref_Feeds extends Handler_Protected { } + /* FTS Stemming Language */ + + if (DB_TYPE == "pgsql") { + print "
"; + + print __('Language:') . " "; + print_select("feed_language", "", $this::$feed_languages, + 'disabled="1" dojoType="dijit.form.Select"'); + + $this->batch_edit_cbox("feed_language"); + } + print ""; print "
".__("Update")."
"; @@ -938,6 +964,8 @@ class Pref_Feeds extends Handler_Protected { $mark_unread_on_update = checkbox_to_sql_bool( $this->dbh->escape_string($_POST["mark_unread_on_update"])); + $feed_language = $this->dbh->escape_string(trim($_POST["feed_language"])); + if (strlen(FEED_CRYPT_KEY) > 0) { require_once "crypt.php"; $auth_pass = substr(encrypt_string($auth_pass), 0, 250); @@ -976,7 +1004,8 @@ class Pref_Feeds extends Handler_Protected { hide_images = $hide_images, include_in_digest = $include_in_digest, always_display_enclosures = $always_display_enclosures, - mark_unread_on_update = $mark_unread_on_update + mark_unread_on_update = $mark_unread_on_update, + feed_language = '$feed_language' WHERE id = '$feed_id' AND owner_uid = " . $_SESSION["uid"]); PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_SAVE_FEED, @@ -1051,6 +1080,10 @@ class Pref_Feeds extends Handler_Protected { $qpart = $category_qpart_nocomma; break; + case "feed_language": + $qpart = "feed_language = '$feed_language'"; + break; + } if ($qpart) { diff --git a/include/functions2.php b/include/functions2.php index a9bb49df8..551d55d73 100644 --- a/include/functions2.php +++ b/include/functions2.php @@ -402,7 +402,7 @@ if (DB_TYPE == "pgsql") { array_push($query_keywords, - "(tsvector_combined @@ '$search_query_leftover'::tsquery)"); + "(tsvector_combined @@ to_tsquery('english', '$search_query_leftover'))"); } } diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 5ebddf9ef..c8e2ce28f 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -306,6 +306,7 @@ feed_url,auth_pass,cache_images, mark_unread_on_update, owner_uid, pubsub_state, auth_pass_encrypted, + feed_language, (SELECT max(date_entered) FROM ttrss_entries, ttrss_user_entries where ref_id = id AND feed_id = '$feed') AS last_article_timestamp FROM ttrss_feeds WHERE id = '$feed'"); @@ -340,6 +341,8 @@ $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); $fetch_url = db_fetch_result($result, 0, "feed_url"); + $feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language"))); + if (!$feed_language) $feed_language = 'english'; $feed = db_escape_string($feed); @@ -463,6 +466,7 @@ // We use local pluginhost here because we need to load different per-user feed plugins $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss); + _debug("language: $feed_language", $debug_enabled); _debug("processing feed data...", $debug_enabled); // db_query("BEGIN"); @@ -988,7 +992,7 @@ $tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content), 0, 1000000)); - $tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),"; + $tsvector_qpart = "tsvector_combined = to_tsvector('$feed_language', '$tsvector_combined'),"; } else { $tsvector_qpart = ""; diff --git a/schema/ttrss_schema_mysql.sql b/schema/ttrss_schema_mysql.sql index 09234369e..8a6f7d681 100644 --- a/schema/ttrss_schema_mysql.sql +++ b/schema/ttrss_schema_mysql.sql @@ -127,6 +127,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key, view_settings varchar(250) not null default '', pubsub_state integer not null default 0, favicon_last_checked datetime default null, + feed_language varchar(100) not null default '', foreign key (owner_uid) references ttrss_users(id) ON DELETE CASCADE, foreign key (cat_id) references ttrss_feed_categories(id) ON DELETE SET NULL, foreign key (parent_feed) references ttrss_feeds(id) ON DELETE SET NULL) ENGINE=InnoDB DEFAULT CHARSET=UTF8; diff --git a/schema/ttrss_schema_pgsql.sql b/schema/ttrss_schema_pgsql.sql index 4cdc15f9a..9dafa693e 100644 --- a/schema/ttrss_schema_pgsql.sql +++ b/schema/ttrss_schema_pgsql.sql @@ -96,6 +96,7 @@ create table ttrss_feeds (id serial not null primary key, view_settings varchar(250) not null default '', pubsub_state integer not null default 0, favicon_last_checked timestamp default null, + feed_language varchar(100) not null default '', auth_pass_encrypted boolean not null default false); create index ttrss_feeds_owner_uid_index on ttrss_feeds(owner_uid); diff --git a/schema/versions/mysql/128.sql b/schema/versions/mysql/128.sql index 0545cb3ae..0a4d7ab7c 100644 --- a/schema/versions/mysql/128.sql +++ b/schema/versions/mysql/128.sql @@ -1,5 +1,10 @@ BEGIN; +alter table ttrss_feeds add column feed_language varchar(100); +update ttrss_feeds set feed_language = ''; +alter table ttrss_feeds change feed_language feed_language varchar(100) not null; +alter table ttrss_feeds alter column feed_language set default ''; + UPDATE ttrss_version SET schema_version = 128; COMMIT; diff --git a/schema/versions/pgsql/128.sql b/schema/versions/pgsql/128.sql index d85ce7fe9..3aba67220 100644 --- a/schema/versions/pgsql/128.sql +++ b/schema/versions/pgsql/128.sql @@ -3,6 +3,11 @@ BEGIN; alter table ttrss_entries add column tsvector_combined tsvector; create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined); +alter table ttrss_feeds add column feed_language varchar(100); +update ttrss_feeds set feed_language = ''; +alter table ttrss_feeds alter column feed_language set not null; +alter table ttrss_feeds alter column feed_language set default ''; + UPDATE ttrss_version SET schema_version = 128; COMMIT; diff --git a/update.php b/update.php index 06578aaa4..8fc28973f 100755 --- a/update.php +++ b/update.php @@ -33,7 +33,7 @@ "update-schema", "convert-filters", "force-update", - "update-search-idx", + "gen-search-idx", "list-plugins", "help"); @@ -81,7 +81,7 @@ print " --log FILE - log messages to FILE\n"; print " --indexes - recreate missing schema indexes\n"; print " --update-schema - update database schema\n"; - print " --update-search-idx - update PostgreSQL fulltext search index\n"; + print " --gen-search-idx - generate basic PostgreSQL fulltext search index\n"; print " --convert-filters - convert type1 filters to type2\n"; print " --force-update - force update of all feeds\n"; print " --list-plugins - list all available plugins\n"; @@ -332,8 +332,8 @@ } - if (isset($options["update-search-idx"])) { - echo "Generating search index...\n"; + if (isset($options["gen-search-idx"])) { + echo "Generating search index (stemming set to English)...\n"; $result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries"); $count = db_fetch_result($result, 0, "count"); @@ -353,7 +353,7 @@ $tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']), 0, 1000000)); - db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]); + db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('english', '$tsvector_combined') WHERE id = " . $line["id"]); } $offset += $limit;