From 3318d324105ee222a54afc94076878c12b588c24 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 19 Jun 2015 10:12:47 +0300 Subject: [PATCH] move language detection to a plugin, remove config.php constant --- config.php-dist | 6 --- include/functions.php | 2 - include/rssfuncs.php | 29 ++---------- include/sanity_config.php | 4 +- plugins/af_lang_detect/init.php | 46 +++++++++++++++++++ .../languagedetect/LanguageDetect.php | 0 .../Text/LanguageDetect/Exception.php | 0 .../Text/LanguageDetect/ISO639.php | 0 .../Text/LanguageDetect/Parser.php | 0 .../languagedetect/data/lang.dat | 0 .../languagedetect/data/unicode_blocks.dat | 0 11 files changed, 53 insertions(+), 34 deletions(-) create mode 100644 plugins/af_lang_detect/init.php rename {lib => plugins/af_lang_detect}/languagedetect/LanguageDetect.php (100%) rename {lib => plugins/af_lang_detect}/languagedetect/Text/LanguageDetect/Exception.php (100%) rename {lib => plugins/af_lang_detect}/languagedetect/Text/LanguageDetect/ISO639.php (100%) rename {lib => plugins/af_lang_detect}/languagedetect/Text/LanguageDetect/Parser.php (100%) rename {lib => plugins/af_lang_detect}/languagedetect/data/lang.dat (100%) rename {lib => plugins/af_lang_detect}/languagedetect/data/unicode_blocks.dat (100%) diff --git a/config.php-dist b/config.php-dist index c0729b61b..311b94df0 100644 --- a/config.php-dist +++ b/config.php-dist @@ -180,12 +180,6 @@ define('CHECK_FOR_UPDATES', true); // Check for updates automatically if running Git version - define('DETECT_ARTICLE_LANGUAGE', false); - // Detect article language when updating feeds, presently this is only - // used for hyphenation. This may increase amount of CPU time used by - // update processes, disable if necessary (i.e. you are being billed - // for CPU time). - define('ENABLE_GZIP_OUTPUT', false); // Selectively gzip output to improve wire performance. This requires // PHP Zlib extension on the server. diff --git a/include/functions.php b/include/functions.php index edc196f31..5c10ac6ac 100644 --- a/include/functions.php +++ b/include/functions.php @@ -99,8 +99,6 @@ require_once "lib/accept-to-gettext.php"; require_once "lib/gettext/gettext.inc"; - require_once "lib/languagedetect/LanguageDetect.php"; - function startup_gettext() { # Get locale from Accept-Language header diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 4efc843c4..7a3ea7402 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -443,13 +443,6 @@ $rss->init(); } - if (DETECT_ARTICLE_LANGUAGE) { - require_once "lib/languagedetect/LanguageDetect.php"; - - $lang = new Text_LanguageDetect(); - $lang->setNameMode(2); - } - // print_r($rss); $feed = db_escape_string($feed); @@ -645,21 +638,6 @@ print "\n"; } - $entry_language = ""; - - if (DETECT_ARTICLE_LANGUAGE) { - $entry_language = $lang->detect($entry_title . " " . $entry_content, 1); - - if (count($entry_language) > 0) { - $possible = array_keys($entry_language); - $entry_language = $possible[0]; - - _debug("detected language: $entry_language", $debug_enabled); - } else { - $entry_language = ""; - } - } - $entry_comments = $item->get_comments_url(); $entry_author = $item->get_author(); @@ -695,17 +673,19 @@ _debug("done collecting data.", $debug_enabled); - $result = db_query("SELECT id, content_hash FROM ttrss_entries + $result = db_query("SELECT id, content_hash, lang FROM ttrss_entries WHERE guid = '".db_escape_string($entry_guid)."' OR guid = '$entry_guid_hashed'"); if (db_num_rows($result) != 0) { $base_entry_id = db_fetch_result($result, 0, "id"); $entry_stored_hash = db_fetch_result($result, 0, "content_hash"); $article_labels = get_article_labels($base_entry_id, $owner_uid); + $entry_language = db_fetch_result($result, 0, "lang"); } else { $base_entry_id = false; $entry_stored_hash = ""; $article_labels = array(); + $entry_language = ""; } $article = array("owner_uid" => $owner_uid, // read only @@ -719,7 +699,7 @@ "author" => $entry_author, "force_catchup" => false, // ugly hack for the time being "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed - "language" => $entry_language, // read only + "language" => $entry_language, "feed" => array("id" => $feed, "fetch_url" => $fetch_url, "site_url" => $site_url) @@ -783,6 +763,7 @@ $entry_force_catchup = $article["force_catchup"]; $article_labels = $article["labels"]; $entry_score_modifier = (int) $article["score_modifier"]; + $entry_language = db_escape_string($article["language"]); if ($debug_enabled) { _debug("article labels:", $debug_enabled); diff --git a/include/sanity_config.php b/include/sanity_config.php index 6120e5fe0..eaa1668b7 100644 --- a/include/sanity_config.php +++ b/include/sanity_config.php @@ -1,3 +1,3 @@ - +$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'FEED_CRYPT_KEY', 'SINGLE_USER_MODE', 'SIMPLE_UPDATE_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_SERVER', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_SERVER', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'SMTP_SECURE', 'CHECK_FOR_UPDATES', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'LOG_DESTINATION', 'CONFIG_VERSION'); ?> diff --git a/plugins/af_lang_detect/init.php b/plugins/af_lang_detect/init.php new file mode 100644 index 000000000..3f2eb29f8 --- /dev/null +++ b/plugins/af_lang_detect/init.php @@ -0,0 +1,46 @@ +host = $host; + + $host->add_hook($host::HOOK_ARTICLE_FILTER, $this); + + require_once __DIR__ . "/languagedetect/LanguageDetect.php"; + + $this->lang = new Text_LanguageDetect(); + $this->lang->setNameMode(2); + } + + function hook_article_filter($article) { + + if ($this->lang) { + $entry_language = $this->lang->detect($article['title'] . " " . $article['content'], 1); + + if (count($entry_language) > 0) { + $possible = array_keys($entry_language); + $entry_language = $possible[0]; + + _debug("detected language: $entry_language"); + + $article["language"] = $entry_language; + } + } + + return $article; + } + + function api_version() { + return 2; + } + +} +?> diff --git a/lib/languagedetect/LanguageDetect.php b/plugins/af_lang_detect/languagedetect/LanguageDetect.php similarity index 100% rename from lib/languagedetect/LanguageDetect.php rename to plugins/af_lang_detect/languagedetect/LanguageDetect.php diff --git a/lib/languagedetect/Text/LanguageDetect/Exception.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php similarity index 100% rename from lib/languagedetect/Text/LanguageDetect/Exception.php rename to plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php diff --git a/lib/languagedetect/Text/LanguageDetect/ISO639.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php similarity index 100% rename from lib/languagedetect/Text/LanguageDetect/ISO639.php rename to plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php diff --git a/lib/languagedetect/Text/LanguageDetect/Parser.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php similarity index 100% rename from lib/languagedetect/Text/LanguageDetect/Parser.php rename to plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php diff --git a/lib/languagedetect/data/lang.dat b/plugins/af_lang_detect/languagedetect/data/lang.dat similarity index 100% rename from lib/languagedetect/data/lang.dat rename to plugins/af_lang_detect/languagedetect/data/lang.dat diff --git a/lib/languagedetect/data/unicode_blocks.dat b/plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat similarity index 100% rename from lib/languagedetect/data/unicode_blocks.dat rename to plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat