1
0
mirror of https://tt-rss.org/git/tt-rss.git synced 2024-06-28 12:10:52 +02:00

some bayes updates

This commit is contained in:
Andrew Dolgov 2015-06-17 19:20:17 +03:00
parent 08cfcba417
commit e0ae194ac9

View File

@ -38,7 +38,7 @@ class Af_Sort_Bayes extends Plugin {
$dst_category = "UGLY"; $dst_category = "UGLY";
$nbs = new NaiveBayesianStorage($_SESSION["uid"]); $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
$nb = new NaiveBayesianNgram($nbs); $nb = new NaiveBayesian($nbs);
$result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
$article_id . " AND owner_uid = " . $_SESSION["uid"]); $article_id . " AND owner_uid = " . $_SESSION["uid"]);
@ -240,7 +240,7 @@ class Af_Sort_Bayes extends Plugin {
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
$nbs = new NaiveBayesianStorage($owner_uid); $nbs = new NaiveBayesianStorage($owner_uid);
$nb = new NaiveBayesianNgram($nbs); $nb = new NaiveBayesian($nbs);
$categories = $nbs->getCategories(); $categories = $nbs->getCategories();
@ -267,23 +267,27 @@ class Af_Sort_Bayes extends Plugin {
$bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
if ($count_neutral >= 5000) { if ($count_neutral >= 10000) {
// enable automatic categorization // enable automatic categorization
$result = $nb->categorize($bayes_content); $result = $nb->categorize($bayes_content);
print_r($result);
if (count($result) == 3) { if (count($result) == 3) {
$prob_good = $result[$id_good]; $prob_good = $result[$id_good];
$prob_bad = $result[$id_bad]; $prob_bad = $result[$id_bad];
if ($prob_good > 0.90) { if ($prob_good > 0.90) {
$dst_category = $id_good; // should we autofile as good or not? idk $dst_category = $id_good;
$article["score_modifier"] += $this->score_modifier; $article["score_modifier"] += $this->score_modifier;
} else if ($prob_bad > 0.90) { } else if ($prob_bad > 0.90) {
$dst_category = $id_bad; // should we autofile as good or not? idk $dst_category = $id_bad;
$article["score_modifier"] -= $this->score_modifier; $article["score_modifier"] -= $this->score_modifier;
} }
} }
_debug("bayes, dst category: $dst_category");
} }
$nb->train($article["guid_hashed"], $dst_category, $bayes_content); $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
@ -304,7 +308,7 @@ class Af_Sort_Bayes extends Plugin {
$this->dbh->query("COMMIT"); $this->dbh->query("COMMIT");
$nbs = new NaiveBayesianStorage($_SESSION["uid"]); $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
$nb = new NaiveBayesianNgram($nbs); $nb = new NaiveBayesian($nbs);
$nb->updateProbabilities(); $nb->updateProbabilities();
} }