mirror of
https://tt-rss.org/git/tt-rss.git
synced 2024-06-24 11:56:36 +02:00
limit maximum data length for bayesian processing
This commit is contained in:
parent
bc0e565a77
commit
132e42a9a7
|
@ -8,6 +8,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
private $score_modifier = 50;
|
private $score_modifier = 50;
|
||||||
private $sql_prefix = "ttrss_plugin_af_sort_bayes";
|
private $sql_prefix = "ttrss_plugin_af_sort_bayes";
|
||||||
private $auto_categorize_threshold = 10000;
|
private $auto_categorize_threshold = 10000;
|
||||||
|
private $max_document_length = 3000; // classifier can't rescale output for very long strings apparently
|
||||||
|
|
||||||
function about() {
|
function about() {
|
||||||
return array(1.0,
|
return array(1.0,
|
||||||
|
@ -47,7 +48,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
if ($this->dbh->num_rows($result) != 0) {
|
if ($this->dbh->num_rows($result) != 0) {
|
||||||
$guid = $this->dbh->fetch_result($result, 0, "guid");
|
$guid = $this->dbh->fetch_result($result, 0, "guid");
|
||||||
$title = $this->dbh->fetch_result($result, 0, "title");
|
$title = $this->dbh->fetch_result($result, 0, "title");
|
||||||
$content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
|
$content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length);
|
||||||
$score = $this->dbh->fetch_result($result, 0, "score");
|
$score = $this->dbh->fetch_result($result, 0, "score");
|
||||||
|
|
||||||
$this->dbh->query("BEGIN");
|
$this->dbh->query("BEGIN");
|
||||||
|
@ -302,7 +303,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
|
|
||||||
$dst_category = $id_ugly;
|
$dst_category = $id_ugly;
|
||||||
|
|
||||||
$bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
|
$bayes_content = mb_substr(mb_strtolower($article["title"] . " " . strip_tags($article["content"])), 0, $this->max_document_length);
|
||||||
|
|
||||||
if ($count_neutral >= $this->auto_categorize_threshold) {
|
if ($count_neutral >= $this->auto_categorize_threshold) {
|
||||||
// enable automatic categorization
|
// enable automatic categorization
|
||||||
|
@ -358,7 +359,8 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
if ($this->dbh->num_rows($result) != 0) {
|
if ($this->dbh->num_rows($result) != 0) {
|
||||||
$guid = $this->dbh->fetch_result($result, 0, "guid");
|
$guid = $this->dbh->fetch_result($result, 0, "guid");
|
||||||
$title = $this->dbh->fetch_result($result, 0, "title");
|
$title = $this->dbh->fetch_result($result, 0, "title");
|
||||||
$content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
|
|
||||||
|
$content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length);
|
||||||
|
|
||||||
print "<h2>" . $title . "</h2>";
|
print "<h2>" . $title . "</h2>";
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,7 @@
|
||||||
class NaiveBayesianStorage {
|
class NaiveBayesianStorage {
|
||||||
var $con = null;
|
var $con = null;
|
||||||
var $owner_uid = null;
|
var $owner_uid = null;
|
||||||
|
var $max_document_length = 3000; // classifier can't rescale output for very long strings apparently
|
||||||
|
|
||||||
function NaiveBayesianStorage($owner_uid) {
|
function NaiveBayesianStorage($owner_uid) {
|
||||||
$this->con = Db::get();
|
$this->con = Db::get();
|
||||||
|
@ -239,7 +240,8 @@
|
||||||
$this->con->escape_string($ref['document_id']) . "'");
|
$this->con->escape_string($ref['document_id']) . "'");
|
||||||
|
|
||||||
if ($this->con->num_rows($rs) != 0) {
|
if ($this->con->num_rows($rs) != 0) {
|
||||||
$ref['content'] = mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content')));
|
$ref['content'] = mb_substr(mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content'))), 0,
|
||||||
|
$this->max_document_length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user