rspamd: add neural module config (#3833)

Co-authored-by: Georg Lauterbach <44545919+georglauterbach@users.noreply.github.com> Co-authored-by: Brennan Kinney <5098581+polarathene@users.noreply.github.com>
2024-02-01 17:34:33 +01:00 · 2024-02-01 17:34:33 +01:00 · 45935f5fb8
parent 05fbcf6889
commit 45935f5fb8
7 changed files with 105 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -70,6 +70,7 @@ The most noteworthy change of this release is the update of the container's base
  - Refactored helper methods for sending e-mails with specific `Message-ID` headers and the helpers for retrieving + filtering logs, which together help isolate logs relevant to specific mail when multiple mails have been processed within a single test. ([#3786](https://github.com/docker-mailserver/docker-mailserver/pull/3786))
 - **Rspamd**:
  - The `rewrite_subject` action, is now disabled by default. It has been replaced with the new `SPAM_SUBJECT` environment variable, which implements the functionality via a Sieve script instead in favor of being anti-spam service agnostic ([3820](https://github.com/docker-mailserver/docker-mailserver/pull/3820))
+  - `RSPAMD_NEURAL` was added and is disabled by default. If switched on it wil enable the experimental Rspamd Neural network module to add a layer of analysis to spam detection using neural network technology.  ([3833](https://github.com/docker-mailserver/docker-mailserver/pull/3833))

 ### Fixes

--- a/docs/content/config/environment.md
+++ b/docs/content/config/environment.md
@ -451,6 +451,17 @@ Can be used to control the score when the [`HFILTER_HOSTNAME_UNKNOWN` symbol](#r

 Default: 6 (which corresponds to the `add_header` action)

+
+##### RSPAMD_NEURAL
+
+Can be used to enable or disable the [Neural network module][rspamd-docs-neural-network]. This is an experimental anti-spam weigh method using three neuaral networks in the configuration added here. As far as we can tell it trains itsself by using other modules to find out what spam is. It will take a while (a week or more) to train its first neural network. The config trains new networks all the time and discards of old networks. 
+Since it is experimental it is switched of by default.
+
+- **0** => Disabled
+- 1 => Enabled
+
+[rspamd-docs-neural-network]: https://www.rspamd.com/doc/modules/neural.html
+
 #### Reports

 ##### PFLOGSUMM_TRIGGER
--- a/mailserver.env
+++ b/mailserver.env
@ -183,6 +183,12 @@ RSPAMD_HFILTER=1
 # Default: 6
 RSPAMD_HFILTER_HOSTNAME_UNKNOWN_SCORE=6

+# Can be used to enable or disable the (still experimental) neural module.
+#
+# - **0** => Disabled
+# - 1     => Enabled
+RSPAMD_NEURAL=0
+
 # Amavis content filter (used for ClamAV & SpamAssassin)
 # 0 => Disabled
 # 1 => Enabled
--- a/target/rspamd/local.d/neural.conf
+++ b/target/rspamd/local.d/neural.conf
@ -0,0 +1,42 @@
+#https://github.com/rspamd/rspamd/issues/3099
+rules {
+  "NEURAL_WEEK_1000" {
+    train {
+      max_trains = 1000;
+      max_usages = 50;
+      max_iterations = 25;
+      learning_rate = 0.01,
+      spam_score = 8;
+      ham_score = -2;
+    }
+    symbol_spam = "NEURAL_WEEK_SPAM";
+    symbol_ham = "NEURAL_WEEK_HAM";
+    ann_expire = 300d;
+  }
+  "NEURAL_DAYS_200" {
+    train {
+      max_trains = 200;
+      max_usages = 10;
+      max_iterations = 25;
+      learning_rate = 0.01,
+      spam_score = 8;
+      ham_score = -2;
+    }
+    symbol_spam = "NEURAL_DAYS_SPAM";
+    symbol_ham = "NEURAL_DAYS_HAM";
+    ann_expire = 100d;
+  }
+  "NEURAL_HALF_DAY_50" {
+    train {
+      max_trains = 50;
+      max_usages = 4;
+      max_iterations = 25;
+      learning_rate = 0.01,
+      spam_score = 8;
+      ham_score = -2;
+    }
+    symbol_spam = "NEURAL_HALF_DAY_SPAM";
+    symbol_ham = "NEURAL_HALF_DAY_HAM";
+    ann_expire = 13d;
+  }
+}
--- a/target/rspamd/local.d/neural_group.conf
+++ b/target/rspamd/local.d/neural_group.conf
@ -0,0 +1,26 @@
+symbols = {
+  "NEURAL_WEEK_SPAM" {
+    weight = 3.0; # sample weight
+    description = "Neural network spam (long)";
+  }
+  "NEURAL_WEEK_HAM" {
+    weight = -3.0; # sample weight
+    description = "Neural network ham (long)";
+  }
+  "NEURAL_DAYS_SPAM" {
+    weight = 2.5; # sample weight
+    description = "Neural network spam (medium)";
+  }
+  "NEURAL_DAYS_HAM" {
+    weight = -1.5; # sample weight
+    description = "Neural network ham (medium)";
+  }
+  "NEURAL_HALF_DAY_SPAM" {
+    weight = 2.0; # sample weight
+    description = "Neural network spam (short)";
+  }
+  "NEURAL_HALF_DAY_HAM" {
+    weight = -1.0; # sample weight
+    description = "Neural network ham (short)";
+  }
+}
--- a/target/scripts/startup/setup.d/security/rspamd.sh
+++ b/target/scripts/startup/setup.d/security/rspamd.sh
@ -20,6 +20,7 @@ function _setup_rspamd() {
    __rspamd__setup_learning
    __rspamd__setup_greylisting
    __rspamd__setup_hfilter_group
+    __rspamd__setup_neural
    __rspamd__setup_check_authenticated
    _rspamd_handle_user_modules_adjustments   # must run last

@ -186,7 +187,6 @@ function __rspamd__setup_default_modules() {
  local DISABLE_MODULES=(
    clickhouse
    elastic
-    neural
    reputation
    spamassassin
    url_redirector
@ -283,6 +283,23 @@ function __rspamd__setup_hfilter_group() {
  fi
 }

+
+# This function handles setup of the neural module (see
+# https://www.rspamd.com/doc/modules/neural.html). This module is experimental
+# but can enhance anti-spam scoring possibly.
+function __rspamd__setup_neural() {
+  if _env_var_expect_zero_or_one 'RSPAMD_NEURAL' && [[ ${RSPAMD_NEURAL} -eq 1 ]]; then
+    __rspamd__log 'debug' 'Enabling Neural module'
+    __rspamd__log 'warn' 'The Neural module is still experimental (in Rspamd) and hence not tested in DMS'
+  else
+    __rspamd__log 'debug' 'Neural module is disabled'
+    rm -f "${RSPAMD_LOCAL_D}/neural.conf"
+    rm -f "${RSPAMD_LOCAL_D}/neural_group.conf"
+    __rspamd__helper__enable_disable_module 'neural' 'false'
+  fi
+}
+
+
 # If 'RSPAMD_CHECK_AUTHENTICATED' is enabled, then content checks for all users, i.e.
 # also for authenticated users, are performed.
 #
--- a/target/scripts/startup/variables-stack.sh
+++ b/target/scripts/startup/variables-stack.sh
@ -71,6 +71,7 @@ function __environment_variables_general_setup() {
  VARS[RSPAMD_GREYLISTING]="${RSPAMD_GREYLISTING:=0}"
  VARS[RSPAMD_HFILTER]="${RSPAMD_HFILTER:=1}"
  VARS[RSPAMD_HFILTER_HOSTNAME_UNKNOWN_SCORE]="${RSPAMD_HFILTER_HOSTNAME_UNKNOWN_SCORE:=6}"
+  VARS[RSPAMD_NEURAL]="${RSPAMD_NEURAL:=0}"
  VARS[RSPAMD_LEARN]="${RSPAMD_LEARN:=0}"
  VARS[SA_KILL]=${SA_KILL:="10.0"}
  VARS[SPAM_SUBJECT]=${SPAM_SUBJECT:=}