mirror of
https://github.com/djcb/mu.git
synced 2024-06-28 07:41:04 +02:00
166 lines
5.8 KiB
Scheme
166 lines
5.8 KiB
Scheme
;;
|
|
;; Copyright (C) 2011-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
|
;;
|
|
;; This program is free software; you can redistribute it and/or modify it
|
|
;; under the terms of the GNU General Public License as published by the
|
|
;; Free Software Foundation; either version 3, or (at your option) any
|
|
;; later version.
|
|
;;
|
|
;; This program is distributed in the hope that it will be useful,
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;; GNU General Public License for more details.
|
|
;;
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
;; along with this program; if not, write to the Free Software Foundation,
|
|
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
(define-module (mu stats)
|
|
:use-module (oop goops)
|
|
:use-module (mu)
|
|
:use-module (srfi srfi-1)
|
|
:use-module (ice-9 i18n)
|
|
:use-module (ice-9 r5rs)
|
|
:export ( mu:tabulate
|
|
mu:top-n-most-frequent
|
|
mu:count
|
|
mu:average
|
|
mu:stddev
|
|
mu:correl
|
|
mu:max
|
|
mu:min
|
|
mu:weekday-numbers->names
|
|
mu:month-numbers->names))
|
|
|
|
|
|
(define* (mu:tabulate func #:optional (expr #t))
|
|
"Execute FUNC for each message matching EXPR, and return an alist
|
|
with maps each result of FUNC to its frequency. If the result of FUNC
|
|
is a list, add each of its values separately.
|
|
FUNC is a function takes a <mu-message> instance as its argument. For
|
|
example, to tabulate messages by weekday, one could use:
|
|
(mu:tabulate (lambda(msg) (tm:wday (localtime (date msg))))), and
|
|
get back a list like
|
|
((1 . 2) (2 . 5)(3 . 4)(4 . 4)(5 . 12)(6 . 7)(7. 2))."
|
|
(let* ((table '())
|
|
;; func to add a value to our table
|
|
(update-table
|
|
(lambda (val)
|
|
(let ((old-freq (or (assoc-ref table val) 0)))
|
|
(set! table (assoc-set! table val (1+ old-freq)))))))
|
|
(mu:for-each-message
|
|
(lambda(msg)
|
|
(let ((val (func msg)))
|
|
(if (list? val)
|
|
(for-each update-table val)
|
|
(update-table val))))
|
|
expr)
|
|
table))
|
|
|
|
(define* (top-n func less n #:optional (expr #t))
|
|
"Take the results of (mu:tabulate FUNC EXPR), sort using LESS (a
|
|
function taking two arguments A and B (cons cells, (VAL . FREQ)), and
|
|
returns #t if A < B, #f otherwise), and then take the first N."
|
|
(take (sort (mu:tabulate func expr) less) n))
|
|
|
|
(define* (mu:top-n-most-frequent func n #:optional (expr #t))
|
|
"Take the results of (mu:tabulate FUNC EXPR), and return the N items with the higest frequency."
|
|
(top-n func (lambda (a b) (> (cdr a) (cdr b))) n expr))
|
|
|
|
(define* (mu:count #:optional (expr #t))
|
|
"Count the number of messages matching EXPR. If EXPR is not
|
|
provided, match /all/ messages."
|
|
(let ((num 0))
|
|
(mu:for-each-message
|
|
(lambda (msg) (set! num (1+ num)))
|
|
expr)
|
|
num))
|
|
|
|
(define (average lst)
|
|
"Calculate the average of a list LST of numbers, or #f if undefined."
|
|
(if (null? lst)
|
|
#f
|
|
(/ (apply + lst) (length lst))))
|
|
|
|
(define (stddev lst)
|
|
"Calculate the standard deviation of a list LST of numbers or #f if
|
|
undefined."
|
|
(let* ((avg (average lst))
|
|
(sosq (if avg
|
|
(apply + (map (lambda (x)(* (- x avg) (- x avg))) lst)))))
|
|
(if sosq
|
|
(sqrt (/ sosq (length lst))))))
|
|
|
|
|
|
(define* (mu:average func #:optional (expr #t))
|
|
"Get the average value of FUNC applied to all messages matching
|
|
EXPR (or #t for all). Returns #f if undefined."
|
|
(average (map func (mu:message-list expr))))
|
|
|
|
(define* (mu:stddev func #:optional (expr #t))
|
|
"Get the standard deviation the the values of FUNC applied to all
|
|
messages matching EXPR (or #t for all). This is the 'population' stddev, not the 'sample' stddev. Returns #f if undefined."
|
|
(stddev (map func (mu:message-list expr))))
|
|
|
|
(define* (mu:max func #:optional (expr #t))
|
|
"Get the maximum value of FUNC applied to all messages matching
|
|
EXPR (or #t for all). Returns #f if undefined."
|
|
(apply max (map func (mu:message-list expr))))
|
|
|
|
(define* (mu:min func #:optional (expr #t))
|
|
"Get the minimum value of FUNC applied to all messages matching
|
|
EXPR (or #t for all). Returns #f if undefined."
|
|
(apply min (map func (mu:message-list expr))))
|
|
|
|
|
|
(define (correl lst)
|
|
"Calculate Pearson's correlation coefficient for a list LST of cons
|
|
pair, where the car and cdr of the pairs are values from data set 1
|
|
and 2, respectively."
|
|
(let ((n (length lst))
|
|
(sx (apply + (map car lst)))
|
|
(sy (apply + (map cdr lst)))
|
|
(sxy (apply + (map (lambda (cell) (* (car cell) (cdr cell))) lst)))
|
|
(sxx (apply + (map (lambda (cell) (* (car cell) (car cell))) lst)))
|
|
(syy (apply + (map (lambda (cell) (* (cdr cell) (cdr cell))) lst))))
|
|
(/ (- (* n sxy) (* sx sy))
|
|
(sqrt (* (- (* n sxx) (* sx sx)) (- (* n syy) (* sy sy)))))))
|
|
|
|
(define* (mu:correl func1 func2 #:optional (expr #t))
|
|
"Determine Pearson's correlation coefficient between the value for
|
|
functions FUNC1 and FUNC2 to all messages matching EXPR (or #t for
|
|
all). Returns #f if undefined."
|
|
(let ((data
|
|
(map (lambda (msg)
|
|
(cons (func1 msg) (func2 msg)))
|
|
(mu:message-list expr))))
|
|
(if data (correl data) #f)))
|
|
|
|
|
|
;; a list of abbreviated, localized day names
|
|
(define day-names
|
|
(map locale-day-short (iota 7 1)))
|
|
|
|
(define (mu:weekday-numbers->names table)
|
|
"Convert a list of pairs with the car denoting a day number (0-6)
|
|
into a list of pairs with the car replaced by the corresponding day
|
|
name (abbreviated) for the current locale."
|
|
(map
|
|
(lambda (pair)
|
|
(cons (list-ref day-names (car pair)) (cdr pair)))
|
|
table))
|
|
|
|
;; a list of abbreviated, localized month names
|
|
(define month-names
|
|
(map locale-month-short (iota 12 1)))
|
|
|
|
(define (mu:month-numbers->names table)
|
|
"Convert a list of pairs with the car denoting a month number (0-11)
|
|
into a list of pairs with the car replaced by the corresponding day
|
|
name (abbreviated)."
|
|
(map
|
|
(lambda (pair)
|
|
(cons (list-ref month-names (car pair)) (cdr pair)))
|
|
table))
|