mirror of https://github.com/djcb/mu.git
* guile: add find-dups script, to find duplicate messages
This commit is contained in:
parent
bd12c4bb36
commit
d2dd0a8699
|
@ -22,7 +22,8 @@ EXTRA_DIST= \
|
|||
msgs-per-hour.scm \
|
||||
msgs-per-month.scm \
|
||||
msgs-per-day.scm \
|
||||
msgs-per-year-month.scm
|
||||
msgs-per-year-month.scm \
|
||||
find-dups.scm
|
||||
|
||||
muguiledistscriptdir = $(pkgdatadir)/scripts/
|
||||
muguiledistscript_SCRIPTS = $(EXTRA_DIST)
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
#!/bin/sh
|
||||
exec guile -e main -s $0 $@
|
||||
!#
|
||||
;;
|
||||
;; Copyright (C) 2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
;;
|
||||
;; This program is free software; you can redistribute it and/or modify it
|
||||
;; under the terms of the GNU General Public License as published by the
|
||||
;; Free Software Foundation; either version 3, or (at your option) any
|
||||
;; later version.
|
||||
;;
|
||||
;; This program is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with this program; if not, write to the Free Software Foundation,
|
||||
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
;; INFO: find duplicate messages
|
||||
;; INFO: --muhome=<muhome>: path to mu home dir
|
||||
|
||||
(use-modules (mu) (mu script) (mu stats))
|
||||
(use-modules (ice-9 getopt-long) (ice-9 optargs)
|
||||
(ice-9 popen) (ice-9 format) (ice-9 rdelim))
|
||||
|
||||
(define (md5sum path)
|
||||
(let* ((port (open-pipe* OPEN_READ "md5sum" path))
|
||||
(md5 (read-delimited " " port)))
|
||||
(close-pipe port)
|
||||
md5))
|
||||
|
||||
|
||||
(define (find-dups)
|
||||
(let ((id-table (make-hash-table 20000)))
|
||||
;; fill the hash with <msgid-size> => <list of paths>
|
||||
(mu:for-each-message
|
||||
(lambda (msg)
|
||||
(let* ((id (format #f "~a-~d" (mu:message-id msg)
|
||||
(mu:size msg)))
|
||||
(lst (hash-ref id-table id)))
|
||||
(if lst
|
||||
(set! lst (cons (mu:path msg) lst))
|
||||
(set! lst (list (mu:path msg))))
|
||||
(hash-set! id-table id lst))))
|
||||
;; list all the paths with multiple elements; check the md5sum to
|
||||
;; make 100%-minus-ε sure they are really the same file.
|
||||
(hash-for-each
|
||||
(lambda (id paths)
|
||||
(if (> (length paths) 1)
|
||||
(let ((hash (make-hash-table 10)))
|
||||
(for-each
|
||||
(lambda (path)
|
||||
(let* ((md5 (md5sum path)) (lst (hash-ref hash md5)))
|
||||
(if lst
|
||||
(set! lst (cons path lst))
|
||||
(set! lst (list path)))
|
||||
(hash-set! hash md5 lst)))
|
||||
paths)
|
||||
;; hash now maps the md5sum to the messages...
|
||||
(hash-for-each
|
||||
(lambda (md5 mpaths)
|
||||
(if (> (length mpaths) 1)
|
||||
(begin
|
||||
(format #t "md5sum: ~a:\n" md5)
|
||||
(let ((num 1))
|
||||
(for-each
|
||||
(lambda (path)
|
||||
(format #t "\t~d. ~a\n" num path)
|
||||
(set! num (+ 1 num)))
|
||||
mpaths)))))
|
||||
hash))))
|
||||
id-table)))
|
||||
|
||||
|
||||
|
||||
(define (main args)
|
||||
"Run some statistics function.
|
||||
Interpret argument-list ARGS (like command-line
|
||||
arguments). Possible arguments are:
|
||||
--muhome (path to alternative mu home directory)."
|
||||
(setlocale LC_ALL "")
|
||||
(let* ((optionspec '( (muhome (value #t))
|
||||
(help (single-char #\h) (value #f))))
|
||||
(options (getopt-long args optionspec))
|
||||
(help (option-ref options 'help #f))
|
||||
(muhome (option-ref options 'muhome #f)))
|
||||
(mu:initialize muhome)
|
||||
(find-dups)))
|
||||
|
||||
|
||||
;; Local Variables:
|
||||
;; mode: scheme
|
||||
;; End:
|
Loading…
Reference in New Issue