diff --git a/man/mu-index.1 b/man/mu-index.1 index eaa2a640..fa3b914d 100644 --- a/man/mu-index.1 +++ b/man/mu-index.1 @@ -1,4 +1,4 @@ -.TH MU-INDEX 1 "November 2010" "User Manuals" +.TH MU-INDEX 1 "January 2011" "User Manuals" .SH NAME @@ -10,11 +10,11 @@ mu index \- index e-mail messages stored in Maildirs .SH DESCRIPTION -\fBmu index\fR is the \fBmu\fR sub-command for scanning the contents of -Maildir directories and storing the results in a Xapian database which can -then be searched using +\fBmu index\fR is the \fBmu\fR command for scanning the contents of Maildir +directories and storing the results in a Xapian database. The data can then be +queried using .BR mu-find(1) -\. +\. .B index understands Maildirs as defined by Daniel Bernstein for qmail(7). In addition, @@ -34,17 +34,14 @@ with spam-messages. The first run of \fBmu index\fR may take a few minutes if you have a lot of mail (ten thousands of messages). Fortunately, such a full scan needs to be -done only once, after that it suffices to index the changes, which goes much -faster. Also note that a substantial amount of the time goes to printing the -progress information; if you turn that off (with \fB\-q\fR or -\fB\-\-quiet\fR), it goes a lot faster. See the 'Note on performance' below -for more information. +done only once; after that it suffices to index the changes, which goes much +faster. See the 'Note on performance' below for more information. -The optional phase two of the indexing-process is the removal of messages from -the database for which there is no longer a corresponding file in the +The optional 'phase two' of the indexing-process is the removal of messages +from the database for which there is no longer a corresponding file in the Maildir. If you do not want this, you can use \fB\-n\fR, \fB\-\-nocleanup\fR. -When \fBmu index\fR catches on of the signals \fBSIGINT\fR, \fBSIGHUP\fR or +When \fBmu index\fR catches one of the signals \fBSIGINT\fR, \fBSIGHUP\fR or \fBSIGTERM\fR (e.g,, when you press Ctrl-C during the indexing process), it tries to shutdown gracefully; it tries to save and commit data, and close the database etc. If it receives another signal (e.g,, when pressing Ctrl-C once @@ -52,16 +49,14 @@ more), \fBmu index\fR will terminate immediately. .SH OPTIONS -Note, some of the important options are described in the \fBmu(1)\fR man-page -and not here, as they apply to multiple mu-commands. +Note, some of the general options are described in the \fBmu(1)\fR man-page +and not here, as they apply to multiple mu commands. .TP \fB\-m\fR, \fB\-\-maildir\fR=\fI\fR starts searching at \fI\fR. By default, \fBmu\fR uses whatever the -\fBMAILDIR\fR environment variable is set to; if that is not set, it tries -\fI~/Maildir\fR \. In either case, the path must be \fBabsolute\fR. - -Also please see the note on mixing sub-maildirs below. +\fBMAILDIR\fR environment variable is set to; if it is not set, it tries +\fI~/Maildir\fR. See the note on mixing sub-maildirs below. .TP \fB\-\-reindex\fR @@ -83,23 +78,25 @@ messages (using \fB\-\-maildir\fR). For this reason, it is necessary to run format. \fBmu index\fR will issue a warning about this. .TP -\fB\-\-autoupgrade\fR automatically use \fB\-y\fR, \fB\-\-empty\fR +\fB\-\-autoupgrade\fR +automatically use \fB\-y\fR, \fB\-\-empty\fR when \fBmu\fR notices that the database version is not up-to-date. This option is for use in cron scripts and the like, so they won't require any user interaction, even when mu introduces a new database version. .TP +\fB\-\-xbatchsize\fR=\fI\fR +set the maximum number of messages to process in a single Xapian +transaction. In practice, this option is only useful if you find that \fBmu\fR +is running out of memory while indexing; in that case, you can set the batch +size to (for example) 1000, which will reduce memory consumption, but also +reduce performance. .B NOTE: -It is not a good idea to run multiple instances of -.B mu index +It is generally not a good idea to run multiple instances of \fBmu index\fR concurrently. No data loss should occur, but one or more of the instances may experience errors due to database locks. -Also note that, before indexing is completed, searches for messages may fail, -even if they have already been indexed, as some of the esssential database -information will only be written in batches during the indexing process. - Furthermore, it is not recommended tot mix maildirs and sub-maildirs within the hierarchy in the same database; for example, it's better not to index both with \fB\-\-maildir\fR=~/MyMaildir and \fB\-\-maildir\fR=~/MyMaildir/foo, as @@ -179,6 +176,4 @@ Dirk-Jan C. Binnema .SH "SEE ALSO" -.BR maildir(5) -.BR mu(1) -.BR mu-find(1) +.BR maildir(5) mu(1) mu-find(1) diff --git a/src/mu-config.h b/src/mu-config.h index 2d311749..38b8a154 100644 --- a/src/mu-config.h +++ b/src/mu-config.h @@ -27,10 +27,28 @@ G_BEGIN_DECLS +enum _MuConfigCmd { + MU_CONFIG_CMD_INDEX, + MU_CONFIG_CMD_FIND, + MU_CONFIG_CMD_CLEANUP, + MU_CONFIG_CMD_MKDIR, + MU_CONFIG_CMD_VIEW, + MU_CONFIG_CMD_EXTRACT, + MU_CONFIG_CMD_NONE, + + MU_CONFIG_CMD_UNKNOWN +}; +typedef enum _MuConfigCmd MuConfigCmd; + + + /* struct with all configuration options for mu; it will be filled * from the config file, and/or command line arguments */ struct _MuConfigOptions { + + MuConfigCmd cmd; /* the command, or MU_CONFIG_CMD_NONE */ + const char *cmdstr; /* cmd string, for user info */ /* general options */ gboolean quiet; /* don't give any output */ @@ -47,6 +65,9 @@ struct _MuConfigOptions { gboolean rebuild; /* empty the database before indexing */ gboolean autoupgrade; /* automatically upgrade db * when needed */ + int xbatchsize; /* batchsize for xapian commits, or 0 for default + * */ + /* options for querying */ gboolean xquery; /* give the Xapian query instead of search results */ diff --git a/src/mu-index.c b/src/mu-index.c index 01f757ad..d509fb1c 100644 --- a/src/mu-index.c +++ b/src/mu-index.c @@ -41,14 +41,14 @@ struct _MuIndex { }; MuIndex* -mu_index_new (const char *xpath, GError **err) +mu_index_new (const char *xpath, guint xbatchsize, GError **err) { MuIndex *index; g_return_val_if_fail (xpath, NULL); index = g_new0 (MuIndex, 1); - index->_xapian = mu_store_new (xpath, err); + index->_xapian = mu_store_new (xpath, xbatchsize, err); if (!index->_xapian) { g_warning ("%s: failed to open xapian store (%s)", diff --git a/src/mu-index.h b/src/mu-index.h index 86b86402..861ed004 100644 --- a/src/mu-index.h +++ b/src/mu-index.h @@ -51,7 +51,7 @@ typedef struct _MuIndexStats MuIndexStats; * * @return a new MuIndex instance, or NULL in case of error */ -MuIndex* mu_index_new (const char* muhome, GError **err) +MuIndex* mu_index_new (const char* muhome, guint batchsize, GError **err) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; diff --git a/src/mu-store.cc b/src/mu-store.cc index c53a82fc..2dfbb599 100644 --- a/src/mu-store.cc +++ b/src/mu-store.cc @@ -32,8 +32,8 @@ #include "mu-str.h" #include "mu-msg-flags.h" -/* number of new messages after which we commit to the database */ -#define MU_STORE_TRX_SIZE 6666 +/* by default, use transactions of 30000 messages */ +#define MU_STORE_DEFAULT_TRX_SIZE 30000 /* http://article.gmane.org/gmane.comp.search.xapian.general/3656 */ #define MU_STORE_MAX_TERM_LENGTH 240 @@ -47,6 +47,7 @@ struct _MuStore { bool _in_transaction; int _processed; size_t _trx_size; + guint _batchsize; /* batch size of a xapian transaction */ }; @@ -115,7 +116,7 @@ check_version (MuStore *store) } MuStore* -mu_store_new (const char* xpath, GError **err) +mu_store_new (const char* xpath, guint batchsize, GError **err) { MuStore *store (0); @@ -131,14 +132,15 @@ mu_store_new (const char* xpath, GError **err) } /* keep count of processed docs */ - store->_trx_size = MU_STORE_TRX_SIZE; store->_in_transaction = false; - store->_processed = 0; + store->_processed = 0; + store->_trx_size = batchsize ? batchsize : MU_STORE_DEFAULT_TRX_SIZE; add_synonyms (store); - MU_WRITE_LOG ("%s: opened %s", __FUNCTION__, xpath); - + MU_WRITE_LOG ("%s: opened %s (batch size: %u)", + __FUNCTION__, xpath, store->_trx_size); + return store; } MU_XAPIAN_CATCH_BLOCK_G_ERROR(err,MU_ERROR_XAPIAN); diff --git a/src/mu-store.h b/src/mu-store.h index 9e2163a1..5f48112b 100644 --- a/src/mu-store.h +++ b/src/mu-store.h @@ -36,12 +36,13 @@ typedef struct _MuStore MuStore; * create a new Xapian store, a place to store documents * * @param path the path to the database - * @param err to receive error info or NULL. err->code can be found in + * @param batchsize size of batch before committing + * @param err to receive error info or NULL. err->code can be found in * mu-error.h * * @return a new MuStore object, or NULL in case of error */ -MuStore* mu_store_new (const char *path, GError **err) +MuStore* mu_store_new (const char *path, guint batchsize, GError **err) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; diff --git a/src/tests/test-mu-cmd.c b/src/tests/test-mu-cmd.c index 62ac3fce..71062f7e 100644 --- a/src/tests/test-mu-cmd.c +++ b/src/tests/test-mu-cmd.c @@ -113,7 +113,7 @@ test_mu_index (void) xpath = g_strdup_printf ("%s%c%s", muhome, G_DIR_SEPARATOR, "xapian"); - store = mu_store_new (xpath, NULL); + store = mu_store_new (xpath, 0, NULL); g_assert (store); g_assert_cmpuint (mu_store_count (store), ==, 4); diff --git a/src/tests/test-mu-store.c b/src/tests/test-mu-store.c index 299e91e5..bf8c36b5 100644 --- a/src/tests/test-mu-store.c +++ b/src/tests/test-mu-store.c @@ -42,7 +42,7 @@ test_mu_store_new_destroy (void) g_assert (tmpdir); err = NULL; - store = mu_store_new (tmpdir, &err); + store = mu_store_new (tmpdir, 12345, &err); g_assert (store); g_assert (err == NULL); @@ -68,7 +68,7 @@ test_mu_store_version (void) g_assert (tmpdir); err = NULL; - store = mu_store_new (tmpdir, &err); + store = mu_store_new (tmpdir, 789, &err); g_assert (store); g_assert (err == NULL); @@ -94,7 +94,7 @@ test_mu_store_store_and_count (void) tmpdir = test_mu_common_get_random_tmpdir(); g_assert (tmpdir); - store = mu_store_new (tmpdir, NULL); + store = mu_store_new (tmpdir, 1, NULL); g_assert (store); g_assert_cmpuint (0,==,mu_store_count (store)); @@ -142,7 +142,7 @@ test_mu_store_store_remove_and_count (void) tmpdir = test_mu_common_get_random_tmpdir(); g_assert (tmpdir); - store = mu_store_new (tmpdir, NULL); + store = mu_store_new (tmpdir, 0, NULL); g_assert (store); g_assert_cmpuint (0,==,mu_store_count (store));