mirror of https://github.com/djcb/mu.git
* <many>: add option to change the batch size for xapian transactions
This commit is contained in:
parent
0b88f86e65
commit
169196498e
|
@ -1,4 +1,4 @@
|
|||
.TH MU-INDEX 1 "November 2010" "User Manuals"
|
||||
.TH MU-INDEX 1 "January 2011" "User Manuals"
|
||||
|
||||
.SH NAME
|
||||
|
||||
|
@ -10,11 +10,11 @@ mu index \- index e-mail messages stored in Maildirs
|
|||
|
||||
.SH DESCRIPTION
|
||||
|
||||
\fBmu index\fR is the \fBmu\fR sub-command for scanning the contents of
|
||||
Maildir directories and storing the results in a Xapian database which can
|
||||
then be searched using
|
||||
\fBmu index\fR is the \fBmu\fR command for scanning the contents of Maildir
|
||||
directories and storing the results in a Xapian database. The data can then be
|
||||
queried using
|
||||
.BR mu-find(1)
|
||||
\.
|
||||
\.
|
||||
|
||||
.B index
|
||||
understands Maildirs as defined by Daniel Bernstein for qmail(7). In addition,
|
||||
|
@ -34,17 +34,14 @@ with spam-messages.
|
|||
|
||||
The first run of \fBmu index\fR may take a few minutes if you have a lot of
|
||||
mail (ten thousands of messages). Fortunately, such a full scan needs to be
|
||||
done only once, after that it suffices to index the changes, which goes much
|
||||
faster. Also note that a substantial amount of the time goes to printing the
|
||||
progress information; if you turn that off (with \fB\-q\fR or
|
||||
\fB\-\-quiet\fR), it goes a lot faster. See the 'Note on performance' below
|
||||
for more information.
|
||||
done only once; after that it suffices to index the changes, which goes much
|
||||
faster. See the 'Note on performance' below for more information.
|
||||
|
||||
The optional phase two of the indexing-process is the removal of messages from
|
||||
the database for which there is no longer a corresponding file in the
|
||||
The optional 'phase two' of the indexing-process is the removal of messages
|
||||
from the database for which there is no longer a corresponding file in the
|
||||
Maildir. If you do not want this, you can use \fB\-n\fR, \fB\-\-nocleanup\fR.
|
||||
|
||||
When \fBmu index\fR catches on of the signals \fBSIGINT\fR, \fBSIGHUP\fR or
|
||||
When \fBmu index\fR catches one of the signals \fBSIGINT\fR, \fBSIGHUP\fR or
|
||||
\fBSIGTERM\fR (e.g,, when you press Ctrl-C during the indexing process), it
|
||||
tries to shutdown gracefully; it tries to save and commit data, and close the
|
||||
database etc. If it receives another signal (e.g,, when pressing Ctrl-C once
|
||||
|
@ -52,16 +49,14 @@ more), \fBmu index\fR will terminate immediately.
|
|||
|
||||
.SH OPTIONS
|
||||
|
||||
Note, some of the important options are described in the \fBmu(1)\fR man-page
|
||||
and not here, as they apply to multiple mu-commands.
|
||||
Note, some of the general options are described in the \fBmu(1)\fR man-page
|
||||
and not here, as they apply to multiple mu commands.
|
||||
|
||||
.TP
|
||||
\fB\-m\fR, \fB\-\-maildir\fR=\fI<maildir>\fR
|
||||
starts searching at \fI<maildir>\fR. By default, \fBmu\fR uses whatever the
|
||||
\fBMAILDIR\fR environment variable is set to; if that is not set, it tries
|
||||
\fI~/Maildir\fR \. In either case, the path must be \fBabsolute\fR.
|
||||
|
||||
Also please see the note on mixing sub-maildirs below.
|
||||
\fBMAILDIR\fR environment variable is set to; if it is not set, it tries
|
||||
\fI~/Maildir\fR. See the note on mixing sub-maildirs below.
|
||||
|
||||
.TP
|
||||
\fB\-\-reindex\fR
|
||||
|
@ -83,23 +78,25 @@ messages (using \fB\-\-maildir\fR). For this reason, it is necessary to run
|
|||
format. \fBmu index\fR will issue a warning about this.
|
||||
|
||||
.TP
|
||||
\fB\-\-autoupgrade\fR automatically use \fB\-y\fR, \fB\-\-empty\fR
|
||||
\fB\-\-autoupgrade\fR
|
||||
automatically use \fB\-y\fR, \fB\-\-empty\fR
|
||||
when \fBmu\fR notices that the database version is not up-to-date. This option
|
||||
is for use in cron scripts and the like, so they won't require any user
|
||||
interaction, even when mu introduces a new database version.
|
||||
|
||||
.TP
|
||||
\fB\-\-xbatchsize\fR=\fI<batch size>\fR
|
||||
set the maximum number of messages to process in a single Xapian
|
||||
transaction. In practice, this option is only useful if you find that \fBmu\fR
|
||||
is running out of memory while indexing; in that case, you can set the batch
|
||||
size to (for example) 1000, which will reduce memory consumption, but also
|
||||
reduce performance.
|
||||
|
||||
.B NOTE:
|
||||
It is not a good idea to run multiple instances of
|
||||
.B mu index
|
||||
It is generally not a good idea to run multiple instances of \fBmu index\fR
|
||||
concurrently. No data loss should occur, but one or more of the instances may
|
||||
experience errors due to database locks.
|
||||
|
||||
Also note that, before indexing is completed, searches for messages may fail,
|
||||
even if they have already been indexed, as some of the esssential database
|
||||
information will only be written in batches during the indexing process.
|
||||
|
||||
Furthermore, it is not recommended tot mix maildirs and sub-maildirs within
|
||||
the hierarchy in the same database; for example, it's better not to index both
|
||||
with \fB\-\-maildir\fR=~/MyMaildir and \fB\-\-maildir\fR=~/MyMaildir/foo, as
|
||||
|
@ -179,6 +176,4 @@ Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
|||
|
||||
.SH "SEE ALSO"
|
||||
|
||||
.BR maildir(5)
|
||||
.BR mu(1)
|
||||
.BR mu-find(1)
|
||||
.BR maildir(5) mu(1) mu-find(1)
|
||||
|
|
|
@ -27,10 +27,28 @@
|
|||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
enum _MuConfigCmd {
|
||||
MU_CONFIG_CMD_INDEX,
|
||||
MU_CONFIG_CMD_FIND,
|
||||
MU_CONFIG_CMD_CLEANUP,
|
||||
MU_CONFIG_CMD_MKDIR,
|
||||
MU_CONFIG_CMD_VIEW,
|
||||
MU_CONFIG_CMD_EXTRACT,
|
||||
MU_CONFIG_CMD_NONE,
|
||||
|
||||
MU_CONFIG_CMD_UNKNOWN
|
||||
};
|
||||
typedef enum _MuConfigCmd MuConfigCmd;
|
||||
|
||||
|
||||
|
||||
/* struct with all configuration options for mu; it will be filled
|
||||
* from the config file, and/or command line arguments */
|
||||
|
||||
struct _MuConfigOptions {
|
||||
|
||||
MuConfigCmd cmd; /* the command, or MU_CONFIG_CMD_NONE */
|
||||
const char *cmdstr; /* cmd string, for user info */
|
||||
|
||||
/* general options */
|
||||
gboolean quiet; /* don't give any output */
|
||||
|
@ -47,6 +65,9 @@ struct _MuConfigOptions {
|
|||
gboolean rebuild; /* empty the database before indexing */
|
||||
gboolean autoupgrade; /* automatically upgrade db
|
||||
* when needed */
|
||||
int xbatchsize; /* batchsize for xapian commits, or 0 for default
|
||||
* */
|
||||
|
||||
/* options for querying */
|
||||
gboolean xquery; /* give the Xapian query instead of
|
||||
search results */
|
||||
|
|
|
@ -41,14 +41,14 @@ struct _MuIndex {
|
|||
};
|
||||
|
||||
MuIndex*
|
||||
mu_index_new (const char *xpath, GError **err)
|
||||
mu_index_new (const char *xpath, guint xbatchsize, GError **err)
|
||||
{
|
||||
MuIndex *index;
|
||||
|
||||
g_return_val_if_fail (xpath, NULL);
|
||||
|
||||
index = g_new0 (MuIndex, 1);
|
||||
index->_xapian = mu_store_new (xpath, err);
|
||||
index->_xapian = mu_store_new (xpath, xbatchsize, err);
|
||||
|
||||
if (!index->_xapian) {
|
||||
g_warning ("%s: failed to open xapian store (%s)",
|
||||
|
|
|
@ -51,7 +51,7 @@ typedef struct _MuIndexStats MuIndexStats;
|
|||
*
|
||||
* @return a new MuIndex instance, or NULL in case of error
|
||||
*/
|
||||
MuIndex* mu_index_new (const char* muhome, GError **err)
|
||||
MuIndex* mu_index_new (const char* muhome, guint batchsize, GError **err)
|
||||
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
|
||||
|
||||
|
||||
|
|
|
@ -32,8 +32,8 @@
|
|||
#include "mu-str.h"
|
||||
#include "mu-msg-flags.h"
|
||||
|
||||
/* number of new messages after which we commit to the database */
|
||||
#define MU_STORE_TRX_SIZE 6666
|
||||
/* by default, use transactions of 30000 messages */
|
||||
#define MU_STORE_DEFAULT_TRX_SIZE 30000
|
||||
|
||||
/* http://article.gmane.org/gmane.comp.search.xapian.general/3656 */
|
||||
#define MU_STORE_MAX_TERM_LENGTH 240
|
||||
|
@ -47,6 +47,7 @@ struct _MuStore {
|
|||
bool _in_transaction;
|
||||
int _processed;
|
||||
size_t _trx_size;
|
||||
guint _batchsize; /* batch size of a xapian transaction */
|
||||
};
|
||||
|
||||
|
||||
|
@ -115,7 +116,7 @@ check_version (MuStore *store)
|
|||
}
|
||||
|
||||
MuStore*
|
||||
mu_store_new (const char* xpath, GError **err)
|
||||
mu_store_new (const char* xpath, guint batchsize, GError **err)
|
||||
{
|
||||
MuStore *store (0);
|
||||
|
||||
|
@ -131,14 +132,15 @@ mu_store_new (const char* xpath, GError **err)
|
|||
}
|
||||
|
||||
/* keep count of processed docs */
|
||||
store->_trx_size = MU_STORE_TRX_SIZE;
|
||||
store->_in_transaction = false;
|
||||
store->_processed = 0;
|
||||
store->_processed = 0;
|
||||
store->_trx_size = batchsize ? batchsize : MU_STORE_DEFAULT_TRX_SIZE;
|
||||
|
||||
add_synonyms (store);
|
||||
|
||||
MU_WRITE_LOG ("%s: opened %s", __FUNCTION__, xpath);
|
||||
|
||||
MU_WRITE_LOG ("%s: opened %s (batch size: %u)",
|
||||
__FUNCTION__, xpath, store->_trx_size);
|
||||
|
||||
return store;
|
||||
|
||||
} MU_XAPIAN_CATCH_BLOCK_G_ERROR(err,MU_ERROR_XAPIAN);
|
||||
|
|
|
@ -36,12 +36,13 @@ typedef struct _MuStore MuStore;
|
|||
* create a new Xapian store, a place to store documents
|
||||
*
|
||||
* @param path the path to the database
|
||||
* @param err to receive error info or NULL. err->code can be found in
|
||||
* @param batchsize size of batch before committing
|
||||
* @param err to receive error info or NULL. err->code can be found in
|
||||
* mu-error.h
|
||||
*
|
||||
* @return a new MuStore object, or NULL in case of error
|
||||
*/
|
||||
MuStore* mu_store_new (const char *path, GError **err)
|
||||
MuStore* mu_store_new (const char *path, guint batchsize, GError **err)
|
||||
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
|
||||
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ test_mu_index (void)
|
|||
xpath = g_strdup_printf ("%s%c%s", muhome, G_DIR_SEPARATOR,
|
||||
"xapian");
|
||||
|
||||
store = mu_store_new (xpath, NULL);
|
||||
store = mu_store_new (xpath, 0, NULL);
|
||||
g_assert (store);
|
||||
|
||||
g_assert_cmpuint (mu_store_count (store), ==, 4);
|
||||
|
|
|
@ -42,7 +42,7 @@ test_mu_store_new_destroy (void)
|
|||
g_assert (tmpdir);
|
||||
|
||||
err = NULL;
|
||||
store = mu_store_new (tmpdir, &err);
|
||||
store = mu_store_new (tmpdir, 12345, &err);
|
||||
g_assert (store);
|
||||
g_assert (err == NULL);
|
||||
|
||||
|
@ -68,7 +68,7 @@ test_mu_store_version (void)
|
|||
g_assert (tmpdir);
|
||||
|
||||
err = NULL;
|
||||
store = mu_store_new (tmpdir, &err);
|
||||
store = mu_store_new (tmpdir, 789, &err);
|
||||
g_assert (store);
|
||||
g_assert (err == NULL);
|
||||
|
||||
|
@ -94,7 +94,7 @@ test_mu_store_store_and_count (void)
|
|||
tmpdir = test_mu_common_get_random_tmpdir();
|
||||
g_assert (tmpdir);
|
||||
|
||||
store = mu_store_new (tmpdir, NULL);
|
||||
store = mu_store_new (tmpdir, 1, NULL);
|
||||
g_assert (store);
|
||||
|
||||
g_assert_cmpuint (0,==,mu_store_count (store));
|
||||
|
@ -142,7 +142,7 @@ test_mu_store_store_remove_and_count (void)
|
|||
tmpdir = test_mu_common_get_random_tmpdir();
|
||||
g_assert (tmpdir);
|
||||
|
||||
store = mu_store_new (tmpdir, NULL);
|
||||
store = mu_store_new (tmpdir, 0, NULL);
|
||||
g_assert (store);
|
||||
|
||||
g_assert_cmpuint (0,==,mu_store_count (store));
|
||||
|
|
Loading…
Reference in New Issue