mu/lib/mu-store-write.cc

802 lines
19 KiB
C++

/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */
/*
** Copyright (C) 2008-2011 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 3, or (at your option) any
** later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif /*HAVE_CONFIG_H*/
#include <cstdio>
#include <xapian.h>
#include <cstring>
#include <stdexcept>
#include "mu-store.h"
#include "mu-store-priv.hh" /* _MuStore */
#include "mu-msg.h"
#include "mu-msg-part.h"
#include "mu-store.h"
#include "mu-util.h"
#include "mu-str.h"
#include "mu-date.h"
#include "mu-flags.h"
#include "mu-contacts.h"
void
_MuStore::begin_transaction ()
{
try {
db_writable()->begin_transaction();
in_transaction (true);
} MU_XAPIAN_CATCH_BLOCK;
}
void
_MuStore::commit_transaction () {
try {
in_transaction (false);
db_writable()->commit_transaction();
} MU_XAPIAN_CATCH_BLOCK;
}
void
_MuStore::rollback_transaction () {
try {
in_transaction (false);
db_writable()->cancel_transaction();
} MU_XAPIAN_CATCH_BLOCK;
}
/* we cache these prefix strings, so we don't have to allocate them all
* the time; this should save 10-20 string allocs per message */
G_GNUC_CONST static const std::string&
prefix (MuMsgFieldId mfid)
{
static std::string fields[MU_MSG_FIELD_ID_NUM];
static bool initialized = false;
if (G_UNLIKELY(!initialized)) {
for (int i = 0; i != MU_MSG_FIELD_ID_NUM; ++i)
fields[i] = std::string (1, mu_msg_field_xapian_prefix
((MuMsgFieldId)i));
initialized = true;
}
return fields[mfid];
}
static void
add_synonym_for_flag (MuFlags flag, Xapian::WritableDatabase *db)
{
static const std::string pfx(prefix(MU_MSG_FIELD_ID_FLAGS));
db->clear_synonyms (pfx + mu_flag_name (flag));
db->add_synonym (pfx + mu_flag_name (flag), pfx +
(std::string(1, (char)(tolower(mu_flag_char(flag))))));
}
static void
add_synonym_for_prio (MuMsgPrio prio, Xapian::WritableDatabase *db)
{
static const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO));
std::string s1 (pfx + mu_msg_prio_name (prio));
std::string s2 (pfx + (std::string(1, mu_msg_prio_char (prio))));
db->clear_synonyms (s1);
db->clear_synonyms (s2);
db->add_synonym (s1, s2);
}
static void
add_synonyms (MuStore *store)
{
mu_flags_foreach ((MuFlagsForeachFunc)add_synonym_for_flag,
store->db_writable());
mu_msg_prio_foreach ((MuMsgPrioForeachFunc)add_synonym_for_prio,
store->db_writable());
}
MuStore*
mu_store_new_writable (const char* xpath, const char *contacts_cache,
gboolean rebuild, GError **err)
{
g_return_val_if_fail (xpath, NULL);
try {
try {
MuStore *store;
store = new _MuStore (xpath, contacts_cache,
rebuild ? true : false);
add_synonyms (store);
return store;
} MU_STORE_CATCH_BLOCK_RETURN(err,NULL);
} MU_XAPIAN_CATCH_BLOCK_G_ERROR_RETURN (err, MU_ERROR_XAPIAN, NULL);
}
void
mu_store_set_batch_size (MuStore *store, guint batchsize)
{
g_return_if_fail (store);
store->set_batch_size (batchsize);
}
gboolean
mu_store_set_metadata (MuStore *store, const char *key, const char *val,
GError **err)
{
g_return_val_if_fail (store, FALSE);
g_return_val_if_fail (key, FALSE);
g_return_val_if_fail (val, FALSE);
try {
try {
store->db_writable()->set_metadata (key, val);
return TRUE;
} MU_STORE_CATCH_BLOCK_RETURN(err, FALSE);
} MU_XAPIAN_CATCH_BLOCK_G_ERROR_RETURN(err, MU_ERROR_XAPIAN, FALSE);
}
gboolean
mu_store_clear (MuStore *store, GError **err)
{
g_return_val_if_fail (store, FALSE);
try {
try {
store->clear();
return TRUE;
} MU_STORE_CATCH_BLOCK_RETURN(err, FALSE);
} MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE);
}
void
mu_store_flush (MuStore *store)
{
g_return_if_fail (store);
try {
if (store->in_transaction())
store->commit_transaction ();
store->db_writable()->commit ();
} MU_XAPIAN_CATCH_BLOCK;
}
static void
add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
{
time_t t;
const char *datestr;
t = (time_t)mu_msg_get_field_numeric (msg, mfid);
if (t != 0) {
datestr = mu_date_time_t_to_str_s (t, FALSE /*UTC*/);
doc.add_value ((Xapian::valueno)mfid, datestr);
}
}
/* pre-calculate; optimization */
G_GNUC_CONST static const std::string&
flag_val (char flagchar)
{
static const std::string
pfx (prefix(MU_MSG_FIELD_ID_FLAGS)),
draftstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_DRAFT))),
flaggedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_FLAGGED))),
passedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_PASSED))),
repliedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_REPLIED))),
seenstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_SEEN))),
trashedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_TRASHED))),
newstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_NEW))),
signedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_SIGNED))),
cryptstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_ENCRYPTED))),
attachstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_HAS_ATTACH))),
unreadstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_UNREAD)));
switch (flagchar) {
case 'D': return draftstr;
case 'F': return flaggedstr;
case 'P': return passedstr;
case 'R': return repliedstr;
case 'S': return seenstr;
case 'T': return trashedstr;
case 'N': return newstr;
case 'z': return signedstr;
case 'x': return cryptstr;
case 'a': return attachstr;
case 'u': return unreadstr;
default:
g_return_val_if_reached (flaggedstr);
return flaggedstr;
}
}
/* pre-calculate; optimization */
G_GNUC_CONST static const std::string&
prio_val (MuMsgPrio prio)
{
static const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO));
static const std::string
low (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_LOW))),
norm (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_NORMAL))),
high (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_HIGH)));
switch (prio) {
case MU_MSG_PRIO_LOW: return low;
case MU_MSG_PRIO_NORMAL: return norm;
case MU_MSG_PRIO_HIGH: return high;
default:
g_return_val_if_reached (norm);
return norm;
}
}
static void
add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
{
gint64 num = mu_msg_get_field_numeric (msg, mfid);
const std::string numstr (Xapian::sortable_serialise((double)num));
doc.add_value ((Xapian::valueno)mfid, numstr);
if (mfid == MU_MSG_FIELD_ID_FLAGS) {
const char *cur = mu_flags_to_str_s
((MuFlags)num,(MuFlagType)MU_FLAG_TYPE_ANY);
g_return_if_fail (cur);
while (*cur) {
doc.add_term (flag_val(*cur));
++cur;
}
} else if (mfid == MU_MSG_FIELD_ID_PRIO)
doc.add_term (prio_val((MuMsgPrio)num));
}
/* for string and string-list */
static void
add_terms_values_str (Xapian::Document& doc, char *val,
MuMsgFieldId mfid, GStringChunk *strchunk)
{
/* the value is what we display in search results; the
* unchanged original */
if (mu_msg_field_xapian_value(mfid))
doc.add_value ((Xapian::valueno)mfid, val);
/* now, let's create some search terms... */
if (mu_msg_field_normalize (mfid))
val = mu_str_normalize_in_place_try (val, TRUE, strchunk);
if (mu_msg_field_xapian_index (mfid)) {
Xapian::TermGenerator termgen;
termgen.set_document (doc);
termgen.index_text_without_positions (val, 1, prefix(mfid));
}
if (mu_msg_field_xapian_escape (mfid))
val= mu_str_xapian_escape_in_place_try (val, TRUE /*esc_space*/,
strchunk);
if (mu_msg_field_xapian_term(mfid))
doc.add_term (prefix(mfid) +
std::string(val, 0, _MuStore::MAX_TERM_LENGTH));
}
static void
add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid, GStringChunk *strchunk)
{
const char *orig;
char *val;
if (!(orig = mu_msg_get_field_string (msg, mfid)))
return; /* nothing to do */
val = g_string_chunk_insert (strchunk, orig);
add_terms_values_str (doc, val, mfid, strchunk);
}
static void
add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid, GStringChunk *strchunk)
{
const GSList *lst;
lst = mu_msg_get_field_string_list (msg, mfid);
if (!lst)
return;
if (mu_msg_field_xapian_value (mfid)) {
gchar *str;
str = mu_str_from_list (lst, ',');
if (str)
doc.add_value ((Xapian::valueno)mfid, str);
g_free (str);
}
if (mu_msg_field_xapian_term (mfid)) {
for (; lst; lst = g_slist_next ((GSList*)lst)) {
char *val;
val = g_string_chunk_insert
(strchunk, (const gchar*)lst->data);
add_terms_values_str (doc, val, mfid, strchunk);
}
}
}
struct PartData {
PartData (Xapian::Document& doc, MuMsgFieldId mfid,
GStringChunk *strchunk):
_doc (doc), _mfid(mfid), _strchunk(strchunk) {}
Xapian::Document _doc;
MuMsgFieldId _mfid;
GStringChunk *_strchunk;
};
/* index non-body text parts */
static void
maybe_index_text_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
{
char *txt, *norm;
Xapian::TermGenerator termgen;
/* only deal with attachments/messages; inlines are indexed as
* body parts */
if (!(part->part_type & MU_MSG_PART_TYPE_ATTACHMENT) &&
!(part->part_type & MU_MSG_PART_TYPE_MESSAGE))
return;
txt = mu_msg_part_get_text (msg, part, MU_MSG_OPTION_NONE);
if (!txt)
return;
termgen.set_document(pdata->_doc);
/* allocated on strchunk, no need to free */
norm = mu_str_normalize (txt, TRUE, pdata->_strchunk);
termgen.index_text_without_positions
(norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
g_free (txt);
}
static void
each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
{
char *fname;
static const std::string
file (prefix(MU_MSG_FIELD_ID_FILE)),
mime (prefix(MU_MSG_FIELD_ID_MIME));
/* save the mime type of any part */
if (part->type) {
/* note, we use '_' instead of '/' to separate
* type/subtype -- Xapian doesn't treat '/' as
* desired, so we use '_' and pre-process queries; see
* mu_query_preprocess */
char ctype[MuStore::MAX_TERM_LENGTH + 1];
snprintf (ctype, sizeof(ctype), "%s_%s",
part->type, part->subtype);
pdata->_doc.add_term
(mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH));
}
/* now, let's create a term it there's a filename. allocated
* on strchunk, no need to free*/
if ((fname = mu_msg_part_get_filename (part, FALSE))) {
char *val;
val = mu_str_xapian_escape (fname, TRUE /*esc space*/,
pdata->_strchunk);
g_free (fname);
pdata->_doc.add_term
(file + std::string(val, 0, MuStore::MAX_TERM_LENGTH));
}
maybe_index_text_part (msg, part, pdata);
}
static void
add_terms_values_attach (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid, GStringChunk *strchunk)
{
PartData pdata (doc, mfid, strchunk);
mu_msg_part_foreach (msg, MU_MSG_OPTION_RECURSE_RFC822,
(MuMsgPartForeachFunc)each_part, &pdata);
}
static void
add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid, GStringChunk *strchunk)
{
const char *str;
char *norm;
if (mu_msg_get_flags(msg) & MU_FLAG_ENCRYPTED)
return; /* ignore encrypted bodies */
str = mu_msg_get_body_text (msg, MU_MSG_OPTION_NONE);
if (!str) /* FIXME: html->txt fallback needed */
str = mu_msg_get_body_html (msg,
MU_MSG_OPTION_NONE);
if (!str)
return; /* no body... */
Xapian::TermGenerator termgen;
termgen.set_document(doc);
/* norm is allocated on strchunk, no need for freeing */
norm = mu_str_normalize (str, TRUE, strchunk);
termgen.index_text_without_positions (norm, 1, prefix(mfid));
}
struct _MsgDoc {
Xapian::Document *_doc;
MuMsg *_msg;
MuStore *_store;
GStringChunk *_strchunk;
/* callback data, to determine whether this message is 'personal' */
gboolean _personal;
GSList *_my_addresses;
};
typedef struct _MsgDoc MsgDoc;
static void
add_terms_values_default (MuMsgFieldId mfid, MsgDoc *msgdoc)
{
if (mu_msg_field_is_numeric (mfid))
add_terms_values_number
(*msgdoc->_doc, msgdoc->_msg, mfid);
else if (mu_msg_field_is_string (mfid))
add_terms_values_string
(*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk);
else if (mu_msg_field_is_string_list(mfid))
add_terms_values_string_list
(*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk);
else
g_return_if_reached ();
}
static void
add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
{
/* note: contact-stuff (To/Cc/From) will handled in
* each_contact_info, not here */
if (!mu_msg_field_xapian_index(mfid) &&
!mu_msg_field_xapian_term(mfid) &&
!mu_msg_field_xapian_value(mfid))
return;
switch (mfid) {
case MU_MSG_FIELD_ID_DATE:
add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
case MU_MSG_FIELD_ID_BODY_TEXT:
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid,
msgdoc->_strchunk);
break;
/* note: add_terms_values_attach handles _FILE, _MIME and
* _ATTACH_TEXT msgfields */
case MU_MSG_FIELD_ID_FILE:
add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid,
msgdoc->_strchunk);
break;
case MU_MSG_FIELD_ID_MIME:
case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
break;
///////////////////////////////////////////
case MU_MSG_FIELD_ID_UID:
break; /* already taken care of elsewhere */
default:
return add_terms_values_default (mfid, msgdoc);
}
}
static const std::string&
xapian_pfx (MuMsgContact *contact)
{
static const std::string empty;
/* use ptr to string to prevent copy... */
switch (contact->type) {
case MU_MSG_CONTACT_TYPE_TO:
return prefix(MU_MSG_FIELD_ID_TO);
case MU_MSG_CONTACT_TYPE_FROM:
return prefix(MU_MSG_FIELD_ID_FROM);
case MU_MSG_CONTACT_TYPE_CC:
return prefix(MU_MSG_FIELD_ID_CC);
case MU_MSG_CONTACT_TYPE_BCC:
return prefix(MU_MSG_FIELD_ID_BCC);
default:
g_warning ("unsupported contact type %u",
(unsigned)contact->type);
return empty;
}
}
static void
each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
{
/* for now, don't store reply-to addresses */
if (mu_msg_contact_type (contact) == MU_MSG_CONTACT_TYPE_REPLY_TO)
return;
const std::string pfx (xapian_pfx(contact));
if (pfx.empty())
return; /* unsupported contact type */
if (!mu_str_is_empty(contact->name)) {
Xapian::TermGenerator termgen;
termgen.set_document (*msgdoc->_doc);
/* note: norm is added to stringchunk, no need for freeing */
char *norm = mu_str_normalize (contact->name, TRUE,
msgdoc->_strchunk);
termgen.index_text_without_positions (norm, 1, pfx);
}
/* don't normalize e-mail address, but do lowercase it */
if (!mu_str_is_empty(contact->address)) {
char *escaped;
/* note: escaped is added to stringchunk, no need for
* freeing */
escaped = mu_str_xapian_escape (contact->address,
FALSE /*dont esc space*/,
msgdoc->_strchunk);
msgdoc->_doc->add_term
(std::string (pfx + escaped, 0, MuStore::MAX_TERM_LENGTH));
/* store it also in our contacts cache */
if (msgdoc->_store->contacts())
mu_contacts_add (msgdoc->_store->contacts(),
contact->address, contact->name,
msgdoc->_personal,
mu_msg_get_date(msgdoc->_msg));
}
}
static void
each_contact_check_if_personal (MuMsgContact *contact, MsgDoc *msgdoc)
{
GSList *cur;
if (msgdoc->_personal || !contact->address)
return;
for (cur = msgdoc->_my_addresses; cur; cur = g_slist_next (cur)) {
if (g_ascii_strcasecmp (contact->address,
(const char*)cur->data) == 0)
msgdoc->_personal = TRUE;
}
}
#define MU_STRING_CHUNK_SIZE 8192
Xapian::Document
new_doc_from_message (MuStore *store, MuMsg *msg)
{
Xapian::Document doc;
MsgDoc docinfo = {&doc, msg, store, 0, FALSE, NULL};
docinfo._strchunk = g_string_chunk_new (MU_STRING_CHUNK_SIZE);
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo);
/* determine whether this is 'personal' email, ie. one of my
* e-mail addresses is explicitly mentioned -- it's not a
* mailing list message. Callback will update docinfo->_personal */
if (store->my_addresses()) {
docinfo._my_addresses = store->my_addresses();
mu_msg_contact_foreach
(msg,
(MuMsgContactForeachFunc)each_contact_check_if_personal,
&docinfo);
}
/* also store the contact-info as separate terms, and add it
* to the cache */
mu_msg_contact_foreach (msg, (MuMsgContactForeachFunc)each_contact_info,
&docinfo);
g_string_chunk_free (docinfo._strchunk);
return doc;
}
unsigned
mu_store_add_msg (MuStore *store, MuMsg *msg, GError **err)
{
g_return_val_if_fail (store, MU_STORE_INVALID_DOCID);
g_return_val_if_fail (msg, MU_STORE_INVALID_DOCID);
try {
Xapian::docid id;
Xapian::Document doc (new_doc_from_message(store, msg));
const std::string term (store->get_uid_term
(mu_msg_get_path(msg)));
if (!store->in_transaction())
store->begin_transaction();
doc.add_term (term);
// MU_WRITE_LOG ("adding: %s", term.c_str());
/* note, this will replace any other messages for this path */
id = store->db_writable()->replace_document (term, doc);
if (store->inc_processed() % store->batch_size() == 0)
store->commit_transaction();
return id;
} MU_XAPIAN_CATCH_BLOCK_G_ERROR (err, MU_ERROR_XAPIAN_STORE_FAILED);
if (store->in_transaction())
store->rollback_transaction();
return MU_STORE_INVALID_DOCID;
}
unsigned
mu_store_update_msg (MuStore *store, unsigned docid, MuMsg *msg, GError **err)
{
g_return_val_if_fail (store, MU_STORE_INVALID_DOCID);
g_return_val_if_fail (msg, MU_STORE_INVALID_DOCID);
g_return_val_if_fail (docid != 0, MU_STORE_INVALID_DOCID);
try {
Xapian::Document doc (new_doc_from_message(store, msg));
if (!store->in_transaction())
store->begin_transaction();
const std::string term
(store->get_uid_term(mu_msg_get_path(msg)));
doc.add_term (term);
store->db_writable()->replace_document (docid, doc);
if (store->inc_processed() % store->batch_size() == 0)
store->commit_transaction();
return docid;
} MU_XAPIAN_CATCH_BLOCK_G_ERROR (err, MU_ERROR_XAPIAN_STORE_FAILED);
if (store->in_transaction())
store->rollback_transaction();
return MU_STORE_INVALID_DOCID;
}
unsigned
mu_store_add_path (MuStore *store, const char *path, const char *maildir,
GError **err)
{
MuMsg *msg;
unsigned docid;
g_return_val_if_fail (store, FALSE);
g_return_val_if_fail (path, FALSE);
msg = mu_msg_new_from_file (path, maildir, err);
if (!msg)
return MU_STORE_INVALID_DOCID;
docid = mu_store_add_msg (store, msg, err);
mu_msg_unref (msg);
return docid;
}
XapianWritableDatabase*
mu_store_get_writable_database (MuStore *store)
{
g_return_val_if_fail (store, NULL);
return (XapianWritableDatabase*)store->db_writable();
}
gboolean
mu_store_remove_path (MuStore *store, const char *msgpath)
{
g_return_val_if_fail (store, FALSE);
g_return_val_if_fail (msgpath, FALSE);
try {
const std::string term
(store->get_uid_term(msgpath));
store->db_writable()->delete_document (term);
store->inc_processed();
return TRUE;
} MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE);
}
gboolean
mu_store_set_timestamp (MuStore *store, const char* msgpath,
time_t stamp, GError **err)
{
char buf[21];
g_return_val_if_fail (store, FALSE);
g_return_val_if_fail (msgpath, FALSE);
sprintf (buf, "%" G_GUINT64_FORMAT, (guint64)stamp);
return mu_store_set_metadata (store, msgpath, buf, err);
}