/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ /* ** Copyright (C) 2008-2011 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the ** Free Software Foundation; either version 3, or (at your option) any ** later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software Foundation, ** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ** */ #if HAVE_CONFIG_H #include "config.h" #endif /*HAVE_CONFIG_H*/ #include #include #include #include #include "mu-store.h" #include "mu-store-priv.hh" /* _MuStore */ #include "mu-msg.h" #include "mu-msg-part.h" #include "mu-store.h" #include "mu-util.h" #include "mu-str.h" #include "mu-date.h" #include "mu-flags.h" #include "mu-contacts.h" void _MuStore::begin_transaction () { try { db_writable()->begin_transaction(); in_transaction (true); } MU_XAPIAN_CATCH_BLOCK; } void _MuStore::commit_transaction () { try { in_transaction (false); db_writable()->commit_transaction(); } MU_XAPIAN_CATCH_BLOCK; } void _MuStore::rollback_transaction () { try { in_transaction (false); db_writable()->cancel_transaction(); } MU_XAPIAN_CATCH_BLOCK; } /* we cache these prefix strings, so we don't have to allocate them all * the time; this should save 10-20 string allocs per message */ G_GNUC_CONST static const std::string& prefix (MuMsgFieldId mfid) { static std::string fields[MU_MSG_FIELD_ID_NUM]; static bool initialized = false; if (G_UNLIKELY(!initialized)) { for (int i = 0; i != MU_MSG_FIELD_ID_NUM; ++i) fields[i] = std::string (1, mu_msg_field_xapian_prefix ((MuMsgFieldId)i)); initialized = true; } return fields[mfid]; } static void add_synonym_for_flag (MuFlags flag, Xapian::WritableDatabase *db) { static const std::string pfx(prefix(MU_MSG_FIELD_ID_FLAGS)); db->clear_synonyms (pfx + mu_flag_name (flag)); db->add_synonym (pfx + mu_flag_name (flag), pfx + (std::string(1, (char)(tolower(mu_flag_char(flag)))))); } static void add_synonym_for_prio (MuMsgPrio prio, Xapian::WritableDatabase *db) { static const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO)); std::string s1 (pfx + mu_msg_prio_name (prio)); std::string s2 (pfx + (std::string(1, mu_msg_prio_char (prio)))); db->clear_synonyms (s1); db->clear_synonyms (s2); db->add_synonym (s1, s2); } static void add_synonyms (MuStore *store) { mu_flags_foreach ((MuFlagsForeachFunc)add_synonym_for_flag, store->db_writable()); mu_msg_prio_foreach ((MuMsgPrioForeachFunc)add_synonym_for_prio, store->db_writable()); } MuStore* mu_store_new_writable (const char* xpath, const char *contacts_cache, gboolean rebuild, GError **err) { g_return_val_if_fail (xpath, NULL); try { try { MuStore *store; store = new _MuStore (xpath, contacts_cache, rebuild ? true : false); add_synonyms (store); return store; } MU_STORE_CATCH_BLOCK_RETURN(err,NULL); } MU_XAPIAN_CATCH_BLOCK_G_ERROR_RETURN (err, MU_ERROR_XAPIAN, NULL); } void mu_store_set_batch_size (MuStore *store, guint batchsize) { g_return_if_fail (store); store->set_batch_size (batchsize); } gboolean mu_store_set_metadata (MuStore *store, const char *key, const char *val, GError **err) { g_return_val_if_fail (store, FALSE); g_return_val_if_fail (key, FALSE); g_return_val_if_fail (val, FALSE); try { try { store->db_writable()->set_metadata (key, val); return TRUE; } MU_STORE_CATCH_BLOCK_RETURN(err, FALSE); } MU_XAPIAN_CATCH_BLOCK_G_ERROR_RETURN(err, MU_ERROR_XAPIAN, FALSE); } gboolean mu_store_clear (MuStore *store, GError **err) { g_return_val_if_fail (store, FALSE); try { try { store->clear(); return TRUE; } MU_STORE_CATCH_BLOCK_RETURN(err, FALSE); } MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE); } void mu_store_flush (MuStore *store) { g_return_if_fail (store); try { if (store->in_transaction()) store->commit_transaction (); store->db_writable()->commit (); } MU_XAPIAN_CATCH_BLOCK; } static void add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { time_t t; const char *datestr; t = (time_t)mu_msg_get_field_numeric (msg, mfid); if (t != 0) { datestr = mu_date_time_t_to_str_s (t, FALSE /*UTC*/); doc.add_value ((Xapian::valueno)mfid, datestr); } } /* pre-calculate; optimization */ G_GNUC_CONST static const std::string& flag_val (char flagchar) { static const std::string pfx (prefix(MU_MSG_FIELD_ID_FLAGS)), draftstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_DRAFT))), flaggedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_FLAGGED))), passedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_PASSED))), repliedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_REPLIED))), seenstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_SEEN))), trashedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_TRASHED))), newstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_NEW))), signedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_SIGNED))), cryptstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_ENCRYPTED))), attachstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_HAS_ATTACH))), unreadstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_UNREAD))); switch (flagchar) { case 'D': return draftstr; case 'F': return flaggedstr; case 'P': return passedstr; case 'R': return repliedstr; case 'S': return seenstr; case 'T': return trashedstr; case 'N': return newstr; case 'z': return signedstr; case 'x': return cryptstr; case 'a': return attachstr; case 'u': return unreadstr; default: g_return_val_if_reached (flaggedstr); return flaggedstr; } } /* pre-calculate; optimization */ G_GNUC_CONST static const std::string& prio_val (MuMsgPrio prio) { static const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO)); static const std::string low (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_LOW))), norm (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_NORMAL))), high (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_HIGH))); switch (prio) { case MU_MSG_PRIO_LOW: return low; case MU_MSG_PRIO_NORMAL: return norm; case MU_MSG_PRIO_HIGH: return high; default: g_return_val_if_reached (norm); return norm; } } static void add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { gint64 num = mu_msg_get_field_numeric (msg, mfid); const std::string numstr (Xapian::sortable_serialise((double)num)); doc.add_value ((Xapian::valueno)mfid, numstr); if (mfid == MU_MSG_FIELD_ID_FLAGS) { const char *cur = mu_flags_to_str_s ((MuFlags)num,(MuFlagType)MU_FLAG_TYPE_ANY); g_return_if_fail (cur); while (*cur) { doc.add_term (flag_val(*cur)); ++cur; } } else if (mfid == MU_MSG_FIELD_ID_PRIO) doc.add_term (prio_val((MuMsgPrio)num)); } /* for string and string-list */ static void add_terms_values_str (Xapian::Document& doc, char *val, MuMsgFieldId mfid, GStringChunk *strchunk) { /* the value is what we display in search results; the * unchanged original */ if (mu_msg_field_xapian_value(mfid)) doc.add_value ((Xapian::valueno)mfid, val); /* now, let's create some search terms... */ if (mu_msg_field_normalize (mfid)) val = mu_str_normalize_in_place_try (val, TRUE, strchunk); if (mu_msg_field_xapian_index (mfid)) { Xapian::TermGenerator termgen; termgen.set_document (doc); termgen.index_text_without_positions (val, 1, prefix(mfid)); } if (mu_msg_field_xapian_escape (mfid)) val= mu_str_xapian_escape_in_place_try (val, TRUE /*esc_space*/, strchunk); if (mu_msg_field_xapian_term(mfid)) doc.add_term (prefix(mfid) + std::string(val, 0, _MuStore::MAX_TERM_LENGTH)); } static void add_terms_values_string (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid, GStringChunk *strchunk) { const char *orig; char *val; if (!(orig = mu_msg_get_field_string (msg, mfid))) return; /* nothing to do */ val = g_string_chunk_insert (strchunk, orig); add_terms_values_str (doc, val, mfid, strchunk); } static void add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid, GStringChunk *strchunk) { const GSList *lst; lst = mu_msg_get_field_string_list (msg, mfid); if (!lst) return; if (mu_msg_field_xapian_value (mfid)) { gchar *str; str = mu_str_from_list (lst, ','); if (str) doc.add_value ((Xapian::valueno)mfid, str); g_free (str); } if (mu_msg_field_xapian_term (mfid)) { for (; lst; lst = g_slist_next ((GSList*)lst)) { char *val; val = g_string_chunk_insert (strchunk, (const gchar*)lst->data); add_terms_values_str (doc, val, mfid, strchunk); } } } struct PartData { PartData (Xapian::Document& doc, MuMsgFieldId mfid, GStringChunk *strchunk): _doc (doc), _mfid(mfid), _strchunk(strchunk) {} Xapian::Document _doc; MuMsgFieldId _mfid; GStringChunk *_strchunk; }; /* index non-body text parts */ static void maybe_index_text_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) { char *txt, *norm; Xapian::TermGenerator termgen; /* only deal with attachments/messages; inlines are indexed as * body parts */ if (!(part->part_type & MU_MSG_PART_TYPE_ATTACHMENT) && !(part->part_type & MU_MSG_PART_TYPE_MESSAGE)) return; txt = mu_msg_part_get_text (msg, part, MU_MSG_OPTION_NONE); if (!txt) return; termgen.set_document(pdata->_doc); /* allocated on strchunk, no need to free */ norm = mu_str_normalize (txt, TRUE, pdata->_strchunk); termgen.index_text_without_positions (norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT)); g_free (txt); } static void each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) { char *fname; static const std::string file (prefix(MU_MSG_FIELD_ID_FILE)), mime (prefix(MU_MSG_FIELD_ID_MIME)); /* save the mime type of any part */ if (part->type) { /* note, we use '_' instead of '/' to separate * type/subtype -- Xapian doesn't treat '/' as * desired, so we use '_' and pre-process queries; see * mu_query_preprocess */ char ctype[MuStore::MAX_TERM_LENGTH + 1]; snprintf (ctype, sizeof(ctype), "%s_%s", part->type, part->subtype); pdata->_doc.add_term (mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH)); } /* now, let's create a term it there's a filename. allocated * on strchunk, no need to free*/ if ((fname = mu_msg_part_get_filename (part, FALSE))) { char *val; val = mu_str_xapian_escape (fname, TRUE /*esc space*/, pdata->_strchunk); g_free (fname); pdata->_doc.add_term (file + std::string(val, 0, MuStore::MAX_TERM_LENGTH)); } maybe_index_text_part (msg, part, pdata); } static void add_terms_values_attach (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid, GStringChunk *strchunk) { PartData pdata (doc, mfid, strchunk); mu_msg_part_foreach (msg, MU_MSG_OPTION_RECURSE_RFC822, (MuMsgPartForeachFunc)each_part, &pdata); } static void add_terms_values_body (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid, GStringChunk *strchunk) { const char *str; char *norm; if (mu_msg_get_flags(msg) & MU_FLAG_ENCRYPTED) return; /* ignore encrypted bodies */ str = mu_msg_get_body_text (msg, MU_MSG_OPTION_NONE); if (!str) /* FIXME: html->txt fallback needed */ str = mu_msg_get_body_html (msg, MU_MSG_OPTION_NONE); if (!str) return; /* no body... */ Xapian::TermGenerator termgen; termgen.set_document(doc); /* norm is allocated on strchunk, no need for freeing */ norm = mu_str_normalize (str, TRUE, strchunk); termgen.index_text_without_positions (norm, 1, prefix(mfid)); } struct _MsgDoc { Xapian::Document *_doc; MuMsg *_msg; MuStore *_store; GStringChunk *_strchunk; /* callback data, to determine whether this message is 'personal' */ gboolean _personal; GSList *_my_addresses; }; typedef struct _MsgDoc MsgDoc; static void add_terms_values_default (MuMsgFieldId mfid, MsgDoc *msgdoc) { if (mu_msg_field_is_numeric (mfid)) add_terms_values_number (*msgdoc->_doc, msgdoc->_msg, mfid); else if (mu_msg_field_is_string (mfid)) add_terms_values_string (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); else if (mu_msg_field_is_string_list(mfid)) add_terms_values_string_list (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); else g_return_if_reached (); } static void add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) { /* note: contact-stuff (To/Cc/From) will handled in * each_contact_info, not here */ if (!mu_msg_field_xapian_index(mfid) && !mu_msg_field_xapian_term(mfid) && !mu_msg_field_xapian_value(mfid)) return; switch (mfid) { case MU_MSG_FIELD_ID_DATE: add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid); break; case MU_MSG_FIELD_ID_BODY_TEXT: add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); break; /* note: add_terms_values_attach handles _FILE, _MIME and * _ATTACH_TEXT msgfields */ case MU_MSG_FIELD_ID_FILE: add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); break; case MU_MSG_FIELD_ID_MIME: case MU_MSG_FIELD_ID_EMBEDDED_TEXT: break; /////////////////////////////////////////// case MU_MSG_FIELD_ID_UID: break; /* already taken care of elsewhere */ default: return add_terms_values_default (mfid, msgdoc); } } static const std::string& xapian_pfx (MuMsgContact *contact) { static const std::string empty; /* use ptr to string to prevent copy... */ switch (contact->type) { case MU_MSG_CONTACT_TYPE_TO: return prefix(MU_MSG_FIELD_ID_TO); case MU_MSG_CONTACT_TYPE_FROM: return prefix(MU_MSG_FIELD_ID_FROM); case MU_MSG_CONTACT_TYPE_CC: return prefix(MU_MSG_FIELD_ID_CC); case MU_MSG_CONTACT_TYPE_BCC: return prefix(MU_MSG_FIELD_ID_BCC); default: g_warning ("unsupported contact type %u", (unsigned)contact->type); return empty; } } static void each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) { /* for now, don't store reply-to addresses */ if (mu_msg_contact_type (contact) == MU_MSG_CONTACT_TYPE_REPLY_TO) return; const std::string pfx (xapian_pfx(contact)); if (pfx.empty()) return; /* unsupported contact type */ if (!mu_str_is_empty(contact->name)) { Xapian::TermGenerator termgen; termgen.set_document (*msgdoc->_doc); /* note: norm is added to stringchunk, no need for freeing */ char *norm = mu_str_normalize (contact->name, TRUE, msgdoc->_strchunk); termgen.index_text_without_positions (norm, 1, pfx); } /* don't normalize e-mail address, but do lowercase it */ if (!mu_str_is_empty(contact->address)) { char *escaped; /* note: escaped is added to stringchunk, no need for * freeing */ escaped = mu_str_xapian_escape (contact->address, FALSE /*dont esc space*/, msgdoc->_strchunk); msgdoc->_doc->add_term (std::string (pfx + escaped, 0, MuStore::MAX_TERM_LENGTH)); /* store it also in our contacts cache */ if (msgdoc->_store->contacts()) mu_contacts_add (msgdoc->_store->contacts(), contact->address, contact->name, msgdoc->_personal, mu_msg_get_date(msgdoc->_msg)); } } static void each_contact_check_if_personal (MuMsgContact *contact, MsgDoc *msgdoc) { GSList *cur; if (msgdoc->_personal || !contact->address) return; for (cur = msgdoc->_my_addresses; cur; cur = g_slist_next (cur)) { if (g_ascii_strcasecmp (contact->address, (const char*)cur->data) == 0) msgdoc->_personal = TRUE; } } #define MU_STRING_CHUNK_SIZE 8192 Xapian::Document new_doc_from_message (MuStore *store, MuMsg *msg) { Xapian::Document doc; MsgDoc docinfo = {&doc, msg, store, 0, FALSE, NULL}; docinfo._strchunk = g_string_chunk_new (MU_STRING_CHUNK_SIZE); mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo); /* determine whether this is 'personal' email, ie. one of my * e-mail addresses is explicitly mentioned -- it's not a * mailing list message. Callback will update docinfo->_personal */ if (store->my_addresses()) { docinfo._my_addresses = store->my_addresses(); mu_msg_contact_foreach (msg, (MuMsgContactForeachFunc)each_contact_check_if_personal, &docinfo); } /* also store the contact-info as separate terms, and add it * to the cache */ mu_msg_contact_foreach (msg, (MuMsgContactForeachFunc)each_contact_info, &docinfo); g_string_chunk_free (docinfo._strchunk); return doc; } unsigned mu_store_add_msg (MuStore *store, MuMsg *msg, GError **err) { g_return_val_if_fail (store, MU_STORE_INVALID_DOCID); g_return_val_if_fail (msg, MU_STORE_INVALID_DOCID); try { Xapian::docid id; Xapian::Document doc (new_doc_from_message(store, msg)); const std::string term (store->get_uid_term (mu_msg_get_path(msg))); if (!store->in_transaction()) store->begin_transaction(); doc.add_term (term); // MU_WRITE_LOG ("adding: %s", term.c_str()); /* note, this will replace any other messages for this path */ id = store->db_writable()->replace_document (term, doc); if (store->inc_processed() % store->batch_size() == 0) store->commit_transaction(); return id; } MU_XAPIAN_CATCH_BLOCK_G_ERROR (err, MU_ERROR_XAPIAN_STORE_FAILED); if (store->in_transaction()) store->rollback_transaction(); return MU_STORE_INVALID_DOCID; } unsigned mu_store_update_msg (MuStore *store, unsigned docid, MuMsg *msg, GError **err) { g_return_val_if_fail (store, MU_STORE_INVALID_DOCID); g_return_val_if_fail (msg, MU_STORE_INVALID_DOCID); g_return_val_if_fail (docid != 0, MU_STORE_INVALID_DOCID); try { Xapian::Document doc (new_doc_from_message(store, msg)); if (!store->in_transaction()) store->begin_transaction(); const std::string term (store->get_uid_term(mu_msg_get_path(msg))); doc.add_term (term); store->db_writable()->replace_document (docid, doc); if (store->inc_processed() % store->batch_size() == 0) store->commit_transaction(); return docid; } MU_XAPIAN_CATCH_BLOCK_G_ERROR (err, MU_ERROR_XAPIAN_STORE_FAILED); if (store->in_transaction()) store->rollback_transaction(); return MU_STORE_INVALID_DOCID; } unsigned mu_store_add_path (MuStore *store, const char *path, const char *maildir, GError **err) { MuMsg *msg; unsigned docid; g_return_val_if_fail (store, FALSE); g_return_val_if_fail (path, FALSE); msg = mu_msg_new_from_file (path, maildir, err); if (!msg) return MU_STORE_INVALID_DOCID; docid = mu_store_add_msg (store, msg, err); mu_msg_unref (msg); return docid; } XapianWritableDatabase* mu_store_get_writable_database (MuStore *store) { g_return_val_if_fail (store, NULL); return (XapianWritableDatabase*)store->db_writable(); } gboolean mu_store_remove_path (MuStore *store, const char *msgpath) { g_return_val_if_fail (store, FALSE); g_return_val_if_fail (msgpath, FALSE); try { const std::string term (store->get_uid_term(msgpath)); store->db_writable()->delete_document (term); store->inc_processed(); return TRUE; } MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE); } gboolean mu_store_set_timestamp (MuStore *store, const char* msgpath, time_t stamp, GError **err) { char buf[21]; g_return_val_if_fail (store, FALSE); g_return_val_if_fail (msgpath, FALSE); sprintf (buf, "%" G_GUINT64_FORMAT, (guint64)stamp); return mu_store_set_metadata (store, msgpath, buf, err); }