From 93c6ff1f85823707b9805c527daa722a5bc70896 Mon Sep 17 00:00:00 2001 From: djcb Date: Tue, 14 Aug 2018 21:57:33 +0300 Subject: [PATCH] mu: protect against overly long keys We got some errors when some of the key values exceeded the Xapian maximum; in particular the message-id. So make all the key-methods check, and truncate the message-id if necessary. --- lib/mu-msg-file.c | 10 ++++----- lib/mu-msg-iter.cc | 2 -- lib/mu-store-priv.hh | 6 +----- lib/mu-store-write.cc | 48 ++++++++++++++++++++++++------------------- lib/mu-store.h | 2 ++ mu/mu-cmd-server.c | 8 ++++++++ 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/lib/mu-msg-file.c b/lib/mu-msg-file.c index e727ab12..d9678874 100644 --- a/lib/mu-msg-file.c +++ b/lib/mu-msg-file.c @@ -30,6 +30,7 @@ #include "mu-util.h" #include "mu-str.h" #include "mu-maildir.h" +#include "mu-store.h" #include "mu-msg-priv.h" static gboolean init_file_metadata (MuMsgFile *self, const char* path, @@ -655,13 +656,12 @@ get_msgid (MuMsgFile *self, gboolean *do_free) const char *msgid; msgid = g_mime_message_get_message_id (self->_mime_msg); - if (msgid) + if (msgid && strlen(msgid) < MU_STORE_MAX_TERM_LENGTH) { return (char*)msgid; - else { /* if there is none, fake it */ + } else { /* if there is none, fake it */ *do_free = TRUE; - return g_strdup_printf ( - "%s@fake-msgid", - mu_util_get_hash (self->_path)); + return g_strdup_printf ("%s@fake-msgid", + mu_util_get_hash (self->_path)); } } diff --git a/lib/mu-msg-iter.cc b/lib/mu-msg-iter.cc index 2a1f1966..5f811d4d 100644 --- a/lib/mu-msg-iter.cc +++ b/lib/mu-msg-iter.cc @@ -397,10 +397,8 @@ mu_msg_iter_get_thread_id (MuMsgIter *iter) try { const std::string thread_id ( iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_THREAD_ID).c_str()); - return thread_id.empty() ? NULL : g_strdup (thread_id.c_str()); - } MU_XAPIAN_CATCH_BLOCK_RETURN (NULL); } diff --git a/lib/mu-store-priv.hh b/lib/mu-store-priv.hh index 8d103271..c919fd5c 100644 --- a/lib/mu-store-priv.hh +++ b/lib/mu-store-priv.hh @@ -46,7 +46,6 @@ private: const std::string _what; }; - struct _MuStore { public: /* create a read-write MuStore */ @@ -196,7 +195,7 @@ public: mu_store_set_metadata (this, MU_STORE_VERSION_KEY, vers, NULL); } - static unsigned max_term_length() { return MAX_TERM_LENGTH; } + static unsigned max_term_length() { return MU_STORE_MAX_TERM_LENGTH; } void begin_transaction (); void commit_transaction (); @@ -237,9 +236,6 @@ public: /* by default, use transactions of 30000 messages */ static const unsigned DEFAULT_BATCH_SIZE = 30000; - /* http://article.gmane.org/gmane.comp.search.xapian.general/3656 */ - static const unsigned MAX_TERM_LENGTH = 240; - private: /* transaction handling */ bool _in_transaction; diff --git a/lib/mu-store-write.cc b/lib/mu-store-write.cc index 36af013a..ad3681c3 100644 --- a/lib/mu-store-write.cc +++ b/lib/mu-store-write.cc @@ -289,6 +289,17 @@ prio_val (MuMsgPrio prio) } +static void // add term, truncate if needed. +add_term (Xapian::Document& doc, const std::string& term) +{ + if (term.length() < MU_STORE_MAX_TERM_LENGTH) + doc.add_term(term); + else + doc.add_term(term.substr(0, MU_STORE_MAX_TERM_LENGTH)); +} + + + static void add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { @@ -302,12 +313,12 @@ add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) ((MuFlags)num,(MuFlagType)MU_FLAG_TYPE_ANY); g_return_if_fail (cur); while (*cur) { - doc.add_term (flag_val(*cur)); + add_term (doc, flag_val(*cur)); ++cur; } } else if (mfid == MU_MSG_FIELD_ID_PRIO) - doc.add_term (prio_val((MuMsgPrio)num)); + add_term (doc, prio_val((MuMsgPrio)num)); } @@ -323,11 +334,9 @@ add_terms_values_str (Xapian::Document& doc, const char *val, MuMsgFieldId mfid) termgen.index_text (flat, 1, prefix(mfid)); } - if (mu_msg_field_xapian_term(mfid)) { - //std::cerr << ":" << prefix(mfid) + flat << std::endl; - doc.add_term((prefix(mfid) + flat) - .substr(0, MuStore::MAX_TERM_LENGTH)); - } + if (mu_msg_field_xapian_term(mfid)) + add_term(doc, prefix(mfid) + flat); + } static void @@ -414,17 +423,15 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) /* save the mime type of any part */ if (part->type) { - char ctype[MuStore::MAX_TERM_LENGTH + 1]; - snprintf (ctype, sizeof(ctype), "%s/%s", part->type, part->subtype); - pdata->_doc.add_term - (mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH)); + char ctype[MU_STORE_MAX_TERM_LENGTH + 1]; + snprintf(ctype, sizeof(ctype), "%s/%s", part->type, part->subtype); + add_term(pdata->_doc, mime + ctype); } if ((fname = mu_msg_part_get_filename (part, FALSE))) { const auto flat = Mux::utf8_flatten (fname); g_free (fname); - pdata->_doc.add_term - (file + std::string(flat, 0, MuStore::MAX_TERM_LENGTH)); + add_term(pdata->_doc, file + flat); } maybe_index_text_part (msg, part, pdata); @@ -565,8 +572,8 @@ add_address_subfields (Xapian::Document& doc, const char *addr, name_part = g_strndup(addr, at - addr); // foo domain_part = at + 1; - doc.add_term (pfx + std::string(name_part, 0, _MuStore::MAX_TERM_LENGTH)); - doc.add_term (pfx + std::string(domain_part, 0, _MuStore::MAX_TERM_LENGTH)); + add_term(doc, pfx + name_part); + add_term(doc, pfx + domain_part); g_free (name_part); } @@ -591,8 +598,7 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) if (!mu_str_is_empty(contact->address)) { const auto flat = Mux::utf8_flatten(contact->address); - msgdoc->_doc->add_term - (std::string (pfx + flat, 0, MuStore::MAX_TERM_LENGTH)); + add_term(*msgdoc->_doc, pfx + flat); add_address_subfields (*msgdoc->_doc, contact->address, pfx); /* store it also in our contacts cache */ if (msgdoc->_store->contacts()) @@ -664,8 +670,8 @@ update_threading_info (Xapian::WritableDatabase* db, // one first until the last one, which is the direct parent of // the current message. of course, it may be empty. // - // NOTE: there may be cases where the the list is truncated; - // we happily ignore that case. + // NOTE: there may be cases where the list is truncated; we happily + // ignore that case. refs = mu_msg_get_references (msg); std::string thread_id; @@ -674,7 +680,7 @@ update_threading_info (Xapian::WritableDatabase* db, else thread_id = mu_util_get_hash (mu_msg_get_msgid (msg)); - doc.add_term (prefix(MU_MSG_FIELD_ID_THREAD_ID) + thread_id); + add_term (doc, prefix(MU_MSG_FIELD_ID_THREAD_ID) + thread_id); doc.add_value((Xapian::valueno)MU_MSG_FIELD_ID_THREAD_ID, thread_id); } @@ -693,7 +699,7 @@ add_or_update_msg (MuStore *store, unsigned docid, MuMsg *msg, GError **err) if (!store->in_transaction()) store->begin_transaction(); - doc.add_term (term); + add_term (doc, term); // update the threading info if this message has a message id if (mu_msg_get_msgid (msg)) diff --git a/lib/mu-store.h b/lib/mu-store.h index c4073765..7df551de 100644 --- a/lib/mu-store.h +++ b/lib/mu-store.h @@ -30,6 +30,8 @@ G_BEGIN_DECLS struct _MuStore; typedef struct _MuStore MuStore; +/* http://article.gmane.org/gmane.comp.search.xapian.general/3656 */ +#define MU_STORE_MAX_TERM_LENGTH (240) /** * create a new writable Xapian store, a place to store documents diff --git a/mu/mu-cmd-server.c b/mu/mu-cmd-server.c index be227e52..4a55f13e 100644 --- a/mu/mu-cmd-server.c +++ b/mu/mu-cmd-server.c @@ -50,6 +50,7 @@ #include "mu-maildir.h" #include "mu-query.h" #include "mu-index.h" +#include "mu-store.h" #include "mu-msg-part.h" #include "mu-contacts.h" @@ -275,6 +276,13 @@ get_docids_from_msgids (MuQuery *query, const char *msgid, MuMsgIter *iter; GSList *lst; + if (!msgid || strlen(msgid) > MU_STORE_MAX_TERM_LENGTH - 1) { + mu_util_g_set_error (err, MU_ERROR_IN_PARAMETERS, + "invalid message-id '%s' (length=%zu)", + msgid, strlen(msgid)); + return NULL; + } + xprefix = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_MSGID); /*XXX this is a bit dodgy */ tmp = g_ascii_strdown(msgid, -1);