diff --git a/lib/mu-msg-file.c b/lib/mu-msg-file.c index 9b795995..d28da180 100644 --- a/lib/mu-msg-file.c +++ b/lib/mu-msg-file.c @@ -541,8 +541,8 @@ get_references (MuMsgFile *self) msgid = g_mime_references_get_message_id (cur); /* don't include duplicates */ if (msgid && !contains (msgids, msgid)) - /* explicitly ensure it's utf8-safe, as GMime - * does not ensure that */ + /* explicitly ensure it's utf8-safe, + * as GMime does not ensure that */ msgids = g_slist_prepend (msgids, g_strdup((msgid))); } @@ -625,6 +625,22 @@ recipient_type (MuMsgFieldId mfid) } } +static gchar* +get_msgid (MuMsgFile *self, gboolean *do_free) +{ + const char *msgid; + + msgid = g_mime_message_get_message_id (self->_mime_msg); + if (msgid) + return (char*)msgid; + else { /* if there is none, fake it */ + *do_free = TRUE; + return g_strdup_printf ( + "%s@fake-msgid", + mu_util_get_hash (self->_path)); + } +} + char* mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid, @@ -659,7 +675,7 @@ mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid, self->_path, do_free); case MU_MSG_FIELD_ID_MSGID: - return (char*)g_mime_message_get_message_id (self->_mime_msg); + return get_msgid (self, do_free); case MU_MSG_FIELD_ID_MAILDIR: return self->_maildir; diff --git a/lib/mu-msg-iter.cc b/lib/mu-msg-iter.cc index 9ab01a6f..d8a8171f 100644 --- a/lib/mu-msg-iter.cc +++ b/lib/mu-msg-iter.cc @@ -125,22 +125,27 @@ public: MuMsgIterFlags flags() const { return _flags; } + const std::string msgid () const { + const Xapian::Document doc (cursor().get_document()); + return doc.get_value(MU_MSG_FIELD_ID_PATH); + } + + unsigned docid () const { + const Xapian::Document doc (cursor().get_document()); + return doc.get_docid(); + } + + bool looks_like_dup () const { try { const Xapian::Document doc (cursor().get_document()); - const std::string msgid (doc.get_value(MU_MSG_FIELD_ID_MSGID)); - unsigned docid (doc.get_docid()); - - if (msgid.empty()) - return false; - // is this message in the preferred map? if // so, it's not a duplicate, otherwise, it // isn't - msgid_docid_map::const_iterator pref_iter (_preferred_map.find (msgid)); + msgid_docid_map::const_iterator pref_iter (_preferred_map.find (msgid())); if (pref_iter != _preferred_map.end()) { //std::cerr << "in the set!" << std::endl; - if ((*pref_iter).second == docid) + if ((*pref_iter).second == docid()) return false; // in the set: not a dup! else return true; @@ -148,10 +153,10 @@ public: // otherwise, simply check if we've already seen this message-id, // and, if so, it's considered a dup - if (_msg_uid_set.find (msgid) != _msg_uid_set.end()) { + if (_msg_uid_set.find (msgid()) != _msg_uid_set.end()) { return true; } else { - _msg_uid_set.insert (msgid); + _msg_uid_set.insert (msgid()); return false; } } catch (...) { @@ -159,7 +164,8 @@ public: } } - static void each_preferred (const char *msgid, gpointer docidp, msgid_docid_map *preferred_map) { + static void each_preferred (const char *msgid, gpointer docidp, + msgid_docid_map *preferred_map) { (*preferred_map)[msgid] = GPOINTER_TO_SIZE(docidp); } @@ -346,7 +352,7 @@ mu_msg_iter_get_docid (MuMsgIter *iter) g_return_val_if_fail (!mu_msg_iter_is_done(iter), (unsigned int)-1); try { - return iter->cursor().get_document().get_docid(); + return iter->docid(); } MU_XAPIAN_CATCH_BLOCK_RETURN ((unsigned int)-1); } @@ -360,10 +366,7 @@ mu_msg_iter_get_msgid (MuMsgIter *iter) g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL); try { - const std::string msgid ( - iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_MSGID).c_str()); - - return msgid.empty() ? NULL : msgid.c_str(); + return iter->msgid().c_str(); } MU_XAPIAN_CATCH_BLOCK_RETURN (NULL); } diff --git a/lib/mu-msg.h b/lib/mu-msg.h index 50ba117c..3c14579b 100644 --- a/lib/mu-msg.h +++ b/lib/mu-msg.h @@ -261,9 +261,9 @@ const char* mu_msg_get_subject (MuMsg *msg); * * @param msg a valid MuMsg* instance * - * @return the Message-Id of this message (without the enclosing <>) - * or NULL in case of error or if there is none. the returned string - * should *not* be modified or freed. + * @return the Message-Id of this message (without the enclosing <>), + * or a fake message-id for messages that don't have them, or NULL in + * case of error. */ const char* mu_msg_get_msgid (MuMsg *msg); diff --git a/lib/mu-store-write.cc b/lib/mu-store-write.cc index c08806fa..56e02843 100644 --- a/lib/mu-store-write.cc +++ b/lib/mu-store-write.cc @@ -460,6 +460,7 @@ add_terms_values_attach (Xapian::Document& doc, MuMsg *msg, (MuMsgPartForeachFunc)each_part, &pdata); } + /* escape the body -- for now, only replace '-' with '_' */ static void body_escape_in_place (char *body) @@ -559,7 +560,6 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) case MU_MSG_FIELD_ID_MIME: case MU_MSG_FIELD_ID_EMBEDDED_TEXT: break; - /////////////////////////////////////////// case MU_MSG_FIELD_ID_THREAD_ID: case MU_MSG_FIELD_ID_UID: