* add basic support for skipping dups, unreadable messages in mu-msg-iter

This commit is contained in:
djcb 2012-12-17 22:29:39 +02:00
parent 35195f8c47
commit 76adc694c0
4 changed files with 48 additions and 37 deletions

View File

@ -65,7 +65,6 @@ public:
threads = (flags & MU_MSG_ITER_FLAG_THREADS); threads = (flags & MU_MSG_ITER_FLAG_THREADS);
descending = (flags & MU_MSG_ITER_FLAG_DESCENDING); descending = (flags & MU_MSG_ITER_FLAG_DESCENDING);
_matches = _enq.get_mset (0, maxnum); _matches = _enq.get_mset (0, maxnum);
/* when threading, we calculate the threads for the /* when threading, we calculate the threads for the
@ -117,9 +116,11 @@ public:
MuMsgIterFlags flags() const { return _flags; } MuMsgIterFlags flags() const { return _flags; }
void remember_msgid (const std::string& msgid) { _msgid_set.insert (msgid); } bool msg_uid_seen_before (const std::string& msg_uid) {
bool msgid_seen (const std::string& msgid) const { if (_msg_uid_set.count (msg_uid) > 0)
return _msgid_set.find (msgid) != _msgid_set.end(); return true;
_msg_uid_set.insert (msg_uid);
return false;
} }
private: private:
@ -131,10 +132,29 @@ private:
MuMsg *_msg; MuMsg *_msg;
MuMsgIterFlags _flags; MuMsgIterFlags _flags;
std::set <std::string> _msgid_set;
struct ltstr {
bool operator () (const std::string &s1, const std::string &s2) const {
return g_strcmp0 (s1.c_str(), s2.c_str());
}
};
std::set <std::string, ltstr> _msg_uid_set;
}; };
static gboolean
is_msg_file_readable (MuMsgIter *iter)
{
std::string path
(iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_PATH));
if (path.empty())
return FALSE;
return (access (path.c_str(), R_OK) == 0) ? TRUE : FALSE;
}
MuMsgIter * MuMsgIter *
mu_msg_iter_new (XapianEnquire *enq, size_t maxnum, mu_msg_iter_new (XapianEnquire *enq, size_t maxnum,
@ -150,10 +170,16 @@ mu_msg_iter_new (XapianEnquire *enq, size_t maxnum,
sortfield == MU_MSG_FIELD_ID_NONE, sortfield == MU_MSG_FIELD_ID_NONE,
FALSE); FALSE);
try { try {
return new MuMsgIter ((Xapian::Enquire&)*enq, MuMsgIter *iter;
iter = new MuMsgIter ((Xapian::Enquire&)*enq,
maxnum, maxnum,
sortfield, sortfield,
flags); flags);
if ((flags & MU_MSG_ITER_FLAG_SKIP_UNREADABLE) &&
!is_msg_file_readable (iter))
mu_msg_iter_next (iter);
return iter;
} catch (const Xapian::DatabaseModifiedError &dbmex) { } catch (const Xapian::DatabaseModifiedError &dbmex) {
mu_util_g_set_error (err, MU_ERROR_XAPIAN_MODIFIED, mu_util_g_set_error (err, MU_ERROR_XAPIAN_MODIFIED,
@ -210,30 +236,18 @@ mu_msg_iter_reset (MuMsgIter *iter)
} }
static gboolean static gboolean
is_msg_file_readable (MuMsgIter *iter) msg_seen_before (MuMsgIter *iter)
{ {
std::string path // we *only* consider the msgid; we could also look eg. the
(iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_PATH)); // date, but it seems that Gmail may change the date / time
// zone etc. with still being the same message.
if (path.empty()) const std::string msg_uid
return FALSE;
return (access (path.c_str(), R_OK) == 0) ? TRUE : FALSE;
}
static gboolean
has_duplicate_msgid (MuMsgIter *iter)
{
std::string msgid
(iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_MSGID)); (iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_MSGID));
if (msgid.empty()) // (iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_DATE)));
if (msg_uid.empty())
return FALSE; return FALSE;
else if (iter->msgid_seen (msgid)) else
return TRUE; return iter->msg_uid_seen_before (msg_uid);
iter->remember_msgid (msgid);
return FALSE;
} }
@ -252,16 +266,14 @@ mu_msg_iter_next (MuMsgIter *iter)
if (iter->cursor() == iter->matches().end()) if (iter->cursor() == iter->matches().end())
return FALSE; return FALSE;
/* filter out non-existing messages? */ /* filter out non-existing messages? */
else if ((iter->flags() & else if ((iter->flags() &
MU_MSG_ITER_FLAG_SKIP_UNREADABLE) && MU_MSG_ITER_FLAG_SKIP_UNREADABLE) &&
!is_msg_file_readable (iter)) !is_msg_file_readable (iter))
return mu_msg_iter_next (iter); /*skip!*/ return mu_msg_iter_next (iter); /*skip!*/
/* filter out msgid duplicates? */ /* filter out msgid duplicates? */
else if ((iter->flags() & else if ((iter->flags() & MU_MSG_ITER_FLAG_SKIP_DUPS) &&
MU_MSG_ITER_FLAG_SKIP_MSGID_DUPS) && msg_seen_before (iter))
has_duplicate_msgid (iter))
return mu_msg_iter_next (iter); /*skip!*/ return mu_msg_iter_next (iter); /*skip!*/
else else
return TRUE; return TRUE;

View File

@ -46,9 +46,8 @@ enum _MuMsgIterFlags {
/* ignore results for which there is no existing /* ignore results for which there is no existing
* readable message-file? */ * readable message-file? */
MU_MSG_ITER_FLAG_SKIP_UNREADABLE = 1 << 2, MU_MSG_ITER_FLAG_SKIP_UNREADABLE = 1 << 2,
/* ignore result which have a message id already seen in these /* ignore duplicate messages? */
* results? */ MU_MSG_ITER_FLAG_SKIP_DUPS = 1 << 3
MU_MSG_ITER_FLAG_SKIP_MSGID_DUPS = 1 << 3
}; };
typedef unsigned MuMsgIterFlags; typedef unsigned MuMsgIterFlags;

View File

@ -394,8 +394,8 @@ msg_iter_flags (MuQueryFlags flags)
iflags |= MU_MSG_ITER_FLAG_DESCENDING; iflags |= MU_MSG_ITER_FLAG_DESCENDING;
if (flags & MU_QUERY_FLAG_SKIP_UNREADABLE) if (flags & MU_QUERY_FLAG_SKIP_UNREADABLE)
iflags |= MU_MSG_ITER_FLAG_SKIP_UNREADABLE; iflags |= MU_MSG_ITER_FLAG_SKIP_UNREADABLE;
if (flags & MU_QUERY_FLAG_SKIP_MSGID_DUPS) if (flags & MU_QUERY_FLAG_SKIP_DUPS)
iflags |= MU_MSG_ITER_FLAG_SKIP_MSGID_DUPS; iflags |= MU_MSG_ITER_FLAG_SKIP_DUPS;
return iflags; return iflags;
} }

View File

@ -71,7 +71,7 @@ enum _MuQueryFlags {
MU_QUERY_FLAG_THREADS = 1 << 0, /* <** add threading info */ MU_QUERY_FLAG_THREADS = 1 << 0, /* <** add threading info */
MU_QUERY_FLAG_DESCENDING = 1 << 1, /* <** sort z->a */ MU_QUERY_FLAG_DESCENDING = 1 << 1, /* <** sort z->a */
MU_QUERY_FLAG_SKIP_UNREADABLE = 1 << 2, /* <** skip unreadable msgs */ MU_QUERY_FLAG_SKIP_UNREADABLE = 1 << 2, /* <** skip unreadable msgs */
MU_QUERY_FLAG_SKIP_MSGID_DUPS = 1 << 3 /* <** skip msgid dups */ MU_QUERY_FLAG_SKIP_DUPS = 1 << 3 /* <** skip duplicate msgs */
}; };
typedef enum _MuQueryFlags MuQueryFlags; typedef enum _MuQueryFlags MuQueryFlags;