* mu-msg-iter: add basic implemenation of checking for msgid dups, msg file

existence
This commit is contained in:
djcb 2012-12-16 14:17:58 +02:00
parent 17f3ef0c4b
commit 50f5c7affb
2 changed files with 98 additions and 18 deletions

View File

@ -19,12 +19,17 @@
*/ */
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <iostream> #include <iostream>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <algorithm> #include <algorithm>
#include <xapian.h> #include <xapian.h>
#include <string> #include <string>
#include <set>
#include "mu-util.h" #include "mu-util.h"
#include "mu-msg.h" #include "mu-msg.h"
@ -52,8 +57,14 @@ private:
struct _MuMsgIter { struct _MuMsgIter {
public: public:
_MuMsgIter (Xapian::Enquire &enq, size_t maxnum, _MuMsgIter (Xapian::Enquire &enq, size_t maxnum,
gboolean threads, MuMsgFieldId sortfield, bool revert): MuMsgFieldId sortfield, MuMsgIterFlags flags):
_enq(enq), _thread_hash (0), _msg(0) { _enq(enq), _thread_hash (0), _msg(0), _flags(flags) {
bool threads, revert;
threads = (flags & MU_MSG_ITER_FLAG_THREADS);
revert = (flags & MU_MSG_ITER_FLAG_REVERT);
_matches = _enq.get_mset (0, maxnum); _matches = _enq.get_mset (0, maxnum);
@ -97,13 +108,20 @@ public:
GHashTable *thread_hash () { return _thread_hash; } GHashTable *thread_hash () { return _thread_hash; }
MuMsg *msg() { return _msg; } MuMsg *msg() const { return _msg; }
MuMsg *set_msg (MuMsg *msg) { MuMsg *set_msg (MuMsg *msg) {
if (_msg) if (_msg)
mu_msg_unref (_msg); mu_msg_unref (_msg);
return _msg = msg; return _msg = msg;
} }
MuMsgIterFlags flags() const { return _flags; }
void remember_msgid (const std::string& msgid) { _msgid_set.insert (msgid); }
bool msgid_seen (const std::string& msgid) const {
return _msgid_set.find (msgid) != _msgid_set.end();
}
private: private:
const Xapian::Enquire _enq; const Xapian::Enquire _enq;
Xapian::MSet _matches; Xapian::MSet _matches;
@ -111,25 +129,31 @@ private:
GHashTable *_thread_hash; GHashTable *_thread_hash;
MuMsg *_msg; MuMsg *_msg;
MuMsgIterFlags _flags;
std::set <std::string> _msgid_set;
}; };
MuMsgIter* MuMsgIter *
mu_msg_iter_new (XapianEnquire *enq, size_t maxnum, mu_msg_iter_new (XapianEnquire *enq, size_t maxnum,
gboolean threads, MuMsgFieldId sortfield, gboolean revert, MuMsgFieldId sortfield, MuMsgIterFlags flags,
GError **err) GError **err)
{ {
g_return_val_if_fail (enq, NULL); g_return_val_if_fail (enq, NULL);
/* sortfield should be set to .._NONE when we're not threading */ /* sortfield should be set to .._NONE when we're not threading */
g_return_val_if_fail (threads || sortfield == MU_MSG_FIELD_ID_NONE, g_return_val_if_fail ((flags & MU_MSG_ITER_FLAG_THREADS)
|| sortfield == MU_MSG_FIELD_ID_NONE,
NULL); NULL);
g_return_val_if_fail (mu_msg_field_id_is_valid (sortfield) || g_return_val_if_fail (mu_msg_field_id_is_valid (sortfield) ||
sortfield == MU_MSG_FIELD_ID_NONE, sortfield == MU_MSG_FIELD_ID_NONE,
FALSE); FALSE);
try { try {
return new MuMsgIter ((Xapian::Enquire&)*enq, maxnum, threads, return new MuMsgIter ((Xapian::Enquire&)*enq,
sortfield, revert ? true : false); maxnum,
sortfield,
flags);
} catch (const Xapian::DatabaseModifiedError &dbmex) { } catch (const Xapian::DatabaseModifiedError &dbmex) {
mu_util_g_set_error (err, MU_ERROR_XAPIAN_MODIFIED, mu_util_g_set_error (err, MU_ERROR_XAPIAN_MODIFIED,
@ -185,6 +209,32 @@ mu_msg_iter_reset (MuMsgIter *iter)
return TRUE; return TRUE;
} }
static gboolean
is_msg_file_readable (MuMsgIter *iter)
{
std::string path
(iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_PATH));
if (path.empty())
return FALSE;
return (access (path.c_str(), R_OK) == 0) ? TRUE : FALSE;
}
static gboolean
has_duplicate_msgid (MuMsgIter *iter)
{
std::string msgid
(iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_MSGID));
if (msgid.empty())
return FALSE;
else if (iter->msgid_seen (msgid))
return TRUE;
iter->remember_msgid (msgid);
return FALSE;
}
gboolean gboolean
mu_msg_iter_next (MuMsgIter *iter) mu_msg_iter_next (MuMsgIter *iter)
@ -198,7 +248,20 @@ mu_msg_iter_next (MuMsgIter *iter)
try { try {
iter->cursor_next(); iter->cursor_next();
return iter->cursor() == iter->matches().end() ? FALSE:TRUE;
if (iter->cursor() == iter->matches().end())
return FALSE;
/* filter out non-existing messages? */
else if ((iter->flags() & MU_MSG_ITER_FLAG_MSG_READABLE) &&
is_msg_file_readable (iter))
return mu_msg_iter_next (iter); /*skip!*/
/* filter out msgid duplicates? */
else if ((iter->flags() & MU_MSG_ITER_FLAG_NO_MSGID_DUPS) &&
has_duplicate_msgid (iter))
return mu_msg_iter_next (iter); /*skip!*/
else
return TRUE;
} MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE); } MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE);
} }

View File

@ -37,26 +37,43 @@ struct _MuMsgIter;
typedef struct _MuMsgIter MuMsgIter; typedef struct _MuMsgIter MuMsgIter;
enum _MuMsgIterFlags {
MU_MSG_ITER_FLAG_NONE = 0,
/*calculate the threads? */
MU_MSG_ITER_FLAG_THREADS = 1 << 0,
/* revert the sort order (only for threads) */
MU_MSG_ITER_FLAG_REVERT = 1 << 1,
/* ignore results for which there is no existing
* readable message-file? */
MU_MSG_ITER_FLAG_MSG_READABLE = 1 << 2,
/* ignore result which have a message id already seen in these
* results? */
MU_MSG_ITER_FLAG_NO_MSGID_DUPS = 1 << 3
};
typedef unsigned MuMsgIterFlags;
/** /**
* create a new MuMsgIter -- basically, an iterator over the search * create a new MuMsgIter -- basically, an iterator over the search
* results * results
* *
* @param enq a Xapian::Enquire* cast to XapianEnquire* (because this * @param enq a Xapian::Enquire* cast to XapianEnquire* (because this
* is C, not C++),providing access to search results * is C, not C++),providing access to search results
* @param batchsize how many results to retrieve at once * @param maxnum the maximum number of results
* @param threads whether to calculate threads * @param sorting field for threads; when threads are not wanted, set it to
* @param sorting field when using threads; note, when 'threads' is * MU_MSG_FIELD_ID_NONE
* FALSE, this should be MU_MSG_FIELD_ID_NONE * @param flags flags for this iterator (see MsgIterFlags)
* @param if TRUE, revert the sorting order
* @param err receives error information. if the error is MU_ERROR_XAPIAN_MODIFIED, * @param err receives error information. if the error is
* the database should be reloaded. * MU_ERROR_XAPIAN_MODIFIED, the database should be reloaded.
* *
* @return a new MuMsgIter, or NULL in case of error * @return a new MuMsgIter, or NULL in case of error
*/ */
MuMsgIter *mu_msg_iter_new (XapianEnquire *enq, MuMsgIter *mu_msg_iter_new (XapianEnquire *enq,
size_t batchsize, gboolean threads, size_t maxnum,
MuMsgFieldId threadsortfield, MuMsgFieldId threadsortfield,
gboolean revert, MuMsgIterFlags flags,
GError **err) G_GNUC_WARN_UNUSED_RESULT; GError **err) G_GNUC_WARN_UNUSED_RESULT;
/** /**