mu/lib/mu-msg-iter.cc

436 lines
11 KiB
C++

/* -*- mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
**
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#include <stdlib.h>
#include <unistd.h>
#include <iostream>
#include <string.h>
#include <errno.h>
#include <algorithm>
#include <xapian.h>
#include <string>
#include <set>
#include <map>
#include "utils/mu-util.h"
#include "mu-msg.h"
#include "mu-msg-iter.h"
#include "mu-threader.h"
struct ltstr {
bool operator () (const std::string &s1,
const std::string &s2) const {
return g_strcmp0 (s1.c_str(), s2.c_str()) < 0;
}
};
typedef std::map <std::string, unsigned, ltstr> msgid_docid_map;
class ThreadKeyMaker: public Xapian::KeyMaker {
public:
ThreadKeyMaker (GHashTable *threadinfo): _threadinfo(threadinfo) {}
virtual std::string operator()(const Xapian::Document &doc) const {
MuMsgIterThreadInfo *ti;
ti = (MuMsgIterThreadInfo*)g_hash_table_lookup
(_threadinfo,
GUINT_TO_POINTER(doc.get_docid()));
return std::string (ti && ti->threadpath ? ti->threadpath : "");
}
private:
GHashTable *_threadinfo;
};
struct _MuMsgIter {
public:
_MuMsgIter (Xapian::Enquire &enq, size_t maxnum,
MuMsgFieldId sortfield, MuMsgIterFlags flags):
_enq(enq), _thread_hash (0), _msg(0), _flags(flags),
_skip_unreadable(flags & MU_MSG_ITER_FLAG_SKIP_UNREADABLE),
_skip_dups (flags & MU_MSG_ITER_FLAG_SKIP_DUPS) {
bool descending = (flags & MU_MSG_ITER_FLAG_DESCENDING);
bool threads = (flags & MU_MSG_ITER_FLAG_THREADS);
// first, we get _all_ matches (G_MAXINT), based the threads
// on that, then return <maxint> of those
_matches = _enq.get_mset (0, G_MAXINT);
if (_matches.empty())
return;
if (threads) {
_matches.fetch();
_cursor = _matches.begin();
// NOTE: temporarily turn-off skipping duplicates, since we
// need threadinfo for *all*
_skip_dups = false;
_thread_hash = mu_threader_calculate
(this, _matches.size(), sortfield, descending);
_skip_dups = (flags & MU_MSG_ITER_FLAG_SKIP_DUPS);
ThreadKeyMaker keymaker(_thread_hash);
enq.set_sort_by_key (&keymaker, false);
_matches = _enq.get_mset (0, maxnum);
} else if (sortfield != MU_MSG_FIELD_ID_NONE) {
enq.set_sort_by_value ((Xapian::valueno)sortfield,
descending);
_matches = _enq.get_mset (0, maxnum);
_cursor = _matches.begin();
}
_cursor = _matches.begin();
}
~_MuMsgIter () {
if (_thread_hash)
g_hash_table_destroy (_thread_hash);
set_msg (NULL);
}
const Xapian::Enquire& enquire() const { return _enq; }
Xapian::MSet& matches() { return _matches; }
Xapian::MSet::const_iterator cursor () const { return _cursor; }
void set_cursor (Xapian::MSetIterator cur) { _cursor = cur; }
void cursor_next () { ++_cursor; }
GHashTable *thread_hash () { return _thread_hash; }
MuMsg *msg() const { return _msg; }
MuMsg *set_msg (MuMsg *msg) {
if (_msg)
mu_msg_unref (_msg);
return _msg = msg;
}
MuMsgIterFlags flags() const { return _flags; }
const std::string msgid () const {
const Xapian::Document doc (cursor().get_document());
return doc.get_value(MU_MSG_FIELD_ID_MSGID);
}
unsigned docid () const {
const Xapian::Document doc (cursor().get_document());
return doc.get_docid();
}
bool looks_like_dup () const {
try {
const Xapian::Document doc (cursor().get_document());
// is this message in the preferred map? if
// so, it's not a duplicate, otherwise, it
// isn't
msgid_docid_map::const_iterator pref_iter (_preferred_map.find (msgid()));
if (pref_iter != _preferred_map.end()) {
//std::cerr << "in the set!" << std::endl;
if ((*pref_iter).second == docid())
return false; // in the set: not a dup!
else
return true;
}
// otherwise, simply check if we've already seen this message-id,
// and, if so, it's considered a dup
if (_msg_uid_set.find (msgid()) != _msg_uid_set.end()) {
return true;
} else {
_msg_uid_set.insert (msgid());
return false;
}
} catch (...) {
return true;
}
}
static void each_preferred (const char *msgid, gpointer docidp,
msgid_docid_map *preferred_map) {
(*preferred_map)[msgid] = GPOINTER_TO_SIZE(docidp);
}
void set_preferred_map (GHashTable *preferred_hash) {
if (!preferred_hash)
_preferred_map.clear();
else
g_hash_table_foreach (preferred_hash,
(GHFunc)each_preferred, &_preferred_map);
}
bool skip_dups () const { return _skip_dups; }
bool skip_unreadable () const { return _skip_unreadable; }
private:
const Xapian::Enquire _enq;
Xapian::MSet _matches;
Xapian::MSet::const_iterator _cursor;
GHashTable *_thread_hash;
MuMsg *_msg;
MuMsgIterFlags _flags;
mutable std::set <std::string, ltstr> _msg_uid_set;
bool _skip_unreadable;
// the 'preferred map' (msgid->docid) is used when checking
// for duplicates; if a message is in the preferred map, it
// will not be excluded (but other messages with the same
// msgid will)
msgid_docid_map _preferred_map;
bool _skip_dups;
};
static gboolean
is_msg_file_readable (MuMsgIter *iter)
{
gboolean readable;
std::string path
(iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_PATH));
if (path.empty())
return FALSE;
readable = (access (path.c_str(), R_OK) == 0) ? TRUE : FALSE;
return readable;
}
MuMsgIter*
mu_msg_iter_new (XapianEnquire *enq, size_t maxnum,
MuMsgFieldId sortfield, MuMsgIterFlags flags,
GError **err)
{
g_return_val_if_fail (enq, NULL);
/* sortfield should be set to .._NONE when we're not threading */
g_return_val_if_fail (mu_msg_field_id_is_valid (sortfield) ||
sortfield == MU_MSG_FIELD_ID_NONE,
FALSE);
try {
MuMsgIter *iter (new MuMsgIter ((Xapian::Enquire&)*enq,
maxnum,
sortfield,
flags));
// note: we check if it's a dup even for the first message,
// since we need its uid in the set for checking later messages
if ((iter->skip_unreadable() && !is_msg_file_readable (iter)) ||
(iter->skip_dups() && iter->looks_like_dup ()))
mu_msg_iter_next (iter); /* skip! */
return iter;
} catch (const Xapian::DatabaseModifiedError &dbmex) {
mu_util_g_set_error (err, MU_ERROR_XAPIAN_MODIFIED,
"database was modified; please reopen");
return 0;
} MU_XAPIAN_CATCH_BLOCK_G_ERROR_RETURN (err, MU_ERROR_XAPIAN, 0);
}
void
mu_msg_iter_destroy (MuMsgIter *iter)
{
try { delete iter; } MU_XAPIAN_CATCH_BLOCK;
}
void
mu_msg_iter_set_preferred (MuMsgIter *iter, GHashTable *preferred_hash)
{
g_return_if_fail (iter);
iter->set_preferred_map (preferred_hash);
}
MuMsg*
mu_msg_iter_get_msg_floating (MuMsgIter *iter)
{
g_return_val_if_fail (iter, NULL);
g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL);
try {
MuMsg *msg;
GError *err;
Xapian::Document *docp;
docp = new Xapian::Document(iter->cursor().get_document());
err = NULL;
msg = iter->set_msg (mu_msg_new_from_doc((XapianDocument*)docp,
&err));
if (!msg)
MU_HANDLE_G_ERROR(err);
return msg;
} MU_XAPIAN_CATCH_BLOCK_RETURN (NULL);
}
gboolean
mu_msg_iter_reset (MuMsgIter *iter)
{
g_return_val_if_fail (iter, FALSE);
iter->set_msg (NULL);
try {
iter->set_cursor(iter->matches().begin());
} MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE);
return TRUE;
}
gboolean
mu_msg_iter_next (MuMsgIter *iter)
{
g_return_val_if_fail (iter, FALSE);
iter->set_msg (NULL);
if (mu_msg_iter_is_done(iter))
return FALSE;
try {
iter->cursor_next();
if (iter->cursor() == iter->matches().end())
return FALSE;
if ((iter->skip_unreadable() && !is_msg_file_readable (iter)) ||
(iter->skip_dups() && iter->looks_like_dup ()))
return mu_msg_iter_next (iter); /* skip! */
return TRUE;
} MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE);
}
gboolean
mu_msg_iter_is_done (MuMsgIter *iter)
{
g_return_val_if_fail (iter, TRUE);
try {
return iter->cursor() == iter->matches().end() ? TRUE : FALSE;
} MU_XAPIAN_CATCH_BLOCK_RETURN (TRUE);
}
gboolean
mu_msg_iter_is_first (MuMsgIter *iter)
{
g_return_val_if_fail (iter, FALSE);
return iter->cursor() == iter->matches().begin();
}
gboolean
mu_msg_iter_is_last (MuMsgIter *iter)
{
g_return_val_if_fail (iter, FALSE);
if (mu_msg_iter_is_done (iter))
return FALSE;
return iter->cursor() + 1 == iter->matches().end();
}
/* hmmm.... is it impossible to get a 0 docid, or just very improbable? */
unsigned
mu_msg_iter_get_docid (MuMsgIter *iter)
{
g_return_val_if_fail (iter, (unsigned int)-1);
g_return_val_if_fail (!mu_msg_iter_is_done(iter),
(unsigned int)-1);
try {
return iter->docid();
} MU_XAPIAN_CATCH_BLOCK_RETURN ((unsigned int)-1);
}
char*
mu_msg_iter_get_msgid (MuMsgIter *iter)
{
g_return_val_if_fail (iter, NULL);
g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL);
try {
return g_strdup (iter->msgid().c_str());
} MU_XAPIAN_CATCH_BLOCK_RETURN (NULL);
}
char**
mu_msg_iter_get_refs (MuMsgIter *iter)
{
g_return_val_if_fail (iter, NULL);
g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL);
try {
std::string refs (
iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_REFS));
if (refs.empty())
return NULL;
return g_strsplit (refs.c_str(),",", -1);
} MU_XAPIAN_CATCH_BLOCK_RETURN (NULL);
}
char*
mu_msg_iter_get_thread_id (MuMsgIter *iter)
{
g_return_val_if_fail (iter, NULL);
g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL);
try {
const std::string thread_id (
iter->cursor().get_document().get_value(MU_MSG_FIELD_ID_THREAD_ID).c_str());
return thread_id.empty() ? NULL : g_strdup (thread_id.c_str());
} MU_XAPIAN_CATCH_BLOCK_RETURN (NULL);
}
const MuMsgIterThreadInfo*
mu_msg_iter_get_thread_info (MuMsgIter *iter)
{
g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL);
/* maybe we don't have thread info */
if (!iter->thread_hash())
return NULL;
try {
const MuMsgIterThreadInfo *ti;
unsigned int docid;
docid = mu_msg_iter_get_docid (iter);
ti = (const MuMsgIterThreadInfo*)g_hash_table_lookup
(iter->thread_hash(), GUINT_TO_POINTER(docid));
if (!ti)
g_warning ("no ti for %u\n", docid);
return ti;
} MU_XAPIAN_CATCH_BLOCK_RETURN (NULL);
}