mirror of https://github.com/djcb/mu.git
* mu-query/mu-msg-iter: when showing related message (--include-related),
favor the ones that were in the original set
This commit is contained in:
parent
a0d8d4f5da
commit
70356a62f5
|
@ -30,12 +30,22 @@
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include "mu-util.h"
|
#include "mu-util.h"
|
||||||
#include "mu-msg.h"
|
#include "mu-msg.h"
|
||||||
#include "mu-msg-iter.h"
|
#include "mu-msg-iter.h"
|
||||||
#include "mu-threader.h"
|
#include "mu-threader.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct ltstr {
|
||||||
|
bool operator () (const std::string &s1,
|
||||||
|
const std::string &s2) const {
|
||||||
|
return g_strcmp0 (s1.c_str(), s2.c_str()) < 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef std::map <std::string, unsigned, ltstr> msgid_docid_map;
|
||||||
|
|
||||||
class ThreadKeyMaker: public Xapian::KeyMaker {
|
class ThreadKeyMaker: public Xapian::KeyMaker {
|
||||||
public:
|
public:
|
||||||
ThreadKeyMaker (GHashTable *threadinfo): _threadinfo(threadinfo) {}
|
ThreadKeyMaker (GHashTable *threadinfo): _threadinfo(threadinfo) {}
|
||||||
|
@ -56,7 +66,7 @@ public:
|
||||||
MuMsgFieldId sortfield, MuMsgIterFlags flags):
|
MuMsgFieldId sortfield, MuMsgIterFlags flags):
|
||||||
_enq(enq), _thread_hash (0), _msg(0), _flags(flags),
|
_enq(enq), _thread_hash (0), _msg(0), _flags(flags),
|
||||||
_skip_unreadable(flags & MU_MSG_ITER_FLAG_SKIP_UNREADABLE),
|
_skip_unreadable(flags & MU_MSG_ITER_FLAG_SKIP_UNREADABLE),
|
||||||
_skip_dups (flags & MU_MSG_ITER_FLAG_SKIP_DUPS) {
|
_skip_dups (flags & MU_MSG_ITER_FLAG_SKIP_DUPS){
|
||||||
|
|
||||||
bool descending = (flags & MU_MSG_ITER_FLAG_DESCENDING);
|
bool descending = (flags & MU_MSG_ITER_FLAG_DESCENDING);
|
||||||
bool threads = (flags & MU_MSG_ITER_FLAG_THREADS);
|
bool threads = (flags & MU_MSG_ITER_FLAG_THREADS);
|
||||||
|
@ -71,24 +81,22 @@ public:
|
||||||
if (threads) {
|
if (threads) {
|
||||||
_matches.fetch();
|
_matches.fetch();
|
||||||
_cursor = _matches.begin();
|
_cursor = _matches.begin();
|
||||||
{ // temporarily turn-off skipping dups
|
// NOTE: temporarily turn-off skipping duplicates, since we
|
||||||
_skip_dups = FALSE;
|
// need threadinfo for *all*
|
||||||
_thread_hash = mu_threader_calculate
|
_skip_dups = false;
|
||||||
(this, _matches.size(), sortfield, descending);
|
_thread_hash = mu_threader_calculate
|
||||||
_skip_dups = (flags & MU_MSG_ITER_FLAG_SKIP_DUPS);
|
(this, _matches.size(), sortfield, descending);
|
||||||
}
|
_skip_dups = (flags & MU_MSG_ITER_FLAG_SKIP_DUPS);
|
||||||
ThreadKeyMaker keymaker(_thread_hash);
|
ThreadKeyMaker keymaker(_thread_hash);
|
||||||
enq.set_sort_by_key (&keymaker, false);
|
enq.set_sort_by_key (&keymaker, false);
|
||||||
_matches = _enq.get_mset (0, maxnum);
|
_matches = _enq.get_mset (0, maxnum);
|
||||||
|
|
||||||
|
|
||||||
} else if (sortfield != MU_MSG_FIELD_ID_NONE) {
|
} else if (sortfield != MU_MSG_FIELD_ID_NONE) {
|
||||||
enq.set_sort_by_value ((Xapian::valueno)sortfield,
|
enq.set_sort_by_value ((Xapian::valueno)sortfield,
|
||||||
descending);
|
descending);
|
||||||
_matches = _enq.get_mset (0, maxnum);
|
_matches = _enq.get_mset (0, maxnum);
|
||||||
_cursor = _matches.begin();
|
_cursor = _matches.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
_cursor = _matches.begin();
|
_cursor = _matches.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,12 +128,30 @@ public:
|
||||||
bool looks_like_dup () const {
|
bool looks_like_dup () const {
|
||||||
try {
|
try {
|
||||||
const Xapian::Document doc (cursor().get_document());
|
const Xapian::Document doc (cursor().get_document());
|
||||||
const std::string msg_uid
|
const std::string msgid (doc.get_value(MU_MSG_FIELD_ID_MSGID));
|
||||||
(doc.get_value(MU_MSG_FIELD_ID_MSGID));
|
unsigned docid (doc.get_docid());
|
||||||
if (_msg_uid_set.find (msg_uid) != _msg_uid_set.end()) {
|
|
||||||
|
if (msgid.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// is this message in the preferred map? if
|
||||||
|
// so, it's not a duplicate, otherwise, it
|
||||||
|
// isn't
|
||||||
|
msgid_docid_map::const_iterator pref_iter (_preferred_map.find (msgid));
|
||||||
|
if (pref_iter != _preferred_map.end()) {
|
||||||
|
//std::cerr << "in the set!" << std::endl;
|
||||||
|
if ((*pref_iter).second == docid)
|
||||||
|
return false; // in the set: not a dup!
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise, simply check if we've already seen this message-id,
|
||||||
|
// and, if so, it's considered a dup
|
||||||
|
if (_msg_uid_set.find (msgid) != _msg_uid_set.end()) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
_msg_uid_set.insert (msg_uid);
|
_msg_uid_set.insert (msgid);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
@ -133,6 +159,17 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void each_preferred (const char *msgid, gpointer docidp, msgid_docid_map *preferred_map) {
|
||||||
|
(*preferred_map)[msgid] = GPOINTER_TO_SIZE(docidp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_preferred_map (GHashTable *preferred_hash) {
|
||||||
|
if (!preferred_hash)
|
||||||
|
_preferred_map.clear();
|
||||||
|
else
|
||||||
|
g_hash_table_foreach (preferred_hash,
|
||||||
|
(GHFunc)each_preferred, &_preferred_map);
|
||||||
|
}
|
||||||
|
|
||||||
bool skip_dups () const { return _skip_dups; }
|
bool skip_dups () const { return _skip_dups; }
|
||||||
bool skip_unreadable () const { return _skip_unreadable; }
|
bool skip_unreadable () const { return _skip_unreadable; }
|
||||||
|
@ -147,15 +184,15 @@ private:
|
||||||
|
|
||||||
MuMsgIterFlags _flags;
|
MuMsgIterFlags _flags;
|
||||||
|
|
||||||
struct ltstr {
|
|
||||||
bool operator () (const std::string &s1,
|
|
||||||
const std::string &s2) const {
|
|
||||||
return g_strcmp0 (s1.c_str(), s2.c_str()) < 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
mutable std::set <std::string, ltstr> _msg_uid_set;
|
mutable std::set <std::string, ltstr> _msg_uid_set;
|
||||||
|
bool _skip_unreadable;
|
||||||
|
|
||||||
bool _skip_unreadable, _skip_dups;
|
// the 'preferred map' (msgid->docid) is used when checking
|
||||||
|
// for duplicates; if a message is in the preferred map, it
|
||||||
|
// will not be excluded (but other messages with the same
|
||||||
|
// msgid will)
|
||||||
|
msgid_docid_map _preferred_map;
|
||||||
|
bool _skip_dups;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -212,21 +249,17 @@ mu_msg_iter_destroy (MuMsgIter *iter)
|
||||||
try { delete iter; } MU_XAPIAN_CATCH_BLOCK;
|
try { delete iter; } MU_XAPIAN_CATCH_BLOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
mu_msg_iter_set_skip_duplicates (MuMsgIter *iter, gboolean skip_duplicates,
|
mu_msg_iter_set_preferred (MuMsgIter *iter, GHashTable *preferred_hash)
|
||||||
GHashTable *preferred_set)
|
|
||||||
{
|
{
|
||||||
g_return_if_fail (iter);
|
g_return_if_fail (iter);
|
||||||
g_return_if_fail (!skip_duplicates && preferred_set);
|
iter->set_preferred_map (preferred_hash);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MuMsg*
|
MuMsg*
|
||||||
mu_msg_iter_get_msg_floating (MuMsgIter *iter)
|
mu_msg_iter_get_msg_floating (MuMsgIter *iter)
|
||||||
{
|
{
|
||||||
|
@ -306,7 +339,7 @@ mu_msg_iter_is_done (MuMsgIter *iter)
|
||||||
|
|
||||||
|
|
||||||
/* hmmm.... is it impossible to get a 0 docid, or just very improbable? */
|
/* hmmm.... is it impossible to get a 0 docid, or just very improbable? */
|
||||||
unsigned int
|
unsigned
|
||||||
mu_msg_iter_get_docid (MuMsgIter *iter)
|
mu_msg_iter_get_docid (MuMsgIter *iter)
|
||||||
{
|
{
|
||||||
g_return_val_if_fail (iter, (unsigned int)-1);
|
g_return_val_if_fail (iter, (unsigned int)-1);
|
||||||
|
@ -368,8 +401,6 @@ mu_msg_iter_get_thread_id (MuMsgIter *iter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const MuMsgIterThreadInfo*
|
const MuMsgIterThreadInfo*
|
||||||
mu_msg_iter_get_thread_info (MuMsgIter *iter)
|
mu_msg_iter_get_thread_info (MuMsgIter *iter)
|
||||||
{
|
{
|
||||||
|
@ -385,7 +416,7 @@ mu_msg_iter_get_thread_info (MuMsgIter *iter)
|
||||||
(iter->thread_hash(), GUINT_TO_POINTER(docid));
|
(iter->thread_hash(), GUINT_TO_POINTER(docid));
|
||||||
|
|
||||||
if (!ti)
|
if (!ti)
|
||||||
g_printerr ("no ti for %u\n", docid);
|
g_warning ("no ti for %u\n", docid);
|
||||||
|
|
||||||
return ti;
|
return ti;
|
||||||
|
|
||||||
|
|
|
@ -51,8 +51,6 @@ enum _MuMsgIterFlags {
|
||||||
};
|
};
|
||||||
typedef unsigned MuMsgIterFlags;
|
typedef unsigned MuMsgIterFlags;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a new MuMsgIter -- basically, an iterator over the search
|
* create a new MuMsgIter -- basically, an iterator over the search
|
||||||
* results
|
* results
|
||||||
|
@ -128,6 +126,18 @@ MuMsg* mu_msg_iter_get_msg_floating (MuMsgIter *iter)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provide a preferred_hash, which is a hashtable msgid->docid to
|
||||||
|
* indicate the messages which should /not/ be seen as duplicates.
|
||||||
|
*
|
||||||
|
* @param iter a valid MuMsgIter iterator
|
||||||
|
* @param preferred_hash a hashtable msgid->docid of message /not/ to
|
||||||
|
* mark as duplicates, or NULL
|
||||||
|
*/
|
||||||
|
void mu_msg_iter_set_preferred (MuMsgIter *iter, GHashTable *preferred_hash);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the document id for the current message
|
* get the document id for the current message
|
||||||
*
|
*
|
||||||
|
|
|
@ -397,20 +397,34 @@ get_enquire (MuQuery *self, const char *searchexpr, MuMsgFieldId sortfieldid,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* record all threadids for the messages
|
* record all threadids for the messages; also 'orig_set' receives all
|
||||||
|
* original matches (a map msgid-->docid), so we can make sure the
|
||||||
|
* originals are not seen as 'duplicates' later (when skipping
|
||||||
|
* duplicates). We want to favor the originals over the related
|
||||||
|
* messages, when skipping duplicates.
|
||||||
*/
|
*/
|
||||||
static GHashTable*
|
static GHashTable*
|
||||||
get_thread_ids (MuMsgIter *iter)
|
get_thread_ids (MuMsgIter *iter, GHashTable **orig_set)
|
||||||
{
|
{
|
||||||
GHashTable *ids;
|
GHashTable *ids;
|
||||||
ids = g_hash_table_new_full (g_str_hash, g_str_equal,
|
ids = g_hash_table_new_full (g_str_hash, g_str_equal,
|
||||||
(GDestroyNotify)g_free, NULL);
|
(GDestroyNotify)g_free, NULL);
|
||||||
|
*orig_set = g_hash_table_new_full (g_str_hash, g_str_equal,
|
||||||
|
(GDestroyNotify)g_free, NULL);
|
||||||
|
|
||||||
while (!mu_msg_iter_is_done (iter)) {
|
while (!mu_msg_iter_is_done (iter)) {
|
||||||
const char *thread_id;
|
const char *thread_id, *msgid;
|
||||||
|
unsigned docid;
|
||||||
|
/* record the thread id for the message */
|
||||||
if ((thread_id = mu_msg_iter_get_thread_id (iter)))
|
if ((thread_id = mu_msg_iter_get_thread_id (iter)))
|
||||||
g_hash_table_insert (ids, g_strdup (thread_id),
|
g_hash_table_insert (ids, g_strdup (thread_id),
|
||||||
GSIZE_TO_POINTER(TRUE));
|
GSIZE_TO_POINTER(TRUE));
|
||||||
|
/* record the original set */
|
||||||
|
docid = mu_msg_iter_get_docid(iter);
|
||||||
|
if (docid != 0 && (msgid = mu_msg_iter_get_msgid (iter)))
|
||||||
|
g_hash_table_insert (*orig_set, g_strdup (msgid),
|
||||||
|
GSIZE_TO_POINTER(docid));
|
||||||
|
|
||||||
if (!mu_msg_iter_next (iter))
|
if (!mu_msg_iter_next (iter))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -420,7 +434,7 @@ get_thread_ids (MuMsgIter *iter)
|
||||||
|
|
||||||
|
|
||||||
static Xapian::Query
|
static Xapian::Query
|
||||||
get_related_query (MuMsgIter *iter)
|
get_related_query (MuMsgIter *iter, GHashTable **orig_set)
|
||||||
{
|
{
|
||||||
GHashTable *hash;
|
GHashTable *hash;
|
||||||
GList *id_list, *cur;
|
GList *id_list, *cur;
|
||||||
|
@ -428,7 +442,9 @@ get_related_query (MuMsgIter *iter)
|
||||||
static std::string pfx (1, mu_msg_field_xapian_prefix
|
static std::string pfx (1, mu_msg_field_xapian_prefix
|
||||||
(MU_MSG_FIELD_ID_THREAD_ID));
|
(MU_MSG_FIELD_ID_THREAD_ID));
|
||||||
|
|
||||||
hash = get_thread_ids (iter);
|
/* orig_set receives the hash msgid->docid of the set of
|
||||||
|
* original matches */
|
||||||
|
hash = get_thread_ids (iter, orig_set);
|
||||||
/* id_list now gets a list of all thread-ids seen in the query
|
/* id_list now gets a list of all thread-ids seen in the query
|
||||||
* results; either in the Message-Id field or in
|
* results; either in the Message-Id field or in
|
||||||
* References. */
|
* References. */
|
||||||
|
@ -451,10 +467,12 @@ static void
|
||||||
include_related (MuQuery *self, MuMsgIter **iter, int maxnum,
|
include_related (MuQuery *self, MuMsgIter **iter, int maxnum,
|
||||||
MuMsgFieldId sortfieldid, MuQueryFlags flags)
|
MuMsgFieldId sortfieldid, MuQueryFlags flags)
|
||||||
{
|
{
|
||||||
|
GHashTable *orig_set;
|
||||||
Xapian::Enquire enq (self->db());
|
Xapian::Enquire enq (self->db());
|
||||||
MuMsgIter *rel_iter;
|
MuMsgIter *rel_iter;
|
||||||
|
|
||||||
enq.set_query(get_related_query (*iter));
|
orig_set = NULL;
|
||||||
|
enq.set_query(get_related_query (*iter, &orig_set));
|
||||||
enq.set_cutoff(0,0);
|
enq.set_cutoff(0,0);
|
||||||
|
|
||||||
rel_iter= mu_msg_iter_new (
|
rel_iter= mu_msg_iter_new (
|
||||||
|
@ -465,6 +483,12 @@ include_related (MuQuery *self, MuMsgIter **iter, int maxnum,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
mu_msg_iter_destroy (*iter);
|
mu_msg_iter_destroy (*iter);
|
||||||
|
|
||||||
|
// set the preferred set for the iterator (ie., the set not
|
||||||
|
// consider to be duplicates) to be the original matches
|
||||||
|
mu_msg_iter_set_preferred (rel_iter, orig_set);
|
||||||
|
g_hash_table_destroy (orig_set);
|
||||||
|
|
||||||
*iter = rel_iter;
|
*iter = rel_iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue