* implement sorting of threads on arbitrary fields (WIP)

This commit is contained in:
Dirk-Jan C. Binnema 2011-07-02 11:27:08 +03:00
parent 50edc719fa
commit a2bc4540e0
11 changed files with 174 additions and 58 deletions

View File

@ -299,7 +299,7 @@ mu_container_from_list (GSList *lst)
}
struct _SortFuncData {
GCompareDataFunc func;
MuMsgFieldId mfid;
gboolean invert;
gpointer user_data;
};
@ -315,11 +315,18 @@ sort_func_wrapper (MuContainer *a, MuContainer *b, SortFuncData *data)
* is */
for (a1 = a; a1->msg == NULL && a1->child != NULL; a1 = a1->child);
for (b1 = b; b1->msg == NULL && b1->child != NULL; b1 = b1->child);
if (a1 == b1)
return 0;
else if (!a1->msg)
return 1;
else if (!b1->msg)
return -1;
if (data->invert)
return data->func (b1, a1, data->user_data);
return mu_msg_cmp (b1->msg, a1->msg, data->mfid);
else
return data->func (a1, b1, data->user_data);
return mu_msg_cmp (a1->msg, b1->msg, data->mfid);
}
static MuContainer*
@ -348,15 +355,15 @@ mu_container_sort_real (MuContainer *c, SortFuncData *sfdata)
MuContainer*
mu_container_sort (MuContainer *c, GCompareDataFunc func, gpointer user_data,
gboolean invert)
mu_container_sort (MuContainer *c, MuMsgFieldId mfid, gpointer user_data,
gboolean invert)
{
SortFuncData sfdata = { func, invert, user_data };
SortFuncData sfdata = { mfid, invert, user_data };
g_return_val_if_fail (c, NULL);
g_return_val_if_fail (func, NULL);
g_return_val_if_fail (mu_msg_field_id_is_valid(mfid), NULL);
return mu_container_sort_real (c, &sfdata);
}

View File

@ -171,14 +171,15 @@ typedef int (*MuContainerCmpFunc) (MuContainer *c1, MuContainer *c2,
* container is empty, the first non-empty 'leftmost' child is used.
*
* @param c a container
* @param func a sorting function
* @param mfid the field to sort by
* @param user_data a user pointer to pass to the sorting function
* @param invert if TRUE, invert the sorting order
*
* @return a sorted container
*/
MuContainer *mu_container_sort (MuContainer *c, GCompareDataFunc func,
gpointer user_data, gboolean invert);
MuContainer* mu_container_sort (MuContainer *c, MuMsgFieldId mfid,
gpointer user_data, gboolean invert);
/**
* create a hashtable with maps document-ids to information about them,

View File

@ -59,26 +59,32 @@ typedef enum _FieldFlags FieldFlags;
* this struct describes the fields of an e-mail
/*/
struct _MuMsgField {
MuMsgFieldId _id; /* the id of the field */
MuMsgFieldType _type; /* the type of the field */
const char *_name; /* the name of the field */
const char _shortcut; /* the shortcut for use in
* --fields and sorting */
const char _xprefix; /* the Xapian-prefix */
FieldFlags _flags; /* the flags that tells us
* what to do */
MuMsgFieldId _id; /* the id of the field */
MuMsgFieldType _type; /* the type of the field */
const char *_name; /* the name of the field */
const char _shortcut; /* the shortcut for use in
* --fields and sorting */
const char _xprefix; /* the Xapian-prefix */
GCompareDataFunc _cmpfunc; /* sort function */
FieldFlags _flags; /* the flags that tells us
* what to do */
};
typedef struct _MuMsgField MuMsgField;
static int cmp_num (int a, int b);
static int cmp_str (const char* s1, const char* s2);
/* the name and shortcut fields must be lower case, or they might be
* misinterpreted by the query-preprocesser which turns queries into
* lowercase */
static const MuMsgField FIELD_DATA[] = {
{
MU_MSG_FIELD_ID_ATTACH,
MU_MSG_FIELD_TYPE_STRING,
"attach" , 'a', 'A',
"attach" , 'a', 'A',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_NORMALIZE |
FLAG_DONT_CACHE
},
@ -87,6 +93,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_BCC,
MU_MSG_FIELD_TYPE_STRING,
"bcc" , 'h', 'H', /* 'hidden */
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_CONTACT |
FLAG_XAPIAN_VALUE
},
@ -95,6 +102,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_BODY_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"body", 'b', 'B',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE |
FLAG_DONT_CACHE
},
@ -103,6 +111,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_BODY_HTML,
MU_MSG_FIELD_TYPE_STRING,
"bodyhtml", 'h', 0,
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_DONT_CACHE
},
@ -110,6 +119,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_CC,
MU_MSG_FIELD_TYPE_STRING,
"cc", 'c', 'C',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
},
@ -117,6 +127,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_DATE,
MU_MSG_FIELD_TYPE_TIME_T,
"date", 'd', 'D',
(GCompareDataFunc)cmp_num,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY
},
@ -125,6 +136,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_FLAGS,
MU_MSG_FIELD_TYPE_INT,
"flag", 'g', 'G', /* flaGs */
(GCompareDataFunc)cmp_num,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_PREFIX_ONLY
},
@ -133,6 +145,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_FROM,
MU_MSG_FIELD_TYPE_STRING,
"from", 'f', 'F',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
},
@ -140,6 +153,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_PATH,
MU_MSG_FIELD_TYPE_STRING,
"path", 'l', 'L', /* 'l' for location */
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY
},
@ -148,6 +162,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_MAILDIR,
MU_MSG_FIELD_TYPE_STRING,
"maildir", 'm', 'M',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_NORMALIZE | FLAG_XAPIAN_ESCAPE |
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY
@ -156,7 +171,8 @@ static const MuMsgField FIELD_DATA[] = {
{
MU_MSG_FIELD_ID_PRIO,
MU_MSG_FIELD_TYPE_INT,
"prio", 'p', 'P',
"prio", 'p', 'P',
(GCompareDataFunc)cmp_num,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_PREFIX_ONLY
},
@ -165,6 +181,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_SIZE,
MU_MSG_FIELD_TYPE_BYTESIZE,
"size", 'z', 'Z', /* siZe */
(GCompareDataFunc)cmp_num,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_PREFIX_ONLY
},
@ -173,6 +190,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_SUBJECT,
MU_MSG_FIELD_TYPE_STRING,
"subject", 's', 'S',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE |
FLAG_NORMALIZE
},
@ -181,6 +199,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_TO,
MU_MSG_FIELD_TYPE_STRING,
"to", 't', 'T',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
},
@ -188,6 +207,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_MSGID,
MU_MSG_FIELD_TYPE_STRING,
"msgid", 'i', 'I', /* 'i' for Id */
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_ESCAPE | FLAG_XAPIAN_PREFIX_ONLY
},
@ -196,6 +216,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_TIMESTAMP,
MU_MSG_FIELD_TYPE_TIME_T,
"timestamp", 0, 0,
(GCompareDataFunc)cmp_num,
FLAG_GMIME
},
@ -203,6 +224,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_REFS,
MU_MSG_FIELD_TYPE_STRING_LIST,
NULL, 'r', 'R',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_PREFIX_ONLY
},
@ -211,6 +233,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_TAGS,
MU_MSG_FIELD_TYPE_STRING_LIST,
"tag", 'x', 'X',
(GCompareDataFunc)cmp_str,
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_PREFIX_ONLY
}
@ -378,6 +401,15 @@ mu_msg_field_name (MuMsgFieldId id)
return mu_msg_field(id)->_name;
}
GCompareDataFunc
mu_msg_field_cmp_func (MuMsgFieldId id)
{
g_return_val_if_fail (mu_msg_field_id_is_valid(id),NULL);
return mu_msg_field(id)->_cmpfunc;
}
char
mu_msg_field_shortcut (MuMsgFieldId id)
{
@ -403,3 +435,24 @@ mu_msg_field_type (MuMsgFieldId id)
MU_MSG_FIELD_TYPE_NONE);
return mu_msg_field(id)->_type;
}
static int
cmp_num (int a, int b)
{
return a - b;
}
static int
cmp_str (const char* s1, const char *s2)
{
if (s1 == s2)
return 0;
else if (!s1)
return -1;
else if (!s2)
return 1;
return g_utf8_collate (s1, s2);
}

View File

@ -250,7 +250,7 @@ gboolean mu_msg_field_xapian_escape (MuMsgFieldId id) G_GNUC_PURE;
/**
* should this field be normalized? ie. should it be downcased and
* accents removed?
* accents removed when storing as Xapian term?
*
* @param field a MuMsgField
*
@ -258,6 +258,19 @@ gboolean mu_msg_field_xapian_escape (MuMsgFieldId id) G_GNUC_PURE;
*/
gboolean mu_msg_field_normalize (MuMsgFieldId id) G_GNUC_PURE;
/**
* get the comparison function for this field, ie. the one that can be
* use for sorting them
*
* @param field a MuMsgField
*
* @return the comparison function (or NULL in case of error )
*/
GCompareDataFunc mu_msg_field_cmp_func (MuMsgFieldId id) G_GNUC_PURE;
/**
* should this field be stored as contact information? This means that
* e-mail address will be stored as terms, and names will be indexed

View File

@ -50,16 +50,20 @@ private:
struct _MuMsgIter {
_MuMsgIter (Xapian::Enquire &enq, size_t maxnum, gboolean threads):
_MuMsgIter (Xapian::Enquire &enq, size_t maxnum,
gboolean threads, MuMsgFieldId sortfield):
_enq(enq), _msg(0), _threadhash (0) {
_matches = _enq.get_mset (0, maxnum);
if (threads && !_matches.empty()) {
_matches.fetch();
_threadhash = mu_threader_calculate
(this, _matches.size());
(this, _matches.size(), sortfield);
ThreadKeyMaker keymaker(_threadhash);
enq.set_sort_by_key (&keymaker, false);
_matches = _enq.get_mset (0, maxnum);
}
@ -91,12 +95,20 @@ struct _MuMsgIter {
MuMsgIter*
mu_msg_iter_new (XapianEnquire *enq, size_t maxnum, gboolean threads)
mu_msg_iter_new (XapianEnquire *enq, size_t maxnum, gboolean threads,
MuMsgFieldId sortfield)
{
g_return_val_if_fail (enq, NULL);
/* sortfield should be set to .._NONE when we're not threading */
g_return_val_if_fail (threads || sortfield == MU_MSG_FIELD_ID_NONE,
NULL);
g_return_val_if_fail (mu_msg_field_id_is_valid (sortfield) ||
sortfield == MU_MSG_FIELD_ID_NONE,
FALSE);
try {
return new MuMsgIter ((Xapian::Enquire&)*enq, maxnum, threads);
return new MuMsgIter ((Xapian::Enquire&)*enq, maxnum, threads,
sortfield);
} MU_XAPIAN_CATCH_BLOCK_RETURN(NULL);
}

View File

@ -45,11 +45,15 @@ typedef struct _MuMsgIter MuMsgIter;
* is C, not C++),providing access to search results
* @param batchsize how many results to retrieve at once
* @param threads whether to calculate threads
* @param sorting field when using threads; note, when 'threads' is
* FALSE, this should be MU_MSG_FIELD_ID_NONE
*
* @return a new MuMsgIter, or NULL in case of error
*/
MuMsgIter *mu_msg_iter_new (XapianEnquire *enq,
size_t batchsize, gboolean threads) G_GNUC_WARN_UNUSED_RESULT;
size_t batchsize,
gboolean threads,
MuMsgFieldId threadsortfield) G_GNUC_WARN_UNUSED_RESULT;
/**
* get the next message (which you got from

View File

@ -597,3 +597,26 @@ mu_msg_contact_foreach (MuMsg *msg, MuMsgContactForeachFunc func,
}
}
int
mu_msg_cmp (MuMsg *m1, MuMsg *m2, MuMsgFieldId mfid)
{
GCompareDataFunc func;
g_return_val_if_fail (m1, 0);
g_return_val_if_fail (m2, 0);
g_return_val_if_fail (mu_msg_field_id_is_valid(mfid), 0);
func = mu_msg_field_cmp_func (mfid);
if (mu_msg_field_is_string (mfid))
return func (get_str_field (m1, mfid),
get_str_field (m2, mfid), NULL);
/* TODO: special-case 64-bit nums */
else if (mu_msg_field_is_numeric (mfid))
return func (GUINT_TO_POINTER((guint)get_num_field(m1, mfid)),
GUINT_TO_POINTER((guint)get_num_field(m2, mfid)),
NULL);
return 0; /* TODO: handle lists */
}

View File

@ -348,6 +348,19 @@ const GSList* mu_msg_get_references (MuMsg *msg);
const GSList* mu_msg_get_tags (MuMsg *self);
/**
* compare two messages for sorting
*
* @param m1 a message
* @param m2 another message
* @param mfid the message to use for the comparison
*
* @return negative if m1 is smaller, positive if m1 is smaller, 0 if
* they are equal
*/
int mu_msg_cmp (MuMsg *m1, MuMsg *m2, MuMsgFieldId mfid);
enum _MuMsgContactType { /* Reply-To:? */
MU_MSG_CONTACT_TYPE_TO = 0,
MU_MSG_CONTACT_TYPE_FROM,

View File

@ -356,14 +356,18 @@ mu_query_run (MuQuery *self, const char* searchexpr, gboolean threads,
Xapian::Enquire enq (self->_db);
if (sortfieldid != MU_MSG_FIELD_ID_NONE)
/* note, when our result will be *threaded*, we sort
* there, and don't let Xapian do any sorting */
if (!threads && sortfieldid != MU_MSG_FIELD_ID_NONE)
enq.set_sort_by_value ((Xapian::valueno)sortfieldid,
ascending ? true : false);
enq.set_query(query);
enq.set_cutoff(0,0);
return mu_msg_iter_new ((XapianEnquire*)&enq,
self->_db.get_doccount(), threads);
return mu_msg_iter_new (
(XapianEnquire*)&enq,
self->_db.get_doccount(), threads,
threads ? sortfieldid : MU_MSG_FIELD_ID_NONE);
} MU_XAPIAN_CATCH_BLOCK_RETURN(NULL);
}

View File

@ -55,18 +55,19 @@ static MuContainer* prune_empty_containers (MuContainer *root);
/* static void group_root_set_by_subject (GSList *root_set); */
GHashTable* create_doc_id_thread_path_hash (MuContainer *root, size_t match_num);
static gint cmp_dates (MuContainer *c1, MuContainer *c2);
/* msg threading algorithm, based on JWZ's algorithm,
* http://www.jwz.org/doc/threading.html */
GHashTable*
mu_threader_calculate (MuMsgIter *iter, size_t matchnum)
mu_threader_calculate (MuMsgIter *iter, size_t matchnum, MuMsgFieldId sortfield)
{
GHashTable *id_table, *thread_ids;
MuContainer *root_set;
g_return_val_if_fail (iter, FALSE);
g_return_val_if_fail (mu_msg_field_id_is_valid (sortfield) ||
sortfield == MU_MSG_FIELD_ID_NONE,
FALSE);
/* step 1 */
id_table = create_containers (iter);
@ -79,8 +80,9 @@ mu_threader_calculate (MuMsgIter *iter, size_t matchnum)
root_set = prune_empty_containers (root_set);
/* sort root set */
root_set = mu_container_sort (root_set, (GCompareDataFunc)cmp_dates,
NULL, FALSE);
if (sortfield != MU_MSG_FIELD_ID_NONE)
root_set = mu_container_sort (root_set, sortfield,
NULL, FALSE);
/* step 5: group root set by subject */
//group_root_set_by_subject (root_set);
@ -417,22 +419,3 @@ prune_empty_containers (MuContainer *root_set)
return root_set;
}
G_GNUC_UNUSED static gint
cmp_dates (MuContainer *c1, MuContainer *c2)
{
MuMsg *m1, *m2;
m1 = c1->msg;
m2 = c2->msg;
if (!m1)
return m2 ? 1 : 0;
if (!m2)
return m1 ? 0 : 1;
return mu_msg_get_date (m1) - mu_msg_get_date (m2);
}

View File

@ -40,11 +40,14 @@ G_BEGIN_DECLS
* to a MuMsgIterThreadInfo structure (see mu-msg-iter.h)
*
* @param iter an iter; note this function will mu_msgi_iter_reset this iterator
* @param matches the number of matches in the set
* @param matches the number of matches in the set *
* @param sortfield the field to sort results by, or
* MU_MSG_FIELD_ID_NONE if no sorting should be performed
*
* @return a hashtable; free with g_hash_table_destroy when done with it
*/
GHashTable *mu_threader_calculate (MuMsgIter *iter, size_t matches);
GHashTable *mu_threader_calculate (MuMsgIter *iter, size_t matches,
MuMsgFieldId sortfield);
G_END_DECLS