mirror of https://github.com/djcb/mu.git
* update threading implementation (WIP still)
This commit is contained in:
parent
1891e68936
commit
eb9b0c6de8
|
@ -39,8 +39,10 @@
|
|||
#include "mu-bookmarks.h"
|
||||
#include "mu-runtime.h"
|
||||
|
||||
|
||||
#include "mu-util.h"
|
||||
#include "mu-cmd.h"
|
||||
#include "mu-msg-threader.h"
|
||||
|
||||
enum _OutputFormat {
|
||||
FORMAT_JSON,
|
||||
|
@ -552,24 +554,52 @@ print_summary (MuMsgIter *iter)
|
|||
|
||||
|
||||
static void
|
||||
indent (MuMsgIter *iter)
|
||||
thread_indent (MuMsgIter *iter, gboolean color)
|
||||
{
|
||||
const MuMsgIterThreadInfo *ti;
|
||||
const char* threadpath;
|
||||
int i;
|
||||
gboolean is_root, first_child, empty_parent, is_dup;
|
||||
|
||||
threadpath = mu_msg_iter_get_thread_path (iter);
|
||||
if (!threadpath)
|
||||
|
||||
ti = mu_msg_iter_get_thread_info (iter);
|
||||
if (!ti) {
|
||||
g_warning ("cannot get thread-info for %s",
|
||||
mu_msg_get_subject(mu_msg_iter_get_msg(iter, NULL)));
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
threadpath = ti->threadpath;
|
||||
/* fputs (threadpath, stdout); */
|
||||
/* fputs (" ", stdout); */
|
||||
|
||||
is_root = ti->prop & MU_MSG_ITER_THREAD_PROP_ROOT;
|
||||
first_child = ti->prop & MU_MSG_ITER_THREAD_PROP_FIRST_CHILD;
|
||||
empty_parent = ti->prop & MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT;
|
||||
is_dup = ti->prop & MU_MSG_ITER_THREAD_PROP_DUP;
|
||||
|
||||
/* count the colons... */
|
||||
for (i = 0; *threadpath; ++threadpath)
|
||||
i += (*threadpath == ':') ? 1 : 0;
|
||||
|
||||
|
||||
/* indent */
|
||||
while (i --> 0)
|
||||
fputs (" ", stdout);
|
||||
fputs (" ", stdout);
|
||||
|
||||
if (color)
|
||||
fputs (MU_COLOR_YELLOW, stdout);
|
||||
|
||||
if (!is_root) {
|
||||
if (is_dup)
|
||||
fputs ("==>", stdout);
|
||||
else if (first_child)
|
||||
fputs (empty_parent ? "*-> " : "`-> ", stdout);
|
||||
else
|
||||
fputs ("|-> ", stdout);
|
||||
}
|
||||
|
||||
if (color)
|
||||
fputs (MU_COLOR_DEFAULT, stdout);
|
||||
}
|
||||
|
||||
|
||||
|
@ -582,7 +612,7 @@ output_plain_fields (MuMsgIter *iter, const char *fields, gboolean color,
|
|||
size_t len;
|
||||
|
||||
if (threads)
|
||||
indent (iter);
|
||||
thread_indent (iter, color);
|
||||
|
||||
for (myfields = fields, len = 0; *myfields; ++myfields) {
|
||||
|
||||
|
|
|
@ -39,11 +39,11 @@ class ThreadKeyMaker: public Xapian::KeyMaker {
|
|||
public:
|
||||
ThreadKeyMaker (GHashTable *threadinfo): _threadinfo(threadinfo) {}
|
||||
virtual std::string operator()(const Xapian::Document &doc) const {
|
||||
const char *key;
|
||||
key = (const char*)g_hash_table_lookup
|
||||
MuMsgIterThreadInfo *ti;
|
||||
ti = (MuMsgIterThreadInfo*)g_hash_table_lookup
|
||||
(_threadinfo,
|
||||
GUINT_TO_POINTER(doc.get_docid()));
|
||||
return std::string (key ? key : "");
|
||||
return std::string (ti && ti->threadpath ? ti->threadpath : "");
|
||||
}
|
||||
private:
|
||||
GHashTable *_threadinfo;
|
||||
|
@ -58,7 +58,8 @@ struct _MuMsgIter {
|
|||
|
||||
if (threads && !_matches.empty()) {
|
||||
_matches.fetch();
|
||||
_threadhash = mu_msg_threader_calculate (this, _matches.size());
|
||||
_threadhash = mu_msg_threader_calculate
|
||||
(this, _matches.size());
|
||||
ThreadKeyMaker keymaker(_threadhash);
|
||||
enq.set_sort_by_key (&keymaker, false);
|
||||
_matches = _enq.get_mset (0, maxnum);
|
||||
|
@ -221,19 +222,26 @@ mu_msg_iter_get_docid (MuMsgIter *iter)
|
|||
}
|
||||
|
||||
|
||||
const char*
|
||||
mu_msg_iter_get_thread_path (MuMsgIter *iter)
|
||||
const MuMsgIterThreadInfo*
|
||||
mu_msg_iter_get_thread_info (MuMsgIter *iter)
|
||||
{
|
||||
g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL);
|
||||
g_return_val_if_fail (iter->_threadhash, NULL);
|
||||
|
||||
try {
|
||||
const MuMsgIterThreadInfo *ti;
|
||||
unsigned int docid;
|
||||
|
||||
docid = mu_msg_iter_get_docid (iter);
|
||||
ti = (const MuMsgIterThreadInfo*)g_hash_table_lookup
|
||||
(iter->_threadhash,
|
||||
GUINT_TO_POINTER(docid));
|
||||
|
||||
if (!ti)
|
||||
g_printerr ("no ti for %u\n", docid);
|
||||
|
||||
return (const char*)g_hash_table_lookup
|
||||
(iter->_threadhash, GUINT_TO_POINTER(docid));
|
||||
|
||||
return ti;
|
||||
|
||||
} MU_XAPIAN_CATCH_BLOCK_RETURN (NULL);
|
||||
}
|
||||
|
||||
|
|
|
@ -128,14 +128,28 @@ unsigned int mu_msg_iter_get_docid (MuMsgIter *iter);
|
|||
gboolean mu_msg_iter_calculate_threads (MuMsgIter *iter);
|
||||
|
||||
|
||||
enum _MuMsgIterThreadProp {
|
||||
MU_MSG_ITER_THREAD_PROP_ROOT = 1 << 0,
|
||||
MU_MSG_ITER_THREAD_PROP_FIRST_CHILD = 1 << 1,
|
||||
MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT = 1 << 2,
|
||||
MU_MSG_ITER_THREAD_PROP_DUP = 1 << 3
|
||||
};
|
||||
typedef guint8 MuMsgIterThreadProp;
|
||||
|
||||
struct _MuMsgIterThreadInfo {
|
||||
gchar *threadpath;
|
||||
MuMsgIterThreadProp prop;
|
||||
};
|
||||
typedef struct _MuMsgIterThreadInfo MuMsgIterThreadInfo;
|
||||
|
||||
/**
|
||||
* get a sortable string describing the path of a thread
|
||||
* get a the MuMsgThreaderInfo struct for this message
|
||||
*
|
||||
* @param iter a valid MuMsgIter iterator
|
||||
*
|
||||
* @return a thread path
|
||||
* @return an info struct
|
||||
*/
|
||||
const char* mu_msg_iter_get_thread_path (MuMsgIter *iter);
|
||||
const MuMsgIterThreadInfo* mu_msg_iter_get_thread_info (MuMsgIter *iter);
|
||||
|
||||
/**
|
||||
* get some message field
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
#include <math.h> /* for log, ceil */
|
||||
|
||||
#include "mu-msg-threader.h"
|
||||
#include "mu-str.h"
|
||||
|
@ -40,28 +41,24 @@
|
|||
* Msg5 (child of Msg4) => 00001:00001:00000
|
||||
* Msg6 => 00002
|
||||
*
|
||||
* the padding-0's are added to make them easy to sort using strcmp
|
||||
* the padding-0's are added to make them easy to sort using strcmp;
|
||||
* the number hexadecimal numbers, and the length of the 'segments'
|
||||
* (the parts separated by the ':') is equal to ceil(log_16(matchnum))
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Container data structure, as seen in the JWZ-doc; one differences
|
||||
* is that I use GSLists for the children, rather than 'next'
|
||||
* pointers
|
||||
*
|
||||
* the _state is for pruning; when traversing the tree, i mark
|
||||
* containers with NUKE or SPLICE, and then do it afterwards; that
|
||||
* way, I don't have to change the very list I am iterating over...
|
||||
*
|
||||
* */
|
||||
enum _ContainerState { NUKE, SPLICE, OKAY };
|
||||
typedef enum _ContainerState ContainerState;
|
||||
|
||||
struct _Container {
|
||||
MuMsg *_msg;
|
||||
unsigned int _docid;
|
||||
struct _Container *_parent;
|
||||
GSList *_children;
|
||||
ContainerState _state;
|
||||
gboolean _dup;
|
||||
};
|
||||
typedef struct _Container Container;
|
||||
|
||||
|
@ -76,18 +73,17 @@ static gboolean container_traverse (Container *c,
|
|||
ContainerTraverseFunc func,
|
||||
gpointer user_data);
|
||||
static gboolean container_add_child (Container *c, Container *child);
|
||||
static void container_promote_child (Container *c, Container *child);
|
||||
static void container_splice_child (Container *c, Container *child);
|
||||
static gboolean container_is_root (Container *c);
|
||||
static void container_dump_tree (Container *c);
|
||||
static void container_remove_child (Container *c, Container *child);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* step 1 */ static GHashTable* create_containers (MuMsgIter *iter);
|
||||
/* step 2 */ static GSList *find_root_set (GHashTable *ids);
|
||||
static void prune_empty_containers (GSList *root_set);
|
||||
/* step 2 */ static Container *find_root (GHashTable *ids);
|
||||
static void prune_empty_containers (Container *root);
|
||||
/* static void group_root_set_by_subject (GSList *root_set); */
|
||||
GHashTable* create_doc_id_thread_path_hash (GSList *root_set, size_t match_num);
|
||||
GHashTable* create_doc_id_thread_path_hash (Container *root, size_t match_num);
|
||||
static void sort_by_date (Container *root);
|
||||
|
||||
/* msg threading algorithm, based on JWZ's algorithm,
|
||||
* http://www.jwz.org/doc/threading.html */
|
||||
|
@ -95,38 +91,41 @@ GHashTable*
|
|||
mu_msg_threader_calculate (MuMsgIter *iter, size_t matchnum)
|
||||
{
|
||||
GHashTable *id_table, *thread_ids;
|
||||
GSList *root_set;
|
||||
Container *root;
|
||||
|
||||
g_return_val_if_fail (iter, FALSE);
|
||||
|
||||
/* step 1 */
|
||||
id_table = create_containers (iter);
|
||||
|
||||
/* step 2 */
|
||||
root_set = find_root_set (id_table);
|
||||
|
||||
/* step 2 -- JWZ calls this the 'root-set'; in our case, the
|
||||
* root_set is the list of children for the dummy
|
||||
* root-container */
|
||||
root = find_root (id_table);
|
||||
/* step 3: skip until the end; we still need to containers */
|
||||
|
||||
//container_dump_tree (root);
|
||||
|
||||
/* step 4: prune empty containers */
|
||||
prune_empty_containers (root_set);
|
||||
prune_empty_containers (root);
|
||||
|
||||
/* recalculate root set */
|
||||
g_slist_free (root_set);
|
||||
root_set = find_root_set (id_table);
|
||||
sort_by_date (root);
|
||||
|
||||
//container_dump_tree (root);
|
||||
|
||||
|
||||
/* step 5: group root set by subject */
|
||||
// group_root_set_by_subject (root_set);
|
||||
//group_root_set_by_subject (root_set);
|
||||
|
||||
/* sort */
|
||||
|
||||
|
||||
mu_msg_iter_reset (iter); /* go all the way back */
|
||||
|
||||
/* finally, deliver the docid => thread-path hash */
|
||||
thread_ids = create_doc_id_thread_path_hash (root_set, matchnum);
|
||||
thread_ids = create_doc_id_thread_path_hash (root, matchnum);
|
||||
g_hash_table_destroy (id_table); /* step 3*/
|
||||
|
||||
g_slist_free (root_set);
|
||||
container_destroy (root);
|
||||
|
||||
return thread_ids;
|
||||
}
|
||||
|
||||
|
@ -155,21 +154,31 @@ static Container*
|
|||
find_or_create (GHashTable *id_table, const char* msgid, unsigned docid)
|
||||
{
|
||||
Container *c;
|
||||
|
||||
c = g_hash_table_lookup (id_table, msgid);
|
||||
|
||||
c = g_hash_table_lookup (id_table, msgid);
|
||||
if (!c) {
|
||||
c = container_new (NULL, docid);
|
||||
g_hash_table_insert (id_table, (gpointer)msgid, c);
|
||||
if (docid != 0)
|
||||
g_print ("*1 %s => %u\n", msgid, docid);
|
||||
} else if (c->_docid == 0) {
|
||||
c->_docid = docid;
|
||||
g_print ("*2 %s => %u\n", msgid, docid);
|
||||
} else if (docid != 0) { /* duplicate message-id */
|
||||
/* Container *c2; */
|
||||
/* char *fake_msgid; */
|
||||
/* g_print ("duplicate message-id %s\n", msgid); /\* FIXME: leak *\/ */
|
||||
/* c2 = container_new (NULL, docid); */
|
||||
/* c2->_parent = c; /\* make it a child of the other one...*\/ */
|
||||
/* c2->_dup = TRUE; */
|
||||
/* fake_msgid = g_strdup_printf ("%s_%u", msgid, docid); */
|
||||
/* g_hash_table_insert (id_table, fake_msgid, c2); */
|
||||
/* g_print ("*3 %s => %u\n", fake_msgid, docid); */
|
||||
}
|
||||
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void /* 1B */
|
||||
handle_references (GHashTable *id_table, Container *c)
|
||||
{
|
||||
|
@ -183,13 +192,14 @@ handle_references (GHashTable *id_table, Container *c)
|
|||
for (cur = refs; cur && cur->next; cur = g_slist_next (cur)) {
|
||||
Container *c1, *c2; /* two consecutive refs in the list;
|
||||
* we register them as parent, child */
|
||||
|
||||
c1 = find_or_create (id_table, (gchar*)cur->data, 0);
|
||||
c2 = find_or_create (id_table, (gchar*)cur->next->data, 0);
|
||||
|
||||
container_add_child (c1, c2);
|
||||
}
|
||||
|
||||
/* now cur points to the final ref, which refers to our own
|
||||
/* now cur points to the final ref, which refers to our direct
|
||||
* parent... register it */
|
||||
if (cur) {
|
||||
Container *parent;
|
||||
|
@ -212,14 +222,14 @@ create_containers (MuMsgIter *iter)
|
|||
|
||||
for (mu_msg_iter_reset (iter); !mu_msg_iter_is_done (iter);
|
||||
mu_msg_iter_next (iter)) {
|
||||
|
||||
|
||||
Container *c;
|
||||
MuMsg *msg;
|
||||
unsigned docid;
|
||||
const char *msgid;
|
||||
|
||||
/* 1.A */
|
||||
msg = mu_msg_iter_get_msg (iter, NULL);
|
||||
msg = mu_msg_iter_get_msg (iter, NULL);
|
||||
msgid = mu_msg_get_msgid (msg);
|
||||
docid = mu_msg_iter_get_docid (iter);
|
||||
|
||||
|
@ -238,88 +248,62 @@ create_containers (MuMsgIter *iter)
|
|||
handle_references (id_table, c);
|
||||
}
|
||||
|
||||
|
||||
return id_table;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
filter_root_set (const gchar *msgid, Container *c, GSList **lst)
|
||||
filter_root_set (const gchar *msgid, Container *c, Container *root)
|
||||
{
|
||||
if (!c->_parent && c->_state != NUKE)
|
||||
*lst = g_slist_prepend (*lst, c);
|
||||
if (!c->_parent) /* this *before* adding it to the dummy root */
|
||||
container_add_child (root, c);
|
||||
}
|
||||
|
||||
|
||||
/* 2. Find the root - this is dummy container which takes the
|
||||
* until-now parentless Container-objects as children. JWZ calls this
|
||||
* the 'root_set'
|
||||
|
||||
/* 2. Find the root set. Walk over the elements of id_table, and
|
||||
gather a list of the Container objects that have no parents, but do
|
||||
have children */
|
||||
static GSList*
|
||||
find_root_set (GHashTable *ids)
|
||||
Walk over the elements of id_table, and gather a list of the
|
||||
Container objects that have no parents, but do have children */
|
||||
static Container*
|
||||
find_root (GHashTable *ids)
|
||||
{
|
||||
GSList *lst;
|
||||
|
||||
lst = NULL;
|
||||
g_hash_table_foreach (ids, (GHFunc)filter_root_set, &lst);
|
||||
|
||||
return lst;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
do_pruning (GSList *containers)
|
||||
{
|
||||
GSList *cur;
|
||||
Container *root;
|
||||
|
||||
/* now, do stuff to our children... */
|
||||
for (cur = containers; cur; cur = g_slist_next(cur)) {
|
||||
root = container_new (NULL, 0);
|
||||
|
||||
Container *child;
|
||||
child = (Container *)cur->data;
|
||||
|
||||
if (child->_state == SPLICE) {
|
||||
/* g_printerr ("SPLICE %p\n", (void*)child); */
|
||||
container_promote_child (child->_parent, child);
|
||||
//container_remove_child (child->_parent, child);
|
||||
|
||||
} else if (child->_state == NUKE) {
|
||||
/* g_printerr ("NUKE %p\n", (void*)child); */
|
||||
container_remove_child (child->_parent, child);
|
||||
}
|
||||
|
||||
child->_state = OKAY;
|
||||
}
|
||||
lst = NULL;
|
||||
g_hash_table_foreach (ids, (GHFunc)filter_root_set, root);
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
/* this function will mark 'containers', and the do the pruning on
|
||||
* their children */
|
||||
static void
|
||||
prune_empty_nonroot (GSList *containers)
|
||||
prune_empty_containers (Container *container)
|
||||
{
|
||||
GSList *cur;
|
||||
|
||||
for (cur = containers; cur; cur = g_slist_next (cur)) {
|
||||
|
||||
Container *container;
|
||||
|
||||
container = (Container*)cur->data;
|
||||
|
||||
if (container->_children) {
|
||||
prune_empty_nonroot (container->_children); /* recurse! */
|
||||
do_pruning (container->_children);
|
||||
}
|
||||
|
||||
for (cur = container->_children; cur; cur = g_slist_next (cur)) {
|
||||
|
||||
Container *c;
|
||||
c = (Container*)cur->data;
|
||||
|
||||
prune_empty_containers (c); /* recurse! */
|
||||
|
||||
/* don't touch containers with messages */
|
||||
if (container->_msg)
|
||||
if (c->_msg)
|
||||
continue;
|
||||
|
||||
|
||||
/* A. If it is an msg-less container with no children, nuke it. */
|
||||
if (!container->_children)
|
||||
container->_state = NUKE;
|
||||
|
||||
if (!c->_children) {
|
||||
container_remove_child (c->_parent, c);
|
||||
continue;
|
||||
}
|
||||
/* B. If the Container has no Message, but does have
|
||||
* children, remove this container but promote its
|
||||
* children to this level (that is, splice them in to
|
||||
|
@ -327,40 +311,17 @@ prune_empty_nonroot (GSList *containers)
|
|||
*
|
||||
* Do not promote the children if doing so would
|
||||
* promote them to the root set -- unless there is
|
||||
* only one child, in which case, do. */
|
||||
else if ((container->_parent ||
|
||||
g_slist_length(container->_children) == 1))
|
||||
container->_state = SPLICE;
|
||||
* only one child, in which case, do.
|
||||
*/
|
||||
if (container_is_root(container) &&
|
||||
g_slist_length(c->_children) != 1)
|
||||
continue;
|
||||
|
||||
container_splice_child (container, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* 4. Prune empty containers */
|
||||
static void
|
||||
prune_empty_containers (GSList *root_set)
|
||||
{
|
||||
GSList *cur;
|
||||
|
||||
/* everything below the root_set will be pruned */
|
||||
prune_empty_nonroot (root_set);
|
||||
|
||||
/* no, clear up the root_set itself... */
|
||||
for (cur = root_set; cur; cur = g_slist_next(cur)) {
|
||||
|
||||
Container *c;
|
||||
c = (Container *)cur->data;
|
||||
|
||||
if (c->_state == SPLICE) {
|
||||
/* make child parent-less, so the become part of the root_set */
|
||||
GSList *iter; /* there should be only 1... */
|
||||
for (iter = c->_children; iter; iter = g_slist_next(iter))
|
||||
((Container*)iter->data)->_parent = NULL;
|
||||
c->_children = NULL;
|
||||
c->_state = NUKE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
/* 5. group root set by subject */
|
||||
|
@ -410,82 +371,195 @@ group_root_set_by_subject (GSList *root_set)
|
|||
|
||||
#endif
|
||||
|
||||
struct _ThreadInfo {
|
||||
GHashTable *hash;
|
||||
GQueue *idqueue;
|
||||
unsigned prev_level;
|
||||
};
|
||||
typedef struct _ThreadInfo ThreadInfo;
|
||||
|
||||
static void
|
||||
accumulate_path (int seq, char **threadpath)
|
||||
static gint
|
||||
cmp_dates (Container *c1, Container *c2)
|
||||
{
|
||||
if (*threadpath) {
|
||||
char *path;
|
||||
path = g_strdup_printf ("%s:%05d", *threadpath, seq);
|
||||
g_free (*threadpath);
|
||||
*threadpath = path;
|
||||
} else
|
||||
*threadpath = g_strdup_printf ("%05d", seq);
|
||||
MuMsg *m1, *m2;
|
||||
m1 = c1->_msg;
|
||||
m2 = c2->_msg;
|
||||
|
||||
if (!m1)
|
||||
return m2 ? 1 : 0;
|
||||
if (!m2)
|
||||
return m1 ? 0 : 1;
|
||||
|
||||
return mu_msg_get_date (m1) - mu_msg_get_date (m2);
|
||||
}
|
||||
|
||||
/* let's make a GtkTreePath compatible thread path */
|
||||
static gboolean
|
||||
add_thread_path (Container *c, guint level, ThreadInfo *ti)
|
||||
static void
|
||||
sort_by_date (Container *container)
|
||||
{
|
||||
gchar *threadpath;
|
||||
GSList *cur;
|
||||
|
||||
for (cur = container->_children; cur; cur = g_slist_next(cur)) {
|
||||
Container *c;
|
||||
c = (Container*)cur->data;
|
||||
sort_by_date (c); /* recurse */
|
||||
}
|
||||
|
||||
container->_children = g_slist_sort (container->_children,
|
||||
(GCompareFunc)cmp_dates);
|
||||
}
|
||||
|
||||
static MuMsgIterThreadInfo*
|
||||
thread_info_new (gchar *threadpath, gboolean root,
|
||||
gboolean first_child, gboolean empty_parent, gboolean is_dup)
|
||||
{
|
||||
MuMsgIterThreadInfo *ti;
|
||||
|
||||
ti = g_slice_new (MuMsgIterThreadInfo);
|
||||
ti->threadpath = threadpath;
|
||||
|
||||
ti->prop = 0;
|
||||
ti->prop |= root ? MU_MSG_ITER_THREAD_PROP_ROOT : 0;
|
||||
ti->prop |= first_child ? MU_MSG_ITER_THREAD_PROP_FIRST_CHILD : 0;
|
||||
ti->prop |= empty_parent ? MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT : 0;
|
||||
ti->prop |= is_dup ? MU_MSG_ITER_THREAD_PROP_DUP : 0;
|
||||
|
||||
if (is_dup)
|
||||
g_print ("dup: %s\n", threadpath);
|
||||
|
||||
return ti;
|
||||
}
|
||||
|
||||
static void
|
||||
thread_info_destroy (MuMsgIterThreadInfo *ti)
|
||||
{
|
||||
if (ti) {
|
||||
g_free (ti->threadpath);
|
||||
g_slice_free (MuMsgIterThreadInfo, ti);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct _ThreadInfo {
|
||||
GHashTable *hash;
|
||||
GQueue *idqueue;
|
||||
unsigned prev_level;
|
||||
const char* format;
|
||||
};
|
||||
typedef struct _ThreadInfo ThreadInfo;
|
||||
|
||||
|
||||
struct _TP {
|
||||
char *threadpath;
|
||||
const char *frmt;
|
||||
};
|
||||
typedef struct _TP TP;
|
||||
|
||||
|
||||
static void
|
||||
accumulate_path (int seq, TP *tp)
|
||||
{
|
||||
char segm[16];
|
||||
snprintf (segm, sizeof(segm), tp->frmt, seq);
|
||||
|
||||
if (tp->threadpath) {
|
||||
char *path;
|
||||
path = g_strdup_printf ("%s:%s", tp->threadpath, segm);
|
||||
g_free (tp->threadpath);
|
||||
tp->threadpath = path;
|
||||
} else
|
||||
tp->threadpath = g_strdup (segm);
|
||||
}
|
||||
|
||||
static void
|
||||
add_to_thread_info_hash (GHashTable *thread_info_hash, Container *c,
|
||||
char *threadpath)
|
||||
{
|
||||
gboolean is_root, first_child, empty_parent;
|
||||
|
||||
/* 'root' means we're a child of the dummy root-container */
|
||||
is_root = container_is_root (c);
|
||||
|
||||
first_child = is_root ? FALSE : (c->_parent->_children->data == c);
|
||||
empty_parent = is_root ? FALSE : (!c->_parent->_msg);
|
||||
|
||||
g_hash_table_insert (thread_info_hash,
|
||||
GUINT_TO_POINTER(c->_docid),
|
||||
thread_info_new (threadpath,
|
||||
container_is_root (c),
|
||||
first_child,
|
||||
empty_parent,
|
||||
c->_dup));
|
||||
}
|
||||
|
||||
/* device a format string that is the minimum size to fit up to
|
||||
* matchnum matches -- returns static memory */
|
||||
const char*
|
||||
thread_segment_format_string (size_t matchnum)
|
||||
{
|
||||
unsigned digitnum;
|
||||
static char frmt[16];
|
||||
|
||||
/* get the number of digits needed in a hex-representation of
|
||||
* matchnum */
|
||||
digitnum = (unsigned) (ceil (log(matchnum)/log(16)));
|
||||
snprintf (frmt, sizeof(frmt),"%%0%ux", digitnum);
|
||||
|
||||
return frmt;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
add_thread_info (Container *c, guint level, ThreadInfo *ti)
|
||||
{
|
||||
TP tp;
|
||||
unsigned i;
|
||||
|
||||
/* ignore our dummy root container */
|
||||
if (!c->_parent)
|
||||
return TRUE;
|
||||
|
||||
if (level > ti->prev_level) {
|
||||
for (i = ti->prev_level; i != level; ++i)
|
||||
g_queue_push_tail (ti->idqueue, GUINT_TO_POINTER(0));
|
||||
} else if (level <= ti->prev_level) {
|
||||
int oldseq;
|
||||
|
||||
/* level == ti->prev_level, the for-loop is void */
|
||||
for (i = level; i != ti->prev_level; ++i)
|
||||
(void)g_queue_pop_tail (ti->idqueue);
|
||||
for (i = level; i < ti->prev_level; ++i)
|
||||
g_queue_pop_tail (ti->idqueue);
|
||||
|
||||
oldseq = GPOINTER_TO_UINT(g_queue_pop_tail(ti->idqueue));
|
||||
g_queue_push_tail (ti->idqueue,GUINT_TO_POINTER(1 + oldseq));
|
||||
if (g_queue_is_empty (ti->idqueue))
|
||||
g_queue_push_tail(ti->idqueue, GUINT_TO_POINTER(0));
|
||||
else {
|
||||
oldseq = GPOINTER_TO_UINT(g_queue_pop_tail(ti->idqueue));
|
||||
g_queue_push_tail(ti->idqueue, GUINT_TO_POINTER(1 + oldseq));
|
||||
}
|
||||
}
|
||||
|
||||
threadpath = NULL;
|
||||
g_queue_foreach (ti->idqueue, (GFunc)accumulate_path, &threadpath);
|
||||
if (c->_docid) /* don't put empty (pseudo) in the hash */
|
||||
g_hash_table_insert (ti->hash, GUINT_TO_POINTER(c->_docid),
|
||||
threadpath);
|
||||
else
|
||||
g_free (threadpath);
|
||||
|
||||
ti->prev_level = level;
|
||||
|
||||
if (!c->_docid)
|
||||
return TRUE; /* nothing more to do - it's a 'virtual'
|
||||
* message */
|
||||
|
||||
tp.threadpath = NULL;
|
||||
tp.frmt = ti->format;
|
||||
|
||||
g_queue_foreach (ti->idqueue, (GFunc)accumulate_path, &tp);
|
||||
|
||||
add_to_thread_info_hash (ti->hash, c, tp.threadpath);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
GHashTable*
|
||||
create_doc_id_thread_path_hash (GSList *root_set, size_t matchnum)
|
||||
create_doc_id_thread_path_hash (Container *root, size_t matchnum)
|
||||
{
|
||||
ThreadInfo ti;
|
||||
GSList *cur;
|
||||
int i;
|
||||
|
||||
/* create hash docid => thread-path */
|
||||
ti.hash = g_hash_table_new_full (g_direct_hash, g_direct_equal,
|
||||
NULL,
|
||||
(GDestroyNotify)g_free);
|
||||
|
||||
(GDestroyNotify)thread_info_destroy);
|
||||
ti.idqueue = g_queue_new ();
|
||||
g_queue_push_tail (ti.idqueue, GUINT_TO_POINTER(0));
|
||||
|
||||
ti.prev_level = 0;
|
||||
|
||||
for (i = 0, cur = root_set; cur; cur = g_slist_next (cur))
|
||||
container_traverse ((Container*)cur->data, 0,
|
||||
(ContainerTraverseFunc)add_thread_path,
|
||||
&ti);
|
||||
ti.format = thread_segment_format_string (matchnum);
|
||||
|
||||
container_traverse (root, 0, (ContainerTraverseFunc)add_thread_info,
|
||||
&ti);
|
||||
|
||||
g_queue_free (ti.idqueue);
|
||||
return ti.hash;
|
||||
|
@ -497,12 +571,14 @@ container_new (MuMsg *msg, unsigned docid)
|
|||
{
|
||||
Container *c;
|
||||
|
||||
c = g_slice_new0 (Container);
|
||||
if (msg)
|
||||
c->_msg = mu_msg_ref (msg);
|
||||
|
||||
c->_docid = docid;
|
||||
c->_state = OKAY;
|
||||
c = g_slice_new (Container);
|
||||
|
||||
c->_msg = msg ? mu_msg_ref (msg) : NULL;
|
||||
c->_docid = docid;
|
||||
c->_dup = FALSE;
|
||||
c->_children = NULL;
|
||||
c->_parent = NULL;
|
||||
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -516,9 +592,8 @@ container_destroy (Container *c)
|
|||
if (c->_msg)
|
||||
mu_msg_unref (c->_msg);
|
||||
|
||||
/* free the list, not the children */
|
||||
c->_parent = (void*)0xdeadbeef;
|
||||
g_slist_free (c->_children);
|
||||
|
||||
g_slice_free (Container, c);
|
||||
}
|
||||
|
||||
|
@ -533,7 +608,7 @@ container_traverse (Container *c, guint level, ContainerTraverseFunc func,
|
|||
int i;
|
||||
|
||||
g_return_val_if_fail (c, FALSE);
|
||||
|
||||
|
||||
if (!func (c, level, user_data))
|
||||
return FALSE;
|
||||
|
||||
|
@ -546,39 +621,51 @@ container_traverse (Container *c, guint level, ContainerTraverseFunc func,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
container_add_child (Container *parent, Container *child)
|
||||
{
|
||||
g_return_val_if_fail (parent != child, FALSE);
|
||||
g_return_val_if_fail (child, FALSE);
|
||||
|
||||
|
||||
if (already_referenced(child, parent) ||
|
||||
already_referenced(parent, child))
|
||||
already_referenced(parent, child)) {
|
||||
/* g_print ("already ref'd\n"); */
|
||||
return FALSE;
|
||||
|
||||
}
|
||||
|
||||
child->_parent = parent;
|
||||
parent->_children = g_slist_prepend (parent->_children,
|
||||
child);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
container_promote_child (Container *c, Container *child)
|
||||
container_splice_child (Container *c, Container *child)
|
||||
{
|
||||
GSList *iter;
|
||||
|
||||
g_return_if_fail (c != child);
|
||||
g_return_if_fail (child);
|
||||
g_return_if_fail (!child->_msg);
|
||||
|
||||
for (iter = child->_children; iter; iter = g_slist_next(iter))
|
||||
((Container*)iter->data)->_parent = c; /* reparent
|
||||
* grandchildren */
|
||||
/*
|
||||
* remove the old child
|
||||
*/
|
||||
c->_children = g_slist_remove (c->_children, child);
|
||||
/*
|
||||
* put child's children first, so we can use this while
|
||||
* iterating over c->_children (we are already past the merged
|
||||
* child's children)
|
||||
*/
|
||||
c->_children = g_slist_concat (child->_children, c->_children);
|
||||
|
||||
for (iter = child->_children; iter; iter = g_slist_next(iter))
|
||||
/* reparent grandchildren */
|
||||
((Container*)iter->data)->_parent = c;
|
||||
|
||||
c->_children = g_slist_concat (c->_children,
|
||||
child->_children);
|
||||
child->_children = NULL;
|
||||
child->_parent = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -587,28 +674,48 @@ container_remove_child (Container *c, Container *child)
|
|||
{
|
||||
g_return_if_fail (c != child);
|
||||
g_return_if_fail (child);
|
||||
|
||||
g_return_if_fail (!child->_children);
|
||||
|
||||
c->_children = g_slist_remove (c->_children, child);
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
container_is_root (Container *c)
|
||||
{
|
||||
return (!c->_parent || !c->_parent->_parent);
|
||||
}
|
||||
|
||||
G_GNUC_UNUSED static void
|
||||
container_dump (Container *c)
|
||||
{
|
||||
const char* state;
|
||||
switch (c->_state) {
|
||||
case NUKE: state = "NUKE"; break;
|
||||
case SPLICE: state = "SPLICE"; break;
|
||||
case OKAY: state = "OKAY"; break;
|
||||
default: state = "HUH"; break;
|
||||
};
|
||||
|
||||
g_print ("[%s] { %p parent=%p msg=%p [%s] children: %d state: %s}\n",
|
||||
{
|
||||
g_print ("[%s] { %p parent=%p msg=%p docid=%u [%s] children: %d }\n",
|
||||
c->_msg ? mu_msg_get_subject(c->_msg) : "<empty>",
|
||||
(void*)c,
|
||||
(void*)c->_parent, (void*)c->_msg,
|
||||
c->_docid,
|
||||
c->_msg ? mu_msg_get_msgid(c->_msg) : "",
|
||||
g_slist_length (c->_children), state);
|
||||
g_slist_length (c->_children));
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
each_container (Container *c, guint level)
|
||||
{
|
||||
while (level--)
|
||||
fputs (" ", stdout);
|
||||
|
||||
container_dump (c);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
G_GNUC_UNUSED static void
|
||||
container_dump_tree (Container *c)
|
||||
{
|
||||
container_traverse (c, 0, (ContainerTraverseFunc)each_container, NULL);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -28,8 +28,26 @@
|
|||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
/**
|
||||
* takes an iter and the total number of matches, and from this
|
||||
* generates a hash-table with information about the thread structure
|
||||
* of these matches.
|
||||
*
|
||||
* the algorithm to find this structure is based on JWZ's
|
||||
* message-threading algorithm, as descrbed in:
|
||||
* http://www.jwz.org/doc/threading.html
|
||||
*
|
||||
* the returned hashtable maps the Xapian docid of iter (msg) to a ptr
|
||||
* to a MuMsgIterThreadInfo structure (see mu-msg-iter.h)
|
||||
*
|
||||
* @param iter an iter; note this function will mu_msgi_iter_reset this iterator
|
||||
* @param matches the number of matches in the set
|
||||
*
|
||||
* @return a hashtable; free with g_hash_table_destroy when done with it
|
||||
*/
|
||||
GHashTable *mu_msg_threader_calculate (MuMsgIter *iter, size_t matches);
|
||||
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
#endif /*__MU_MSG_THREADER_H__*/
|
||||
|
|
Loading…
Reference in New Issue