From eb9b0c6de87f2b3a8ad720cdbc7505188fae7e95 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Fri, 24 Jun 2011 00:21:54 +0300 Subject: [PATCH] * update threading implementation (WIP still) --- src/mu-cmd-find.c | 44 +++- src/mu-msg-iter.cc | 26 ++- src/mu-msg-iter.h | 20 +- src/mu-msg-threader.c | 513 +++++++++++++++++++++++++----------------- src/mu-msg-threader.h | 18 ++ 5 files changed, 399 insertions(+), 222 deletions(-) diff --git a/src/mu-cmd-find.c b/src/mu-cmd-find.c index a361853c..1f1484fc 100644 --- a/src/mu-cmd-find.c +++ b/src/mu-cmd-find.c @@ -39,8 +39,10 @@ #include "mu-bookmarks.h" #include "mu-runtime.h" + #include "mu-util.h" #include "mu-cmd.h" +#include "mu-msg-threader.h" enum _OutputFormat { FORMAT_JSON, @@ -552,24 +554,52 @@ print_summary (MuMsgIter *iter) static void -indent (MuMsgIter *iter) +thread_indent (MuMsgIter *iter, gboolean color) { + const MuMsgIterThreadInfo *ti; const char* threadpath; int i; + gboolean is_root, first_child, empty_parent, is_dup; - threadpath = mu_msg_iter_get_thread_path (iter); - if (!threadpath) + + ti = mu_msg_iter_get_thread_info (iter); + if (!ti) { + g_warning ("cannot get thread-info for %s", + mu_msg_get_subject(mu_msg_iter_get_msg(iter, NULL))); return; - + } + + threadpath = ti->threadpath; /* fputs (threadpath, stdout); */ + /* fputs (" ", stdout); */ + + is_root = ti->prop & MU_MSG_ITER_THREAD_PROP_ROOT; + first_child = ti->prop & MU_MSG_ITER_THREAD_PROP_FIRST_CHILD; + empty_parent = ti->prop & MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT; + is_dup = ti->prop & MU_MSG_ITER_THREAD_PROP_DUP; /* count the colons... */ for (i = 0; *threadpath; ++threadpath) i += (*threadpath == ':') ? 1 : 0; - + /* indent */ while (i --> 0) - fputs (" ", stdout); + fputs (" ", stdout); + + if (color) + fputs (MU_COLOR_YELLOW, stdout); + + if (!is_root) { + if (is_dup) + fputs ("==>", stdout); + else if (first_child) + fputs (empty_parent ? "*-> " : "`-> ", stdout); + else + fputs ("|-> ", stdout); + } + + if (color) + fputs (MU_COLOR_DEFAULT, stdout); } @@ -582,7 +612,7 @@ output_plain_fields (MuMsgIter *iter, const char *fields, gboolean color, size_t len; if (threads) - indent (iter); + thread_indent (iter, color); for (myfields = fields, len = 0; *myfields; ++myfields) { diff --git a/src/mu-msg-iter.cc b/src/mu-msg-iter.cc index 7860c5b9..55564af5 100644 --- a/src/mu-msg-iter.cc +++ b/src/mu-msg-iter.cc @@ -39,11 +39,11 @@ class ThreadKeyMaker: public Xapian::KeyMaker { public: ThreadKeyMaker (GHashTable *threadinfo): _threadinfo(threadinfo) {} virtual std::string operator()(const Xapian::Document &doc) const { - const char *key; - key = (const char*)g_hash_table_lookup + MuMsgIterThreadInfo *ti; + ti = (MuMsgIterThreadInfo*)g_hash_table_lookup (_threadinfo, GUINT_TO_POINTER(doc.get_docid())); - return std::string (key ? key : ""); + return std::string (ti && ti->threadpath ? ti->threadpath : ""); } private: GHashTable *_threadinfo; @@ -58,7 +58,8 @@ struct _MuMsgIter { if (threads && !_matches.empty()) { _matches.fetch(); - _threadhash = mu_msg_threader_calculate (this, _matches.size()); + _threadhash = mu_msg_threader_calculate + (this, _matches.size()); ThreadKeyMaker keymaker(_threadhash); enq.set_sort_by_key (&keymaker, false); _matches = _enq.get_mset (0, maxnum); @@ -221,19 +222,26 @@ mu_msg_iter_get_docid (MuMsgIter *iter) } -const char* -mu_msg_iter_get_thread_path (MuMsgIter *iter) +const MuMsgIterThreadInfo* +mu_msg_iter_get_thread_info (MuMsgIter *iter) { g_return_val_if_fail (!mu_msg_iter_is_done(iter), NULL); g_return_val_if_fail (iter->_threadhash, NULL); try { + const MuMsgIterThreadInfo *ti; unsigned int docid; + docid = mu_msg_iter_get_docid (iter); + ti = (const MuMsgIterThreadInfo*)g_hash_table_lookup + (iter->_threadhash, + GUINT_TO_POINTER(docid)); + + if (!ti) + g_printerr ("no ti for %u\n", docid); - return (const char*)g_hash_table_lookup - (iter->_threadhash, GUINT_TO_POINTER(docid)); - + return ti; + } MU_XAPIAN_CATCH_BLOCK_RETURN (NULL); } diff --git a/src/mu-msg-iter.h b/src/mu-msg-iter.h index 6f93b7a1..90873fb5 100644 --- a/src/mu-msg-iter.h +++ b/src/mu-msg-iter.h @@ -128,14 +128,28 @@ unsigned int mu_msg_iter_get_docid (MuMsgIter *iter); gboolean mu_msg_iter_calculate_threads (MuMsgIter *iter); +enum _MuMsgIterThreadProp { + MU_MSG_ITER_THREAD_PROP_ROOT = 1 << 0, + MU_MSG_ITER_THREAD_PROP_FIRST_CHILD = 1 << 1, + MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT = 1 << 2, + MU_MSG_ITER_THREAD_PROP_DUP = 1 << 3 +}; +typedef guint8 MuMsgIterThreadProp; + +struct _MuMsgIterThreadInfo { + gchar *threadpath; + MuMsgIterThreadProp prop; +}; +typedef struct _MuMsgIterThreadInfo MuMsgIterThreadInfo; + /** - * get a sortable string describing the path of a thread + * get a the MuMsgThreaderInfo struct for this message * * @param iter a valid MuMsgIter iterator * - * @return a thread path + * @return an info struct */ -const char* mu_msg_iter_get_thread_path (MuMsgIter *iter); +const MuMsgIterThreadInfo* mu_msg_iter_get_thread_info (MuMsgIter *iter); /** * get some message field diff --git a/src/mu-msg-threader.c b/src/mu-msg-threader.c index d0fa6946..88a8f3e6 100644 --- a/src/mu-msg-threader.c +++ b/src/mu-msg-threader.c @@ -17,6 +17,7 @@ ** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ** */ +#include /* for log, ceil */ #include "mu-msg-threader.h" #include "mu-str.h" @@ -40,28 +41,24 @@ * Msg5 (child of Msg4) => 00001:00001:00000 * Msg6 => 00002 * - * the padding-0's are added to make them easy to sort using strcmp + * the padding-0's are added to make them easy to sort using strcmp; + * the number hexadecimal numbers, and the length of the 'segments' + * (the parts separated by the ':') is equal to ceil(log_16(matchnum)) * */ + /* Container data structure, as seen in the JWZ-doc; one differences * is that I use GSLists for the children, rather than 'next' * pointers * - * the _state is for pruning; when traversing the tree, i mark - * containers with NUKE or SPLICE, and then do it afterwards; that - * way, I don't have to change the very list I am iterating over... - * * */ -enum _ContainerState { NUKE, SPLICE, OKAY }; -typedef enum _ContainerState ContainerState; - struct _Container { MuMsg *_msg; unsigned int _docid; struct _Container *_parent; GSList *_children; - ContainerState _state; + gboolean _dup; }; typedef struct _Container Container; @@ -76,18 +73,17 @@ static gboolean container_traverse (Container *c, ContainerTraverseFunc func, gpointer user_data); static gboolean container_add_child (Container *c, Container *child); -static void container_promote_child (Container *c, Container *child); +static void container_splice_child (Container *c, Container *child); +static gboolean container_is_root (Container *c); +static void container_dump_tree (Container *c); static void container_remove_child (Container *c, Container *child); - - - - /* step 1 */ static GHashTable* create_containers (MuMsgIter *iter); -/* step 2 */ static GSList *find_root_set (GHashTable *ids); -static void prune_empty_containers (GSList *root_set); +/* step 2 */ static Container *find_root (GHashTable *ids); +static void prune_empty_containers (Container *root); /* static void group_root_set_by_subject (GSList *root_set); */ -GHashTable* create_doc_id_thread_path_hash (GSList *root_set, size_t match_num); +GHashTable* create_doc_id_thread_path_hash (Container *root, size_t match_num); +static void sort_by_date (Container *root); /* msg threading algorithm, based on JWZ's algorithm, * http://www.jwz.org/doc/threading.html */ @@ -95,38 +91,41 @@ GHashTable* mu_msg_threader_calculate (MuMsgIter *iter, size_t matchnum) { GHashTable *id_table, *thread_ids; - GSList *root_set; + Container *root; g_return_val_if_fail (iter, FALSE); /* step 1 */ id_table = create_containers (iter); - /* step 2 */ - root_set = find_root_set (id_table); - + /* step 2 -- JWZ calls this the 'root-set'; in our case, the + * root_set is the list of children for the dummy + * root-container */ + root = find_root (id_table); /* step 3: skip until the end; we still need to containers */ - + //container_dump_tree (root); + /* step 4: prune empty containers */ - prune_empty_containers (root_set); + prune_empty_containers (root); /* recalculate root set */ - g_slist_free (root_set); - root_set = find_root_set (id_table); + sort_by_date (root); + + //container_dump_tree (root); + /* step 5: group root set by subject */ -// group_root_set_by_subject (root_set); + //group_root_set_by_subject (root_set); /* sort */ - - mu_msg_iter_reset (iter); /* go all the way back */ /* finally, deliver the docid => thread-path hash */ - thread_ids = create_doc_id_thread_path_hash (root_set, matchnum); + thread_ids = create_doc_id_thread_path_hash (root, matchnum); g_hash_table_destroy (id_table); /* step 3*/ - g_slist_free (root_set); + container_destroy (root); + return thread_ids; } @@ -155,21 +154,31 @@ static Container* find_or_create (GHashTable *id_table, const char* msgid, unsigned docid) { Container *c; - - c = g_hash_table_lookup (id_table, msgid); + c = g_hash_table_lookup (id_table, msgid); if (!c) { c = container_new (NULL, docid); g_hash_table_insert (id_table, (gpointer)msgid, c); + if (docid != 0) + g_print ("*1 %s => %u\n", msgid, docid); + } else if (c->_docid == 0) { + c->_docid = docid; + g_print ("*2 %s => %u\n", msgid, docid); + } else if (docid != 0) { /* duplicate message-id */ + /* Container *c2; */ + /* char *fake_msgid; */ + /* g_print ("duplicate message-id %s\n", msgid); /\* FIXME: leak *\/ */ + /* c2 = container_new (NULL, docid); */ + /* c2->_parent = c; /\* make it a child of the other one...*\/ */ + /* c2->_dup = TRUE; */ + /* fake_msgid = g_strdup_printf ("%s_%u", msgid, docid); */ + /* g_hash_table_insert (id_table, fake_msgid, c2); */ + /* g_print ("*3 %s => %u\n", fake_msgid, docid); */ } - + return c; } - - - - static void /* 1B */ handle_references (GHashTable *id_table, Container *c) { @@ -183,13 +192,14 @@ handle_references (GHashTable *id_table, Container *c) for (cur = refs; cur && cur->next; cur = g_slist_next (cur)) { Container *c1, *c2; /* two consecutive refs in the list; * we register them as parent, child */ + c1 = find_or_create (id_table, (gchar*)cur->data, 0); c2 = find_or_create (id_table, (gchar*)cur->next->data, 0); container_add_child (c1, c2); } - /* now cur points to the final ref, which refers to our own + /* now cur points to the final ref, which refers to our direct * parent... register it */ if (cur) { Container *parent; @@ -212,14 +222,14 @@ create_containers (MuMsgIter *iter) for (mu_msg_iter_reset (iter); !mu_msg_iter_is_done (iter); mu_msg_iter_next (iter)) { - + Container *c; MuMsg *msg; unsigned docid; const char *msgid; /* 1.A */ - msg = mu_msg_iter_get_msg (iter, NULL); + msg = mu_msg_iter_get_msg (iter, NULL); msgid = mu_msg_get_msgid (msg); docid = mu_msg_iter_get_docid (iter); @@ -238,88 +248,62 @@ create_containers (MuMsgIter *iter) handle_references (id_table, c); } - return id_table; } static void -filter_root_set (const gchar *msgid, Container *c, GSList **lst) +filter_root_set (const gchar *msgid, Container *c, Container *root) { - if (!c->_parent && c->_state != NUKE) - *lst = g_slist_prepend (*lst, c); + if (!c->_parent) /* this *before* adding it to the dummy root */ + container_add_child (root, c); } +/* 2. Find the root - this is dummy container which takes the + * until-now parentless Container-objects as children. JWZ calls this + * the 'root_set' -/* 2. Find the root set. Walk over the elements of id_table, and - gather a list of the Container objects that have no parents, but do - have children */ -static GSList* -find_root_set (GHashTable *ids) + Walk over the elements of id_table, and gather a list of the + Container objects that have no parents, but do have children */ +static Container* +find_root (GHashTable *ids) { GSList *lst; - - lst = NULL; - g_hash_table_foreach (ids, (GHFunc)filter_root_set, &lst); - - return lst; -} - - - -static void -do_pruning (GSList *containers) -{ - GSList *cur; + Container *root; - /* now, do stuff to our children... */ - for (cur = containers; cur; cur = g_slist_next(cur)) { + root = container_new (NULL, 0); - Container *child; - child = (Container *)cur->data; - - if (child->_state == SPLICE) { - /* g_printerr ("SPLICE %p\n", (void*)child); */ - container_promote_child (child->_parent, child); - //container_remove_child (child->_parent, child); - - } else if (child->_state == NUKE) { - /* g_printerr ("NUKE %p\n", (void*)child); */ - container_remove_child (child->_parent, child); - } - - child->_state = OKAY; - } + lst = NULL; + g_hash_table_foreach (ids, (GHFunc)filter_root_set, root); + + return root; } /* this function will mark 'containers', and the do the pruning on * their children */ static void -prune_empty_nonroot (GSList *containers) +prune_empty_containers (Container *container) { GSList *cur; - for (cur = containers; cur; cur = g_slist_next (cur)) { - - Container *container; - - container = (Container*)cur->data; - - if (container->_children) { - prune_empty_nonroot (container->_children); /* recurse! */ - do_pruning (container->_children); - } - + for (cur = container->_children; cur; cur = g_slist_next (cur)) { + + Container *c; + c = (Container*)cur->data; + + prune_empty_containers (c); /* recurse! */ + /* don't touch containers with messages */ - if (container->_msg) + if (c->_msg) continue; - + /* A. If it is an msg-less container with no children, nuke it. */ - if (!container->_children) - container->_state = NUKE; - + if (!c->_children) { + container_remove_child (c->_parent, c); + continue; + } /* B. If the Container has no Message, but does have * children, remove this container but promote its * children to this level (that is, splice them in to @@ -327,40 +311,17 @@ prune_empty_nonroot (GSList *containers) * * Do not promote the children if doing so would * promote them to the root set -- unless there is - * only one child, in which case, do. */ - else if ((container->_parent || - g_slist_length(container->_children) == 1)) - container->_state = SPLICE; + * only one child, in which case, do. + */ + if (container_is_root(container) && + g_slist_length(c->_children) != 1) + continue; + + container_splice_child (container, c); } } -/* 4. Prune empty containers */ -static void -prune_empty_containers (GSList *root_set) -{ - GSList *cur; - - /* everything below the root_set will be pruned */ - prune_empty_nonroot (root_set); - - /* no, clear up the root_set itself... */ - for (cur = root_set; cur; cur = g_slist_next(cur)) { - - Container *c; - c = (Container *)cur->data; - - if (c->_state == SPLICE) { - /* make child parent-less, so the become part of the root_set */ - GSList *iter; /* there should be only 1... */ - for (iter = c->_children; iter; iter = g_slist_next(iter)) - ((Container*)iter->data)->_parent = NULL; - c->_children = NULL; - c->_state = NUKE; - } - } -} - #if 0 /* 5. group root set by subject */ @@ -410,82 +371,195 @@ group_root_set_by_subject (GSList *root_set) #endif -struct _ThreadInfo { - GHashTable *hash; - GQueue *idqueue; - unsigned prev_level; -}; -typedef struct _ThreadInfo ThreadInfo; -static void -accumulate_path (int seq, char **threadpath) +static gint +cmp_dates (Container *c1, Container *c2) { - if (*threadpath) { - char *path; - path = g_strdup_printf ("%s:%05d", *threadpath, seq); - g_free (*threadpath); - *threadpath = path; - } else - *threadpath = g_strdup_printf ("%05d", seq); + MuMsg *m1, *m2; + m1 = c1->_msg; + m2 = c2->_msg; + + if (!m1) + return m2 ? 1 : 0; + if (!m2) + return m1 ? 0 : 1; + + return mu_msg_get_date (m1) - mu_msg_get_date (m2); } -/* let's make a GtkTreePath compatible thread path */ -static gboolean -add_thread_path (Container *c, guint level, ThreadInfo *ti) +static void +sort_by_date (Container *container) { - gchar *threadpath; + GSList *cur; + + for (cur = container->_children; cur; cur = g_slist_next(cur)) { + Container *c; + c = (Container*)cur->data; + sort_by_date (c); /* recurse */ + } + + container->_children = g_slist_sort (container->_children, + (GCompareFunc)cmp_dates); +} + +static MuMsgIterThreadInfo* +thread_info_new (gchar *threadpath, gboolean root, + gboolean first_child, gboolean empty_parent, gboolean is_dup) +{ + MuMsgIterThreadInfo *ti; + + ti = g_slice_new (MuMsgIterThreadInfo); + ti->threadpath = threadpath; + + ti->prop = 0; + ti->prop |= root ? MU_MSG_ITER_THREAD_PROP_ROOT : 0; + ti->prop |= first_child ? MU_MSG_ITER_THREAD_PROP_FIRST_CHILD : 0; + ti->prop |= empty_parent ? MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT : 0; + ti->prop |= is_dup ? MU_MSG_ITER_THREAD_PROP_DUP : 0; + + if (is_dup) + g_print ("dup: %s\n", threadpath); + + return ti; +} + +static void +thread_info_destroy (MuMsgIterThreadInfo *ti) +{ + if (ti) { + g_free (ti->threadpath); + g_slice_free (MuMsgIterThreadInfo, ti); + } +} + + +struct _ThreadInfo { + GHashTable *hash; + GQueue *idqueue; + unsigned prev_level; + const char* format; +}; +typedef struct _ThreadInfo ThreadInfo; + + +struct _TP { + char *threadpath; + const char *frmt; +}; +typedef struct _TP TP; + + +static void +accumulate_path (int seq, TP *tp) +{ + char segm[16]; + snprintf (segm, sizeof(segm), tp->frmt, seq); + + if (tp->threadpath) { + char *path; + path = g_strdup_printf ("%s:%s", tp->threadpath, segm); + g_free (tp->threadpath); + tp->threadpath = path; + } else + tp->threadpath = g_strdup (segm); +} + +static void +add_to_thread_info_hash (GHashTable *thread_info_hash, Container *c, + char *threadpath) +{ + gboolean is_root, first_child, empty_parent; + + /* 'root' means we're a child of the dummy root-container */ + is_root = container_is_root (c); + + first_child = is_root ? FALSE : (c->_parent->_children->data == c); + empty_parent = is_root ? FALSE : (!c->_parent->_msg); + + g_hash_table_insert (thread_info_hash, + GUINT_TO_POINTER(c->_docid), + thread_info_new (threadpath, + container_is_root (c), + first_child, + empty_parent, + c->_dup)); +} + +/* device a format string that is the minimum size to fit up to + * matchnum matches -- returns static memory */ +const char* +thread_segment_format_string (size_t matchnum) +{ + unsigned digitnum; + static char frmt[16]; + + /* get the number of digits needed in a hex-representation of + * matchnum */ + digitnum = (unsigned) (ceil (log(matchnum)/log(16))); + snprintf (frmt, sizeof(frmt),"%%0%ux", digitnum); + + return frmt; +} + +static gboolean +add_thread_info (Container *c, guint level, ThreadInfo *ti) +{ + TP tp; unsigned i; + + /* ignore our dummy root container */ + if (!c->_parent) + return TRUE; if (level > ti->prev_level) { for (i = ti->prev_level; i != level; ++i) g_queue_push_tail (ti->idqueue, GUINT_TO_POINTER(0)); } else if (level <= ti->prev_level) { int oldseq; - - /* level == ti->prev_level, the for-loop is void */ - for (i = level; i != ti->prev_level; ++i) - (void)g_queue_pop_tail (ti->idqueue); + for (i = level; i < ti->prev_level; ++i) + g_queue_pop_tail (ti->idqueue); - oldseq = GPOINTER_TO_UINT(g_queue_pop_tail(ti->idqueue)); - g_queue_push_tail (ti->idqueue,GUINT_TO_POINTER(1 + oldseq)); + if (g_queue_is_empty (ti->idqueue)) + g_queue_push_tail(ti->idqueue, GUINT_TO_POINTER(0)); + else { + oldseq = GPOINTER_TO_UINT(g_queue_pop_tail(ti->idqueue)); + g_queue_push_tail(ti->idqueue, GUINT_TO_POINTER(1 + oldseq)); + } } - threadpath = NULL; - g_queue_foreach (ti->idqueue, (GFunc)accumulate_path, &threadpath); - if (c->_docid) /* don't put empty (pseudo) in the hash */ - g_hash_table_insert (ti->hash, GUINT_TO_POINTER(c->_docid), - threadpath); - else - g_free (threadpath); - ti->prev_level = level; + + if (!c->_docid) + return TRUE; /* nothing more to do - it's a 'virtual' + * message */ + + tp.threadpath = NULL; + tp.frmt = ti->format; + + g_queue_foreach (ti->idqueue, (GFunc)accumulate_path, &tp); + + add_to_thread_info_hash (ti->hash, c, tp.threadpath); return TRUE; } - GHashTable* -create_doc_id_thread_path_hash (GSList *root_set, size_t matchnum) +create_doc_id_thread_path_hash (Container *root, size_t matchnum) { ThreadInfo ti; - GSList *cur; - int i; /* create hash docid => thread-path */ ti.hash = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL, - (GDestroyNotify)g_free); - + (GDestroyNotify)thread_info_destroy); ti.idqueue = g_queue_new (); - g_queue_push_tail (ti.idqueue, GUINT_TO_POINTER(0)); ti.prev_level = 0; - - for (i = 0, cur = root_set; cur; cur = g_slist_next (cur)) - container_traverse ((Container*)cur->data, 0, - (ContainerTraverseFunc)add_thread_path, - &ti); + ti.format = thread_segment_format_string (matchnum); + + container_traverse (root, 0, (ContainerTraverseFunc)add_thread_info, + &ti); g_queue_free (ti.idqueue); return ti.hash; @@ -497,12 +571,14 @@ container_new (MuMsg *msg, unsigned docid) { Container *c; - c = g_slice_new0 (Container); - if (msg) - c->_msg = mu_msg_ref (msg); - - c->_docid = docid; - c->_state = OKAY; + c = g_slice_new (Container); + + c->_msg = msg ? mu_msg_ref (msg) : NULL; + c->_docid = docid; + c->_dup = FALSE; + c->_children = NULL; + c->_parent = NULL; + return c; } @@ -516,9 +592,8 @@ container_destroy (Container *c) if (c->_msg) mu_msg_unref (c->_msg); - /* free the list, not the children */ + c->_parent = (void*)0xdeadbeef; g_slist_free (c->_children); - g_slice_free (Container, c); } @@ -533,7 +608,7 @@ container_traverse (Container *c, guint level, ContainerTraverseFunc func, int i; g_return_val_if_fail (c, FALSE); - + if (!func (c, level, user_data)) return FALSE; @@ -546,39 +621,51 @@ container_traverse (Container *c, guint level, ContainerTraverseFunc func, return TRUE; } - static gboolean container_add_child (Container *parent, Container *child) { g_return_val_if_fail (parent != child, FALSE); g_return_val_if_fail (child, FALSE); - + if (already_referenced(child, parent) || - already_referenced(parent, child)) + already_referenced(parent, child)) { + /* g_print ("already ref'd\n"); */ return FALSE; - + } + child->_parent = parent; parent->_children = g_slist_prepend (parent->_children, child); + return TRUE; } static void -container_promote_child (Container *c, Container *child) +container_splice_child (Container *c, Container *child) { GSList *iter; g_return_if_fail (c != child); g_return_if_fail (child); + g_return_if_fail (!child->_msg); + + for (iter = child->_children; iter; iter = g_slist_next(iter)) + ((Container*)iter->data)->_parent = c; /* reparent + * grandchildren */ + /* + * remove the old child + */ + c->_children = g_slist_remove (c->_children, child); + /* + * put child's children first, so we can use this while + * iterating over c->_children (we are already past the merged + * child's children) + */ + c->_children = g_slist_concat (child->_children, c->_children); - for (iter = child->_children; iter; iter = g_slist_next(iter)) - /* reparent grandchildren */ - ((Container*)iter->data)->_parent = c; - - c->_children = g_slist_concat (c->_children, - child->_children); child->_children = NULL; + child->_parent = NULL; } @@ -587,28 +674,48 @@ container_remove_child (Container *c, Container *child) { g_return_if_fail (c != child); g_return_if_fail (child); - + g_return_if_fail (!child->_children); + c->_children = g_slist_remove (c->_children, child); } + +static gboolean +container_is_root (Container *c) +{ + return (!c->_parent || !c->_parent->_parent); +} G_GNUC_UNUSED static void container_dump (Container *c) -{ - const char* state; - switch (c->_state) { - case NUKE: state = "NUKE"; break; - case SPLICE: state = "SPLICE"; break; - case OKAY: state = "OKAY"; break; - default: state = "HUH"; break; - }; - - g_print ("[%s] { %p parent=%p msg=%p [%s] children: %d state: %s}\n", +{ + g_print ("[%s] { %p parent=%p msg=%p docid=%u [%s] children: %d }\n", c->_msg ? mu_msg_get_subject(c->_msg) : "", (void*)c, (void*)c->_parent, (void*)c->_msg, + c->_docid, c->_msg ? mu_msg_get_msgid(c->_msg) : "", - g_slist_length (c->_children), state); + g_slist_length (c->_children)); +} + + +static gboolean +each_container (Container *c, guint level) +{ + while (level--) + fputs (" ", stdout); + + container_dump (c); + + return TRUE; +} + + + +G_GNUC_UNUSED static void +container_dump_tree (Container *c) +{ + container_traverse (c, 0, (ContainerTraverseFunc)each_container, NULL); } diff --git a/src/mu-msg-threader.h b/src/mu-msg-threader.h index 6bc4446f..003c601d 100644 --- a/src/mu-msg-threader.h +++ b/src/mu-msg-threader.h @@ -28,8 +28,26 @@ G_BEGIN_DECLS +/** + * takes an iter and the total number of matches, and from this + * generates a hash-table with information about the thread structure + * of these matches. + * + * the algorithm to find this structure is based on JWZ's + * message-threading algorithm, as descrbed in: + * http://www.jwz.org/doc/threading.html + * + * the returned hashtable maps the Xapian docid of iter (msg) to a ptr + * to a MuMsgIterThreadInfo structure (see mu-msg-iter.h) + * + * @param iter an iter; note this function will mu_msgi_iter_reset this iterator + * @param matches the number of matches in the set + * + * @return a hashtable; free with g_hash_table_destroy when done with it + */ GHashTable *mu_msg_threader_calculate (MuMsgIter *iter, size_t matches); + G_END_DECLS #endif /*__MU_MSG_THREADER_H__*/