* threading: small updates, cleanups

This commit is contained in:
Dirk-Jan C. Binnema 2011-07-01 20:52:09 +03:00
parent 8955703828
commit 8a4c70c669
3 changed files with 67 additions and 172 deletions

View File

@ -58,6 +58,8 @@ libmu_la_SOURCES= \
mu-config.h \
mu-contacts.c \
mu-contacts.h \
mu-container.c \
mu-container.h \
mu-index.c \
mu-index.h \
mu-log.c \
@ -93,8 +95,6 @@ libmu_la_SOURCES= \
mu-str-normalize.c \
mu-str.c \
mu-str.h \
mu-threader-utils.c \
mu-threader-utils.h \
mu-threader.c \
mu-threader.h \
mu-util-db.cc \

View File

@ -143,7 +143,8 @@ struct _MuMsgIterThreadInfo {
typedef struct _MuMsgIterThreadInfo MuMsgIterThreadInfo;
/**
* get a the MuMsgThreaderInfo struct for this message
* get a the MuMsgThreaderInfo struct for this message; this only
* works when you created the mu-msg-iter with threading enabled
*
* @param iter a valid MuMsgIter iterator
*

View File

@ -21,7 +21,7 @@
#include <string.h> /* for memset */
#include "mu-threader.h"
#include "mu-threader-utils.h"
#include "mu-container.h"
#include "mu-str.h"
/* msg threading implementation based on JWZ's algorithm, as described in:
@ -50,12 +50,12 @@
/* step 1 */ static GHashTable* create_containers (MuMsgIter *iter);
/* step 2 */ static Container *find_root_set (GHashTable *ids);
static Container* prune_empty_containers (Container *root);
/* step 2 */ static MuContainer *find_root_set (GHashTable *ids);
static MuContainer* prune_empty_containers (MuContainer *root);
/* static void group_root_set_by_subject (GSList *root_set); */
GHashTable* create_doc_id_thread_path_hash (Container *root, size_t match_num);
GHashTable* create_doc_id_thread_path_hash (MuContainer *root, size_t match_num);
static gint cmp_dates (Container *c1, Container *c2);
static gint cmp_dates (MuContainer *c1, MuContainer *c2);
/* msg threading algorithm, based on JWZ's algorithm,
* http://www.jwz.org/doc/threading.html */
@ -63,7 +63,7 @@ GHashTable*
mu_threader_calculate (MuMsgIter *iter, size_t matchnum)
{
GHashTable *id_table, *thread_ids;
Container *root_set;
MuContainer *root_set;
g_return_val_if_fail (iter, FALSE);
@ -79,7 +79,7 @@ mu_threader_calculate (MuMsgIter *iter, size_t matchnum)
root_set = prune_empty_containers (root_set);
/* sort root set */
root_set = container_sort (root_set, (GCompareDataFunc)cmp_dates,
root_set = mu_container_sort (root_set, (GCompareDataFunc)cmp_dates,
NULL, FALSE);
/* step 5: group root set by subject */
@ -89,8 +89,9 @@ mu_threader_calculate (MuMsgIter *iter, size_t matchnum)
mu_msg_iter_reset (iter); /* go all the way back */
/* finally, deliver the docid => thread-path hash */
thread_ids = create_doc_id_thread_path_hash (root_set,
matchnum);
thread_ids = mu_container_thread_info_hash_new (root_set,
matchnum);
g_hash_table_destroy (id_table); /* step 3*/
return thread_ids;
@ -100,11 +101,11 @@ mu_threader_calculate (MuMsgIter *iter, size_t matchnum)
#if 0
static void
check_dup (const char *msgid, Container *c, GHashTable *hash)
check_dup (const char *msgid, MuContainer *c, GHashTable *hash)
{
if (g_hash_table_lookup (hash, c)) {
g_warning ("ALREADY!!");
container_dump (c, FALSE);
mu_container_dump (c, FALSE);
g_assert (0);
} else
g_hash_table_insert (hash, c, GUINT_TO_POINTER(TRUE));
@ -129,18 +130,18 @@ assert_no_duplicates (GHashTable *ids)
/* a referred message is a message that is refered by some other message */
static Container*
static MuContainer*
find_or_create_referred (GHashTable *id_table, const char *msgid,
gboolean *created)
{
Container *c;
MuContainer *c;
g_return_val_if_fail (msgid, NULL);
c = g_hash_table_lookup (id_table, msgid);
*created = !c;
if (!c) {
c = container_new (NULL, 0, msgid);
c = mu_container_new (NULL, 0, msgid);
g_hash_table_insert (id_table, (gpointer)msgid, c);
/* assert_no_duplicates (id_table); */
}
@ -151,10 +152,10 @@ find_or_create_referred (GHashTable *id_table, const char *msgid,
/* find a container for the given msgid; if it does not exist yet,
* create a new one, and register it */
static Container*
static MuContainer*
find_or_create (GHashTable *id_table, MuMsg *msg, guint docid)
{
Container *c;
MuContainer *c;
const char* msgid;
g_return_val_if_fail (msg, NULL);
@ -166,8 +167,8 @@ find_or_create (GHashTable *id_table, MuMsg *msg, guint docid)
c = g_hash_table_lookup (id_table, msgid);
/* If id_table contains an empty Container for this ID: * *
* Store this message in the Container's message slot. */
/* If id_table contains an empty MuContainer for this ID: * *
* Store this message in the MuContainer's message slot. */
if (c) {
if (!c->msg) {
c->msg = mu_msg_ref (msg);
@ -181,20 +182,20 @@ find_or_create (GHashTable *id_table, MuMsg *msg, guint docid)
* message-id we already saw... create this message,
* and mark it as a duplicate, and a child of the one
* we saw before; use its path as a fake message-id*/
Container *c2;
c2 = container_new (msg, docid, "<dup>");
c2->flags = CONTAINER_FLAG_DUP;
c = container_append_children (c, c2);
MuContainer *c2;
c2 = mu_container_new (msg, docid, "<dup>");
c2->flags = MU_CONTAINER_FLAG_DUP;
c = mu_container_append_children (c, c2);
g_hash_table_insert (id_table,
(gpointer)mu_msg_get_path (msg), c2);
/* assert_no_duplicates (id_table); */
return NULL; /* don't process this message further */
}
} else { /* Else: Create a new Container object holding
this message; Index the Container by
} else { /* Else: Create a new MuContainer object holding
this message; Index the MuContainer by
Message-ID in id_table. */
c = container_new (msg, docid, msgid);
c = mu_container_new (msg, docid, msgid);
g_hash_table_insert (id_table, (gpointer)msgid, c);
/* assert_no_duplicates (id_table); */
@ -203,7 +204,7 @@ find_or_create (GHashTable *id_table, MuMsg *msg, guint docid)
}
static gboolean
child_elligible (Container *parent, Container *child, gboolean created)
child_elligible (MuContainer *parent, MuContainer *child, gboolean created)
{
if (!parent || !child)
return FALSE;
@ -211,9 +212,9 @@ child_elligible (Container *parent, Container *child, gboolean created)
return FALSE;
/* if (created) */
/* return TRUE; */
if (container_reachable (parent, child))
if (mu_container_reachable (parent, child))
return FALSE;
if (container_reachable (child, parent))
if (mu_container_reachable (child, parent))
return FALSE;
return TRUE;
@ -222,10 +223,10 @@ child_elligible (Container *parent, Container *child, gboolean created)
static void /* 1B */
handle_references (GHashTable *id_table, Container *c)
handle_references (GHashTable *id_table, MuContainer *c)
{
const GSList *refs, *cur;
Container *parent;
MuContainer *parent;
gboolean created;
refs = mu_msg_get_references (c->msg);
@ -234,7 +235,7 @@ handle_references (GHashTable *id_table, Container *c)
/* For each element in the message's References field:
Find a Container object for the given Message-ID: If
Find a MuContainer object for the given Message-ID: If
there's one in id_table use that; Otherwise, make (and
index) one with a null Message. */
@ -242,11 +243,11 @@ handle_references (GHashTable *id_table, Container *c)
created = FALSE;
for (parent = NULL, cur = refs; cur; cur = g_slist_next (cur)) {
Container *child;
MuContainer *child;
child = find_or_create_referred (id_table, (gchar*)cur->data,
&created);
/*Link the References field's Containers together in
/*Link the References field's MuContainers together in
* the order implied by the References header.
If they are already linked, don't change the existing
@ -258,7 +259,7 @@ handle_references (GHashTable *id_table, Container *c)
as a child of the other, don't add the link. */
if (child_elligible (parent, child, created))
parent = container_append_children (parent, child);
parent = mu_container_append_children (parent, child);
parent = child;
}
@ -271,7 +272,7 @@ handle_references (GHashTable *id_table, Container *c)
References field, and presumed a parent based on the other
entries in that field. Now that we have the actual message,
we can be more definitive, so throw away the old parent and
use this new one. Find this Container in the parent's
use this new one. Find this MuContainer in the parent's
children list, and unlink it.
Note that this could cause this message to now have no
@ -285,7 +286,7 @@ handle_references (GHashTable *id_table, Container *c)
/* optimization: if the the message was newly added, it's by
* definition not reachable yet */
if (child_elligible (parent, c, created))
parent = container_append_children (parent, c);
parent = mu_container_append_children (parent, c);
}
@ -298,12 +299,12 @@ create_containers (MuMsgIter *iter)
id_table = g_hash_table_new_full (g_str_hash,
g_str_equal,
NULL,
(GDestroyNotify)container_destroy);
(GDestroyNotify)mu_container_destroy);
for (mu_msg_iter_reset (iter); !mu_msg_iter_is_done (iter);
mu_msg_iter_next (iter)) {
Container *c;
MuContainer *c;
MuMsg *msg;
unsigned docid;
@ -324,7 +325,7 @@ create_containers (MuMsgIter *iter)
static void
filter_root_set (const gchar *msgid, Container *c, Container **root_set)
filter_root_set (const gchar *msgid, MuContainer *c, MuContainer **root_set)
{
if (c->parent)
return;
@ -333,16 +334,16 @@ filter_root_set (const gchar *msgid, Container *c, Container **root_set)
*root_set = c;
return;
} else
*root_set = container_append_siblings (*root_set, c);
*root_set = mu_container_append_siblings (*root_set, c);
}
/* 2. Walk over the elements of id_table, and gather a list of the
Container objects that have no parents, but do have children */
static Container*
MuContainer objects that have no parents, but do have children */
static MuContainer*
find_root_set (GHashTable *ids)
{
Container *root_set;
MuContainer *root_set;
root_set = NULL;
g_hash_table_foreach (ids, (GHFunc)filter_root_set, &root_set);
@ -352,15 +353,15 @@ find_root_set (GHashTable *ids)
static gboolean
prune_maybe (Container *c)
prune_maybe (MuContainer *c)
{
Container *cur;
MuContainer *cur;
for (cur = c->child; cur; cur = cur->next) {
if (cur->flags & CONTAINER_FLAG_DELETE)
c = container_remove_child (c, cur);
else if (cur->flags & CONTAINER_FLAG_SPLICE)
c = container_splice_children (c, cur);
if (cur->flags & MU_CONTAINER_FLAG_DELETE)
c = mu_container_remove_child (c, cur);
else if (cur->flags & MU_CONTAINER_FLAG_SPLICE)
c = mu_container_splice_children (c, cur);
}
/* don't touch containers with messages */
@ -370,11 +371,11 @@ prune_maybe (Container *c)
/* A. If it is an msg-less container with no children, mark it
* for deletion. */
if (!c->child) {
c->flags |= CONTAINER_FLAG_DELETE;
c->flags |= MU_CONTAINER_FLAG_DELETE;
return TRUE;
}
/* B. If the Container has no Message, but does have
/* B. If the MuContainer has no Message, but does have
* children, remove this container but promote its
* children to this level (that is, splice them in to
* the current child list.)
@ -386,31 +387,30 @@ prune_maybe (Container *c)
if (c->child->next) /* ie., > 1 child */
return TRUE;
c->flags |= CONTAINER_FLAG_SPLICE;
c->flags |= MU_CONTAINER_FLAG_SPLICE;
return TRUE;
}
static Container*
prune_empty_containers (Container *root_set)
static MuContainer*
prune_empty_containers (MuContainer *root_set)
{
Container *cur;
MuContainer *cur;
container_foreach (root_set, (ContainerForeachFunc)prune_maybe, NULL);
mu_container_foreach (root_set, (MuContainerForeachFunc)prune_maybe, NULL);
/* and prune the root_set itself... */
for (cur = root_set; cur; cur = cur->next) {
if (cur->flags & CONTAINER_FLAG_DELETE)
root_set = container_remove_sibling (root_set, cur);
if (cur->flags & MU_CONTAINER_FLAG_DELETE)
root_set = mu_container_remove_sibling (root_set, cur);
else if (cur->flags & CONTAINER_FLAG_SPLICE) {
Container *newchild;
else if (cur->flags & MU_CONTAINER_FLAG_SPLICE) {
MuContainer *newchild;
newchild = cur->child;
cur->child = NULL;
root_set = container_append_siblings (root_set, newchild);
root_set = mu_container_append_siblings (root_set, newchild);
}
}
@ -418,7 +418,7 @@ prune_empty_containers (Container *root_set)
}
G_GNUC_UNUSED static gint
cmp_dates (Container *c1, Container *c2)
cmp_dates (MuContainer *c1, MuContainer *c2)
{
MuMsg *m1, *m2;
@ -436,109 +436,3 @@ cmp_dates (Container *c1, Container *c2)
static MuMsgIterThreadInfo*
thread_info_new (gchar *threadpath, gboolean root,
gboolean child, gboolean empty_parent, gboolean is_dup)
{
MuMsgIterThreadInfo *ti;
ti = g_slice_new (MuMsgIterThreadInfo);
ti->threadpath = threadpath;
ti->prop = 0;
ti->prop |= root ? MU_MSG_ITER_THREAD_PROP_ROOT : 0;
ti->prop |= child ? MU_MSG_ITER_THREAD_PROP_FIRST_CHILD : 0;
ti->prop |= empty_parent ? MU_MSG_ITER_THREAD_PROP_EMPTY_PARENT : 0;
ti->prop |= is_dup ? MU_MSG_ITER_THREAD_PROP_DUP : 0;
return ti;
}
static void
thread_info_destroy (MuMsgIterThreadInfo *ti)
{
if (ti) {
g_free (ti->threadpath);
g_slice_free (MuMsgIterThreadInfo, ti);
}
}
struct _ThreadInfo {
GHashTable *hash;
const char* format;
};
typedef struct _ThreadInfo ThreadInfo;
static void
add_to_thread_info_hash (GHashTable *thread_info_hash, Container *c,
char *threadpath)
{
gboolean is_root, first_child, empty_parent, is_dup;
/* 'root' means we're a child of the dummy root-container */
is_root = (c->parent == NULL);
first_child = is_root ? FALSE : (c->parent->child == c);
empty_parent = is_root ? FALSE : (!c->parent->msg);
is_dup = c->flags & CONTAINER_FLAG_DUP;
g_hash_table_insert (thread_info_hash,
GUINT_TO_POINTER(c->docid),
thread_info_new (threadpath,
is_root,
first_child,
empty_parent,
is_dup));
}
/* device a format string that is the minimum size to fit up to
* matchnum matches -- returns static memory */
const char*
thread_segment_format_string (size_t matchnum)
{
unsigned digitnum;
static char frmt[16];
/* get the number of digits needed in a hex-representation of
* matchnum */
digitnum = (unsigned) (ceil (log(matchnum)/log(16)));
snprintf (frmt, sizeof(frmt),"%%0%ux", digitnum);
return frmt;
}
static gboolean
add_thread_info (Container *c, ThreadInfo *ti, Path *path)
{
gchar *pathstr;
pathstr = path_to_string (path, ti->format);
add_to_thread_info_hash (ti->hash, c, pathstr);
return TRUE;
}
GHashTable*
create_doc_id_thread_path_hash (Container *root_set, size_t matchnum)
{
ThreadInfo ti;
/* create hash docid => thread-info */
ti.hash = g_hash_table_new_full (g_direct_hash, g_direct_equal,
NULL,
(GDestroyNotify)thread_info_destroy);
ti.format = thread_segment_format_string (matchnum);
container_path_foreach (root_set,
(ContainerPathForeachFunc)add_thread_info,
&ti);
return ti.hash;
}