* update attachment extraction code:

- only consider 'leaf', 'message' parts in for_each
   - special handling for gmimemessage
   - some cleanups
This commit is contained in:
djcb 2012-02-13 22:35:22 +02:00
parent 7f44863566
commit 3bb93a401b
5 changed files with 231 additions and 83 deletions

View File

@ -788,7 +788,7 @@ include_attachments (MuMsg *msg)
GString *gstr;
attlist = NULL;
mu_msg_part_foreach (msg,(MuMsgPartForeachFunc)each_part,
mu_msg_part_foreach (msg, FALSE, (MuMsgPartForeachFunc)each_part,
&attlist);
gstr = g_string_sized_new (512);

View File

@ -79,7 +79,8 @@ get_attach_str (MuMsg *msg)
gchar *attach;
attach = NULL;
mu_msg_part_foreach (msg, (MuMsgPartForeachFunc)each_part, &attach);
mu_msg_part_foreach (msg, FALSE,
(MuMsgPartForeachFunc)each_part, &attach);
return attach;
}

View File

@ -38,9 +38,24 @@ struct _FindPartData {
typedef struct _FindPartData FindPartData;
/* is this either a leaf part or an embedded message? */
static gboolean
is_part_or_message_part (GMimeObject *part)
{
return GMIME_IS_PART(part) || GMIME_IS_MESSAGE_PART (part);
}
static void
find_part_cb (GMimeObject *parent, GMimeObject *part, FindPartData *fpdata)
{
/* ignore other parts */
if (!is_part_or_message_part (part))
return;
/* g_printerr ("%u Type-name: %s\n", */
/* fpdata->idx, G_OBJECT_TYPE_NAME((GObject*)part)); */
if (fpdata->part || fpdata->wanted_idx != fpdata->idx++)
return; /* not yet found */
@ -53,8 +68,8 @@ find_part (MuMsg* msg, guint partidx)
FindPartData fpdata;
fpdata.wanted_idx = partidx;
fpdata.idx = 0;
fpdata.part = NULL;
fpdata.idx = 0;
fpdata.part = NULL;
g_mime_message_foreach (msg->_file->_mime_msg,
(GMimeObjectForeachFunc)find_part_cb,
@ -68,6 +83,7 @@ struct _PartData {
MuMsgPartForeachFunc _func;
gpointer _user_data;
GMimePart *_body_part;
gboolean _recurse_rfc822;
};
typedef struct _PartData PartData;
@ -136,10 +152,17 @@ get_part_size (GMimePart *part)
static void
part_foreach_cb_part (GMimePart *part, MuMsgPart *pi)
static gboolean
init_msg_part_from_mime_part (GMimePart *part, MuMsgPart *pi)
{
const gchar *fname;
const gchar *fname, *descr;
GMimeContentType *ct;
ct = g_mime_object_get_content_type ((GMimeObject*)part);
if (GMIME_IS_CONTENT_TYPE(ct)) {
pi->type = (char*)g_mime_content_type_get_media_type (ct);
pi->subtype = (char*)g_mime_content_type_get_media_subtype (ct);
}
pi->disposition = (char*)g_mime_object_get_disposition
((GMimeObject*)part);
@ -147,45 +170,107 @@ part_foreach_cb_part (GMimePart *part, MuMsgPart *pi)
fname = g_mime_part_get_filename (part);
pi->file_name = fname ? mu_str_utf8ify (fname) : NULL;
descr = g_mime_part_get_content_description (part);
pi->description = descr ? mu_str_utf8ify (descr) : NULL;
pi->size = get_part_size (part);
pi->is_leaf = TRUE;
pi->is_msg = FALSE;
return TRUE;
}
static gchar*
get_filename_for_mime_message_part (GMimeMessage *mmsg)
{
gchar *name, *cur;
name = (char*)g_mime_message_get_subject (mmsg);
if (!name)
name = "message";
name = g_strconcat (name, ".eml", NULL);
/* remove slashes... */
for (cur = name ; *cur; ++cur) {
if (*cur == '/' || *cur == ' ' || *cur == ':')
*cur = '-';
}
return name;
}
static gboolean
init_msg_part_from_mime_message_part (GMimeMessage *mmsg, MuMsgPart *pi)
{
pi->disposition = GMIME_DISPOSITION_ATTACHMENT;
/* pseudo-file name... */
pi->file_name = get_filename_for_mime_message_part (mmsg);
pi->description = g_strdup ("message");
pi->type = "message";
pi->subtype = "rfc822";
pi->size = 0;
pi->is_leaf = TRUE;
pi->is_msg = TRUE;
return TRUE;
}
static void
msg_part_free (MuMsgPart *pi)
{
if (!pi)
return;
g_free (pi->file_name);
g_free (pi->description);
}
static void
part_foreach_cb (GMimeObject *parent, GMimeObject *mobj, PartData *pdata)
{
GMimeContentType *ct;
MuMsgPart pi;
gboolean rv;
memset (&pi, 0, sizeof pi);
pi.index = pdata->_idx++;
pi.content_id = (char*)g_mime_object_get_content_id (mobj);
pi.data = (gpointer)mobj;
/* ignore non-leaf / message parts */
if (!is_part_or_message_part (mobj))
return;
memset (&pi, 0, sizeof(pi));
pi.index = pdata->_idx++;
pi.content_id = (char*)g_mime_object_get_content_id (mobj);
pi.data = (gpointer)mobj;
/* check if this is the body part */
pi.is_body = ((void*)pdata->_body_part == (void*)mobj);
ct = g_mime_object_get_content_type (mobj);
if (GMIME_IS_CONTENT_TYPE(ct)) {
pi.type = (char*)g_mime_content_type_get_media_type (ct);
pi.subtype = (char*)g_mime_content_type_get_media_subtype (ct);
}
pi.is_body = ((void*)pdata->_body_part == (void*)mobj);
if (GMIME_IS_PART(mobj))
part_foreach_cb_part ((GMimePart*)mobj, &pi);
rv = init_msg_part_from_mime_part ((GMimePart*)mobj, &pi);
else if (GMIME_IS_MESSAGE_PART(mobj)) {
GMimeMessage *mmsg;
mmsg = g_mime_message_part_get_message ((GMimeMessagePart*)mobj);
if (mmsg)
if (!mmsg)
return;
rv = init_msg_part_from_mime_message_part (mmsg, &pi);
if (rv && pdata->_recurse_rfc822)
/* NOTE: this screws up the counting (pdata->_idx) */
g_mime_message_foreach /* recurse */
(mmsg, (GMimeObjectForeachFunc)part_foreach_cb,
pdata);
}
} else
rv = FALSE; /* ignore */
pdata->_func(pdata->_msg, &pi, pdata->_user_data);
g_free (pi.file_name);
if (rv)
pdata->_func(pdata->_msg, &pi, pdata->_user_data);
msg_part_free (&pi);
}
@ -217,8 +302,8 @@ load_msg_file_maybe (MuMsg *msg)
void
mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
gpointer user_data)
mu_msg_part_foreach (MuMsg *msg, gboolean recurse_rfc822,
MuMsgPartForeachFunc func, gpointer user_data)
{
PartData pdata;
GMimeMessage *mime_msg;
@ -230,11 +315,12 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
mime_msg = msg->_file->_mime_msg;
pdata._msg = msg;
pdata._idx = 0;
pdata._body_part = mu_msg_mime_get_body_part (mime_msg, FALSE);
pdata._func = func;
pdata._user_data = user_data;
pdata._msg = msg;
pdata._idx = 0;
pdata._body_part = mu_msg_mime_get_body_part (mime_msg, FALSE);
pdata._func = func;
pdata._user_data = user_data;
pdata._recurse_rfc822 = recurse_rfc822;
g_mime_message_foreach (msg->_file->_mime_msg,
(GMimeObjectForeachFunc)part_foreach_cb,
@ -243,7 +329,7 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
static gboolean
write_to_stream (GMimeObject *part, int fd, GError **err)
write_part_to_fd (GMimePart *part, int fd, GError **err)
{
GMimeStream *stream;
GMimeDataWrapper *wrapper;
@ -257,7 +343,7 @@ write_to_stream (GMimeObject *part, int fd, GError **err)
}
g_mime_stream_fs_set_owner (GMIME_STREAM_FS(stream), FALSE);
wrapper = g_mime_part_get_content_object (GMIME_PART(part));
wrapper = g_mime_part_get_content_object (part);
if (!GMIME_IS_DATA_WRAPPER(wrapper)) {
g_set_error (err, 0, MU_ERROR_GMIME,
"failed to create wrapper");
@ -279,9 +365,34 @@ write_to_stream (GMimeObject *part, int fd, GError **err)
}
static gboolean
save_part (GMimeObject *part, const char *fullpath,
gboolean overwrite, gboolean use_existing, GError **err)
write_object_to_fd (GMimeObject *obj, int fd, GError **err)
{
gchar *str;
str = g_mime_object_to_string (obj);
if (!str) {
g_set_error (err, 0, MU_ERROR_GMIME,
"could not get string from object");
return FALSE;
}
if (write (fd, str, strlen(str)) == -1) {
g_set_error (err, 0, MU_ERROR_GMIME,
"failed to write object: %s",
strerror(errno));
return FALSE;
}
return TRUE;
}
static gboolean
save_mime_object (GMimeObject *obj, const char *fullpath,
gboolean overwrite, gboolean use_existing, GError **err)
{
int fd;
gboolean rv;
@ -300,7 +411,11 @@ save_part (GMimeObject *part, const char *fullpath,
return FALSE;
}
rv = write_to_stream (part, fd, err);
if (GMIME_IS_PART (obj))
rv = write_part_to_fd ((GMimePart*)obj, fd, err);
else
rv = write_object_to_fd (obj, fd, err);
if (close (fd) != 0 && !err) { /* don't write on top of old err */
g_set_error (err, 0, MU_ERROR_FILE,
"could not close '%s': %s",
@ -313,28 +428,37 @@ save_part (GMimeObject *part, const char *fullpath,
gchar*
mu_msg_part_filepath (MuMsg *msg, const char* targetdir, guint partidx)
mu_msg_part_filepath (MuMsg *msg, const char* targetdir, guint partidx,
GError **err)
{
char *fname, *filepath;
GMimeObject* part;
GMimeObject* mobj;
if (!load_msg_file_maybe (msg))
return NULL;
part = find_part (msg, partidx);
if (!part) {
g_warning ("%s: cannot find part %u", __FUNCTION__, partidx);
if (!(mobj = find_part (msg, partidx))) {
g_set_error (err, 0, MU_ERROR_GMIME, "cannot find part %u", partidx);
return NULL;
}
/* the easy case: the part has a filename */
fname = (gchar*)g_mime_part_get_filename (GMIME_PART(part));
if (fname) /* security: don't include any directory components... */
fname = g_path_get_basename (fname);
else
fname = g_strdup_printf ("%x-part-%u",
if (GMIME_IS_PART (mobj)) {
/* the easy case: the part has a filename */
fname = (gchar*)g_mime_part_get_filename (GMIME_PART(mobj));
if (fname) /* security: don't include any directory components... */
fname = g_path_get_basename (fname);
else
fname = g_strdup_printf ("%x-part-%u",
g_str_hash (mu_msg_get_path (msg)),
partidx);
partidx);
} else if (GMIME_IS_MESSAGE_PART(mobj))
fname = get_filename_for_mime_message_part
(g_mime_message_part_get_message((GMimeMessagePart*)mobj));
else {
g_set_error (err, 0, MU_ERROR_GMIME, "part %u cannot be saved",
partidx);
return NULL;
}
filepath = g_build_path (G_DIR_SEPARATOR_S, targetdir ? targetdir : "",
fname, NULL);
@ -373,7 +497,7 @@ mu_msg_part_filepath_cache (MuMsg *msg, guint partid)
return NULL;
}
filepath = mu_msg_part_filepath (msg, dirname, partid);
filepath = mu_msg_part_filepath (msg, dirname, partid, NULL);
g_free (dirname);
if (!filepath)
g_warning ("%s: could not get filename", __FUNCTION__);
@ -396,16 +520,15 @@ mu_msg_part_save (MuMsg *msg, const char *fullpath, guint partidx,
return FALSE;
part = find_part (msg, partidx);
if (!GMIME_IS_PART(part)) {
if (!is_part_or_message_part (part)) {
g_set_error (err, 0, MU_ERROR_GMIME,
"cannot find part %u", partidx);
"unexpected type %s for part %u",
G_OBJECT_TYPE_NAME((GObject*)part),
partidx);
return FALSE;
}
} else
return save_mime_object (part, fullpath, overwrite, use_cached, err);
if (!save_part (part, fullpath, overwrite, use_cached, err))
return FALSE;
return TRUE;
}
@ -512,18 +635,22 @@ match_filename_rx (GMimeObject *parent, GMimeObject *part, MatchData2 *mdata)
{
const char *fname;
if (!GMIME_IS_PART(part))
goto leave;
/* ignore other parts -- we need this guard so the counting of
* parts is the same as in other functions for dealing with
* msg parts (this is needed since we expose the numbers to
* the user) */
if (!is_part_or_message_part (part))
return;
fname = g_mime_part_get_filename (GMIME_PART(part));
if (!fname)
goto leave;
if (!fname) {
++mdata->_idx;
return;
}
if (g_regex_match (mdata->_rx, fname, 0, NULL))
mdata->_lst = g_slist_prepend (mdata->_lst,
GUINT_TO_POINTER(mdata->_idx));
leave:
++mdata->_idx;
GUINT_TO_POINTER(mdata->_idx++));
}
@ -549,22 +676,17 @@ mu_msg_part_find_files (MuMsg *msg, const GRegex *pattern)
}
gboolean
mu_msg_part_looks_like_attachment (MuMsgPart *part, gboolean include_inline)
{
g_return_val_if_fail (part, FALSE);
if (!part->disposition||!part->type || !part->file_name)
if (part->is_body||!part->disposition||!part->type)
return FALSE;
if (g_ascii_strcasecmp (part->disposition,
GMIME_DISPOSITION_ATTACHMENT) == 0)
return TRUE;
if (include_inline &&
if (include_inline ||
g_ascii_strcasecmp (part->disposition,
GMIME_DISPOSITION_INLINE) == 0)
GMIME_DISPOSITION_ATTACHMENT) == 0)
return TRUE;
return FALSE;

View File

@ -43,6 +43,9 @@ struct _MuMsgPart {
/* the file name (if any) */
char *file_name;
/* description (if any) */
char *description;
/* usually, "attachment" or "inline" */
char *disposition;
@ -55,6 +58,7 @@ struct _MuMsgPart {
* message body*/
gboolean is_leaf; /* if the body is a leaf part (MIME
* Part), not eg. a multipart/ */
gboolean is_msg; /* part is a message/rfc822 */
/* if TRUE, mu_msg_part_destroy will free the member vars
* as well*/
@ -72,6 +76,16 @@ typedef struct _MuMsgPart MuMsgPart;
#define mu_msg_part_file_name(pi) ((pi)->file_name)
/**
* macro to get the description for this mime-part
*
* @param pi a MuMsgPart instance
*
* @return the description
*/
#define mu_msg_part_description(pi) ((pi)->description)
/**
* macro to get the content-id (cid) for this mime-part
*
@ -143,11 +157,12 @@ gchar* mu_msg_part_save_temp (MuMsg *msg, guint partidx, GError **err);
* @param msg a msg
* @param targetdir where to store the part
* @param partidx the part for which to determine a filename
* @param err receives error information (when function returns NULL)
*
* @return a filepath (g_free when done with it) or NULL in case of error
*/
gchar* mu_msg_part_filepath (MuMsg *msg, const char* targetdir,
guint partidx) G_GNUC_WARN_UNUSED_RESULT;
guint partidx, GError **err) G_GNUC_WARN_UNUSED_RESULT;
/**
@ -194,12 +209,15 @@ typedef void (*MuMsgPartForeachFunc) (MuMsg*, MuMsgPart*, gpointer);
* call a function for each of the mime part in a message
*
* @param msg a valid MuMsg* instance
* @param recurse_rfc822 whether to recurse into message/rfc822 parts
* generallly, this is only needed when indexing message contents
* @param func a callback function to call for each contact; when
* the callback does not return TRUE, it won't be called again
* @param user_data a user-provide pointer that will be passed to the callback
*
*/
void mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
void mu_msg_part_foreach (MuMsg *msg, gboolean recurse_rfc822,
MuMsgPartForeachFunc func,
gpointer user_data);
G_END_DECLS

View File

@ -201,20 +201,26 @@ static void
each_part (MuMsg *msg, MuMsgPart *part, gchar **parts)
{
const char *fname;
char *name;
if (!mu_msg_part_looks_like_attachment (part, TRUE))
return;
fname = mu_msg_part_file_name (part);
if (fname) {
char *esc;
esc = mu_str_escape_c_literal (fname, TRUE);
*parts = g_strdup_printf
if (!fname)
fname = mu_msg_part_description (part);
if (fname)
name = mu_str_escape_c_literal (fname, TRUE);
else
name = g_strdup_printf ("\"part-%d\"", part->index);
*parts = g_strdup_printf
("%s(:index %d :name %s :mime-type \"%s/%s\" :size %d)",
*parts ? *parts : "",
part->index,
esc,
*parts ? *parts : "", part->index, name,
part->type ? part->type : "application",
part->subtype ? part->subtype : "octet-stream",
part->size);
}
}
@ -224,7 +230,8 @@ append_sexp_attachments (GString *gstr, MuMsg *msg)
char *parts;
parts = NULL;
mu_msg_part_foreach (msg, (MuMsgPartForeachFunc)each_part, &parts);
mu_msg_part_foreach (msg, FALSE,
(MuMsgPartForeachFunc)each_part, &parts);
if (parts)
g_string_append_printf (gstr, "\t:attachments (%s)\n", parts);