* handle message-ids a bit specially, update unit tests

This commit is contained in:
djcb 2013-10-13 20:05:29 +03:00
parent edfdef44a4
commit c36030a086
5 changed files with 92 additions and 15 deletions

View File

@ -303,6 +303,25 @@ add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
doc.add_term (prio_val((MuMsgPrio)num));
}
/* for string and string-list */
static void
add_terms_values_msgid (Xapian::Document& doc, MuMsg *msg)
{
char *str;
const char *orig;
if (!(orig = mu_msg_get_field_string (
msg, MU_MSG_FIELD_ID_MSGID)))
return; /* nothing to do */
str = mu_str_process_msgid (orig, FALSE);
doc.add_value ((Xapian::valueno)MU_MSG_FIELD_ID_MSGID, orig);
doc.add_term (prefix(MU_MSG_FIELD_ID_MSGID) +
std::string(str, 0, _MuStore::MAX_TERM_LENGTH));
g_free (str);
}
@ -543,6 +562,10 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
break;
case MU_MSG_FIELD_ID_MSGID:
add_terms_values_msgid (*msgdoc->_doc, msgdoc->_msg);
break;
case MU_MSG_FIELD_ID_THREAD_ID:
case MU_MSG_FIELD_ID_UID:
break; /* already taken care of elsewhere */

View File

@ -429,6 +429,54 @@ each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
}
}
/* check if it looks like either i:<msgid> or msgid:<msgid> */
static gboolean
is_msgid_field (const char *str)
{
const char *name;
if (!str || strlen(str) < 3)
return FALSE;
if (str[0] == mu_msg_field_shortcut (MU_MSG_FIELD_ID_MSGID) &&
str[1] == ':')
return TRUE;
name = mu_msg_field_name (MU_MSG_FIELD_ID_MSGID);
if (g_str_has_prefix (str, name) && str[strlen(name)] == ':')
return TRUE;
return FALSE;
}
/* message-ids need a bit more massaging -- we replace all
* non-alphanum with '_'. Note, this function assumes we're looking at
* a msg-id field, ie. i:<msgid> or msgid:<msgid> */
char*
mu_str_process_msgid (const char *str, gboolean query)
{
char *s, *c;
g_return_val_if_fail (str, NULL);
g_return_val_if_fail (!query || strchr(str, ':'), NULL);
if (!str)
return NULL;
s = g_strdup (str);
if (query)
c = strchr (s, ':') + 1;
else
c = s;
for (; *c; ++c)
*c = isalnum (*c) ? tolower (*c) : '_';
return s;
}
static void
check_for_field (const char *str, gboolean *is_field,
@ -519,16 +567,17 @@ process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
if (!norm)
return NULL;
check_for_field (str, &is_field, &is_range_field);
/* msg-id needs some special care in queries */
if (query_esc && is_msgid_field (str))
return mu_str_process_msgid (str, TRUE);
check_for_field (str, &is_field, &is_range_field);
gstr = g_string_sized_new (strlen (norm));
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
gunichar uc;
uc = g_utf8_get_char (cur);
if (xapian_esc)
if (handle_esc_maybe (gstr, &cur, uc, query_esc,
is_range_field))
@ -537,17 +586,11 @@ process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
if (g_unichar_ismark(uc))
continue;
/* maybe add some special cases, such as Spaß->spass ?
*/
uc = g_unichar_tolower (uc);
g_string_append_unichar (gstr, uc);
}
g_free (norm);
/* g_print ("-->%s\n", gstr->str); */
return g_string_free (gstr, FALSE);
}

View File

@ -158,6 +158,18 @@ char* mu_str_process_query_term (const char *qterm)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* Handle the message-id in a special way
*
* @param str the message-id str
* @param query is this a query?
*
* @return the massaged message-id
*/
char* mu_str_process_msgid (const char *str, gboolean query);
/**
* Fixup values for some fields in the DWIM manner:
* - if term is date:YYYYMMDD, replace it with the range

View File

@ -5,13 +5,13 @@ Received: from localhost.example.com ([127.0.0.1] helo=borealis)
by borealis with esmtp (Exim 4.77)
id 1SSpnB-00038a-Ux
for djcb@localhost; Fri, 11 May 2012 16:21:58 +0300
Delivered-To: peter@example.com
Delivered-To: peter@example.com
From: Brian <brian@example.com>
To: Peter <peter@example.com>
Subject: encrypted
User-agent: mu4e 0.9.8.5-dev1; emacs 24.1.50.8
Date: Fri, 11 May 2012 16:21:42 +0300
Message-ID: <877gwi97kp.fsf@example.com>
Message-ID: <!&!AAAAAAAAAYAAAAAAAAAOH1+8mkk+lLn7Gg5fke7FbCgAAAEAAAAJ7eBDgcactKhXL6r8cEnJ8BAAAAAA==@example.com>
MIME-Version: 1.0
Content-Type: multipart/encrypted; boundary="=-=-=";
protocol="application/pgp-encrypted"
@ -54,4 +54,3 @@ IWiH5wMxkWFpzjE+GHiJ09vSbTTL4JY9eu2n5nxQmtjYMBVxQm7S7qwH
=0Paa
-----END PGP MESSAGE-----
--=-=-=--

View File

@ -208,6 +208,9 @@ test_mu_query_03 (void)
{ "ploughed", 1},
{ "i:3BE9E6535E3029448670913581E7A1A20D852173@"
"emss35m06.us.lmco.com", 1},
{ "i:!&!AAAAAAAAAYAAAAAAAAAOH1+8mkk+lLn7Gg5fke7"
"FbCgAAAEAAAAJ7eBDgcactKhXL6r8cEnJ8BAAAAAA==@"
"example.com", 1},
/* subsets of the words in the subject should match */
{ "s:gcc include search order" , 1},
@ -648,9 +651,6 @@ test_mu_query_preprocess (void)
}
int
main (int argc, char *argv[])
{