diff --git a/src/mu-str.c b/src/mu-str.c index fd134af4..fcb9de6e 100644 --- a/src/mu-str.c +++ b/src/mu-str.c @@ -30,7 +30,7 @@ #include "mu-str.h" #include "mu-msg-flags.h" - +#include "mu-msg-fields.h" const char* mu_str_date_s (const char* frm, time_t t) @@ -226,3 +226,104 @@ mu_date_parse_hdwmy (const char* str) return delta <= now ? now - delta : never; } + +struct _CheckPrefix { + const char *pfx; + guint len; + gboolean match; +}; +typedef struct _CheckPrefix CheckPrefix; + +static void +each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx) +{ + const char *field_name; + char field_shortcut; + + if (!cpfx || cpfx->match) + return; + + field_shortcut = mu_msg_field_shortcut (mfid); + if (field_shortcut == cpfx->pfx[0] && cpfx->pfx[1] == ':') { + cpfx->match = TRUE; + return; + } + + field_name = mu_msg_field_name (mfid); + if (field_name && + strncmp (cpfx->pfx, field_name, cpfx->len) == 0) { + cpfx->match = TRUE; + return; + } +} + +/* colon is a position inside q pointing at a ':' character. function + * determines whether the prefix is a registered prefix (like + * 'subject' or 'from' or 's') */ +static gboolean +is_xapian_prefix (const char *q, const char *colon) +{ + const char *cur; + + if (colon == q) + return FALSE; /* : at beginning, not a prefix */ + + /* track back from colon until a boundary or beginning of the + * str */ + for (cur = colon - 1; cur >= q; --cur) { + + if (cur == q || !isalpha (*(cur-1))) { + + CheckPrefix cpfx; + memset (&cpfx, 0, sizeof(CheckPrefix)); + + cpfx.pfx = cur; + cpfx.len = (colon - cur); + cpfx.match = FALSE; + + mu_msg_field_foreach ((MuMsgFieldForEachFunc) + each_check_prefix, + &cpfx); + + return (cpfx.match); + } + } + + return FALSE; +} + +char* +mu_str_ascii_xapian_escape_in_place (char *query) +{ + gchar *cur; + gboolean replace_dot; + + g_return_val_if_fail (query, NULL); + + /* only replace the '.' if the string looks like an e-mail + * address or msg-id */ + replace_dot = (g_strstr_len(query, -1, "@") != NULL); + + for (cur = query; *cur; ++cur) { + if (*cur == '@') + *cur = '_'; + + else if (replace_dot && *cur == '.') { + if (cur[1] == '.') /* don't replace '..' */ + cur += 2; + else + *cur = '_'; + } else if (*cur == ':') { + /* if there's a registered xapian prefix before the + * ':', don't touch it. Otherwise replace ':' with + * a space'... ugly... + */ + if (!is_xapian_prefix (query, cur)) + *cur = '_'; + } else + *cur = tolower(*cur); + } + + return query; +} + diff --git a/src/mu-str.h b/src/mu-str.h index c8e3dc42..6b1ce986 100644 --- a/src/mu-str.h +++ b/src/mu-str.h @@ -138,7 +138,7 @@ char* mu_str_summarize (const char* str, * 'Latin-1 Supplement' and 'Latin Extended-A' * * @param str a valid utf8 string or NULL - * @param downcase if TRUE, convert the string to lowercase + * @param downcase if TRUE, convert the string to lowercase * * @return the normalize string, or NULL in case of error or str was NULL */ @@ -153,12 +153,29 @@ char* mu_str_normalize (const char *str, gboolean downcase); * * @param str a valid utf8 string or NULL * @param downcase if TRUE, convert the string to lowercase - * - * @return the normalize string, or NULL in case of error or str was NULL + * + * @return the normalized string, or NULL in case of error or str was + * NULL */ char* mu_str_normalize_in_place (char *str, gboolean downcase); +/** + * escape the string for use with xapian matching. in practice, if the + * string contains an '@', replace '@', single-'.' with '_'. Also, + * replace ':' with '_', if it's not following a xapian-prefix (such + * as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]). + * changing is done in-place (by changing the argument string). in + * any, case, the string will be downcased. + * + * works for ascii strings, like e-mail addresses and message-id. + * + * @param query a query string + * + * @return the escaped string or NULL in case of error + */ +char* mu_str_ascii_xapian_escape_in_place (char *query); + /** * * parse strings like 1h, 3w, 2m to mean '1 hour before now', '3 weeks diff --git a/src/tests/test-mu-str.c b/src/tests/test-mu-str.c index da782f66..36d1303a 100644 --- a/src/tests/test-mu-str.c +++ b/src/tests/test-mu-str.c @@ -152,6 +152,59 @@ test_mu_str_normalize_01 (void) } +static void +test_mu_str_normalize_02 (void) +{ + int i; + struct { + const char* word; + const char* norm; + } words [] = { + { "DantèS", "DanteS"}, + { "foo", "foo" }, + { "Föö", "Foo" }, + { "číslO", "cislO" }, + { "hÆvý mëÐal ümláõt", "hAevy meDal umlaot"} + }; + + + for (i = 0; i != G_N_ELEMENTS(words); ++i) { + gchar *str; + str = mu_str_normalize (words[i].word, FALSE); + g_assert_cmpstr (str, ==, words[i].norm); + g_free (str); + } +} + + +static void +test_mu_str_ascii_xapian_escape (void) +{ + int i; + struct { + const char* word; + const char* esc; + } words [] = { + { "aap@noot.mies", "aap_noot_mies"}, + { "Foo..Bar", "foo..bar" }, + { "subject:test@foo", "subject:test_foo" }, + { "xxx:test@bar", "xxx_test_bar" }, + }; + + for (i = 0; i != G_N_ELEMENTS(words); ++i) { + gchar *a = g_strdup (words[i].word); + mu_str_ascii_xapian_escape_in_place (a); + g_assert_cmpstr (a, ==, words[i].esc); + g_free (a); + } +} + + + + + + + #if 0 static void @@ -233,6 +286,11 @@ main (int argc, char *argv[]) /* mu_str_normalize */ g_test_add_func ("/mu-str/mu-str-normalize-01", test_mu_str_normalize_01); + g_test_add_func ("/mu-str/mu-str-normalize-02", + test_mu_str_normalize_02); + + g_test_add_func ("/mu-str/mu-str-ascii-xapian-escape", + test_mu_str_ascii_xapian_escape); /* mu_str_complete_iso_date_(begin|end) */ /* g_test_add_func ("/mu-str/mu-str-complete-iso-date-begin", */