diff --git a/src/mu-query.cc b/src/mu-query.cc index 08a97af3..0640ff1c 100644 --- a/src/mu-query.cc +++ b/src/mu-query.cc @@ -288,97 +288,22 @@ mu_query_destroy (MuQuery *self) g_free (self); } -struct _CheckPrefix { - const char *pfx; - guint len; - gboolean match; -}; -typedef struct _CheckPrefix CheckPrefix; -static void -each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx) -{ - const char *field_name; - char field_shortcut; - - if (!cpfx || cpfx->match) - return; - - field_shortcut = mu_msg_field_shortcut (mfid); - if (field_shortcut == cpfx->pfx[0]) { - cpfx->match = TRUE; - return; - } - - field_name = mu_msg_field_name (mfid); - if (field_name && - strncmp (cpfx->pfx, field_name, cpfx->len) == 0) { - cpfx->match = TRUE; - return; - } -} - - -/* colon is a position inside q pointing at a ':' character. function - * determines whether the prefix is a registered prefix (like - * 'subject' or 'from' or 's') */ -static gboolean -is_xapian_prefix (const char *q, const char *colon) -{ - const char *cur; - - if (colon == q) - return FALSE; /* : at beginning, not a prefix */ - - /* track back from colon until a boundary or beginning of the - * str */ - for (cur = colon - 1; cur >= q; --cur) { - - if (cur == q || !isalpha (*(cur-1))) { - - CheckPrefix cpfx; - memset (&cpfx, 0, sizeof(CheckPrefix)); - - cpfx.pfx = cur; - cpfx.len = (colon - cur); - cpfx.match = FALSE; - - mu_msg_field_foreach ((MuMsgFieldForEachFunc) - each_check_prefix, - &cpfx); - - return (cpfx.match); - } - } - - return FALSE; -} - /* preprocess a query to make them a bit more permissive */ char* mu_query_preprocess (const char *query) { gchar *my_query; - gchar *cur; g_return_val_if_fail (query, NULL); + my_query = g_strdup (query); - /* translate the the searchexpr to all lowercase; this - * will fixes some of the false-negatives. A full fix - * probably requires some custom query parser. - */ - my_query = mu_str_normalize(query, TRUE); - - for (cur = my_query; *cur; ++cur) { - if (*cur == ':') /* we found a ':' */ - /* if there's a registered xapian prefix before the - * ':', don't touch it. Otherwise replace ':' with - * a space'... ugly... - */ - if (!is_xapian_prefix (my_query, cur)) - *cur = ' '; - } - + /* remove accents and turn to lower-case */ + mu_str_normalize_in_place (my_query, TRUE); + /* escape '@', single '_' and ':' if it's not following a + * xapian-pfx with '_' */ + mu_str_ascii_xapian_escape_in_place (my_query); + return my_query; } diff --git a/src/mu-store.cc b/src/mu-store.cc index 1d56975a..376022d6 100644 --- a/src/mu-store.cc +++ b/src/mu-store.cc @@ -314,36 +314,36 @@ static void add_terms_values_string (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { - const char* str; - - str = mu_msg_get_field_string (msg, mfid); - if (!str) + const char *orig; + char *val; + + orig = mu_msg_get_field_string (msg, mfid); + if (!orig) return; - - const std::string value (str); + val = g_strdup (orig); + const std::string prefix (1, mu_msg_field_xapian_prefix(mfid)); - - if (mu_msg_field_xapian_index (mfid)) { - Xapian::TermGenerator termgen; - gchar *norm (mu_str_normalize(str, TRUE)); - termgen.set_document (doc); - termgen.index_text_without_positions (norm, 1, prefix); - g_free(norm); - } - - if (mu_msg_field_xapian_term(mfid)) { - /* add a normalized version (accents removed, - * lowercase) */ - gchar *norm = mu_str_normalize(str, TRUE); - doc.add_term (std::string (prefix + std::string(norm), 0, - MU_STORE_MAX_TERM_LENGTH)); - g_free (norm); - } /* the value is what we'll display; the unchanged original */ if (mu_msg_field_xapian_value(mfid)) - doc.add_value ((Xapian::valueno)mfid, - value); + doc.add_value ((Xapian::valueno)mfid, val); + + /* now, let's create some search terms... */ + if (mu_msg_field_normalize (mfid)) + mu_str_normalize_in_place (val, TRUE); + if (mu_msg_field_xapian_escape (mfid)) + mu_str_ascii_xapian_escape_in_place (val); + + if (mu_msg_field_xapian_index (mfid)) { + Xapian::TermGenerator termgen; + termgen.set_document (doc); + termgen.index_text_without_positions (val, 1, prefix); + } + + if (mu_msg_field_xapian_term(mfid)) + doc.add_term (prefix + std::string(val, 0, MU_STORE_MAX_TERM_LENGTH)); + + g_free (val); } static void @@ -443,6 +443,9 @@ each_contact_info (MuMsgContact *contact, MsgDoc *data) /* don't normalize e-mail address, but do lowercase it */ if (contact->address && strlen (contact->address)) { char *lower = g_utf8_strdown (contact->address, -1); + + g_strdelimit (lower, "@.", '_'); /* FIXME */ + data->_doc->add_term (std::string (*pfxp + lower, 0, MU_STORE_MAX_TERM_LENGTH)); diff --git a/src/tests/test-mu-query.c b/src/tests/test-mu-query.c index a72e301a..9b154233 100644 --- a/src/tests/test-mu-query.c +++ b/src/tests/test-mu-query.c @@ -66,14 +66,14 @@ run_and_count_matches (const char *xpath, const char *query) mquery = mu_query_new (xpath, NULL); g_assert (query); + + /* g_printerr ("\n=>'%s'\n", query); */ iter = mu_query_run (mquery, query, MU_MSG_FIELD_ID_NONE, FALSE, 1, NULL); mu_query_destroy (mquery); g_assert (iter); - /* g_printerr ("\n=> %s\n", query); */ - for (count = 0; !mu_msg_iter_is_done(iter); mu_msg_iter_next(iter), ++count); @@ -179,10 +179,10 @@ test_mu_query_04 (void) int i; QResults queries[] = { -// { "frodo@example.com", 1}, /* does not match: see mu-find (1) */ + { "frodo@example.com", 1}, /* does not match: see mu-find (1) */ { "f:frodo@example.com", 1}, { "f:Frodo Baggins", 1}, -// { "bilbo@anotherexample.com", 1}, /* same things */ + { "bilbo@anotherexample.com", 1}, /* same things */ { "t:bilbo@anotherexample.com", 1}, { "t:bilbo", 1}, { "f:bilbo", 0},