* mu-query.cc, mu-store.cc: use the escaping for some fields; update test case

This commit is contained in:
Dirk-Jan C. Binnema 2010-11-29 21:29:43 +02:00
parent bb5b1304e5
commit 3d3a4f8308
3 changed files with 39 additions and 111 deletions

View File

@ -288,97 +288,22 @@ mu_query_destroy (MuQuery *self)
g_free (self);
}
struct _CheckPrefix {
const char *pfx;
guint len;
gboolean match;
};
typedef struct _CheckPrefix CheckPrefix;
static void
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
{
const char *field_name;
char field_shortcut;
if (!cpfx || cpfx->match)
return;
field_shortcut = mu_msg_field_shortcut (mfid);
if (field_shortcut == cpfx->pfx[0]) {
cpfx->match = TRUE;
return;
}
field_name = mu_msg_field_name (mfid);
if (field_name &&
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
cpfx->match = TRUE;
return;
}
}
/* colon is a position inside q pointing at a ':' character. function
* determines whether the prefix is a registered prefix (like
* 'subject' or 'from' or 's') */
static gboolean
is_xapian_prefix (const char *q, const char *colon)
{
const char *cur;
if (colon == q)
return FALSE; /* : at beginning, not a prefix */
/* track back from colon until a boundary or beginning of the
* str */
for (cur = colon - 1; cur >= q; --cur) {
if (cur == q || !isalpha (*(cur-1))) {
CheckPrefix cpfx;
memset (&cpfx, 0, sizeof(CheckPrefix));
cpfx.pfx = cur;
cpfx.len = (colon - cur);
cpfx.match = FALSE;
mu_msg_field_foreach ((MuMsgFieldForEachFunc)
each_check_prefix,
&cpfx);
return (cpfx.match);
}
}
return FALSE;
}
/* preprocess a query to make them a bit more permissive */
char*
mu_query_preprocess (const char *query)
{
gchar *my_query;
gchar *cur;
g_return_val_if_fail (query, NULL);
my_query = g_strdup (query);
/* translate the the searchexpr to all lowercase; this
* will fixes some of the false-negatives. A full fix
* probably requires some custom query parser.
*/
my_query = mu_str_normalize(query, TRUE);
for (cur = my_query; *cur; ++cur) {
if (*cur == ':') /* we found a ':' */
/* if there's a registered xapian prefix before the
* ':', don't touch it. Otherwise replace ':' with
* a space'... ugly...
*/
if (!is_xapian_prefix (my_query, cur))
*cur = ' ';
}
/* remove accents and turn to lower-case */
mu_str_normalize_in_place (my_query, TRUE);
/* escape '@', single '_' and ':' if it's not following a
* xapian-pfx with '_' */
mu_str_ascii_xapian_escape_in_place (my_query);
return my_query;
}

View File

@ -314,36 +314,36 @@ static void
add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid)
{
const char* str;
str = mu_msg_get_field_string (msg, mfid);
if (!str)
const char *orig;
char *val;
orig = mu_msg_get_field_string (msg, mfid);
if (!orig)
return;
const std::string value (str);
val = g_strdup (orig);
const std::string prefix (1, mu_msg_field_xapian_prefix(mfid));
if (mu_msg_field_xapian_index (mfid)) {
Xapian::TermGenerator termgen;
gchar *norm (mu_str_normalize(str, TRUE));
termgen.set_document (doc);
termgen.index_text_without_positions (norm, 1, prefix);
g_free(norm);
}
if (mu_msg_field_xapian_term(mfid)) {
/* add a normalized version (accents removed,
* lowercase) */
gchar *norm = mu_str_normalize(str, TRUE);
doc.add_term (std::string (prefix + std::string(norm), 0,
MU_STORE_MAX_TERM_LENGTH));
g_free (norm);
}
/* the value is what we'll display; the unchanged original */
if (mu_msg_field_xapian_value(mfid))
doc.add_value ((Xapian::valueno)mfid,
value);
doc.add_value ((Xapian::valueno)mfid, val);
/* now, let's create some search terms... */
if (mu_msg_field_normalize (mfid))
mu_str_normalize_in_place (val, TRUE);
if (mu_msg_field_xapian_escape (mfid))
mu_str_ascii_xapian_escape_in_place (val);
if (mu_msg_field_xapian_index (mfid)) {
Xapian::TermGenerator termgen;
termgen.set_document (doc);
termgen.index_text_without_positions (val, 1, prefix);
}
if (mu_msg_field_xapian_term(mfid))
doc.add_term (prefix + std::string(val, 0, MU_STORE_MAX_TERM_LENGTH));
g_free (val);
}
static void
@ -443,6 +443,9 @@ each_contact_info (MuMsgContact *contact, MsgDoc *data)
/* don't normalize e-mail address, but do lowercase it */
if (contact->address && strlen (contact->address)) {
char *lower = g_utf8_strdown (contact->address, -1);
g_strdelimit (lower, "@.", '_'); /* FIXME */
data->_doc->add_term
(std::string (*pfxp + lower, 0,
MU_STORE_MAX_TERM_LENGTH));

View File

@ -66,14 +66,14 @@ run_and_count_matches (const char *xpath, const char *query)
mquery = mu_query_new (xpath, NULL);
g_assert (query);
/* g_printerr ("\n=>'%s'\n", query); */
iter = mu_query_run (mquery, query, MU_MSG_FIELD_ID_NONE,
FALSE, 1, NULL);
mu_query_destroy (mquery);
g_assert (iter);
/* g_printerr ("\n=> %s\n", query); */
for (count = 0; !mu_msg_iter_is_done(iter);
mu_msg_iter_next(iter), ++count);
@ -179,10 +179,10 @@ test_mu_query_04 (void)
int i;
QResults queries[] = {
// { "frodo@example.com", 1}, /* does not match: see mu-find (1) */
{ "frodo@example.com", 1}, /* does not match: see mu-find (1) */
{ "f:frodo@example.com", 1},
{ "f:Frodo Baggins", 1},
// { "bilbo@anotherexample.com", 1}, /* same things */
{ "bilbo@anotherexample.com", 1}, /* same things */
{ "t:bilbo@anotherexample.com", 1},
{ "t:bilbo", 1},
{ "f:bilbo", 0},