mirror of https://github.com/djcb/mu.git
* mu-query.cc, mu-store.cc: use the escaping for some fields; update test case
This commit is contained in:
parent
bb5b1304e5
commit
3d3a4f8308
|
@ -288,97 +288,22 @@ mu_query_destroy (MuQuery *self)
|
|||
g_free (self);
|
||||
}
|
||||
|
||||
struct _CheckPrefix {
|
||||
const char *pfx;
|
||||
guint len;
|
||||
gboolean match;
|
||||
};
|
||||
typedef struct _CheckPrefix CheckPrefix;
|
||||
|
||||
static void
|
||||
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
|
||||
{
|
||||
const char *field_name;
|
||||
char field_shortcut;
|
||||
|
||||
if (!cpfx || cpfx->match)
|
||||
return;
|
||||
|
||||
field_shortcut = mu_msg_field_shortcut (mfid);
|
||||
if (field_shortcut == cpfx->pfx[0]) {
|
||||
cpfx->match = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
field_name = mu_msg_field_name (mfid);
|
||||
if (field_name &&
|
||||
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
|
||||
cpfx->match = TRUE;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* colon is a position inside q pointing at a ':' character. function
|
||||
* determines whether the prefix is a registered prefix (like
|
||||
* 'subject' or 'from' or 's') */
|
||||
static gboolean
|
||||
is_xapian_prefix (const char *q, const char *colon)
|
||||
{
|
||||
const char *cur;
|
||||
|
||||
if (colon == q)
|
||||
return FALSE; /* : at beginning, not a prefix */
|
||||
|
||||
/* track back from colon until a boundary or beginning of the
|
||||
* str */
|
||||
for (cur = colon - 1; cur >= q; --cur) {
|
||||
|
||||
if (cur == q || !isalpha (*(cur-1))) {
|
||||
|
||||
CheckPrefix cpfx;
|
||||
memset (&cpfx, 0, sizeof(CheckPrefix));
|
||||
|
||||
cpfx.pfx = cur;
|
||||
cpfx.len = (colon - cur);
|
||||
cpfx.match = FALSE;
|
||||
|
||||
mu_msg_field_foreach ((MuMsgFieldForEachFunc)
|
||||
each_check_prefix,
|
||||
&cpfx);
|
||||
|
||||
return (cpfx.match);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* preprocess a query to make them a bit more permissive */
|
||||
char*
|
||||
mu_query_preprocess (const char *query)
|
||||
{
|
||||
gchar *my_query;
|
||||
gchar *cur;
|
||||
|
||||
g_return_val_if_fail (query, NULL);
|
||||
my_query = g_strdup (query);
|
||||
|
||||
/* translate the the searchexpr to all lowercase; this
|
||||
* will fixes some of the false-negatives. A full fix
|
||||
* probably requires some custom query parser.
|
||||
*/
|
||||
my_query = mu_str_normalize(query, TRUE);
|
||||
|
||||
for (cur = my_query; *cur; ++cur) {
|
||||
if (*cur == ':') /* we found a ':' */
|
||||
/* if there's a registered xapian prefix before the
|
||||
* ':', don't touch it. Otherwise replace ':' with
|
||||
* a space'... ugly...
|
||||
*/
|
||||
if (!is_xapian_prefix (my_query, cur))
|
||||
*cur = ' ';
|
||||
}
|
||||
|
||||
/* remove accents and turn to lower-case */
|
||||
mu_str_normalize_in_place (my_query, TRUE);
|
||||
/* escape '@', single '_' and ':' if it's not following a
|
||||
* xapian-pfx with '_' */
|
||||
mu_str_ascii_xapian_escape_in_place (my_query);
|
||||
|
||||
return my_query;
|
||||
}
|
||||
|
||||
|
|
|
@ -314,36 +314,36 @@ static void
|
|||
add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
|
||||
MuMsgFieldId mfid)
|
||||
{
|
||||
const char* str;
|
||||
|
||||
str = mu_msg_get_field_string (msg, mfid);
|
||||
if (!str)
|
||||
const char *orig;
|
||||
char *val;
|
||||
|
||||
orig = mu_msg_get_field_string (msg, mfid);
|
||||
if (!orig)
|
||||
return;
|
||||
|
||||
const std::string value (str);
|
||||
val = g_strdup (orig);
|
||||
|
||||
const std::string prefix (1, mu_msg_field_xapian_prefix(mfid));
|
||||
|
||||
if (mu_msg_field_xapian_index (mfid)) {
|
||||
Xapian::TermGenerator termgen;
|
||||
gchar *norm (mu_str_normalize(str, TRUE));
|
||||
termgen.set_document (doc);
|
||||
termgen.index_text_without_positions (norm, 1, prefix);
|
||||
g_free(norm);
|
||||
}
|
||||
|
||||
if (mu_msg_field_xapian_term(mfid)) {
|
||||
/* add a normalized version (accents removed,
|
||||
* lowercase) */
|
||||
gchar *norm = mu_str_normalize(str, TRUE);
|
||||
doc.add_term (std::string (prefix + std::string(norm), 0,
|
||||
MU_STORE_MAX_TERM_LENGTH));
|
||||
g_free (norm);
|
||||
}
|
||||
|
||||
/* the value is what we'll display; the unchanged original */
|
||||
if (mu_msg_field_xapian_value(mfid))
|
||||
doc.add_value ((Xapian::valueno)mfid,
|
||||
value);
|
||||
doc.add_value ((Xapian::valueno)mfid, val);
|
||||
|
||||
/* now, let's create some search terms... */
|
||||
if (mu_msg_field_normalize (mfid))
|
||||
mu_str_normalize_in_place (val, TRUE);
|
||||
if (mu_msg_field_xapian_escape (mfid))
|
||||
mu_str_ascii_xapian_escape_in_place (val);
|
||||
|
||||
if (mu_msg_field_xapian_index (mfid)) {
|
||||
Xapian::TermGenerator termgen;
|
||||
termgen.set_document (doc);
|
||||
termgen.index_text_without_positions (val, 1, prefix);
|
||||
}
|
||||
|
||||
if (mu_msg_field_xapian_term(mfid))
|
||||
doc.add_term (prefix + std::string(val, 0, MU_STORE_MAX_TERM_LENGTH));
|
||||
|
||||
g_free (val);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -443,6 +443,9 @@ each_contact_info (MuMsgContact *contact, MsgDoc *data)
|
|||
/* don't normalize e-mail address, but do lowercase it */
|
||||
if (contact->address && strlen (contact->address)) {
|
||||
char *lower = g_utf8_strdown (contact->address, -1);
|
||||
|
||||
g_strdelimit (lower, "@.", '_'); /* FIXME */
|
||||
|
||||
data->_doc->add_term
|
||||
(std::string (*pfxp + lower, 0,
|
||||
MU_STORE_MAX_TERM_LENGTH));
|
||||
|
|
|
@ -66,14 +66,14 @@ run_and_count_matches (const char *xpath, const char *query)
|
|||
|
||||
mquery = mu_query_new (xpath, NULL);
|
||||
g_assert (query);
|
||||
|
||||
/* g_printerr ("\n=>'%s'\n", query); */
|
||||
|
||||
iter = mu_query_run (mquery, query, MU_MSG_FIELD_ID_NONE,
|
||||
FALSE, 1, NULL);
|
||||
mu_query_destroy (mquery);
|
||||
g_assert (iter);
|
||||
|
||||
/* g_printerr ("\n=> %s\n", query); */
|
||||
|
||||
for (count = 0; !mu_msg_iter_is_done(iter);
|
||||
mu_msg_iter_next(iter), ++count);
|
||||
|
||||
|
@ -179,10 +179,10 @@ test_mu_query_04 (void)
|
|||
int i;
|
||||
|
||||
QResults queries[] = {
|
||||
// { "frodo@example.com", 1}, /* does not match: see mu-find (1) */
|
||||
{ "frodo@example.com", 1}, /* does not match: see mu-find (1) */
|
||||
{ "f:frodo@example.com", 1},
|
||||
{ "f:Frodo Baggins", 1},
|
||||
// { "bilbo@anotherexample.com", 1}, /* same things */
|
||||
{ "bilbo@anotherexample.com", 1}, /* same things */
|
||||
{ "t:bilbo@anotherexample.com", 1},
|
||||
{ "t:bilbo", 1},
|
||||
{ "f:bilbo", 0},
|
||||
|
|
Loading…
Reference in New Issue