* mu-str.[ch]: add mu_str_ascii_xapian_escape_in_place, for escaping some

Xapian fields; also add some tests
This commit is contained in:
Dirk-Jan C. Binnema 2010-11-29 21:21:55 +02:00
parent c6dadad978
commit bb5b1304e5
3 changed files with 180 additions and 4 deletions

View File

@ -30,7 +30,7 @@
#include "mu-str.h"
#include "mu-msg-flags.h"
#include "mu-msg-fields.h"
const char*
mu_str_date_s (const char* frm, time_t t)
@ -226,3 +226,104 @@ mu_date_parse_hdwmy (const char* str)
return delta <= now ? now - delta : never;
}
struct _CheckPrefix {
const char *pfx;
guint len;
gboolean match;
};
typedef struct _CheckPrefix CheckPrefix;
static void
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
{
const char *field_name;
char field_shortcut;
if (!cpfx || cpfx->match)
return;
field_shortcut = mu_msg_field_shortcut (mfid);
if (field_shortcut == cpfx->pfx[0] && cpfx->pfx[1] == ':') {
cpfx->match = TRUE;
return;
}
field_name = mu_msg_field_name (mfid);
if (field_name &&
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
cpfx->match = TRUE;
return;
}
}
/* colon is a position inside q pointing at a ':' character. function
* determines whether the prefix is a registered prefix (like
* 'subject' or 'from' or 's') */
static gboolean
is_xapian_prefix (const char *q, const char *colon)
{
const char *cur;
if (colon == q)
return FALSE; /* : at beginning, not a prefix */
/* track back from colon until a boundary or beginning of the
* str */
for (cur = colon - 1; cur >= q; --cur) {
if (cur == q || !isalpha (*(cur-1))) {
CheckPrefix cpfx;
memset (&cpfx, 0, sizeof(CheckPrefix));
cpfx.pfx = cur;
cpfx.len = (colon - cur);
cpfx.match = FALSE;
mu_msg_field_foreach ((MuMsgFieldForEachFunc)
each_check_prefix,
&cpfx);
return (cpfx.match);
}
}
return FALSE;
}
char*
mu_str_ascii_xapian_escape_in_place (char *query)
{
gchar *cur;
gboolean replace_dot;
g_return_val_if_fail (query, NULL);
/* only replace the '.' if the string looks like an e-mail
* address or msg-id */
replace_dot = (g_strstr_len(query, -1, "@") != NULL);
for (cur = query; *cur; ++cur) {
if (*cur == '@')
*cur = '_';
else if (replace_dot && *cur == '.') {
if (cur[1] == '.') /* don't replace '..' */
cur += 2;
else
*cur = '_';
} else if (*cur == ':') {
/* if there's a registered xapian prefix before the
* ':', don't touch it. Otherwise replace ':' with
* a space'... ugly...
*/
if (!is_xapian_prefix (query, cur))
*cur = '_';
} else
*cur = tolower(*cur);
}
return query;
}

View File

@ -138,7 +138,7 @@ char* mu_str_summarize (const char* str,
* 'Latin-1 Supplement' and 'Latin Extended-A'
*
* @param str a valid utf8 string or NULL
* @param downcase if TRUE, convert the string to lowercase
* @param downcase if TRUE, convert the string to lowercase
*
* @return the normalize string, or NULL in case of error or str was NULL
*/
@ -153,12 +153,29 @@ char* mu_str_normalize (const char *str, gboolean downcase);
*
* @param str a valid utf8 string or NULL
* @param downcase if TRUE, convert the string to lowercase
*
* @return the normalize string, or NULL in case of error or str was NULL
*
* @return the normalized string, or NULL in case of error or str was
* NULL
*/
char* mu_str_normalize_in_place (char *str, gboolean downcase);
/**
* escape the string for use with xapian matching. in practice, if the
* string contains an '@', replace '@', single-'.' with '_'. Also,
* replace ':' with '_', if it's not following a xapian-prefix (such
* as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
* changing is done in-place (by changing the argument string). in
* any, case, the string will be downcased.
*
* works for ascii strings, like e-mail addresses and message-id.
*
* @param query a query string
*
* @return the escaped string or NULL in case of error
*/
char* mu_str_ascii_xapian_escape_in_place (char *query);
/**
*
* parse strings like 1h, 3w, 2m to mean '1 hour before now', '3 weeks

View File

@ -152,6 +152,59 @@ test_mu_str_normalize_01 (void)
}
static void
test_mu_str_normalize_02 (void)
{
int i;
struct {
const char* word;
const char* norm;
} words [] = {
{ "DantèS", "DanteS"},
{ "foo", "foo" },
{ "Föö", "Foo" },
{ "číslO", "cislO" },
{ "hÆvý mëÐal ümláõt", "hAevy meDal umlaot"}
};
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
gchar *str;
str = mu_str_normalize (words[i].word, FALSE);
g_assert_cmpstr (str, ==, words[i].norm);
g_free (str);
}
}
static void
test_mu_str_ascii_xapian_escape (void)
{
int i;
struct {
const char* word;
const char* esc;
} words [] = {
{ "aap@noot.mies", "aap_noot_mies"},
{ "Foo..Bar", "foo..bar" },
{ "subject:test@foo", "subject:test_foo" },
{ "xxx:test@bar", "xxx_test_bar" },
};
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
gchar *a = g_strdup (words[i].word);
mu_str_ascii_xapian_escape_in_place (a);
g_assert_cmpstr (a, ==, words[i].esc);
g_free (a);
}
}
#if 0
static void
@ -233,6 +286,11 @@ main (int argc, char *argv[])
/* mu_str_normalize */
g_test_add_func ("/mu-str/mu-str-normalize-01",
test_mu_str_normalize_01);
g_test_add_func ("/mu-str/mu-str-normalize-02",
test_mu_str_normalize_02);
g_test_add_func ("/mu-str/mu-str-ascii-xapian-escape",
test_mu_str_ascii_xapian_escape);
/* mu_str_complete_iso_date_(begin|end) */
/* g_test_add_func ("/mu-str/mu-str-complete-iso-date-begin", */