mirror of https://github.com/djcb/mu.git
* mu-str.[ch]: add mu_str_ascii_xapian_escape_in_place, for escaping some
Xapian fields; also add some tests
This commit is contained in:
parent
c6dadad978
commit
bb5b1304e5
103
src/mu-str.c
103
src/mu-str.c
|
@ -30,7 +30,7 @@
|
|||
|
||||
#include "mu-str.h"
|
||||
#include "mu-msg-flags.h"
|
||||
|
||||
#include "mu-msg-fields.h"
|
||||
|
||||
const char*
|
||||
mu_str_date_s (const char* frm, time_t t)
|
||||
|
@ -226,3 +226,104 @@ mu_date_parse_hdwmy (const char* str)
|
|||
return delta <= now ? now - delta : never;
|
||||
}
|
||||
|
||||
|
||||
struct _CheckPrefix {
|
||||
const char *pfx;
|
||||
guint len;
|
||||
gboolean match;
|
||||
};
|
||||
typedef struct _CheckPrefix CheckPrefix;
|
||||
|
||||
static void
|
||||
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
|
||||
{
|
||||
const char *field_name;
|
||||
char field_shortcut;
|
||||
|
||||
if (!cpfx || cpfx->match)
|
||||
return;
|
||||
|
||||
field_shortcut = mu_msg_field_shortcut (mfid);
|
||||
if (field_shortcut == cpfx->pfx[0] && cpfx->pfx[1] == ':') {
|
||||
cpfx->match = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
field_name = mu_msg_field_name (mfid);
|
||||
if (field_name &&
|
||||
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
|
||||
cpfx->match = TRUE;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* colon is a position inside q pointing at a ':' character. function
|
||||
* determines whether the prefix is a registered prefix (like
|
||||
* 'subject' or 'from' or 's') */
|
||||
static gboolean
|
||||
is_xapian_prefix (const char *q, const char *colon)
|
||||
{
|
||||
const char *cur;
|
||||
|
||||
if (colon == q)
|
||||
return FALSE; /* : at beginning, not a prefix */
|
||||
|
||||
/* track back from colon until a boundary or beginning of the
|
||||
* str */
|
||||
for (cur = colon - 1; cur >= q; --cur) {
|
||||
|
||||
if (cur == q || !isalpha (*(cur-1))) {
|
||||
|
||||
CheckPrefix cpfx;
|
||||
memset (&cpfx, 0, sizeof(CheckPrefix));
|
||||
|
||||
cpfx.pfx = cur;
|
||||
cpfx.len = (colon - cur);
|
||||
cpfx.match = FALSE;
|
||||
|
||||
mu_msg_field_foreach ((MuMsgFieldForEachFunc)
|
||||
each_check_prefix,
|
||||
&cpfx);
|
||||
|
||||
return (cpfx.match);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
char*
|
||||
mu_str_ascii_xapian_escape_in_place (char *query)
|
||||
{
|
||||
gchar *cur;
|
||||
gboolean replace_dot;
|
||||
|
||||
g_return_val_if_fail (query, NULL);
|
||||
|
||||
/* only replace the '.' if the string looks like an e-mail
|
||||
* address or msg-id */
|
||||
replace_dot = (g_strstr_len(query, -1, "@") != NULL);
|
||||
|
||||
for (cur = query; *cur; ++cur) {
|
||||
if (*cur == '@')
|
||||
*cur = '_';
|
||||
|
||||
else if (replace_dot && *cur == '.') {
|
||||
if (cur[1] == '.') /* don't replace '..' */
|
||||
cur += 2;
|
||||
else
|
||||
*cur = '_';
|
||||
} else if (*cur == ':') {
|
||||
/* if there's a registered xapian prefix before the
|
||||
* ':', don't touch it. Otherwise replace ':' with
|
||||
* a space'... ugly...
|
||||
*/
|
||||
if (!is_xapian_prefix (query, cur))
|
||||
*cur = '_';
|
||||
} else
|
||||
*cur = tolower(*cur);
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
|
|
23
src/mu-str.h
23
src/mu-str.h
|
@ -138,7 +138,7 @@ char* mu_str_summarize (const char* str,
|
|||
* 'Latin-1 Supplement' and 'Latin Extended-A'
|
||||
*
|
||||
* @param str a valid utf8 string or NULL
|
||||
* @param downcase if TRUE, convert the string to lowercase
|
||||
* @param downcase if TRUE, convert the string to lowercase
|
||||
*
|
||||
* @return the normalize string, or NULL in case of error or str was NULL
|
||||
*/
|
||||
|
@ -153,12 +153,29 @@ char* mu_str_normalize (const char *str, gboolean downcase);
|
|||
*
|
||||
* @param str a valid utf8 string or NULL
|
||||
* @param downcase if TRUE, convert the string to lowercase
|
||||
*
|
||||
* @return the normalize string, or NULL in case of error or str was NULL
|
||||
*
|
||||
* @return the normalized string, or NULL in case of error or str was
|
||||
* NULL
|
||||
*/
|
||||
char* mu_str_normalize_in_place (char *str, gboolean downcase);
|
||||
|
||||
|
||||
/**
|
||||
* escape the string for use with xapian matching. in practice, if the
|
||||
* string contains an '@', replace '@', single-'.' with '_'. Also,
|
||||
* replace ':' with '_', if it's not following a xapian-prefix (such
|
||||
* as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
|
||||
* changing is done in-place (by changing the argument string). in
|
||||
* any, case, the string will be downcased.
|
||||
*
|
||||
* works for ascii strings, like e-mail addresses and message-id.
|
||||
*
|
||||
* @param query a query string
|
||||
*
|
||||
* @return the escaped string or NULL in case of error
|
||||
*/
|
||||
char* mu_str_ascii_xapian_escape_in_place (char *query);
|
||||
|
||||
/**
|
||||
*
|
||||
* parse strings like 1h, 3w, 2m to mean '1 hour before now', '3 weeks
|
||||
|
|
|
@ -152,6 +152,59 @@ test_mu_str_normalize_01 (void)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
test_mu_str_normalize_02 (void)
|
||||
{
|
||||
int i;
|
||||
struct {
|
||||
const char* word;
|
||||
const char* norm;
|
||||
} words [] = {
|
||||
{ "DantèS", "DanteS"},
|
||||
{ "foo", "foo" },
|
||||
{ "Föö", "Foo" },
|
||||
{ "číslO", "cislO" },
|
||||
{ "hÆvý mëÐal ümláõt", "hAevy meDal umlaot"}
|
||||
};
|
||||
|
||||
|
||||
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
||||
gchar *str;
|
||||
str = mu_str_normalize (words[i].word, FALSE);
|
||||
g_assert_cmpstr (str, ==, words[i].norm);
|
||||
g_free (str);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
test_mu_str_ascii_xapian_escape (void)
|
||||
{
|
||||
int i;
|
||||
struct {
|
||||
const char* word;
|
||||
const char* esc;
|
||||
} words [] = {
|
||||
{ "aap@noot.mies", "aap_noot_mies"},
|
||||
{ "Foo..Bar", "foo..bar" },
|
||||
{ "subject:test@foo", "subject:test_foo" },
|
||||
{ "xxx:test@bar", "xxx_test_bar" },
|
||||
};
|
||||
|
||||
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
||||
gchar *a = g_strdup (words[i].word);
|
||||
mu_str_ascii_xapian_escape_in_place (a);
|
||||
g_assert_cmpstr (a, ==, words[i].esc);
|
||||
g_free (a);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
static void
|
||||
|
@ -233,6 +286,11 @@ main (int argc, char *argv[])
|
|||
/* mu_str_normalize */
|
||||
g_test_add_func ("/mu-str/mu-str-normalize-01",
|
||||
test_mu_str_normalize_01);
|
||||
g_test_add_func ("/mu-str/mu-str-normalize-02",
|
||||
test_mu_str_normalize_02);
|
||||
|
||||
g_test_add_func ("/mu-str/mu-str-ascii-xapian-escape",
|
||||
test_mu_str_ascii_xapian_escape);
|
||||
|
||||
/* mu_str_complete_iso_date_(begin|end) */
|
||||
/* g_test_add_func ("/mu-str/mu-str-complete-iso-date-begin", */
|
||||
|
|
Loading…
Reference in New Issue