mirror of https://github.com/djcb/mu.git
* mu-str.[ch]: add mu_str_ascii_xapian_escape_in_place, for escaping some
Xapian fields; also add some tests
This commit is contained in:
parent
c6dadad978
commit
bb5b1304e5
103
src/mu-str.c
103
src/mu-str.c
|
@ -30,7 +30,7 @@
|
||||||
|
|
||||||
#include "mu-str.h"
|
#include "mu-str.h"
|
||||||
#include "mu-msg-flags.h"
|
#include "mu-msg-flags.h"
|
||||||
|
#include "mu-msg-fields.h"
|
||||||
|
|
||||||
const char*
|
const char*
|
||||||
mu_str_date_s (const char* frm, time_t t)
|
mu_str_date_s (const char* frm, time_t t)
|
||||||
|
@ -226,3 +226,104 @@ mu_date_parse_hdwmy (const char* str)
|
||||||
return delta <= now ? now - delta : never;
|
return delta <= now ? now - delta : never;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct _CheckPrefix {
|
||||||
|
const char *pfx;
|
||||||
|
guint len;
|
||||||
|
gboolean match;
|
||||||
|
};
|
||||||
|
typedef struct _CheckPrefix CheckPrefix;
|
||||||
|
|
||||||
|
static void
|
||||||
|
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
|
||||||
|
{
|
||||||
|
const char *field_name;
|
||||||
|
char field_shortcut;
|
||||||
|
|
||||||
|
if (!cpfx || cpfx->match)
|
||||||
|
return;
|
||||||
|
|
||||||
|
field_shortcut = mu_msg_field_shortcut (mfid);
|
||||||
|
if (field_shortcut == cpfx->pfx[0] && cpfx->pfx[1] == ':') {
|
||||||
|
cpfx->match = TRUE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
field_name = mu_msg_field_name (mfid);
|
||||||
|
if (field_name &&
|
||||||
|
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
|
||||||
|
cpfx->match = TRUE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* colon is a position inside q pointing at a ':' character. function
|
||||||
|
* determines whether the prefix is a registered prefix (like
|
||||||
|
* 'subject' or 'from' or 's') */
|
||||||
|
static gboolean
|
||||||
|
is_xapian_prefix (const char *q, const char *colon)
|
||||||
|
{
|
||||||
|
const char *cur;
|
||||||
|
|
||||||
|
if (colon == q)
|
||||||
|
return FALSE; /* : at beginning, not a prefix */
|
||||||
|
|
||||||
|
/* track back from colon until a boundary or beginning of the
|
||||||
|
* str */
|
||||||
|
for (cur = colon - 1; cur >= q; --cur) {
|
||||||
|
|
||||||
|
if (cur == q || !isalpha (*(cur-1))) {
|
||||||
|
|
||||||
|
CheckPrefix cpfx;
|
||||||
|
memset (&cpfx, 0, sizeof(CheckPrefix));
|
||||||
|
|
||||||
|
cpfx.pfx = cur;
|
||||||
|
cpfx.len = (colon - cur);
|
||||||
|
cpfx.match = FALSE;
|
||||||
|
|
||||||
|
mu_msg_field_foreach ((MuMsgFieldForEachFunc)
|
||||||
|
each_check_prefix,
|
||||||
|
&cpfx);
|
||||||
|
|
||||||
|
return (cpfx.match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
char*
|
||||||
|
mu_str_ascii_xapian_escape_in_place (char *query)
|
||||||
|
{
|
||||||
|
gchar *cur;
|
||||||
|
gboolean replace_dot;
|
||||||
|
|
||||||
|
g_return_val_if_fail (query, NULL);
|
||||||
|
|
||||||
|
/* only replace the '.' if the string looks like an e-mail
|
||||||
|
* address or msg-id */
|
||||||
|
replace_dot = (g_strstr_len(query, -1, "@") != NULL);
|
||||||
|
|
||||||
|
for (cur = query; *cur; ++cur) {
|
||||||
|
if (*cur == '@')
|
||||||
|
*cur = '_';
|
||||||
|
|
||||||
|
else if (replace_dot && *cur == '.') {
|
||||||
|
if (cur[1] == '.') /* don't replace '..' */
|
||||||
|
cur += 2;
|
||||||
|
else
|
||||||
|
*cur = '_';
|
||||||
|
} else if (*cur == ':') {
|
||||||
|
/* if there's a registered xapian prefix before the
|
||||||
|
* ':', don't touch it. Otherwise replace ':' with
|
||||||
|
* a space'... ugly...
|
||||||
|
*/
|
||||||
|
if (!is_xapian_prefix (query, cur))
|
||||||
|
*cur = '_';
|
||||||
|
} else
|
||||||
|
*cur = tolower(*cur);
|
||||||
|
}
|
||||||
|
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
23
src/mu-str.h
23
src/mu-str.h
|
@ -138,7 +138,7 @@ char* mu_str_summarize (const char* str,
|
||||||
* 'Latin-1 Supplement' and 'Latin Extended-A'
|
* 'Latin-1 Supplement' and 'Latin Extended-A'
|
||||||
*
|
*
|
||||||
* @param str a valid utf8 string or NULL
|
* @param str a valid utf8 string or NULL
|
||||||
* @param downcase if TRUE, convert the string to lowercase
|
* @param downcase if TRUE, convert the string to lowercase
|
||||||
*
|
*
|
||||||
* @return the normalize string, or NULL in case of error or str was NULL
|
* @return the normalize string, or NULL in case of error or str was NULL
|
||||||
*/
|
*/
|
||||||
|
@ -153,12 +153,29 @@ char* mu_str_normalize (const char *str, gboolean downcase);
|
||||||
*
|
*
|
||||||
* @param str a valid utf8 string or NULL
|
* @param str a valid utf8 string or NULL
|
||||||
* @param downcase if TRUE, convert the string to lowercase
|
* @param downcase if TRUE, convert the string to lowercase
|
||||||
*
|
*
|
||||||
* @return the normalize string, or NULL in case of error or str was NULL
|
* @return the normalized string, or NULL in case of error or str was
|
||||||
|
* NULL
|
||||||
*/
|
*/
|
||||||
char* mu_str_normalize_in_place (char *str, gboolean downcase);
|
char* mu_str_normalize_in_place (char *str, gboolean downcase);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* escape the string for use with xapian matching. in practice, if the
|
||||||
|
* string contains an '@', replace '@', single-'.' with '_'. Also,
|
||||||
|
* replace ':' with '_', if it's not following a xapian-prefix (such
|
||||||
|
* as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
|
||||||
|
* changing is done in-place (by changing the argument string). in
|
||||||
|
* any, case, the string will be downcased.
|
||||||
|
*
|
||||||
|
* works for ascii strings, like e-mail addresses and message-id.
|
||||||
|
*
|
||||||
|
* @param query a query string
|
||||||
|
*
|
||||||
|
* @return the escaped string or NULL in case of error
|
||||||
|
*/
|
||||||
|
char* mu_str_ascii_xapian_escape_in_place (char *query);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* parse strings like 1h, 3w, 2m to mean '1 hour before now', '3 weeks
|
* parse strings like 1h, 3w, 2m to mean '1 hour before now', '3 weeks
|
||||||
|
|
|
@ -152,6 +152,59 @@ test_mu_str_normalize_01 (void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_mu_str_normalize_02 (void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct {
|
||||||
|
const char* word;
|
||||||
|
const char* norm;
|
||||||
|
} words [] = {
|
||||||
|
{ "DantèS", "DanteS"},
|
||||||
|
{ "foo", "foo" },
|
||||||
|
{ "Föö", "Foo" },
|
||||||
|
{ "číslO", "cislO" },
|
||||||
|
{ "hÆvý mëÐal ümláõt", "hAevy meDal umlaot"}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
||||||
|
gchar *str;
|
||||||
|
str = mu_str_normalize (words[i].word, FALSE);
|
||||||
|
g_assert_cmpstr (str, ==, words[i].norm);
|
||||||
|
g_free (str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_mu_str_ascii_xapian_escape (void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct {
|
||||||
|
const char* word;
|
||||||
|
const char* esc;
|
||||||
|
} words [] = {
|
||||||
|
{ "aap@noot.mies", "aap_noot_mies"},
|
||||||
|
{ "Foo..Bar", "foo..bar" },
|
||||||
|
{ "subject:test@foo", "subject:test_foo" },
|
||||||
|
{ "xxx:test@bar", "xxx_test_bar" },
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
||||||
|
gchar *a = g_strdup (words[i].word);
|
||||||
|
mu_str_ascii_xapian_escape_in_place (a);
|
||||||
|
g_assert_cmpstr (a, ==, words[i].esc);
|
||||||
|
g_free (a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -233,6 +286,11 @@ main (int argc, char *argv[])
|
||||||
/* mu_str_normalize */
|
/* mu_str_normalize */
|
||||||
g_test_add_func ("/mu-str/mu-str-normalize-01",
|
g_test_add_func ("/mu-str/mu-str-normalize-01",
|
||||||
test_mu_str_normalize_01);
|
test_mu_str_normalize_01);
|
||||||
|
g_test_add_func ("/mu-str/mu-str-normalize-02",
|
||||||
|
test_mu_str_normalize_02);
|
||||||
|
|
||||||
|
g_test_add_func ("/mu-str/mu-str-ascii-xapian-escape",
|
||||||
|
test_mu_str_ascii_xapian_escape);
|
||||||
|
|
||||||
/* mu_str_complete_iso_date_(begin|end) */
|
/* mu_str_complete_iso_date_(begin|end) */
|
||||||
/* g_test_add_func ("/mu-str/mu-str-complete-iso-date-begin", */
|
/* g_test_add_func ("/mu-str/mu-str-complete-iso-date-begin", */
|
||||||
|
|
Loading…
Reference in New Issue