* update / add testcases for string normalization

This commit is contained in:
djcb 2012-12-09 13:34:16 +02:00
parent 58599ab8f9
commit 241af50e97
7 changed files with 59 additions and 94 deletions

View File

@ -50,8 +50,8 @@ exec guile -e main -s $0 $@
(n-results-or-exit "file:custer.*" 1)
(n-results-or-exit "j:sit*" 1)
(n-results-or-exit "mime:image/jpeg" 1)
(n-results-or-exit "mime:text/plain" 12)
(n-results-or-exit "y:text*" 12)
(n-results-or-exit "mime:text/plain" 13)
(n-results-or-exit "y:text*" 13)
(n-results-or-exit "y:image*" 1)
(n-results-or-exit "mime:message/rfc822" 2))
@ -97,9 +97,8 @@ exec guile -e main -s $0 $@
(define (test-stats)
"Test statistical functions."
;; average
(num-equal-or-exit (mu:average mu:size) 40859/6)
(num-equal-or-exit (floor (mu:stddev mu:size))
(floor 13413.7101616927))
(num-equal-or-exit (mu:average mu:size) 82054/13)
(num-equal-or-exit (floor (mu:stddev mu:size)) 13002.0)
(num-equal-or-exit (mu:max mu:size) 46230)
(num-equal-or-exit (mu:min mu:size) 111))

View File

@ -314,7 +314,7 @@ add_terms_values_str (Xapian::Document& doc, char *val,
/* now, let's create some search terms... */
if (mu_msg_field_normalize (mfid))
val = mu_str_normalize_in_place_try (val, TRUE, strchunk);
val = mu_str_normalize_in_place (val, TRUE, strchunk);
if (mu_msg_field_xapian_index (mfid)) {
Xapian::TermGenerator termgen;

View File

@ -120,6 +120,7 @@ EXTRA_DIST= \
testdir2/bar/new/.noindex \
testdir2/Foo/cur/mail5 \
testdir2/Foo/cur/arto.eml \
testdir2/Foo/cur/fraiche.eml \
testdir2/Foo/tmp/.noindex \
testdir2/Foo/new/.noindex \
testdir2/wom_bat/cur/atomic \

View File

@ -149,6 +149,9 @@ test_mu_str_normalize_02 (void)
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
gchar *str;
if (g_test_verbose())
g_print ("[%s] <=> [%s] <=> [%s]\n", words[i].word, words[i].norm,
mu_str_normalize (words[i].word, FALSE, NULL));
str = mu_str_normalize (words[i].word, FALSE, NULL);
g_assert_cmpstr (str, ==, words[i].norm);
g_free (str);
@ -357,91 +360,13 @@ test_mu_str_to_list (void)
static void
test_mu_str_to_list_strip (void)
{
{
const char *items[]= {"foo", "bar", "cuux", NULL};
GSList *lst = mu_str_to_list ("foo@bar @cuux",'@', TRUE);
assert_cmplst (lst, items);
const char *items[]= {"foo", "bar", "cuux", NULL};
GSList *lst = mu_str_to_list ("foo@bar @cuux",'@', TRUE);
assert_cmplst (lst, items);
mu_str_free_list (lst);
}
}
/* static void */
/* test_mu_str_guess_first_name (void) */
/* { */
/* int i; */
/* struct { */
/* char *src, *exp; */
/* } tests[] = { */
/* { "Richard M. Stallman", "Richard M." }, */
/* { "John Rambo", "John" }, */
/* { "Ivanhoe", "Ivanhoe" }, */
/* { "", "" } */
/* }; */
/* for (i = 0; i != G_N_ELEMENTS(tests); ++i) { */
/* gchar *s; */
/* s = mu_str_guess_first_name (tests[i].src); */
/* g_assert_cmpstr (s, ==, tests[i].exp); */
/* g_free (s); */
/* } */
/* } */
/* static void */
/* test_mu_str_guess_last_name (void) */
/* { */
/* int i; */
/* struct { */
/* char *src, *exp; */
/* } tests[] = { */
/* { "Richard M. Stallman", "Stallman" }, */
/* { "John Rambo", "Rambo" }, */
/* { "Ivanhoe", "" }, */
/* { "", "" } */
/* }; */
/* for (i = 0; i != G_N_ELEMENTS(tests); ++i) { */
/* gchar *s; */
/* s = mu_str_guess_last_name (tests[i].src); */
/* g_assert_cmpstr (s, ==, tests[i].exp); */
/* g_free (s); */
/* } */
/* } */
/* static void */
/* test_mu_str_guess_nick (void) */
/* { */
/* int i; */
/* struct { */
/* char *src, *exp; */
/* } tests[] = { */
/* { "Richard M. Stallman", "RichardMS" }, */
/* { "John Rambo", "JohnR" }, */
/* { "Ivanhoe", "Ivanhoe" }, */
/* { "", "" } */
/* }; */
/* for (i = 0; i != G_N_ELEMENTS(tests); ++i) { */
/* gchar *s; */
/* s = mu_str_guess_nick (tests[i].src); */
/* g_assert_cmpstr (s, ==, tests[i].exp); */
/* g_free (s); */
/* } */
/* } */
static void
test_mu_str_subject_normalize (void)
{

View File

@ -0,0 +1,10 @@
From: Sender <test@example.com>
To: Recip <recip@example.com>
Subject: search accents
Date: 2012-12-08 00:48
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
line 1: Глокая куздра штеко будланула бокра и курдячит бокрёнка
line 2: crème fraîche

View File

@ -123,7 +123,7 @@ test_mu_index (void)
store = mu_store_new_read_only (xpath, NULL);
g_assert (store);
g_assert_cmpuint (mu_store_count (store, NULL), ==, 12);
g_assert_cmpuint (mu_store_count (store, NULL), ==, 13);
mu_store_unref (store);
g_free (xpath);
@ -133,12 +133,10 @@ test_mu_index (void)
static void
test_mu_find_empty_query (void)
{
search ("\"\"", 12);
search ("\"\"", 13);
}
static void
test_mu_find_01 (void)
{
@ -185,8 +183,8 @@ static void
test_mu_find_mime (void)
{
search ("mime:image/jpeg", 1);
search ("mime:text/plain", 12);
search ("y:text*", 12);
search ("mime:text/plain", 13);
search ("y:text*", 13);
search ("y:image*", 1);
search ("mime:message/rfc822", 2);
}

View File

@ -331,7 +331,7 @@ test_mu_query_accented_chars_02 (void)
{ "t:Kröger", 1},
{ "s:MotorHeäD", 1},
{ "queensryche", 1},
{ "Queensrÿche", 1},
{ "Queensrÿche", 1}
};
for (i = 0; i != G_N_ELEMENTS(queries); ++i)
@ -341,6 +341,35 @@ test_mu_query_accented_chars_02 (void)
}
static void
test_mu_query_accented_chars_fraiche (void)
{
int i;
QResults queries[] = {
{ "crème fraîche", 1},
{ "creme fraiche", 1},
{ "fraîche crème", 1},
{ "будланула", 1},
{ "БУДЛАНУЛА", 1},
{ "CRÈME FRAÎCHE", 1},
{ "CREME FRAICHE", 1}
};
for (i = 0; i != G_N_ELEMENTS(queries); ++i) {
if (g_test_verbose ())
g_print ("'%s'\n", queries[i].query);
g_assert_cmpuint (run_and_count_matches (DB_PATH2,
queries[i].query),
==, queries[i].count);
}
}
static void
test_mu_query_wildcards (void)
{
@ -600,6 +629,9 @@ main (int argc, char *argv[])
test_mu_query_accented_chars_01);
g_test_add_func ("/mu-query/test-mu-query-accented-chars-2",
test_mu_query_accented_chars_02);
g_test_add_func ("/mu-query/test-mu-query-accented-chars-fraiche",
test_mu_query_accented_chars_fraiche);
g_test_add_func ("/mu-query/test-mu-query-wildcards",
test_mu_query_wildcards);
g_test_add_func ("/mu-query/test-mu-query-sizes",