diff --git a/src/mu-msg-str.c b/src/mu-msg-str.c index b5a8bbcd..705f6c0e 100644 --- a/src/mu-msg-str.c +++ b/src/mu-msg-str.c @@ -19,36 +19,129 @@ #include #include +#include #include "mu-msg-str.h" #include "mu-msg-flags.h" + + char* mu_msg_str_normalize (const char *str, gboolean downcase) { - gchar *s; + const guchar *cur; + gchar *output; + size_t len; + int i; - if (!str) - return NULL; + g_return_val_if_fail (str, NULL); - s = g_utf8_normalize (str, -1, G_NORMALIZE_ALL); - if (!s) { - g_warning ("%s: not valid utf8 '%s'", __FUNCTION__, str); - return NULL; + len = strlen (str); + output = g_new0 (char, 2 * len); + + for (i = 0, cur = (const guchar*)str; *cur; ++cur) { + if (*cur != 0xc3) { /* != latin-1 supplement? */ + output[i++] = *cur; + continue; + } + ++cur; + + switch (*cur) { + + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: output[i++] = 'A'; break; + + case 0x86: output[i++] = 'A'; output[i++] = 'e'; break; + case 0x87: output[i++] = 'C'; break; + + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: output[i++] = 'E'; break; + + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: output[i++] = 'I'; break; + + case 0x90: output[i++] = 'D'; break; + case 0x91: output[i++] = 'N'; break; + + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: output[i++] = 'O'; break; + + case 0x99: + case 0x9a: + case 0x9b: + case 0x9c: output[i++] = 'U'; break; + + case 0x9d: output[i++] = 'Y'; break; + case 0x9e: output[i++] = 'T'; output[i++] = 'h'; break; + case 0x9f: output[i++] = 's'; output[i++] = 's'; break; + + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: output[i++] = 'a'; break; + + case 0xa6: output[i++] = 'a'; output[i++] = 'e'; break; + case 0xa7: output[i++] = 'c'; break; + + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: output[i++] = 'e'; break; + + case 0xac: + case 0xad: + case 0xae: + case 0xaf: output[i++] = 'i'; break; + + case 0xb0: output[i++] = 'd'; break; + case 0xb1: output[i++] = 'n'; break; + + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: output[i++] = 'o'; break; + + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: output[i++] = 'u'; break; + + case 0xbd: output[i++] = 'y'; break; + case 0xbe: output[i++] = 't'; output[i++] = 'h'; break; + case 0xbf: output[i++] = 'y'; break; + + default: + output[i++] = *cur; + } } - + + output [i] = '\0'; + + /* for utf8, this should not interfere with anything it shouldn't... */ if (downcase) { - gchar *tmp; - tmp = g_utf8_strdown (s, -1); - g_free (s); - s = tmp; + gchar *c; + for (c = output; *c; ++c) + *c = tolower (*c); } - - return s; + + return output; } - const char* mu_msg_str_date_s (const char* frm, time_t t) { diff --git a/src/tests/test-mu-msg-str.c b/src/tests/test-mu-msg-str.c index acfa2016..70e7794e 100644 --- a/src/tests/test-mu-msg-str.c +++ b/src/tests/test-mu-msg-str.c @@ -121,9 +121,37 @@ test_mu_msg_str_prio_02 (void) { /* this must fail */ g_test_log_set_fatal_handler ((GTestLogFatalFunc)ignore_error, NULL); - g_assert_cmpstr (mu_msg_str_prio(666), ==, NULL); } + + + +static void +test_mu_msg_str_normalize_01 (void) +{ + int i; + struct { + const char* word; + const char* norm; + } words [] = { + { "dantès", "dantes"}, + { "foo", "foo" }, + { "Föö", "foo" }, + { "hÆvý mëÐal ümláõt", "haevy medal umlaot"} + }; + + + for (i = 0; i != G_N_ELEMENTS(words); ++i) { + gchar *str; + str = mu_msg_str_normalize (words[i].word, TRUE); + g_assert_cmpstr (str, ==, words[i].norm); + g_free (str); + } +} + + + + int main (int argc, char *argv[]) @@ -146,6 +174,11 @@ main (int argc, char *argv[]) g_test_add_func ("/mu-msg-str/mu-msg-str-prio-02", test_mu_msg_str_prio_02); + /* mu_msg_str_normalize */ + g_test_add_func ("/mu-msg-str/mu-msg-str-normalize-01", + test_mu_msg_str_normalize_01); + + /* FIXME: add tests for mu_msg_str_flags; but note the * function simply calls mu_msg_field_str */