mirror of https://github.com/djcb/mu.git
* handle invalid utf8; fixes #211 (hopefully)
This commit is contained in:
parent
8af771fb76
commit
a2eef4fa41
10
lib/mu-str.c
10
lib/mu-str.c
|
@ -475,6 +475,16 @@ process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
|
|||
char *norm, *cur;
|
||||
|
||||
norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
|
||||
if (G_UNLIKELY(!norm)) { /* not valid utf8? */
|
||||
char *u8;
|
||||
u8 = mu_str_utf8ify (str);
|
||||
norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL);
|
||||
g_free (u8);
|
||||
}
|
||||
|
||||
if (!norm)
|
||||
return NULL;
|
||||
|
||||
gstr = g_string_sized_new (strlen (norm));
|
||||
|
||||
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
|
||||
|
|
|
@ -221,7 +221,9 @@ test_mu_str_process_term (void)
|
|||
{ "Masha@Аркона.ru", "masha_аркона_ru" },
|
||||
{ "foo:ελληνικά", "foo_ελληνικα" },
|
||||
{ "日本語!!", "日本語__" },
|
||||
{ "£", "_" }
|
||||
{ "£", "_" },
|
||||
/* invalid utf8 */
|
||||
{ "Hello\xC3\x2EWorld", "hello__world" }
|
||||
};
|
||||
|
||||
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
||||
|
|
Loading…
Reference in New Issue