* handle invalid utf8; fixes #211 (hopefully)

This commit is contained in:
djcb 2013-05-15 21:36:27 +03:00
parent 8af771fb76
commit a2eef4fa41
2 changed files with 13 additions and 1 deletions

View File

@ -475,6 +475,16 @@ process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
char *norm, *cur;
norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
if (G_UNLIKELY(!norm)) { /* not valid utf8? */
char *u8;
u8 = mu_str_utf8ify (str);
norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL);
g_free (u8);
}
if (!norm)
return NULL;
gstr = g_string_sized_new (strlen (norm));
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {

View File

@ -221,7 +221,9 @@ test_mu_str_process_term (void)
{ "Masha@Аркона.ru", "masha_аркона_ru" },
{ "foo:ελληνικά", "foo_ελληνικα" },
{ "日本語!!", "日本語__" },
{ "", "_" }
{ "", "_" },
/* invalid utf8 */
{ "Hello\xC3\x2EWorld", "hello__world" }
};
for (i = 0; i != G_N_ELEMENTS(words); ++i) {