* mu-str: simplify, cleanup string pre-processing functions

2024-06-29 07:51:04 +02:00 · 2013-05-13 00:01:49 +03:00 · 2013-05-13 00:01:49 +03:00 · d26f3c0bae
commit d26f3c0bae
parent 2f60f33dc8
3 changed files with 132 additions and 645 deletions
--- a/lib/mu-str-normalize.c
+++ b/lib/mu-str-normalize.c
@ -1,402 +0,0 @@
 /*
 ** Copyright (C) 2012-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
 **
 ** This program is free software; you can redistribute it and/or modify
 ** it under the terms of the GNU General Public License as published by
 ** the Free Software Foundation; either version 3 of the License, or
 ** (at your option) any later version.
 **
 ** This program is distributed in the hope that it will be useful,
 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ** GNU General Public License for more details.
 **
 ** You should have received a copy of the GNU General Public License
 ** along with this program; if not, write to the Free Software Foundation,
 ** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 **
 */
 #if HAVE_CONFIG_H
 #include "config.h"
 #endif /*HAVE_CONFIG_H*/
 #include <glib.h>
 #include <string.h>
 #include <ctype.h>
 #include "mu-str.h"
 char*
 mu_str_normalize (const char *str, gboolean downcase, GStringChunk *strchunk)
 {
 	char *mystr;
 	g_return_val_if_fail (str, NULL);
 	if (strchunk)
 		mystr = g_string_chunk_insert (strchunk, str);
 	else
 		mystr = g_strdup (str);
 	return mu_str_normalize_in_place (mystr, downcase, strchunk);
 }
 /*
 * this implementation works for accented chars in Unicode Blocks
 * 'Latin-1 Supplement' and 'Latin Extended-A'. An alternative (slower
 * but much simpler) implementation would be to use g_utf8_normalize
 * to decompose characters in the accent part and the character part,
 * and then get rid of the former. That would be slower than what we
 * do here, but also more *complete*.  It's unclear whether it would
 * be slower *in practice* => needs checking
 */
 /* we can normalize in-place, as the normalized string will never be
 * longer than the original.  even for replacements that are 2 chars
 * wide (e.g. German ß => ss), the replacement is 2 bytes, like the
 * original 0xc3 0x9f
 *
 * note-to-self: http://www.geertvanderploeg.com/unicode-gen/
 */
 char*
 mu_str_normalize_in_place (char *str, gboolean downcase, GStringChunk *strchunk)
 {
 	const guchar *cur;
 	int i;
 	g_return_val_if_fail (str, NULL);
 	if (*str == '\0')
 		return str;
 	for (i = 0, cur = (const guchar*)str; *cur; ++cur) {
 		/* special case for plain-old ascii */
 		if ((*cur < 0x80)) {
 			str[i++] = downcase ? tolower (*cur) : *cur;
 			continue;
 		}
 		if (*cur == 0xc3) { /* latin-1 supplement */
 			++cur;
 			switch (*cur) {
 			case 0x80:
 			case 0x81:
 			case 0x82:
 			case 0x83:
 			case 0x84:
 			case 0x85: str[i++] = downcase ? 'a' : 'A' ; break;
 			case 0x86:
 				str[i++] = downcase ? 'a' : 'A' ;
 				str[i++] = 'e';
 				break;
 			case 0x87: str[i++] = downcase ? 'c' : 'C'; break;
 			case 0x88:
 			case 0x89:
 			case 0x8a:
 			case 0x8b:
 				str[i++] = downcase ? 'e' : 'E';
 				break;
 			case 0x8c:
 			case 0x8d:
 			case 0x8e:
 			case 0x8f: str[i++] = downcase ? 'i': 'I'; break;
 			case 0x90: str[i++] = downcase ? 'd' : 'D'; break;
 			case 0x91: str[i++] = downcase ? 'n' : 'N'; break;
 			case 0x92:
 			case 0x93:
 			case 0x94:
 			case 0x95:
 			case 0x96: str[i++] = downcase ? 'o' : 'O'; break;
 			case 0x99:
 			case 0x9a:
 			case 0x9b:
 			case 0x9c: str[i++] = downcase ? 'u' : 'U'; break;
 			case 0x9d: str[i++] = downcase ? 'y' : 'Y'; break;
 			case 0x9e:
 				str[i++] = downcase ? 't' : 'T';
 				str[i++] = 'h';
 				break;
 			case 0x9f: str[i++] = 's'; str[i++] = 's'; break;
 			case 0xa0:
 			case 0xa1:
 			case 0xa2:
 			case 0xa3:
 			case 0xa4:
 			case 0xa5: str[i++] = 'a'; break;
 			case 0xa6: str[i++] = 'a'; str[i++] = 'e'; break;
 			case 0xa7: str[i++] = 'c'; break;
 			case 0xa8:
 			case 0xa9:
 			case 0xaa:
 			case 0xab: str[i++] = 'e'; break;
 			case 0xac:
 			case 0xad:
 			case 0xae:
 			case 0xaf: str[i++] = 'i'; break;
 			case 0xb0: str[i++] = 'd'; break;
 			case 0xb1: str[i++] = 'n'; break;
 			case 0xb2:
 			case 0xb3:
 			case 0xb4:
 			case 0xb5:
 			case 0xb6: str[i++] = 'o'; break;
 			case 0xb9:
 			case 0xba:
 			case 0xbb:
 			case 0xbc: str[i++] = 'u'; break;
 			case 0xbd: str[i++] = 'y'; break;
 			case 0xbe: str[i++] = 't'; str[i++] = 'h'; break;
 			case 0xbf: str[i++] = 'y'; break;
 			default:
 				str[i++] = *cur;
 			}
 		} else if (*cur == 0xc4) {  /* Latin Extended-A (0x04) */
 			++cur;
 			switch (*cur) {
 			case 0x80:
 			case 0x82:
 			case 0x84: str[i++] = downcase ? 'a' : 'A'; break;
 			case 0x86:
 			case 0x88:
 			case 0x8a:
 			case 0x8c: str[i++] = downcase ? 'c' : 'C'; break;
 			case 0x8e:
 			case 0x90: str[i++] = downcase ? 'd' : 'D'; break;
 			case 0x92:
 			case 0x94:
 			case 0x96:
 			case 0x98:
 			case 0x9a: str[i++] = downcase ? 'e' : 'E'; break;
 			case 0x9c:
 			case 0x9e:
 			case 0xa0:
 			case 0xa2: str[i++] = downcase ? 'g' : 'G'; break;
 			case 0xa4:
 			case 0xa6: str[i++] = downcase ? 'h' : 'H'; break;
 			case 0xa8:
 			case 0xaa:
 			case 0xac:
 			case 0xae:
 			case 0xb0: str[i++] = downcase ? 'i' : 'I'; break;
 			case 0xb2:
 				str[i++] = downcase ? 'i' : 'I';
 				str[i++] = downcase ? 'j' : 'J';
 				break;
 			case 0xb4: str[i++] = downcase ? 'j' : 'J'; break;
 			case 0xb6: str[i++] = downcase ? 'k' : 'K'; break;
 			case 0xb9:
 			case 0xbb:
 			case 0xbd:
 			case 0xbf: str[i++] = downcase ? 'l': 'L'; break;
 			case 0x81:
 			case 0x83:
 			case 0x85: str[i++] = 'a'; break;
 			case 0x87:
 			case 0x89:
 			case 0x8b:
 			case 0x8d: str[i++] = 'c'; break;
 			case 0x8f:
 			case 0x91: str[i++] = 'd'; break;
 			case 0x93:
 			case 0x95:
 			case 0x97:
 			case 0x99:
 			case 0x9b: str[i++] = 'e'; break;
 			case 0x9d:
 			case 0x9f:
 			case 0xa1:
 			case 0xa: str[i++] = 'g'; break;
 			case 0xa5:
 			case 0xa7: str[i++] = 'h'; break;
 			case 0xa9:
 			case 0xab:
 			case 0xad:
 			case 0xaf:
 			case 0xb1: str[i++] = 'i'; break;
 			case 0xb3: str[i++] = 'i'; str[i++] = 'j'; break;
 			case 0xb5: str[i++] = 'j'; break;
 			case 0xb7:
 			case 0xb8: str[i++] = 'k'; break;
 			case 0xba:
 			case 0xbc:
 			case 0xbe: str[i++] = 'l'; break;
 			default:   str[i++] = *cur; break;
 			}
 		} else if (*cur == 0xc5) { /* Latin Extended-A (0xc5) */
 			++cur;
 			switch (*cur) {
 			case 0x81: str[i++] = downcase ? 'l': 'L'; break;
 			case 0x83:
 			case 0x85:
 			case 0x87: str[i++] = downcase ? 'n': 'N'; break;
 			case 0x8c:
 			case 0x8e:
 			case 0x90: str[i++] = downcase ? 'o': 'O'; break;
 			case 0x92:
 				str[i++] = downcase ? 'o':  'O';
 				str[i++] = 'e';
 				break;
 			case 0x94:
 			case 0x96:
 			case 0x98: str[i++] = downcase ? 'r': 'R'; break;
 			case 0x9a:
 			case 0x9c:
 			case 0x9e:
 			case 0xa0: str[i++] = downcase ? 's': 'S'; break;
 			case 0xa2:
 			case 0xa4:
 			case 0xa6: str[i++] = downcase ? 't': 'T'; break;
 			case 0xa8:
 			case 0xaa:
 			case 0xac:
 			case 0xae:
 			case 0xb0:
 			case 0xb2: str[i++] = downcase ? 'u': 'U'; break;
 			case 0xb4: str[i++] = downcase ? 'w': 'W'; break;
 			case 0xb6:
 			case 0xb8: str[i++] = downcase ? 'y': 'Y'; break;
 			case 0xb9:
 			case 0xbb:
 			case 0xbd: str[i++] = downcase ? 'z': 'Z'; break;
 			case 0x80:
 			case 0x82: str[i++] = 'l'; break;
 			case 0x84:
 			case 0x86:
 			case 0x88:
 			case 0x89:
 			case 0x8a:
 			case 0x8b: str[i++] = 'n'; break;
 			case 0x8d:
 			case 0x8f:
 			case 0x91: str[i++] = 'o'; break;
 			case 0x93: str[i++] = 'o'; str[i++] = 'e'; break;
 			case 0x95:
 			case 0x97:
 			case 0x99: str[i++] = 'r'; break;
 			case 0x9b:
 			case 0x9d:
 			case 0x9f:
 			case 0xa1: str[i++] = 's'; break;
 			case 0xa3:
 			case 0xa5:
 			case 0xa7: str[i++] = 't'; break;
 			case 0xa9:
 			case 0xab:
 			case 0xad:
 			case 0xaf:
 			case 0xb1:
 			case 0xb3: str[i++] = 'u'; break;
 			case 0xb5: str[i++] = 'w'; break;
 			case 0xb7: str[i++] = 'y'; break;
 			case 0xba:
 			case 0xbc:
 			case 0xbe: str[i++] = 'z'; break;
 			case 0xbf: str[i++] = 's'; break;
 			default:   str[i++] = *cur; break;
 			}
 		} else {
 			/* our fast-path for latin-utf8 does not work
 			 * -- bummer! just append the character then
 			 * */
 			gunichar uc;
 			char buf[7];
 			size_t len1, len2;
 			len1 = g_utf8_next_char ((char*)cur) - (char*)cur;
 			uc = g_utf8_get_char ((char*)cur);
 			if (downcase)
 				uc = g_unichar_tolower (uc);
 			len2 = g_unichar_to_utf8 (uc, buf);
 			/* if the new char fits where the old char was,
 			 * change it. otherwise, don't bother. */
 			if (len1 == len2) {
 				memcpy (str + i, buf, len2);
 				i += len2;
 			}
 		}
 	}
 	str[i] = '\0';
 	return str;
 }
--- a/lib/mu-str.c
+++ b/lib/mu-str.c
@ -257,78 +257,53 @@ mu_str_to_list (const char *str, char sepa, gboolean strip)
 	return lst;
 }
 static gchar*
 eat_esc_string (char **strlst, GError **err)
 {
 	char *str;
 	gboolean quoted;
 	GString *gstr;
 	str  = g_strchug (*strlst);
 	gstr = g_string_sized_new (strlen(str));
 	for (quoted = FALSE; *str; ++str) {
 		if (*str == '"') {
 			quoted = !quoted;
 			continue;
 		} else if (*str == '\\') {
 			if (str[1] != ' ' && str[1] != '"' && str[1] != '\\')
 				goto err; /* invalid escaping */
 			g_string_append_c (gstr, str[1]);
 			++str;
 			continue;
 		} else if (*str == ' ' && !quoted) {
 			++str;
 			goto leave;
 		} else
 			g_string_append_c (gstr, *str);
 	}
 leave:
 	*strlst = str;
 	return g_string_free (gstr, FALSE);
 err:
 	g_set_error (err, MU_ERROR_DOMAIN, MU_ERROR_IN_PARAMETERS,
 		     "error parsing string '%s'", g_strchug(*strlst));
 	*strlst = NULL;
 	return g_string_free (gstr, TRUE);
 }
 GSList*
-mu_str_esc_to_list (const char *strings, GError **err)
+mu_str_esc_to_list (const char *strings)
 {
 	GSList *lst;
-	char *mystrings, *freeme;
+	GString *part;
-	const char* cur;
+	unsigned u;
 	gboolean quoted;
 	g_return_val_if_fail (strings, NULL);
-	for (cur = strings; *cur && (*cur == ' ' || *cur == '\t'); ++cur);
+	part = g_string_new (NULL);
 	freeme = mystrings = g_strdup (cur);
-	lst = NULL;
+	for (u = 0, lst = NULL, quoted = FALSE;
-	do {
+	     u != strlen (strings); ++u) {
-		gchar *str;
+
-		str = eat_esc_string (&mystrings, err);
+		char kar;
-		if (str)
+		kar = strings[u];
-			lst = g_slist_prepend (lst, str);
+
-		else {
+		if (quoted && kar != '"') {
-			g_free (freeme);
+			g_string_append_c (part, kar);
-			mu_str_free_list (lst);
+			continue;
 			return NULL;
 		}
-	} while (mystrings && *mystrings);
+		switch (kar) {
 		case '"':
 			quoted = !quoted;
 			g_string_append_c (part, kar);
 			continue;
 		case ' ':
 			if (part->len > 0) {
 				lst = g_slist_prepend
 					(lst, g_string_free (part, FALSE));
 				part = g_string_new (NULL);
 			}
 			continue;
 		default:
 			g_string_append_c (part, kar);
 		}
 	}
 	if (part->len)
 		lst = g_slist_prepend (lst, g_string_free (part, FALSE));
 	g_free (freeme);
 	return g_slist_reverse (lst);
 }
 void
 mu_str_free_list (GSList *lst)
 {
@ -451,147 +426,105 @@ check_for_field (const char *str, gboolean *is_field,
 static gboolean
-is_xapian_special_char (char c)
+handle_esc_maybe (GString *gstr, char **cur, gunichar uc,
 		  gboolean query_esc)
 {
-	switch (c) {
+	char kar;
-	case '@':
+	kar = *cur[0];
 	case '.':
 	case ',':
 	case '/':
 	case '[':
 	case ']':
 	case '+':
 	case '-':
 	case ' ':
 	case ':':
 	case '(':
 	case ')':
 	case '$':
 	case '"':
 	case '\\':
 	case '\'':
 	case '*':
 		return TRUE;
 	default:
 		return FALSE;
 	}
 }
-#define ESC_CHAR '_'
+	if (query_esc) {
-
+		switch (kar) {
 /*
 * Xapian treats various characters such as '@', '-', ':' and '.'
 * specially; function below is an ugly hack to make it DWIM in most
 * cases...
 *
 * function expects search terms (not complete queries)
 * */
 char*
 mu_str_xapian_escape_in_place_try (char *term, gboolean esc_space, GStringChunk *strchunk)
 {
 	unsigned char *cur;
 	char lookback;
 	gboolean is_field, is_range_field, quoted;
 	unsigned colon;
 	g_return_val_if_fail (term, NULL);
 	check_for_field (term, &is_field, &is_range_field);
 	for (colon = 0, lookback = 0, quoted=FALSE, cur = (unsigned char*)term;
 	     *cur; ++cur) {
 		if (*cur == '\\')
 			quoted = !quoted;
 		switch (*cur) {
 		case '.': /* escape '..' if it's not a range field */
 			if (cur[1] == '.') {
 				if (!is_range_field) {
 					*cur	   = ESC_CHAR;
 					*(cur + 1) = ESC_CHAR;
 				}
 				++cur;
 			} else if (isblank(lookback) || isblank(cur[1]) ||
 				   cur[1] == '\0')
 				*cur = ' ';
 			else
 				*cur = ESC_CHAR;
 			break;
 		case ':':
 			/* if there's a registered xapian prefix
 			 * before the *first* ':', don't touch
 			 * it. Otherwise replace ':' with ' '... ugh
 			 * yuck ugly...
 			 */
 			if (colon != 0 || !is_field)
 				*cur = ' ';
 			++colon;
 			break;
 		case '@':
 		case '/':
 		case '[':
 		case ']':
 		case '+':
 		case '$':
 		case '\\':
 		case '-':
 			*cur = ESC_CHAR;
 			break;
 		case ' ':
 		case '_':
 		case '(':
 		case ')':
 		case '*':
 		case '"':
-		case '\'':
+			g_string_append_c (gstr, kar);
-		case '*':   /* wildcard */
+			return TRUE;
-			break; /* leave as they are */
+		case '.':
-		default:
+			if ((*cur)[1] == '.' && (*cur)[2] != '.') {
-			/* turn other stuff into spaces */
+				g_string_append (gstr, "..");
-			if (*cur < 0x80 && !isalnum (*cur))
+				*cur = g_utf8_next_char (*cur);
-				*cur = ' ';
+				return TRUE;
 			}
 		default: break;
 		}
 		lookback = *cur;
 	}
-	/* downcase try to remove accents etc. */
+	if (g_unichar_ispunct(uc) || isblank(kar)) {
-	return mu_str_normalize_in_place (term, TRUE, strchunk);
+		g_string_append_c (gstr, '_');
 		return TRUE;
 	}
 	return FALSE;
 }
-char*
+
-mu_str_xapian_escape (const char *query, gboolean esc_space, GStringChunk *strchunk)
+static char*
 process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
 {
-	char *mystr;
+	GString *gstr;
 	char *norm, *cur;
-	g_return_val_if_fail (query, NULL);
+	norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
 	gstr = g_string_sized_new (strlen (norm));
-	if (strchunk)
+	for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
 		mystr = g_string_chunk_insert (strchunk, query);
 	else
 		mystr = g_strdup (query);
-	return mu_str_xapian_escape_in_place_try (mystr, esc_space, strchunk);
+		gunichar uc;
 		uc = g_utf8_get_char (cur);
 		if (xapian_esc)
 			if (handle_esc_maybe (gstr, &cur, uc, query_esc))
 				continue;
 		if (g_unichar_ismark(uc))
 			continue;
 		/* maybe add some special cases, such as Spaß->spass ?
 		 */
 		uc = g_unichar_tolower (uc);
 		g_string_append_unichar (gstr, uc);
 	}
 	g_free (norm);
 	/* g_print ("-->%s\n", gstr->str); */
 	return g_string_free (gstr, FALSE);
 }
 char*
-mu_str_xapian_escape_term (const char *term, GStringChunk *strchunk)
+mu_str_process_text (const char *str)
 {
-	char *cur, *esc;
+	g_return_val_if_fail (str, NULL);
-	g_return_val_if_fail (term, NULL);
+	return process_str (str, FALSE, FALSE);
 	g_return_val_if_fail (strchunk, NULL);
-	for (cur = esc = mu_str_normalize (term, TRUE, strchunk);
+}
-	     *cur; ++cur) {
+
-		if (is_xapian_special_char (*cur))
+
-			*cur = ESC_CHAR;
+char*
-	}
+mu_str_process_term (const char *str)
 {
 	g_return_val_if_fail (str, NULL);
 	return process_str (str, TRUE, FALSE);
 }
 char*
 mu_str_process_query_term (const char *str)
 {
 	g_return_val_if_fail (str, NULL);
 	return process_str (str, TRUE, TRUE);
 	return esc;
 }
--- a/lib/mu-str.h
+++ b/lib/mu-str.h
@ -106,86 +106,42 @@ char*       mu_str_flags    (MuFlags flags)
 char* mu_str_summarize (const char* str, size_t max_lines)
    G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
 /**
 * normalize a string (ie., collapse accented characters etc.), and
 * optionally, downcase it. Works for accented chars in Unicode Blocks
 * 'Latin-1 Supplement' and 'Latin Extended-A'
 *
 * @param str a valid utf8 string or NULL
 * @param downcase if TRUE, convert the string to lowercase
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
 * @return the normalized string, or NULL in case of error or str was
 * NULL. Unless strchunk was provided, user must g_free the string when
 * no longer needed
 */
 char* mu_str_normalize (const char *str, gboolean downcase,
 			GStringChunk *strchunk)
    G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
 /**
- * normalize a string (ie., collapse accented characters etc.), and
+ * Process some text (e.g. message bodies) -- flatten (remove accents
- * optionally, downcase it. this happen by changing the string; if
+ * etc.), and remove some punctuation.
 * that is not desired, use mu_str_normalize. Works for accented chars
 * in Unicode Blocks 'Latin-1 Supplement' and 'Latin Extended-A'
 *
- * @param str a valid utf8 string or NULL
+ * @param text some text
 * @param downcase if TRUE, convert the string to lowercase
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
- * @return the normalized string, or NULL in case of error or str was
+ * @return the processed text, free with g_free
 * NULL. User only needs to free the returned string if a) return
 * value != str and b) strchunk was not provided.
 */
-char* mu_str_normalize_in_place (char *str, gboolean downcase,
+char* mu_str_process_text (const char *text)
-				 GStringChunk *strchunk);
+	 G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
 /**
- * escape the string for use with xapian matching. in practice, if the
+ * Process some term (e.g., an e-mail address, subject field):
- * string contains an '@', replace '@', single-'.' with '_'. Also,
+ * remove accents, replace some punctuation by _
 * replace ':' with '_', if it's not following a xapian-prefix (such
 * as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
 * changing is done in-place (by changing the argument string). in any
 * case, the string will be downcased.
 *
- * @param query a query string
+ * @param term some term
 * @param esc_space escape space characters as well
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
 * @return the escaped string or NULL in case of error. User only
 * needs to free the returned string if a) return value != query and b)
 * strchunk was not provided.
 *
 * @return the processed text, free with g_free
 */
-char* mu_str_xapian_escape_in_place_try (char *query, gboolean esc_space,
+char* mu_str_process_term (const char *term)
-					 GStringChunk *strchunk);
+	 G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
 /**
- * escape the string for use with xapian matching. in practice, if the
+ * Process some query term (e.g., an e-mail address, subject field):
- * string contains an '@', replace '@', single-'.' with '_'. Also,
+ * remove accents, replace some punctuation by _, but leave some query
- * replace ':' with '_', if it's not following a xapian-prefix (such
+ * metachars alone.
 * as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
 *
- * @param str a string
+ * @param qterm some query term
 * @param esc_space escape space characters as well
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
- * @return the escaped string (free with g_free) or NULL in case of error
+ * @return the processed text, free with g_free
 * Unless strchunk was provided, user must g_free the string when
 * no longer needed
 */
-char* mu_str_xapian_escape (const char *str, gboolean esc_space,
+char* mu_str_process_query_term (const char *qterm)
-			    GStringChunk *strchunk)  G_GNUC_WARN_UNUSED_RESULT;
+	 G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
 /**
 * escape the xapian term
 *
 * @param str a string
 * @param strchunk allocate strings on strchunk
 *
 * @return the escaped string, which is allocated in the strchunk
 */
 char* mu_str_xapian_escape_term (const char *term, GStringChunk *strchunk);
 /**
 * Fixup values for some fields in the DWIM manner:
@ -315,14 +271,14 @@ GSList* mu_str_to_list (const char *str, char sepa, gboolean strip);
 /**
 * convert a string (with possible escaping) to a list. list items are
 * separated by one or more spaces. list items can be quoted (using
- * '"'), and '"', ' ' and '\' use their special meaning when prefixed
+ * '"').
 * with \.
 *
 * @param str a string
 *
- * @return a list of elements or NULL in case of error
+ * @return a list of elements or NULL in case of error, free with
 * mu_str_free_list
 */
-GSList* mu_str_esc_to_list (const char *str, GError **err);
+GSList* mu_str_esc_to_list (const char *str);
 /**