* mu-str-normalize.c: add note about alternative implementation

2010-12-03 20:53:25 +02:00 · 2010-12-03 20:53:25 +02:00 · 13b1e87cc4
parent 10b0f321c8
commit 13b1e87cc4
1 changed files with 12 additions and 0 deletions
--- a/src/mu-str-normalize.c
+++ b/src/mu-str-normalize.c
@ -28,6 +28,7 @@

 #include "mu-str.h"

+
 char*
 mu_str_normalize (const char *str, gboolean downcase)
 {
@ -36,6 +37,17 @@ mu_str_normalize (const char *str, gboolean downcase)
 	return mu_str_normalize_in_place (g_strdup(str), downcase);
 }

+
+/*
+ * this implementation works for accented chars in Unicode Blocks
+ * 'Latin-1 Supplement' and 'Latin Extended-A'. An alternative (slower
+ * but much simpler) implementation would be to use g_utf8_normalize
+ * to decompose characters in the accent part and the character part,
+ * and then get rid of the former. That would be slower than what we
+ * do here, but also more *complete*.  It's unclear whether it would
+ * be slower *in practice* => needs checking
+ */
+
 /* we can normalize in-place, as the normalized string will never be
 * longer than the original.  even for replacements that are 2 chars
 * wide (e.g. German ß => ss), the replacement is 2 bytes, like the