mirror of https://github.com/djcb/mu.git
* mu-str-normalize.c: add note about alternative implementation
This commit is contained in:
parent
10b0f321c8
commit
13b1e87cc4
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
#include "mu-str.h"
|
#include "mu-str.h"
|
||||||
|
|
||||||
|
|
||||||
char*
|
char*
|
||||||
mu_str_normalize (const char *str, gboolean downcase)
|
mu_str_normalize (const char *str, gboolean downcase)
|
||||||
{
|
{
|
||||||
|
@ -36,6 +37,17 @@ mu_str_normalize (const char *str, gboolean downcase)
|
||||||
return mu_str_normalize_in_place (g_strdup(str), downcase);
|
return mu_str_normalize_in_place (g_strdup(str), downcase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this implementation works for accented chars in Unicode Blocks
|
||||||
|
* 'Latin-1 Supplement' and 'Latin Extended-A'. An alternative (slower
|
||||||
|
* but much simpler) implementation would be to use g_utf8_normalize
|
||||||
|
* to decompose characters in the accent part and the character part,
|
||||||
|
* and then get rid of the former. That would be slower than what we
|
||||||
|
* do here, but also more *complete*. It's unclear whether it would
|
||||||
|
* be slower *in practice* => needs checking
|
||||||
|
*/
|
||||||
|
|
||||||
/* we can normalize in-place, as the normalized string will never be
|
/* we can normalize in-place, as the normalized string will never be
|
||||||
* longer than the original. even for replacements that are 2 chars
|
* longer than the original. even for replacements that are 2 chars
|
||||||
* wide (e.g. German ß => ss), the replacement is 2 bytes, like the
|
* wide (e.g. German ß => ss), the replacement is 2 bytes, like the
|
||||||
|
|
Loading…
Reference in New Issue