mu/lib/utils/mu-str.c

317 lines
6.7 KiB
C

/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/
/*
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif /*HAVE_CONFIG_H*/
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE (500)
#endif /*_XOPEN_SOURCE*/
#include <glib.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include "mu-util.h" /* PATH_MAX */
#include "mu-str.h"
const char*
mu_str_size_s (size_t s)
{
static char buf[32];
char *tmp;
tmp = g_format_size((goffset)s);
strncpy (buf, tmp, sizeof(buf));
buf[sizeof(buf) -1] = '\0'; /* just in case */
g_free (tmp);
return buf;
}
char*
mu_str_summarize (const char* str, size_t max_lines)
{
char *summary;
size_t nl_seen;
unsigned i,j;
gboolean last_was_blank;
g_return_val_if_fail (str, NULL);
g_return_val_if_fail (max_lines > 0, NULL);
/* len for summary <= original len */
summary = g_new (gchar, strlen(str) + 1);
/* copy the string up to max_lines lines, replace CR/LF/tab with
* single space */
for (i = j = 0, nl_seen = 0, last_was_blank = TRUE;
nl_seen < max_lines && str[i] != '\0'; ++i) {
if (str[i] == '\n' || str[i] == '\r' ||
str[i] == '\t' || str[i] == ' ' ) {
if (str[i] == '\n')
++nl_seen;
/* no double-blanks or blank at end of str */
if (!last_was_blank && str[i+1] != '\0')
summary[j++] = ' ';
last_was_blank = TRUE;
} else {
summary[j++] = str[i];
last_was_blank = FALSE;
}
}
summary[j] = '\0';
return summary;
}
char*
mu_str_from_list (const GSList *lst, char sepa)
{
const GSList *cur;
char *str;
g_return_val_if_fail (sepa, NULL);
for (cur = lst, str = NULL; cur; cur = g_slist_next(cur)) {
char *tmp;
/* two extra dummy '\0' so -Wstack-protector won't complain */
char sep[4] = { '\0', '\0', '\0', '\0' };
sep[0] = cur->next ? sepa : '\0';
tmp = g_strdup_printf ("%s%s%s",
str ? str : "",
(gchar*)cur->data,
sep);
g_free (str);
str = tmp;
}
return str;
}
GSList*
mu_str_to_list (const char *str, char sepa, gboolean strip)
{
GSList *lst;
gchar **strs, **cur;
/* two extra dummy '\0' so -Wstack-protector won't complain */
char sep[4] = { '\0', '\0', '\0', '\0' };
g_return_val_if_fail (sepa, NULL);
if (!str)
return NULL;
sep[0] = sepa;
strs = g_strsplit (str, sep, -1);
for (cur = strs, lst = NULL; cur && *cur; ++cur) {
char *elm;
elm = g_strdup(*cur);
if (strip)
elm = g_strstrip (elm);
lst = g_slist_prepend (lst, elm);
}
lst = g_slist_reverse (lst);
g_strfreev (strs);
return lst;
}
/* this function is critical for sorting performance; therefore, no
* regexps, but just some good old c pointer magic */
const gchar*
mu_str_subject_normalize (const gchar* str)
{
const char* cur;
g_return_val_if_fail (str, NULL);
cur = str;
while (isspace(*cur)) ++cur; /* skip space */
/* starts with Re:? */
if (tolower(cur[0]) == 'r' && tolower(cur[1]) == 'e')
cur += 2;
/* starts with Fwd:? */
else if (tolower(cur[0]) == 'f' && tolower(cur[1]) == 'w' &&
tolower(cur[2]) == 'd')
cur += 3;
else /* nope, different string */
return str;
/* we're now past either 'Re' or 'Fwd'. Maybe there's a [<num>] now?
* ie., the Re[3]: foo case */
if (cur[0] == '[') { /* handle the Re[3]: case */
if (isdigit(cur[1])) {
do { ++cur; } while (isdigit(*cur));
if ( cur[0] != ']') {
return str; /* nope: no ending ']' */
} else /* skip ']' and space */
do { ++cur; } while (isspace(*cur));
} else /* nope: no number after '[' */
return str;
}
/* now, cur points past either 're' or 'fwd', possibly with
* [<num>]; check if it's really a prefix -- after re or fwd
* there should either a ':' and possibly some space */
if (cur[0] == ':') {
do { ++cur; } while (isspace(*cur));
/* note: there may still be another prefix, such as
* Re[2]: Fwd: foo */
return mu_str_subject_normalize (cur);
} else
return str; /* nope, it was not a prefix */
}
/* turn \0-terminated buf into ascii (which is a utf8 subset); convert
* any non-ascii into '.'
*/
char*
mu_str_asciify_in_place (char *buf)
{
char *c;
g_return_val_if_fail (buf, NULL);
for (c = buf; c && *c; ++c) {
if ((!isprint(*c) && !isspace (*c)) || !isascii(*c))
*c = '.';
}
return buf;
}
char*
mu_str_utf8ify (const char *buf)
{
char *utf8;
g_return_val_if_fail (buf, NULL);
utf8 = g_strdup (buf);
if (!g_utf8_validate (buf, -1, NULL))
mu_str_asciify_in_place (utf8);
return utf8;
}
gchar*
mu_str_convert_to_utf8 (const char* buffer, const char *charset)
{
GError *err;
gchar * utf8;
g_return_val_if_fail (buffer, NULL);
g_return_val_if_fail (charset, NULL );
err = NULL;
utf8 = g_convert_with_fallback (buffer, -1, "UTF-8",
charset, NULL,
NULL, NULL, &err);
if (!utf8) /* maybe the charset lied; try 8859-15 */
utf8 = g_convert_with_fallback (buffer, -1, "UTF-8",
"ISO8859-15", NULL,
NULL, NULL, &err);
/* final attempt, maybe it was utf-8 already */
if (!utf8 && g_utf8_validate (buffer, -1, NULL))
utf8 = g_strdup (buffer);
if (!utf8) {
g_warning ("%s: conversion failed from %s: %s",
__func__, charset, err ? err->message : "");
}
g_clear_error (&err);
return utf8;
}
gchar*
mu_str_quoted_from_strv (const gchar **params)
{
GString *str;
int i;
g_return_val_if_fail (params, NULL);
if (!params[0])
return g_strdup ("");
str = g_string_sized_new (64); /* just a guess */
for (i = 0; params[i]; ++i) {
if (i > 0)
g_string_append_c (str, ' ');
g_string_append_c (str, '"');
g_string_append (str, params[i]);
g_string_append_c (str, '"');
}
return g_string_free (str, FALSE);
}
char*
mu_str_remove_ctrl_in_place (char *str)
{
char *orig, *cur;
g_return_val_if_fail (str, NULL);
orig = str;
for (cur = orig; *cur; ++cur) {
if (isspace(*cur)) {
/* squash special white space into a simple space */
*orig++ = ' ';
} else if (iscntrl(*cur)) {
/* eat it */
} else
*orig++ = *cur;
}
*orig = '\0'; /* ensure the updated string has a NULL */
return str;
}