mu/lib/mu-str.h

/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/

/*
** Copyright (C) 2008-2012 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/

#ifndef __MU_STR_H__
#define __MU_STR_H__

#include <time.h>
#include <sys/types.h>

#include <mu-msg.h>
#include <mu-flags.h>

G_BEGIN_DECLS


/**
 * create a 'display contact' from an email header To/Cc/Bcc/From-type address
 * ie., turn
 *     "Foo Bar" <foo@bar.com>
 * into
 *      Foo Bar
 * Note that this is based on some simple heuristics. Max length is 255 bytes.
 *
 *   mu_str_display_contact_s returns a statically allocated
 *   buffer (ie, non-reentrant), while mu_str_display_contact
 *   returns a newly allocated string that you must free with g_free
 *   when done with it.
 *
 * @param str a 'contact str' (ie., what is in the To/Cc/Bcc/From fields), or NULL
 *
 * @return a newly allocated string with a display contact
 */
const char* mu_str_display_contact_s (const char *str) G_GNUC_CONST;
char *mu_str_display_contact (const char *str) G_GNUC_WARN_UNUSED_RESULT;


/**
 * get a display size for a given size_t; uses M for sizes >
 * 1000*1000, k for smaller sizes. Note: this function use the
 * 10-based SI units, _not_ the powers-of-2 based ones.
 *
 * mu_str_size_s returns a ptr to a static buffer,
 * while mu_str_size returns dynamically allocated
 * memory that must be freed after use.
 *
 * @param t the size as an size_t
 *
 * @return a string representation of the size; see above
 * for what to do with it
 */
const char* mu_str_size_s  (size_t s) G_GNUC_CONST;
char*       mu_str_size    (size_t s) G_GNUC_WARN_UNUSED_RESULT;

/**
 * get a display string for a given set of flags, OR'ed in
 * @param flags; one character per flag:
 * D=draft,F=flagged,N=new,P=passed,R=replied,S=seen,T=trashed
 * a=has-attachment,s=signed, x=encrypted
 *
 * mu_str_file_flags_s  returns a ptr to a static buffer,
 * while mu_str_file_flags returns dynamically allocated
 * memory that must be freed after use.
 *
 * @param flags file flags
 *
 * @return a string representation of the flags; see above
 * for what to do with it
 */
const char* mu_str_flags_s  (MuFlags flags) G_GNUC_CONST;
char*       mu_str_flags    (MuFlags flags)
    G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;

/**
 * get a 'summary' of the string, ie. the first /n/ lines of the
 * strings, with all newlines removed, replaced by single spaces
 *
 * @param str the source string
 * @param max_lines the maximum number of lines to include in the summary
 *
 * @return a newly allocated string with the summary. use g_free to free it.
 */
char* mu_str_summarize (const char* str, size_t max_lines)
    G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;

/**
 * normalize a string (ie., collapse accented characters etc.), and
 * optionally, downcase it. Works for accented chars in Unicode Blocks
 * 'Latin-1 Supplement' and 'Latin Extended-A'
 *
 * @param str a valid utf8 string or NULL
 * @param downcase if TRUE, convert the string to lowercase
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
 * @return the normalized string, or NULL in case of error or str was
 * NULL. Unless strchunk was provided, user must g_free the string when
 * no longer needed
 */
char* mu_str_normalize (const char *str, gboolean downcase,
			GStringChunk *strchunk)
    G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;

/**
 * normalize a string (ie., collapse accented characters etc.), and
 * optionally, downcase it. this happen by changing the string; if
 * that is not desired, use mu_str_normalize. Works for accented chars
 * in Unicode Blocks 'Latin-1 Supplement' and 'Latin Extended-A'
 *
 * @param str a valid utf8 string or NULL
 * @param downcase if TRUE, convert the string to lowercase
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
 * @return the normalized string, or NULL in case of error or str was
 * NULL. User only needs to free the returned string if a) return
 * value != str and b) strchunk was not provided.
 */
char* mu_str_normalize_in_place_try (char *str, gboolean downcase,
				     GStringChunk *strchunk);

/**
 * escape the string for use with xapian matching. in practice, if the
 * string contains an '@', replace '@', single-'.' with '_'. Also,
 * replace ':' with '_', if it's not following a xapian-prefix (such
 * as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
 * changing is done in-place (by changing the argument string). in any
 * case, the string will be downcased.
 *
 * @param query a query string
 * @param esc_space escape space characters as well
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
 * @return the escaped string or NULL in case of error. User only
 * needs to free the returned string if a) return value != query and b)
 * strchunk was not provided.
 *
 */
char* mu_str_xapian_escape_in_place_try (char *query, gboolean esc_space,
					 GStringChunk *strchunk);

/**
 * escape the string for use with xapian matching. in practice, if the
 * string contains an '@', replace '@', single-'.' with '_'. Also,
 * replace ':' with '_', if it's not following a xapian-prefix (such
 * as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
 *
 * @param query a query string
 * @param esc_space escape space characters as well
 * @param strchunk (optional) if non-NULL, allocate strings on strchunk
 *
 * @return the escaped string (free with g_free) or NULL in case of error
 * Unless strchunk was provided, user must g_free the string when
 * no longer needed
 */
char* mu_str_xapian_escape (const char *query, gboolean esc_space,
			    GStringChunk *strchunk)  G_GNUC_WARN_UNUSED_RESULT;


/**
 * parse a byte size; a size is a number, with optionally a
 * unit. Units recognized are b/B (bytes) k/K (1000) and m/M
 * (1000*1000). Only the first letter is checked and the function is
 * not case-sensitive, so 1000Kb, 3M will work equally well.  Note,
 * for kB, MB etc., we then follow the SI standards, not 2^10 etc. The
 * 'b' may be omitted.
 *
 * practical sizes for email messages are in terms of Mb; even in
 * extreme cases it should be under 100 Mb. Function return
 * GUINT64_MAX if there a parsing error
 *
 * @param str a string with a size, such a "100", "100Kb", "1Mb"
 *
 * @return the corresponding size in bytes, or -1 in case of error
 */
gint64 mu_str_size_parse_bkm (const char* str);

/**
 * create a full path from a path + a filename. function is _not_
 * reentrant.
 *
 * @param path a path (!= NULL)
 * @param name a name (may be NULL)
 *
 * @return the path as a statically allocated buffer. don't free.
 */
const char* mu_str_fullpath_s (const char* path, const char* name);


/**
 * escape a string like a string literal in C; ie. replace \ with \\,
 * and " with \"
 *
 * @param str a non-NULL str
 * @param in_quotes whether the result should be enclosed in ""
 *
 * @return the escaped string, newly allocated (free with g_free)
 */
char* mu_str_escape_c_literal (const gchar* str, gboolean in_quotes)
        G_GNUC_WARN_UNUSED_RESULT;


/**
 * turn a string into plain ascii by replacing each non-ascii
 * character with a dot ('.'). replacement is done in-place.
 *
 * @param buf a buffer to asciify
 *
 * @return the buf ptr (as to allow for function composition)
 */
char* mu_str_asciify_in_place (char *buf);


/**
 * turn string in buf into valid utf8. If this string is not valid
 * utf8 already, the function massages the offending characters.
 *
 * @param buf a buffer to utf8ify
 *
 * @return a newly allocated utf8 string
 */
char* mu_str_utf8ify (const char *buf);


/**
 * convert a string in a certain charset into utf8
 *
 * @param buffer a buffer to convert
 * @param charset source character set.
 *
 * @return a UTF8 string (which you need to g_free when done with it),
 * or NULL in case of error
 */
gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset);


/**
 * macro to check whether the string is empty, ie. if it's NULL or
 * it's length is 0
 *
 * @param S a string
 *
 * @return TRUE if the string is empty, FALSE otherwise
 */
#define mu_str_is_empty(S) ((!(S)||!(*S))?TRUE:FALSE)


/**
 * convert a GSList of strings to a #sepa-separated list
 *
 * @param lst a GSList
 * @param the separator character
 *
 * @return a newly allocated string
 */
char* mu_str_from_list (const GSList *lst, char sepa);


/**
 * convert a #sepa-separated list of strings in to a GSList
 *
 * @param str a #sepa-separated list of strings
 * @param the separator character
 * @param remove leading/trailing whitespace from the string
 *
 * @return a newly allocated GSList (free with mu_str_free_list)
 */
GSList* mu_str_to_list (const char *str, char sepa, gboolean strip);


/**
 * convert a string (with possible escaping) to a list. list items are
 * separated by one or more spaces. list items can be quoted (using
 * '"'), and '"', ' ' and '\' use their special meaning when prefixed
 * with \.
 *
 * @param str a string
 *
 * @return a list of elements or NULL in case of error
 */
GSList* mu_str_esc_to_list (const char *str, GError **err);


/**
 * free a GSList consisting of allocated strings
 *
 * @param lst a GSList
 */
void mu_str_free_list (GSList *lst);


/**
 * strip the subject of Re:, Fwd: etc.
 *
 * @param str a subject string
 *
 * @return a new string -- this is pointing somewhere inside the @str;
 * no copy is made, don't free
 */
const gchar* mu_str_subject_normalize (const gchar* str);


/**
 * take a list of strings, and return the concatenation of their
 * quoted forms
 *
 * @param params NULL-terminated array of strings
 *
 * @return the quoted concatenation of the strings
 */
gchar* mu_str_quoted_from_strv (const gchar **params);

G_END_DECLS

#endif /*__MU_STR_H__*/