utils: add regex-split

This commit is contained in:
Dirk-Jan C. Binnema 2022-06-16 22:49:46 +03:00
parent 5e63b8bed3
commit f69ad37e7a
3 changed files with 24 additions and 8 deletions

View File

@ -39,6 +39,7 @@
#include <cinttypes>
#include <charconv>
#include <limits>
#include <regex>
#include <glib.h>
#include <glib/gprintf.h>
@ -142,8 +143,6 @@ Mu::utf8_flatten(const char* str)
}
/* turn \0-terminated buf into ascii (which is a utf8 subset); convert
* any non-ascii into '.'
*/
@ -162,7 +161,6 @@ asciify_in_place (char *buf)
return buf;
}
static char*
utf8ify (const char *buf)
{
@ -245,7 +243,6 @@ Mu::split(const std::string& str, const std::string& sepa)
std::vector<std::string>
Mu::split(const std::string& str, char sepa)
{
std::vector<std::string> vec;
size_t b = 0, e = 0;
@ -266,6 +263,14 @@ Mu::split(const std::string& str, char sepa)
return vec;
}
std::vector<std::string>
Mu::split(const std::string& str, const std::regex& sepa_rx)
{
std::sregex_token_iterator it(str.begin(), str.end(), sepa_rx, -1);
std::sregex_token_iterator end;
return {it, end};
}
std::string
Mu::join(const std::vector<std::string>& svec, const std::string& sepa)

View File

@ -33,6 +33,7 @@
#include <type_traits>
#include <algorithm>
#include <numeric>
#include <regex>
#include "mu-utils-format.hh"
#include "mu-option.hh"
@ -85,7 +86,6 @@ std::string remove_ctrl(const std::string& str);
*/
std::vector<std::string> split(const std::string& str, const std::string& sepa);
/**
* Split a string in parts. As a special case, splitting an empty string
* yields an empty vector (not a vector with a single empty element)
@ -97,6 +97,15 @@ std::vector<std::string> split(const std::string& str, const std::string& sepa);
*/
std::vector<std::string> split(const std::string& str, char sepa);
/**
* Split a string in parts
*
* @param str a string
* @param sepa the separator regex
*
* @return the parts.
*/
std::vector<std::string> split(const std::string& str, const std::regex& sepa_rx);
/**
* Join the strings in svec into a string, separated by sepa

View File

@ -208,7 +208,7 @@ test_split()
g_assert_cmpstr(sv1[i].c_str(),==,sv2[i].c_str());
};
// string sepa
assert_equal_svec(split("axbxc", "x"), {"a", "b", "c"});
assert_equal_svec(split("axbxcx", "x"), {"a", "b", "c", ""});
assert_equal_svec(split("", "boo"), {});
@ -216,10 +216,12 @@ test_split()
assert_equal_svec(split("abc", ""), {"a", "b", "c"});
assert_equal_svec(split("", "boo"), {});
// char sepa
assert_equal_svec(split("axbxc", 'x'), {"a", "b", "c"});
assert_equal_svec(split("axbxcx", 'x'), {"a", "b", "c", ""});
assert_equal_svec(split("", "boo"), {});
// rx sexp
assert_equal_svec(split("axbyc", std::regex("[xy]")), {"a", "b", "c"});
}
static void