mirror of https://github.com/djcb/mu.git
utils: add regex-split
This commit is contained in:
parent
5e63b8bed3
commit
f69ad37e7a
|
@ -39,6 +39,7 @@
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <charconv>
|
#include <charconv>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
#include <glib/gprintf.h>
|
#include <glib/gprintf.h>
|
||||||
|
@ -142,8 +143,6 @@ Mu::utf8_flatten(const char* str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* turn \0-terminated buf into ascii (which is a utf8 subset); convert
|
/* turn \0-terminated buf into ascii (which is a utf8 subset); convert
|
||||||
* any non-ascii into '.'
|
* any non-ascii into '.'
|
||||||
*/
|
*/
|
||||||
|
@ -162,7 +161,6 @@ asciify_in_place (char *buf)
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static char*
|
static char*
|
||||||
utf8ify (const char *buf)
|
utf8ify (const char *buf)
|
||||||
{
|
{
|
||||||
|
@ -245,7 +243,6 @@ Mu::split(const std::string& str, const std::string& sepa)
|
||||||
std::vector<std::string>
|
std::vector<std::string>
|
||||||
Mu::split(const std::string& str, char sepa)
|
Mu::split(const std::string& str, char sepa)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::vector<std::string> vec;
|
std::vector<std::string> vec;
|
||||||
size_t b = 0, e = 0;
|
size_t b = 0, e = 0;
|
||||||
|
|
||||||
|
@ -266,6 +263,14 @@ Mu::split(const std::string& str, char sepa)
|
||||||
return vec;
|
return vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string>
|
||||||
|
Mu::split(const std::string& str, const std::regex& sepa_rx)
|
||||||
|
{
|
||||||
|
std::sregex_token_iterator it(str.begin(), str.end(), sepa_rx, -1);
|
||||||
|
std::sregex_token_iterator end;
|
||||||
|
|
||||||
|
return {it, end};
|
||||||
|
}
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
Mu::join(const std::vector<std::string>& svec, const std::string& sepa)
|
Mu::join(const std::vector<std::string>& svec, const std::string& sepa)
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
#include "mu-utils-format.hh"
|
#include "mu-utils-format.hh"
|
||||||
#include "mu-option.hh"
|
#include "mu-option.hh"
|
||||||
|
@ -85,7 +86,6 @@ std::string remove_ctrl(const std::string& str);
|
||||||
*/
|
*/
|
||||||
std::vector<std::string> split(const std::string& str, const std::string& sepa);
|
std::vector<std::string> split(const std::string& str, const std::string& sepa);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split a string in parts. As a special case, splitting an empty string
|
* Split a string in parts. As a special case, splitting an empty string
|
||||||
* yields an empty vector (not a vector with a single empty element)
|
* yields an empty vector (not a vector with a single empty element)
|
||||||
|
@ -97,6 +97,15 @@ std::vector<std::string> split(const std::string& str, const std::string& sepa);
|
||||||
*/
|
*/
|
||||||
std::vector<std::string> split(const std::string& str, char sepa);
|
std::vector<std::string> split(const std::string& str, char sepa);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split a string in parts
|
||||||
|
*
|
||||||
|
* @param str a string
|
||||||
|
* @param sepa the separator regex
|
||||||
|
*
|
||||||
|
* @return the parts.
|
||||||
|
*/
|
||||||
|
std::vector<std::string> split(const std::string& str, const std::regex& sepa_rx);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Join the strings in svec into a string, separated by sepa
|
* Join the strings in svec into a string, separated by sepa
|
||||||
|
|
|
@ -208,7 +208,7 @@ test_split()
|
||||||
g_assert_cmpstr(sv1[i].c_str(),==,sv2[i].c_str());
|
g_assert_cmpstr(sv1[i].c_str(),==,sv2[i].c_str());
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// string sepa
|
||||||
assert_equal_svec(split("axbxc", "x"), {"a", "b", "c"});
|
assert_equal_svec(split("axbxc", "x"), {"a", "b", "c"});
|
||||||
assert_equal_svec(split("axbxcx", "x"), {"a", "b", "c", ""});
|
assert_equal_svec(split("axbxcx", "x"), {"a", "b", "c", ""});
|
||||||
assert_equal_svec(split("", "boo"), {});
|
assert_equal_svec(split("", "boo"), {});
|
||||||
|
@ -216,10 +216,12 @@ test_split()
|
||||||
assert_equal_svec(split("abc", ""), {"a", "b", "c"});
|
assert_equal_svec(split("abc", ""), {"a", "b", "c"});
|
||||||
assert_equal_svec(split("", "boo"), {});
|
assert_equal_svec(split("", "boo"), {});
|
||||||
|
|
||||||
|
// char sepa
|
||||||
assert_equal_svec(split("axbxc", 'x'), {"a", "b", "c"});
|
assert_equal_svec(split("axbxc", 'x'), {"a", "b", "c"});
|
||||||
assert_equal_svec(split("axbxcx", 'x'), {"a", "b", "c", ""});
|
assert_equal_svec(split("axbxcx", 'x'), {"a", "b", "c", ""});
|
||||||
assert_equal_svec(split("", "boo"), {});
|
|
||||||
|
// rx sexp
|
||||||
|
assert_equal_svec(split("axbyc", std::regex("[xy]")), {"a", "b", "c"});
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
Loading…
Reference in New Issue