utils: update date/size parsing, factor out format

And update tests
This commit is contained in:
Dirk-Jan C. Binnema 2022-04-28 22:49:45 +03:00
parent 4b9c663ded
commit b7a30c0a36
6 changed files with 284 additions and 165 deletions

View File

@ -76,6 +76,25 @@ Err(const Error& err)
return tl::unexpected(err);
}
template<typename T>
static inline tl::unexpected<Error>
Err(const Result<T>& res)
{
return res.error();
}
template <typename T>
static inline Result<void>
Ok(const T& t)
{
if (t)
return Ok();
else
return Err(t.error());
}
/*
* convenience
@ -115,6 +134,9 @@ Err(Error::Code errcode, GError **err, const char* frm, ...)
return Err(errcode, std::move(str));
}
#define assert_valid_result(R) do { \
if(!R) { \
g_critical("error-result: %s", (R).error().what()); \

View File

@ -0,0 +1,58 @@
/*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 3, or (at your option) any
** later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#ifndef MU_UTILS_FORMAT_HH__
#define MU_UTILS_FORMAT_HH__
namespace Mu {
/**
* Quote & escape a string for " and \
*
* @param str a string
*
* @return quoted string
*/
std::string quote(const std::string& str);
/**
* Format a string, printf style
*
* @param frm format string
* @param ... parameters
*
* @return a formatted string
*/
std::string format(const char* frm, ...) __attribute__((format(printf, 1, 2)));
/**
* Format a string, printf style
*
* @param frm format string
* @param ... parameters
*
* @return a formatted string
*/
std::string vformat(const char* frm, va_list args) __attribute__((format(printf, 1, 0)));
} // namepace Mu
#endif /* MU_UTILS_FORMAT_HH__ */

View File

@ -35,11 +35,15 @@
#include <algorithm>
#include <numeric>
#include <functional>
#include <cinttypes>
#include <charconv>
#include <limits>
#include <glib.h>
#include <glib/gprintf.h>
#include "mu-utils.hh"
#include "mu-utils-format.hh"
#include "mu-util.h"
#include "mu-str.h"
#include "mu-error.hh"
@ -118,7 +122,7 @@ Mu::utf8_flatten(const char* str)
// the pure-ascii case
if (g_str_is_ascii(str)) {
auto l = g_ascii_strdown(str, -1);
auto l = g_ascii_strdown(str, -1);
std::string s{l};
g_free(l);
return s;
@ -298,27 +302,6 @@ Mu::vformat(const char* frm, va_list args)
return str;
}
constexpr const auto InternalDateFormat = "%010" G_GINT64_FORMAT;
constexpr const char InternalDateMin[] = "0000000000";
constexpr const char InternalDateMax[] = "9999999999";
static_assert(sizeof(InternalDateMin) == 10 + 1, "invalid");
static_assert(sizeof(InternalDateMax) == 10 + 1, "invalid");
static std::string
date_boundary(bool is_first)
{
return is_first ? InternalDateMin : InternalDateMax;
}
std::string
Mu::date_to_time_t_string(int64_t t)
{
char buf[sizeof(InternalDateMax)];
g_snprintf(buf, sizeof(buf), InternalDateFormat, t);
return buf;
}
std::string
Mu::time_to_string(const std::string& frm, time_t t, bool utc)
{
@ -356,13 +339,13 @@ Mu::time_to_string(const std::string& frm, time_t t, bool utc)
return datestr.value_or("");
}
static std::string
static Option<int64_t>
delta_ymwdhMs(const std::string& expr)
{
char* endptr;
auto num = strtol(expr.c_str(), &endptr, 10);
if (num <= 0 || num > 9999 || !endptr || !*endptr)
return date_boundary(true);
return Nothing;
int years, months, weeks, days, hours, minutes, seconds;
years = months = weeks = days = hours = minutes = seconds = 0;
@ -375,7 +358,8 @@ delta_ymwdhMs(const std::string& expr)
case 'w': weeks = num; break;
case 'm': months = num; break;
case 'y': years = num; break;
default: return date_boundary(true);
default:
return Nothing;
}
GDateTime *then, *now = g_date_time_new_now_local();
@ -385,21 +369,21 @@ delta_ymwdhMs(const std::string& expr)
then =
g_date_time_add_full(now, -years, -months, -days, -hours, -minutes, -seconds);
time_t t = MAX(0, (gint64)g_date_time_to_unix(then));
auto t = std::max<int64_t>(0, g_date_time_to_unix(then));
g_date_time_unref(then);
g_date_time_unref(now);
return date_to_time_t_string(t);
return t;
}
static std::string
special_date(const std::string& d, bool is_first)
static Option<int64_t>
special_date_time(const std::string& d, bool is_first)
{
if (d == "now")
return date_to_time_t_string(time(NULL));
return ::time({});
else if (d == "today") {
if (d == "today") {
GDateTime *dt, *midnight;
dt = g_date_time_new_now_local();
@ -419,10 +403,11 @@ special_date(const std::string& d, bool is_first)
time_t t = MAX(0, (gint64)g_date_time_to_unix(midnight));
g_date_time_unref(dt);
g_date_time_unref(midnight);
return date_to_time_t_string((time_t)t);
} else
return date_boundary(is_first);
return t;
}
return Nothing;
}
// if a date has a month day greater than the number of days in that month,
@ -431,8 +416,8 @@ static void
fixup_month(struct tm* tbuf)
{
decltype(tbuf->tm_mday) max_days;
const auto month = tbuf->tm_mon + 1;
const auto year = tbuf->tm_year + 1900;
const auto month = tbuf->tm_mon + 1;
const auto year = tbuf->tm_year + 1900;
switch (month) {
case 2:
@ -444,8 +429,12 @@ fixup_month(struct tm* tbuf)
case 4:
case 6:
case 9:
case 11: max_days = 30; break;
default: max_days = 31; break;
case 11:
max_days = 30;
break;
default:
max_days = 31;
break;
}
if (tbuf->tm_mday > max_days) {
@ -454,21 +443,21 @@ fixup_month(struct tm* tbuf)
tbuf->tm_min = 59;
tbuf->tm_sec = 59;
}
}
}
std::string
Mu::date_to_time_t_string(const std::string& dstr, bool is_first)
Option<int64_t>
Mu::parse_date_time(const std::string& dstr, bool is_first)
{
gint64 t;
struct tm tbuf;
GDateTime* dtime;
struct tm tbuf{};
GDateTime *dtime{};
int64_t t;
/* one-sided dates */
if (dstr.empty())
return date_boundary(is_first);
return is_first ? 0 : G_MAXINT64;
else if (dstr == "today" || dstr == "now")
return special_date(dstr, is_first);
return special_date_time(dstr, is_first);
else if (dstr.find_first_of("ymdwhMs") != std::string::npos)
return delta_ymwdhMs(dstr);
@ -478,74 +467,45 @@ Mu::date_to_time_t_string(const std::string& dstr, bool is_first)
std::string date(is_first ? UserDateMin : UserDateMax);
std::copy_if(dstr.begin(), dstr.end(), date.begin(), [](auto c) { return isdigit(c); });
memset(&tbuf, 0, sizeof tbuf);
if (!strptime(date.c_str(), "%Y%m%d%H%M%S", &tbuf) &&
!strptime(date.c_str(), "%Y%m%d%H%M", &tbuf) &&
!strptime(date.c_str(), "%Y%m%d", &tbuf) && !strptime(date.c_str(), "%Y%m", &tbuf) &&
!strptime(date.c_str(), "%Y", &tbuf))
return date_boundary(is_first);
if (!::strptime(date.c_str(), "%Y%m%d%H%M%S", &tbuf) &&
!::strptime(date.c_str(), "%Y%m%d%H%M", &tbuf) &&
!::strptime(date.c_str(), "%Y%m%d%H", &tbuf) &&
!::strptime(date.c_str(), "%Y%m%d", &tbuf) &&
!::strptime(date.c_str(), "%Y%m", &tbuf) &&
!::strptime(date.c_str(), "%Y", &tbuf))
return Nothing;
fixup_month(&tbuf);
dtime = g_date_time_new_local(tbuf.tm_year + 1900,
tbuf.tm_mon + 1,
tbuf.tm_mday,
tbuf.tm_hour,
tbuf.tm_min,
tbuf.tm_sec);
if (!dtime) {
g_warning("invalid %s date '%s'", is_first ? "lower" : "upper", date.c_str());
return date_boundary(is_first);
}
t = g_date_time_to_unix(dtime);
g_date_time_unref(dtime);
if (t < 0 || t > 9999999999)
return date_boundary(is_first);
else
return date_to_time_t_string(t);
return std::max<int64_t>(t, 0);
}
constexpr const auto SizeFormat = "%010" G_GINT64_FORMAT;
constexpr const char SizeMin[] = "0000000000";
constexpr const char SizeMax[] = "9999999999";
static_assert(sizeof(SizeMin) == 10 + 1, "invalid");
static_assert(sizeof(SizeMax) == 10 + 1, "invalid");
static std::string
size_boundary(bool is_first)
Option<int64_t>
Mu::parse_size(const std::string& val, bool is_first)
{
return is_first ? SizeMin : SizeMax;
}
std::string
Mu::size_to_string(int64_t size)
{
char buf[sizeof(SizeMax)];
g_snprintf(buf, sizeof(buf), SizeFormat, size);
return buf;
}
std::string
Mu::size_to_string(const std::string& val, bool is_first)
{
std::string str;
GRegex* rx;
GMatchInfo* minfo;
int64_t size{-1};
std::string str;
GRegex* rx;
GMatchInfo* minfo;
/* one-sided ranges */
if (val.empty())
return size_boundary(is_first);
return is_first ? 0 : std::numeric_limits<int64_t>::max();
rx = g_regex_new("(\\d+)(b|k|kb|m|mb|g|gb)?", G_REGEX_CASELESS, (GRegexMatchFlags)0, NULL);
minfo = NULL;
if (g_regex_match(rx, val.c_str(), (GRegexMatchFlags)0, &minfo)) {
gint64 size;
char* s;
char* s;
s = g_match_info_fetch(minfo, 1);
size = atoll(s);
g_free(s);
@ -559,26 +519,50 @@ Mu::size_to_string(const std::string& val, bool is_first)
}
g_free(s);
str = size_to_string(size);
} else
str = size_boundary(is_first);
}
g_regex_unref(rx);
g_match_info_unref(minfo);
return str;
if (size < 0)
return Nothing;
else
return size;
}
std::string
Mu::to_lexnum(int64_t val)
{
char buf[18]; /* 1 byte prefix + hex + \0 */
buf[0] = 'f' + ::snprintf(buf + 1, sizeof(buf) - 1, "%" PRIx64, val);
return buf;
}
int64_t
Mu::from_lexnum(const std::string& str)
{
int64_t val{};
std::from_chars(str.c_str() + 1, str.c_str() + str.size(), val, 16);
return val;
}
std::string
Mu::canonicalize_filename(const std::string& path, const std::string& relative_to)
{
char* fname =
g_canonicalize_filename(path.c_str(), relative_to.empty() ? NULL : relative_to.c_str());
auto str{to_string_opt_gchar(
g_canonicalize_filename(
path.c_str(),
relative_to.empty() ? nullptr : relative_to.c_str())).value()};
std::string rv{fname};
g_free(fname);
// remove trailing '/'... is this needed?
if (str[str.length()-1] == G_DIR_SEPARATOR)
str.erase(str.length() - 1);
return rv;
return str;
}
@ -592,7 +576,7 @@ Mu::allow_warnings()
Mu::TempDir::TempDir()
Mu::TempDir::TempDir(bool autodelete): autodelete_{autodelete}
{
GError *err{};
gchar *tmpdir = g_dir_make_tmp("mu-tmp-XXXXXX", &err);
@ -611,6 +595,11 @@ Mu::TempDir::~TempDir()
if (::access(path_.c_str(), F_OK) != 0)
return; /* nothing to do */
if (!autodelete_) {
g_debug("_not_ deleting %s", path_.c_str());
return;
}
/* ugly */
const auto cmd{format("/bin/rm -rf '%s'", path_.c_str())};
(void)::system(cmd.c_str());

View File

@ -34,6 +34,9 @@
#include <algorithm>
#include <numeric>
#include "mu-utils-format.hh"
#include "mu-option.hh"
namespace Mu {
using StringVec = std::vector<std::string>;
@ -109,46 +112,16 @@ static inline std::string join(const std::vector<std::string>& svec, char sepa)
}
/**
* Quote & escape a string for " and \
*
* @param str a string
*
* @return quoted string
*/
std::string quote(const std::string& str);
/**
* Format a string, printf style
*
* @param frm format string
* @param ... parameters
*
* @return a formatted string
*/
std::string format(const char* frm, ...) __attribute__((format(printf, 1, 2)));
/**
* Format a string, printf style
*
* @param frm format string
* @param ... parameters
*
* @return a formatted string
*/
std::string vformat(const char* frm, va_list args) __attribute__((format(printf, 1, 0)));
/**
* Convert an date to the corresponding time expressed as a string with a
* 10-digit time_t
*
* Parse a date string to the corresponding time_t
* *
* @param date the date expressed a YYYYMMDDHHMMSS or any n... of the first
* characters.
* characters, using the local timezone.
* @param first whether to fill out incomplete dates to the start or the end;
* ie. either 1972 -> 197201010000 or 1972 -> 197212312359
*
* @return the corresponding time_t expressed as a string
* @return the corresponding time_t or Nothing if parsing failed.
*/
std::string date_to_time_t_string(const std::string& date, bool first);
Option<int64_t> parse_date_time(const std::string& date, bool first);
/**
* 64-bit incarnation of time_t expressed as a 10-digit string. Uses 64-bit for the time-value,
@ -266,9 +239,9 @@ std::string canonicalize_filename(const std::string& path, const std::string& re
* @param sizestr the size string
* @param first
*
* @return the size expressed as a string with the decimal number of bytes
* @return the size or Nothing if parsing failed
*/
std::string size_to_string(const std::string& sizestr, bool first);
Option<int64_t> parse_size(const std::string& sizestr, bool first);
/**
* Convert a size into a size in bytes string
@ -297,6 +270,37 @@ to_string(const T& val)
return sstr.str();
}
/**
* Consume a gchar and return a std::string
*
* @param str a gchar* (consumed/freed)
*
* @return a std::string, empty if gchar was {}
*/
static inline std::string
to_string_gchar(gchar*&& str)
{
std::string s(str?str:"");
g_free(str);
return s;
}
/*
* Lexicals Number are lexicographically sortable string representations
* of numbers. Start with 'g' + length of number in hex, followed by
* the ascii for the hex represntation. So,
*
* 0 -> 'g0'
* 1 -> 'g1'
* 10 -> 'ga'
* 16 -> 'h10'
*
* etc.
*/
std::string to_lexnum(int64_t val);
int64_t from_lexnum(const std::string& str);
/**
* Like std::find_if, but using sequence instead of a range.
*
@ -483,7 +487,7 @@ struct TempDir {
/**
* Construct a temporary directory
*/
TempDir();
TempDir(bool autodelete=true);
/**
* DTOR; removes the temporary directory
@ -503,6 +507,7 @@ struct TempDir {
const std::string& path() {return path_; }
private:
std::string path_;
const bool autodelete_;
};
} // namespace Mu

View File

@ -22,6 +22,7 @@
#include <xapian.h>
#include <glib.h>
#include "mu-result.hh"
namespace Mu {
@ -60,6 +61,25 @@ try {
return static_cast<Default>(def);
}
template <typename Func>
auto
xapian_try_result(Func&& func) noexcept -> std::decay_t<decltype(func())>
try {
return func();
} catch (const Xapian::Error& xerr) {
return Err(Error::Code::Xapian, "%s", xerr.get_error_string());
} catch (const std::runtime_error& re) {
return Err(Error::Code::Internal, "runtime error: %s", re.what());
} catch (const std::exception& e) {
return Err(Error::Code::Internal, "caught exception: %s", e.what());
} catch (...) {
return Err(Error::Code::Internal, "caught exception");
}
} // namespace Mu
#endif /* MU_ XAPIAN_UTILS_HH__ */

View File

@ -23,6 +23,7 @@
#include <iostream>
#include <sstream>
#include <functional>
#include <array>
#include "mu-utils.hh"
@ -57,22 +58,30 @@ test_date_basic()
{
g_setenv("TZ", "Europe/Helsinki", TRUE);
CaseVec cases = {{"2015-09-18T09:10:23", true, "1442556623"},
{"1972-12-14T09:10:23", true, "0093165023"},
{"1854-11-18T17:10:23", true, "0000000000"},
constexpr std::array<std::tuple<const char*, bool, int64_t>, 9> cases = {{
{"2015-09-18T09:10:23", true, 1442556623},
{"1972-12-14T09:10:23", true, 93165023},
{"1854-11-18T17:10:23", true, 0},
{"2000-02-31T09:10:23", true, "0951861599"},
{"2000-02-29T23:59:59", true, "0951861599"},
{"2000-02-31T09:10:23", true, 951861599},
{"2000-02-29T23:59:59", true, 951861599},
{"2016", true, "1451599200"},
{"2016", false, "1483221599"},
{"2016", true, 1451599200},
{"2016", false, 1483221599},
{"fnorb", true, "0000000000"},
{"fnorb", false, "9999999999"},
{"", false, "9999999999"},
{"", true, "0000000000"}};
// {"fnorb", true, -1},
// {"fnorb", false, -1},
{"", false, G_MAXINT64},
{"", true, 0}
}};
test_cases(cases, [](auto s, auto f) { return date_to_time_t_string(s, f); });
for (auto& test: cases) {
if (g_test_verbose())
g_debug("checking %s", std::get<0>(test));
g_assert_cmpuint(parse_date_time(std::get<0>(test),
std::get<1>(test)).value_or(-1),==,
std::get<2>(test));
}
}
static void
@ -90,9 +99,8 @@ test_date_ymwdhMs(void)
{"3m", 3 * 30 * 24 * 60 * 60, 3 * 24 * 3600 + 1}};
for (auto i = 0; i != G_N_ELEMENTS(tests); ++i) {
const auto diff =
time(NULL) -
strtol(Mu::date_to_time_t_string(tests[i].expr, true).c_str(), NULL, 10);
const auto diff = ::time({}) -
parse_date_time(tests[i].expr, true).value_or(-1);
if (g_test_verbose())
std::cerr << tests[i].expr << ' ' << diff << ' ' << tests[i].diff
<< std::endl;
@ -100,19 +108,23 @@ test_date_ymwdhMs(void)
g_assert_true(tests[i].diff - diff <= tests[i].tolerance);
}
g_assert_true(strtol(Mu::date_to_time_t_string("-1y", true).c_str(), NULL, 10) == 0);
//g_assert_true(strtol(Mu::date_to_time_t_string("-1y", true).c_str(), NULL, 10) == 0);
}
static void
test_size()
test_parse_size()
{
CaseVec cases = {
{"456", true, "0000000456"},
{"", false, "9999999999"},
{"", true, "0000000000"},
};
test_cases(cases, [](auto s, auto f) { return size_to_string(s, f); });
constexpr std::array<std::tuple<const char*, bool, int64_t>, 5> cases = {{
{ "456", false, 456 },
{ "", false, G_MAXINT64 },
{ "", true, 0 },
{ "2K", false, 2048 },
{ "2M", true, 2097152 }
}};
for(auto&& test: cases) {
g_assert_cmpint(parse_size(std::get<0>(test), std::get<1>(test))
.value_or(-1), ==, std::get<2>(test));
}
}
static void
@ -222,6 +234,18 @@ test_define_bitmap()
}
}
static void
test_to_from_lexnum()
{
assert_equal(to_lexnum(0), "g0");
assert_equal(to_lexnum(100), "h64");
assert_equal(to_lexnum(12345), "j3039");
g_assert_cmpuint(from_lexnum(to_lexnum(0)), ==, 0);
g_assert_cmpuint(from_lexnum(to_lexnum(7777)), ==, 7777);
g_assert_cmpuint(from_lexnum(to_lexnum(9876543)), ==, 9876543);
}
int
main(int argc, char* argv[])
@ -230,7 +254,7 @@ main(int argc, char* argv[])
g_test_add_func("/utils/date-basic", test_date_basic);
g_test_add_func("/utils/date-ymwdhMs", test_date_ymwdhMs);
g_test_add_func("/utils/size", test_size);
g_test_add_func("/utils/parse-size", test_parse_size);
g_test_add_func("/utils/flatten", test_flatten);
g_test_add_func("/utils/remove-ctrl", test_remove_ctrl);
g_test_add_func("/utils/clean", test_clean);
@ -238,6 +262,7 @@ main(int argc, char* argv[])
g_test_add_func("/utils/split", test_split);
g_test_add_func("/utils/join", test_join);
g_test_add_func("/utils/define-bitmap", test_define_bitmap);
g_test_add_func("/utils/to-from-lexnum", test_to_from_lexnum);
return g_test_run();
}