mu/lib/parser/test-utils.cc

/*
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
**  This library is free software; you can redistribute it and/or
**  modify it under the terms of the GNU Lesser General Public License
**  as published by the Free Software Foundation; either version 2.1
**  of the License, or (at your option) any later version.
**
**  This library is distributed in the hope that it will be useful,
**  but WITHOUT ANY WARRANTY; without even the implied warranty of
**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
**  Lesser General Public License for more details.
**
**  You should have received a copy of the GNU Lesser General Public
**  License along with this library; if not, write to the Free
**  Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
**  02110-1301, USA.
*/

#include <vector>
#include <glib.h>

#include <iostream>
#include <sstream>

#include "parser.hh"
using namespace Mux;

struct Case {
	const std::string	expr;
	bool			is_first;
	const std::string	expected;
};
using CaseVec = std::vector<Case>;
using ProcFunc = std::function<std::string(std::string, bool)>;


static void
test_cases(const CaseVec& cases, ProcFunc proc)
{
	for (const auto& casus : cases ) {

		const auto res = proc(casus.expr, casus.is_first);
		if (g_test_verbose()) {
			std::cout << "\n";
			std::cout << casus.expr << ' ' << casus.is_first << std::endl;
			std::cout << "exp: '" << casus.expected << "'" << std::endl;
			std::cout << "got: '" << res << "'" << std::endl;
		}

		g_assert_true (casus.expected == res);
	}
}

static void
test_date_basic ()
{
	g_setenv ("TZ", "Europe/Helsinki", TRUE);

	CaseVec cases = {
		{ "2015-09-18T09:10:23", true,  "1442556623" },
		{ "1972-12-14T09:10:23", true,	"0093165023" },
		{ "1854-11-18T17:10:23", true,	"0000000000" },

		{ "2000-02-31T09:10:23", true,  "0951861599" },
		{ "2000-02-29T23:59:59", true,  "0951861599" },

		{ "2016",		true,	"1451599200" },
		{ "2016",		false,  "1483221599" },

		{ "fnorb",		 true,	"0000000000" },
		{ "fnorb",		 false, "9999999999" },
		{ "",			 false, "9999999999" },
		{ "",			 true,	"0000000000" }
	};

	test_cases (cases, [](auto s, auto f){ return date_to_time_t_string(s,f); });
}

static void
test_date_ymwdhMs (void)
{
	struct {
		std::string	expr;
		long		diff;
		int		tolerance;
	} tests[] = {
		{ "3h", 3 * 60 * 60, 1 },
		{ "21d", 21 * 24 * 60 * 60, 3600 + 1 },
		{ "2w", 2 * 7 * 24 * 60 * 60, 3600 + 1 },

		{ "2y", 2 * 365 * 24 * 60 * 60, 24 * 3600 + 1 },
		{ "3m", 3 * 30 * 24 * 60 * 60, 3 * 24 * 3600 + 1 }
	};

	for (auto i = 0; i != G_N_ELEMENTS(tests); ++i) {
		const auto diff = time(NULL) -
			strtol(Mux::date_to_time_t_string(tests[i].expr, true).c_str(),
			       NULL, 10);
		if (g_test_verbose())
			std::cerr << tests[i].expr << ' '
				  << diff << ' '
				  << tests[i].diff << std::endl;

		g_assert_true (tests[i].diff - diff <= tests[i].tolerance);
	}

	g_assert_true (strtol(Mux::date_to_time_t_string("-1y", true).c_str(),
			      NULL, 10) == 0);
}

static void
test_size ()
{
	CaseVec cases = {
		{ "456", true,  "0000000456" },
		{ "",    false, "9999999999" },
		{ "",    true,  "0000000000" },
	};

	test_cases (cases, [](auto s, auto f){ return size_to_string(s,f); });
}


static void
test_flatten ()
{
	CaseVec cases = {
		{ "Менделе́ев", true,  "менделеев" },
		{ "",    false, "" },
		{ "Ångström",    true,  "angstrom" },
	};

	test_cases (cases, [](auto s, auto f){ return utf8_flatten(s); });
}

static void
test_clean ()
{
	CaseVec cases = {
		{ "\t a\t\nb ", true,  "a  b" },
		{ "",    false, "" },
		{ "Ångström",    true,  "Ångström" },
	};

	test_cases (cases, [](auto s, auto f){ return utf8_clean(s); });
}


static void
test_format ()
{
	g_assert_true (format ("hello %s, %u", "world", 123) ==
		       "hello world, 123");
}

int
main (int argc, char *argv[])
{
	g_test_init (&argc, &argv, NULL);

	g_test_add_func ("/utils/date-basic",  test_date_basic);
	g_test_add_func ("/utils/date-ymwdhMs",  test_date_ymwdhMs);
	g_test_add_func ("/utils/size",  test_size);
	g_test_add_func ("/utils/flatten",  test_flatten);
	g_test_add_func ("/utils/clean",  test_clean);
	g_test_add_func ("/utils/format",  test_format);

	return g_test_run ();
}
lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00			`/*`
			`** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>`
			`**`
			`** This library is free software; you can redistribute it and/or`
			`** modify it under the terms of the GNU Lesser General Public License`
			`** as published by the Free Software Foundation; either version 2.1`
			`** of the License, or (at your option) any later version.`
			`**`
			`** This library is distributed in the hope that it will be useful,`
			`** but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`** Lesser General Public License for more details.`
			`**`
			`** You should have received a copy of the GNU Lesser General Public`
			`** License along with this library; if not, write to the Free`
			`** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA`
			`** 02110-1301, USA.`
			`*/`

			`#include <vector>`
			`#include <glib.h>`

			`#include <iostream>`
			`#include <sstream>`

			`#include "parser.hh"`
			`using namespace Mux;`

			`struct Case {`
			`const std::string expr;`
			`bool is_first;`
			`const std::string expected;`
			`};`
			`using CaseVec = std::vector<Case>;`
			`using ProcFunc = std::function<std::string(std::string, bool)>;`


			`static void`
			`test_cases(const CaseVec& cases, ProcFunc proc)`
			`{`
			`for (const auto& casus : cases ) {`

			`const auto res = proc(casus.expr, casus.is_first);`
			`if (g_test_verbose()) {`
			`std::cout << "\n";`
			`std::cout << casus.expr << ' ' << casus.is_first << std::endl;`
parser: add more tests 2017-10-28 13:12:50 +02:00			`std::cout << "exp: '" << casus.expected << "'" << std::endl;`
			`std::cout << "got: '" << res << "'" << std::endl;`
lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00			`}`

			`g_assert_true (casus.expected == res);`
			`}`
			`}`

			`static void`
integrate new query parser 2017-10-24 21:57:57 +02:00			`test_date_basic ()`
lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00			`{`
			`g_setenv ("TZ", "Europe/Helsinki", TRUE);`

			`CaseVec cases = {`
integrate new query parser 2017-10-24 21:57:57 +02:00			`{ "2015-09-18T09:10:23", true, "1442556623" },`
			`{ "1972-12-14T09:10:23", true, "0093165023" },`
			`{ "1854-11-18T17:10:23", true, "0000000000" },`

lib/parser: fix month days In the olden days, we stored dates like e.g. 20180131121234, and do a lexicographical check. With that, we could use e.g. upper-limits 201802312359 for "all dates in Feb 2018", even if Feb doesn't have 31 days. However, nowadays we use time_t values, and g_date_time_new_local raises errors for non-existent days; easiest fix is to massage things a bit; so let's do that. Fixes issue #1197. 2018-02-17 16:44:21 +01:00			`{ "2000-02-31T09:10:23", true, "0951861599" },`
			`{ "2000-02-29T23:59:59", true, "0951861599" },`

integrate new query parser 2017-10-24 21:57:57 +02:00			`{ "2016", true, "1451599200" },`
			`{ "2016", false, "1483221599" },`

			`{ "fnorb", true, "0000000000" },`
			`{ "fnorb", false, "9999999999" },`
			`{ "", false, "9999999999" },`
			`{ "", true, "0000000000" }`
lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00			`};`

			`test_cases (cases, [](auto s, auto f){ return date_to_time_t_string(s,f); });`
			`}`

integrate new query parser 2017-10-24 21:57:57 +02:00			`static void`
			`test_date_ymwdhMs (void)`
			`{`
			`struct {`
parser/tests: allow for DST change e.g. 21d / 2w are subject to DST; update the tolerance. 2018-03-31 17:44:17 +02:00			`std::string expr;`
			`long diff;`
			`int tolerance;`
integrate new query parser 2017-10-24 21:57:57 +02:00			`} tests[] = {`
			`{ "3h", 3 * 60 * 60, 1 },`
parser/tests: allow for DST change e.g. 21d / 2w are subject to DST; update the tolerance. 2018-03-31 17:44:17 +02:00			`{ "21d", 21 * 24 * 60 * 60, 3600 + 1 },`
			`{ "2w", 2 * 7 * 24 * 60 * 60, 3600 + 1 },`
integrate new query parser 2017-10-24 21:57:57 +02:00
			`{ "2y", 2 * 365 * 24 * 60 * 60, 24 * 3600 + 1 },`
			`{ "3m", 3 * 30 * 24 * 60 * 60, 3 * 24 * 3600 + 1 }`
			`};`

			`for (auto i = 0; i != G_N_ELEMENTS(tests); ++i) {`
			`const auto diff = time(NULL) -`
			`strtol(Mux::date_to_time_t_string(tests[i].expr, true).c_str(),`
			`NULL, 10);`
			`if (g_test_verbose())`
			`std::cerr << tests[i].expr << ' '`
			`<< diff << ' '`
			`<< tests[i].diff << std::endl;`

			`g_assert_true (tests[i].diff - diff <= tests[i].tolerance);`
			`}`

			`g_assert_true (strtol(Mux::date_to_time_t_string("-1y", true).c_str(),`
			`NULL, 10) == 0);`
			`}`

lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00			`static void`
			`test_size ()`
			`{`
			`CaseVec cases = {`
			`{ "456", true, "0000000456" },`
			`{ "", false, "9999999999" },`
			`{ "", true, "0000000000" },`
			`};`

			`test_cases (cases, [](auto s, auto f){ return size_to_string(s,f); });`
			`}`


parser: add more tests 2017-10-28 13:12:50 +02:00			`static void`
			`test_flatten ()`
			`{`
			`CaseVec cases = {`
			`{ "Менделе́ев", true, "менделеев" },`
			`{ "", false, "" },`
			`{ "Ångström", true, "angstrom" },`
			`};`

			`test_cases (cases, [](auto s, auto f){ return utf8_flatten(s); });`
			`}`

			`static void`
			`test_clean ()`
			`{`
			`CaseVec cases = {`
			`{ "\t a\t\nb ", true, "a b" },`
			`{ "", false, "" },`
			`{ "Ångström", true, "Ångström" },`
			`};`

			`test_cases (cases, [](auto s, auto f){ return utf8_clean(s); });`
			`}`


			`static void`
			`test_format ()`
			`{`
			`g_assert_true (format ("hello %s, %u", "world", 123) ==`
			`"hello world, 123");`
			`}`

lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00			`int`
			`main (int argc, char *argv[])`
			`{`
			`g_test_init (&argc, &argv, NULL);`

integrate new query parser 2017-10-24 21:57:57 +02:00			`g_test_add_func ("/utils/date-basic", test_date_basic);`
			`g_test_add_func ("/utils/date-ymwdhMs", test_date_ymwdhMs);`
			`g_test_add_func ("/utils/size", test_size);`
parser: add more tests 2017-10-28 13:12:50 +02:00			`g_test_add_func ("/utils/flatten", test_flatten);`
			`g_test_add_func ("/utils/clean", test_clean);`
			`g_test_add_func ("/utils/format", test_format);`
lib: implement new query parser mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful. 2017-10-24 21:55:35 +02:00
			`return g_test_run ();`
			`}`