mirror of https://github.com/djcb/mu.git
lib: implement new query parser
mu's query parser is the piece of software that turns your queries into something the Xapian database can understand. So, if you query "maildir:/inbox and subject:bla" this must be translated into a Xapian::Query object which will retrieve the sought after messages. Since mu's beginning, almost a decade ago, this parser was based on Xapian's default Xapian::QueryParser. It works okay, but wasn't really designed for the mu use-case, and had a bit of trouble with anything that's not A..Z (think: spaces, special characters, unicode etc.). Over the years, mu added quite a bit of pre-processing trickery to deal with that. Still, there were corner cases and bugs that were practically unfixable. The solution to all of this is to have a custom query processor that replaces Xapian's, and write it from the ground up to deal with the special characters etc. I wrote one, as part of my "future, post-1.0 mu" reseach project, and I have now backported it to the mu 0.9.19. From a technical perspective, this is a major cleanup, and allows us to get rid of much of the fragile preprocessing both for indexing and querying. From and end-user perspective this (hopefully) means that many of the little parsing issues are gone, and it opens the way for some new features. From an end-user perspective: - better support for special characters. - regexp search! yes, you can now search for regular expressions, e.g. subject:/h.ll?o/ will find subjects with hallo, hello, halo, philosophy, ... As you can imagine, this can be a _heavy_ operation on the database, and might take quite a bit longer than a normal query; but it can be quite useful.
This commit is contained in:
parent
b53366313b
commit
b75f9f508b
|
@ -0,0 +1,87 @@
|
||||||
|
## Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
##
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 3 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License
|
||||||
|
## along with this program; if not, write to the Free Software Foundation,
|
||||||
|
## Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
|
||||||
|
include $(top_srcdir)/gtest.mk
|
||||||
|
|
||||||
|
@VALGRIND_CHECK_RULES@
|
||||||
|
|
||||||
|
noinst_PROGRAMS= \
|
||||||
|
tokenize \
|
||||||
|
parse
|
||||||
|
|
||||||
|
tokenize_SOURCES= \
|
||||||
|
tokenize.cc
|
||||||
|
|
||||||
|
tokenize_LDADD= \
|
||||||
|
$(GCOV_LDADD) \
|
||||||
|
libmuxparser.la
|
||||||
|
|
||||||
|
parse_SOURCES= \
|
||||||
|
parse.cc
|
||||||
|
|
||||||
|
parse_LDADD= \
|
||||||
|
$(GCOV_LDADD) \
|
||||||
|
libmuxparser.la
|
||||||
|
|
||||||
|
AM_CXXFLAGS= \
|
||||||
|
-I$(srcdir)/.. \
|
||||||
|
-I$(top_srcdir)/lib \
|
||||||
|
$(GLIB_CFLAGS) \
|
||||||
|
$(XAPIAN_CXXFLAGS) \
|
||||||
|
$(WARN_CXXFLAGS) \
|
||||||
|
$(GCOV_CFLAGS) \
|
||||||
|
-Wno-inline \
|
||||||
|
-Wno-switch-enum
|
||||||
|
|
||||||
|
libmuxparser_la_LIBADD= \
|
||||||
|
$(WARN_LDFLAGS) \
|
||||||
|
$(GLIB_LIBS) \
|
||||||
|
$(XAPIAN_LIBS) \
|
||||||
|
$(GCOV_LDADD)
|
||||||
|
|
||||||
|
noinst_LTLIBRARIES= \
|
||||||
|
libmuxparser.la
|
||||||
|
|
||||||
|
libmuxparser_la_SOURCES= \
|
||||||
|
data.hh \
|
||||||
|
parser.cc \
|
||||||
|
parser.hh \
|
||||||
|
proc-iface.hh \
|
||||||
|
tokenizer.cc \
|
||||||
|
tokenizer.hh \
|
||||||
|
tree.hh \
|
||||||
|
utils.cc \
|
||||||
|
utils.hh \
|
||||||
|
xapian.cc \
|
||||||
|
xapian.hh
|
||||||
|
|
||||||
|
VALGRIND_SUPPRESSIONS_FILES= ${top_srcdir}/mux.supp
|
||||||
|
|
||||||
|
noinst_PROGRAMS+=$(TEST_PROGS)
|
||||||
|
|
||||||
|
TEST_PROGS += test-tokenizer
|
||||||
|
test_tokenizer_SOURCES=test-tokenizer.cc
|
||||||
|
test_tokenizer_LDADD=libmuxparser.la
|
||||||
|
|
||||||
|
TEST_PROGS += test-parser
|
||||||
|
test_parser_SOURCES=test-parser.cc
|
||||||
|
test_parser_LDADD=libmuxparser.la
|
||||||
|
|
||||||
|
TEST_PROGS += test-utils
|
||||||
|
test_utils_SOURCES=test-utils.cc
|
||||||
|
test_utils_LDADD=libmuxparser.la
|
||||||
|
|
||||||
|
TESTS=$(TEST_PROGS)
|
|
@ -0,0 +1,151 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __DATA_HH__
|
||||||
|
#define __DATA_HH__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
|
#include <parser/utils.hh>
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
// class representing some data item; either a Value or a Range a Value can still be a Regex (but
|
||||||
|
// that's not a separate type here)
|
||||||
|
struct Data {
|
||||||
|
enum class Type { Value, Range };
|
||||||
|
virtual ~Data() = default;
|
||||||
|
|
||||||
|
Type type; /**< type of data */
|
||||||
|
std::string field; /**< full name of the field */
|
||||||
|
std::string prefix; /**< Xapian prefix for thef field */
|
||||||
|
unsigned id; /**< Xapian value no for the field */
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Data (Type _type, const std::string& _field, const std::string& _prefix,
|
||||||
|
unsigned _id): type(_type), field(_field), prefix(_prefix), id(_id) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator<<
|
||||||
|
*
|
||||||
|
* @param os an output stream
|
||||||
|
* @param t a data type
|
||||||
|
*
|
||||||
|
* @return the updated output stream
|
||||||
|
*/
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, Data::Type t)
|
||||||
|
{
|
||||||
|
switch (t) {
|
||||||
|
case Data::Type::Value: os << "value"; break;
|
||||||
|
case Data::Type::Range: os << "range"; break;
|
||||||
|
default: os << "bug"; break;
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Range type -- [a..b]
|
||||||
|
*/
|
||||||
|
struct Range: public Data {
|
||||||
|
/**
|
||||||
|
* Construct a range
|
||||||
|
*
|
||||||
|
* @param _field the field
|
||||||
|
* @param _prefix the xapian prefix
|
||||||
|
* @param _id xapian value number
|
||||||
|
* @param _lower lower bound
|
||||||
|
* @param _upper upper bound
|
||||||
|
*/
|
||||||
|
Range (const std::string& _field, const std::string& _prefix,
|
||||||
|
unsigned _id,
|
||||||
|
const std::string& _lower,const std::string& _upper):
|
||||||
|
|
||||||
|
Data(Data::Type::Range, _field, _prefix, _id),
|
||||||
|
lower(_lower), upper(_upper) {}
|
||||||
|
|
||||||
|
std::string lower; /**< lower bound */
|
||||||
|
std::string upper; /**< upper bound */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic value
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct Value: public Data {
|
||||||
|
/**
|
||||||
|
* Construct a Value
|
||||||
|
*
|
||||||
|
* @param _field the field
|
||||||
|
* @param _prefix the xapian prefix
|
||||||
|
* @param _id xapian value number
|
||||||
|
* @param _value the value
|
||||||
|
*/
|
||||||
|
Value (const std::string& _field, const std::string& _prefix,
|
||||||
|
unsigned _id, const std::string& _value):
|
||||||
|
Data(Value::Type::Value, _field, _prefix, _id),
|
||||||
|
value(_value) {}
|
||||||
|
|
||||||
|
std::string value; /**< the value */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator<<
|
||||||
|
*
|
||||||
|
* @param os an output stream
|
||||||
|
* @param v a data ptr
|
||||||
|
*
|
||||||
|
* @return the updated output stream
|
||||||
|
*/
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, const std::unique_ptr<Data>& v)
|
||||||
|
{
|
||||||
|
switch (v->type) {
|
||||||
|
case Data::Type::Value: {
|
||||||
|
const auto bval = dynamic_cast<Value*> (v.get());
|
||||||
|
os << ' ' << quote(v->field) << ' '
|
||||||
|
<< quote(utf8_flatten(bval->value));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Data::Type::Range: {
|
||||||
|
const auto rval = dynamic_cast<Range*> (v.get());
|
||||||
|
os << ' ' << quote(v->field) << ' '
|
||||||
|
<< quote(rval->lower) << ' '
|
||||||
|
<< quote(rval->upper);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
os << "unexpected type";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Mux
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* __DATA_HH__ */
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
#ifndef __DUMMY_PROCESSOR_HH__
|
||||||
|
#define __DUMMY_PROCESSOR_HH__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* __FIELDS_HH__ */
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "parser.hh"
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
std::string s;
|
||||||
|
|
||||||
|
for (auto i = 1; i < argc; ++i)
|
||||||
|
s += " " + std::string(argv[i]);
|
||||||
|
|
||||||
|
Mux::WarningVec warnings;
|
||||||
|
|
||||||
|
const auto tree = Mux::parse (s, warnings);
|
||||||
|
for (const auto& w: warnings)
|
||||||
|
std::cerr << "1:" << w.pos << ": " << w.msg << std::endl;
|
||||||
|
|
||||||
|
std::cout << tree << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,346 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
#include "parser.hh"
|
||||||
|
#include "tokenizer.hh"
|
||||||
|
#include "utils.hh"
|
||||||
|
|
||||||
|
using namespace Mux;
|
||||||
|
|
||||||
|
// 3 precedence levels: units (NOT,()) > factors (OR) > terms (AND)
|
||||||
|
|
||||||
|
// query -> <term-1> | ε
|
||||||
|
// <term-1> -> <factor-1> <term-2> | ε
|
||||||
|
// <term-2> -> OR|XOR <term-1> | ε
|
||||||
|
// <factor-1> -> <unit> <factor-2> | ε
|
||||||
|
// <factor-2> -> [AND]|AND NOT <factor-1> | ε
|
||||||
|
// <unit> -> [NOT] <term-1> | ( <term-1> ) | <data>
|
||||||
|
// <data> -> <value> | <range> | <regex>
|
||||||
|
// <value> -> [field:]value
|
||||||
|
// <range> -> [field:][lower]..[upper]
|
||||||
|
// <regex> -> [field:]/regex/
|
||||||
|
|
||||||
|
|
||||||
|
#define BUG(...) std::runtime_error (format("%u: BUG: ",__LINE__) \
|
||||||
|
+ format(__VA_ARGS__))
|
||||||
|
|
||||||
|
static Token
|
||||||
|
look_ahead (const Mux::Tokens& tokens)
|
||||||
|
{
|
||||||
|
return tokens.front();
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
empty()
|
||||||
|
{
|
||||||
|
return {{Node::Type::Empty}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree term_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings);
|
||||||
|
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
value (const ProcIface::FieldInfoVec& fields, const std::string& v,
|
||||||
|
size_t pos, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
auto val = utf8_flatten(v);
|
||||||
|
|
||||||
|
if (fields.empty())
|
||||||
|
throw BUG("expected one or more fields");
|
||||||
|
|
||||||
|
if (fields.size() == 1) {
|
||||||
|
const auto item = fields.front();
|
||||||
|
return Tree({Node::Type::Value,
|
||||||
|
std::make_unique<Value>(
|
||||||
|
item.field, item.prefix, item.id,
|
||||||
|
proc->process_value(item.field, val))});
|
||||||
|
}
|
||||||
|
|
||||||
|
// a 'multi-field' such as "recip:"
|
||||||
|
Tree tree(Node{Node::Type::OpOr});
|
||||||
|
for (const auto& item: fields)
|
||||||
|
tree.add_child (Tree({Node::Type::Value,
|
||||||
|
std::make_unique<Value>(
|
||||||
|
item.field, item.prefix, item.id,
|
||||||
|
proc->process_value(item.field, val))}));
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
regex (const ProcIface::FieldInfoVec& fields, const std::string& v,
|
||||||
|
size_t pos, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
if (v.length() < 2)
|
||||||
|
throw BUG("expected regexp, got '%s'", v.c_str());
|
||||||
|
|
||||||
|
const auto rxstr = utf8_flatten(v.substr(1, v.length()-2));
|
||||||
|
|
||||||
|
try {
|
||||||
|
Tree tree(Node{Node::Type::OpOr});
|
||||||
|
const auto rx = std::regex (rxstr);
|
||||||
|
for (const auto& field: fields) {
|
||||||
|
const auto terms = proc->process_regex (field.field, rx);
|
||||||
|
for (const auto& term: terms) {
|
||||||
|
tree.add_child (Tree(
|
||||||
|
{Node::Type::Value,
|
||||||
|
std::make_unique<Value>(field.field, "",
|
||||||
|
field.id, term)}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tree;
|
||||||
|
|
||||||
|
} catch (...) {
|
||||||
|
// fallback
|
||||||
|
warnings.push_back ({pos, "invalid regexp"});
|
||||||
|
return value (fields, v, pos, proc, warnings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
range (const ProcIface::FieldInfoVec& fields, const std::string& lower,
|
||||||
|
const std::string& upper, size_t pos, ProcPtr proc,
|
||||||
|
WarningVec& warnings)
|
||||||
|
{
|
||||||
|
if (fields.empty())
|
||||||
|
throw BUG("expected field");
|
||||||
|
|
||||||
|
const auto& field = fields.front();
|
||||||
|
if (!proc->is_range_field(field.field))
|
||||||
|
return value (fields, lower + ".." + upper, pos, proc, warnings);
|
||||||
|
|
||||||
|
auto prange = proc->process_range (field.field, lower, upper);
|
||||||
|
if (prange.lower > prange.upper)
|
||||||
|
prange = proc->process_range (field.field, upper, lower);
|
||||||
|
|
||||||
|
return Tree({{Node::Type::Range},
|
||||||
|
std::make_unique<Range>(field.field, field.prefix, field.id,
|
||||||
|
prange.lower, prange.upper)});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
data (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
const auto token = look_ahead(tokens);
|
||||||
|
if (token.type != Token::Type::Data)
|
||||||
|
warnings.push_back ({token.pos, "expected: value"});
|
||||||
|
|
||||||
|
tokens.pop_front();
|
||||||
|
|
||||||
|
std::string field, val;
|
||||||
|
const auto col = token.str.find (":");
|
||||||
|
if (col != 0 && col != std::string::npos && col != token.str.length()-1) {
|
||||||
|
field = token.str.substr(0, col);
|
||||||
|
val = token.str.substr(col + 1);
|
||||||
|
} else
|
||||||
|
val = token.str;
|
||||||
|
|
||||||
|
auto fields = proc->process_field (field);
|
||||||
|
if (fields.empty()) {// not valid field...
|
||||||
|
warnings.push_back ({token.pos, format ("invalid field '%s'", field.c_str())});
|
||||||
|
fields = proc->process_field ("");
|
||||||
|
// fallback, treat the whole of foo:bar as a value
|
||||||
|
return value (fields, field + ":" + val, token.pos, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
// does it look like a regexp?
|
||||||
|
if (val.length()>=2) {
|
||||||
|
if (val[0]=='/' && val[val.length()-1] == '/')
|
||||||
|
return regex (fields, val, token.pos, proc, warnings);
|
||||||
|
else if (val[val.length()-1] == '*')
|
||||||
|
return regex (fields, // transfrom wildcard into regexp
|
||||||
|
"/" + val.substr(0, val.length()-1) + ".*/",
|
||||||
|
token.pos, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
// does it look like a range?
|
||||||
|
const auto dotdot = val.find("..");
|
||||||
|
if (dotdot != std::string::npos)
|
||||||
|
return range(fields, val.substr(0, dotdot), val.substr(dotdot + 2),
|
||||||
|
token.pos, proc, warnings);
|
||||||
|
|
||||||
|
// if nothing else, it's a value.
|
||||||
|
return value (fields, val, token.pos, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
unit (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
if (tokens.empty()) {
|
||||||
|
warnings.push_back ({0, "expected: unit"});
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto token = look_ahead (tokens);
|
||||||
|
|
||||||
|
if (token.type == Token::Type::Not) {
|
||||||
|
tokens.pop_front();
|
||||||
|
Tree tree{{Node::Type::OpNot}};
|
||||||
|
tree.add_child(unit (tokens, proc, warnings));
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token.type == Token::Type::Open) {
|
||||||
|
tokens.pop_front();
|
||||||
|
auto tree = term_1 (tokens, proc, warnings);
|
||||||
|
if (tokens.empty())
|
||||||
|
warnings.push_back({token.pos, "expected: ')'"});
|
||||||
|
else {
|
||||||
|
const auto token2 = look_ahead(tokens);
|
||||||
|
if (token2.type == Token::Type::Close)
|
||||||
|
tokens.pop_front();
|
||||||
|
else {
|
||||||
|
warnings.push_back(
|
||||||
|
{token2.pos,
|
||||||
|
std::string("expected: ')' but got ") +
|
||||||
|
token2.str});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
return data (tokens, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree factor_1 (Mux::Tokens& tokens, ProcPtr proc,
|
||||||
|
WarningVec& warnings);
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
factor_2 (Mux::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||||
|
WarningVec& warnings)
|
||||||
|
{
|
||||||
|
if (tokens.empty())
|
||||||
|
return empty();
|
||||||
|
|
||||||
|
const auto token = look_ahead(tokens);
|
||||||
|
|
||||||
|
switch (token.type) {
|
||||||
|
case Token::Type::And: {
|
||||||
|
tokens.pop_front();
|
||||||
|
const auto token2 = look_ahead(tokens);
|
||||||
|
if (token2.type == Token::Type::Not) { // AND NOT is a unit
|
||||||
|
tokens.pop_front();
|
||||||
|
op = Node::Type::OpAndNot;
|
||||||
|
} else
|
||||||
|
op = Node::Type::OpAnd;
|
||||||
|
} break;
|
||||||
|
case Token::Type::Open:
|
||||||
|
case Token::Type::Data:
|
||||||
|
op = Node::Type::OpAnd; // implicit AND
|
||||||
|
break;
|
||||||
|
case Token::Type::Not:
|
||||||
|
tokens.pop_front();
|
||||||
|
op = Node::Type::OpAndNot; // implicit AND NOT
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
default:
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
return factor_1 (tokens, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
factor_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
Node::Type op { Node::Type::Invalid };
|
||||||
|
|
||||||
|
auto t = unit (tokens, proc, warnings);
|
||||||
|
auto a2 = factor_2 (tokens, op, proc, warnings);
|
||||||
|
|
||||||
|
if (a2.empty())
|
||||||
|
return t;
|
||||||
|
|
||||||
|
Tree tree {{op}};
|
||||||
|
tree.add_child(std::move(t));
|
||||||
|
tree.add_child(std::move(a2));
|
||||||
|
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
term_2 (Mux::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||||
|
WarningVec& warnings)
|
||||||
|
{
|
||||||
|
if (tokens.empty())
|
||||||
|
return empty();
|
||||||
|
|
||||||
|
const auto token = look_ahead (tokens);
|
||||||
|
|
||||||
|
switch (token.type) {
|
||||||
|
case Token::Type::Or:
|
||||||
|
op = Node::Type::OpOr;
|
||||||
|
break;
|
||||||
|
case Token::Type::Xor:
|
||||||
|
op = Node::Type::OpXor;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (token.type != Token::Type::Close)
|
||||||
|
warnings.push_back({token.pos, "expected OR|XOR"});
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens.pop_front();
|
||||||
|
|
||||||
|
return term_1 (tokens, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
term_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
Node::Type op { Node::Type::Invalid };
|
||||||
|
|
||||||
|
auto t = factor_1 (tokens, proc, warnings);
|
||||||
|
auto o2 = term_2 (tokens, op, proc, warnings);
|
||||||
|
|
||||||
|
if (o2.empty())
|
||||||
|
return t;
|
||||||
|
else {
|
||||||
|
Tree tree {{op}};
|
||||||
|
tree.add_child(std::move(t));
|
||||||
|
tree.add_child(std::move(o2));
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Tree
|
||||||
|
query (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||||
|
{
|
||||||
|
if (tokens.empty())
|
||||||
|
return empty ();
|
||||||
|
else
|
||||||
|
return term_1 (tokens, proc, warnings);
|
||||||
|
}
|
||||||
|
|
||||||
|
Mux::Tree
|
||||||
|
Mux::parse (const std::string& expr, WarningVec& warnings, ProcPtr proc)
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
auto tokens = tokenize (expr);
|
||||||
|
return query (tokens, proc, warnings);
|
||||||
|
|
||||||
|
} catch (const std::runtime_error& ex) {
|
||||||
|
std::cerr << ex.what() << std::endl;
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __PARSER_HH__
|
||||||
|
#define __PARSER_HH__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <parser/data.hh>
|
||||||
|
#include <parser/tree.hh>
|
||||||
|
#include <parser/proc-iface.hh>
|
||||||
|
|
||||||
|
// A simple recursive-descent parser for queries. Follows the Xapian syntax,
|
||||||
|
// but better handles non-alphanum; also implements regexp
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser warning
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct Warning {
|
||||||
|
size_t pos; /**< pos in string */
|
||||||
|
const std::string msg; /**< warning message */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator==
|
||||||
|
*
|
||||||
|
* @param rhs right-hand side
|
||||||
|
*
|
||||||
|
* @return true if rhs is equal to this; false otherwise
|
||||||
|
*/
|
||||||
|
bool operator==(const Warning& rhs) const {
|
||||||
|
return pos == rhs.pos && msg == rhs.msg;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator<<
|
||||||
|
*
|
||||||
|
* @param os an output stream
|
||||||
|
* @param w a warning
|
||||||
|
*
|
||||||
|
* @return the updated output stream
|
||||||
|
*/
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, const Warning& w)
|
||||||
|
{
|
||||||
|
os << w.pos << ":" << w.msg;
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a query string
|
||||||
|
*
|
||||||
|
* @param query a query string
|
||||||
|
* @param warnings vec to receive warnings
|
||||||
|
* @param proc a Processor object
|
||||||
|
*
|
||||||
|
* @return a parse-tree
|
||||||
|
*/
|
||||||
|
using WarningVec=std::vector<Warning>;
|
||||||
|
using ProcPtr = const std::unique_ptr<ProcIface>&;
|
||||||
|
Tree parse (const std::string& query, WarningVec& warnings,
|
||||||
|
ProcPtr proc = std::make_unique<DummyProc>());
|
||||||
|
|
||||||
|
} // namespace Mux
|
||||||
|
|
||||||
|
#endif /* __PARSER_HH__ */
|
|
@ -0,0 +1,131 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
#ifndef __PROC_IFACE_HH__
|
||||||
|
#define __PROC_IFACE_HH__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <tuple>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
struct ProcIface {
|
||||||
|
|
||||||
|
virtual ~ProcIface() = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the "shortcut"/internal fields for the the given fieldstr or empty if there is none
|
||||||
|
*
|
||||||
|
* @param fieldstr a fieldstr, e.g "subject" or "s" for the subject field
|
||||||
|
*
|
||||||
|
* @return a vector with "exploded" values, with a code and a fullname. E.g. "s" might map
|
||||||
|
* to [<"S","subject">], while "recip" could map to [<"to", "T">, <"cc", "C">, <"bcc", "B">]
|
||||||
|
*/
|
||||||
|
struct FieldInfo {
|
||||||
|
const std::string field;
|
||||||
|
const std::string prefix;
|
||||||
|
unsigned id;
|
||||||
|
};
|
||||||
|
using FieldInfoVec = std::vector<FieldInfo>;
|
||||||
|
|
||||||
|
virtual FieldInfoVec process_field (const std::string& field) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a value
|
||||||
|
*
|
||||||
|
* @param field a field name
|
||||||
|
* @param value a value
|
||||||
|
*
|
||||||
|
* @return the processed value
|
||||||
|
*/
|
||||||
|
virtual std::string process_value (
|
||||||
|
const std::string& field, const std::string& value) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this a range field?
|
||||||
|
*
|
||||||
|
* @param field some field
|
||||||
|
*
|
||||||
|
* @return true if it is a range-field; false otherwise.
|
||||||
|
*/
|
||||||
|
virtual bool is_range_field (const std::string& field) const = 0;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a range field
|
||||||
|
*
|
||||||
|
* @param fieldstr a fieldstr, e.g "date" or "d" for the date field
|
||||||
|
* @param lower lower bound or empty
|
||||||
|
* @param upper upper bound or empty
|
||||||
|
*
|
||||||
|
* @return the processed range
|
||||||
|
*/
|
||||||
|
struct Range {
|
||||||
|
std::string lower;
|
||||||
|
std::string upper;
|
||||||
|
};
|
||||||
|
virtual Range process_range (const std::string& field, const std::string& lower,
|
||||||
|
const std::string& upper) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param field
|
||||||
|
* @param rx
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
virtual std::vector<std::string>
|
||||||
|
process_regex (const std::string& field, const std::regex& rx) const = 0;
|
||||||
|
|
||||||
|
}; // ProcIface
|
||||||
|
|
||||||
|
|
||||||
|
struct DummyProc: public ProcIface { // For testing
|
||||||
|
|
||||||
|
std::vector<FieldInfo>
|
||||||
|
process_field (const std::string& field) const override {
|
||||||
|
return {{ field, "x", 0 }};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
process_value (const std::string& field, const std::string& value) const override {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_range_field (const std::string& field) const override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Range process_range (const std::string& field, const std::string& lower,
|
||||||
|
const std::string& upper) const override {
|
||||||
|
return { lower, upper };
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string>
|
||||||
|
process_regex (const std::string& field, const std::regex& rx) const override {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}; //Dummy
|
||||||
|
|
||||||
|
|
||||||
|
} // Mux
|
||||||
|
|
||||||
|
#endif /* __PROC_IFACE_HH__ */
|
|
@ -0,0 +1,121 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "parser.hh"
|
||||||
|
using namespace Mux;
|
||||||
|
|
||||||
|
struct Case {
|
||||||
|
const std::string expr;
|
||||||
|
const std::string expected;
|
||||||
|
WarningVec warnings;
|
||||||
|
};
|
||||||
|
|
||||||
|
using CaseVec = std::vector<Case>;
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_cases(const CaseVec& cases)
|
||||||
|
{
|
||||||
|
for (const auto& casus : cases ) {
|
||||||
|
|
||||||
|
WarningVec warnings;
|
||||||
|
const auto tree = parse (casus.expr, warnings);
|
||||||
|
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << tree;
|
||||||
|
|
||||||
|
if (g_test_verbose()) {
|
||||||
|
std::cout << "\n";
|
||||||
|
std::cout << casus.expr << std::endl;
|
||||||
|
std::cout << "exp:" << casus.expected << std::endl;
|
||||||
|
std::cout << "got:" << ss.str() << std::endl;
|
||||||
|
}
|
||||||
|
g_assert_true (casus.expected == ss.str());
|
||||||
|
|
||||||
|
// g_assert_cmpuint (casus.warnings.size(), ==, warnings.size());
|
||||||
|
// for (auto i = 0; i != (int)casus.warnings.size(); ++i) {
|
||||||
|
// std::cout << "exp:" << casus.warnings[i] << std::endl;
|
||||||
|
// std::cout << "got:" << warnings[i] << std::endl;
|
||||||
|
// g_assert_true (casus.warnings[i] == warnings[i]);
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_basic ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
//{ "", R"#((atom :value ""))#"},
|
||||||
|
{ "foo", R"#((value "" "foo"))#", },
|
||||||
|
{ "foo or bar",
|
||||||
|
R"#((or(value "" "foo")(value "" "bar")))#" },
|
||||||
|
{ "foo and bar",
|
||||||
|
R"#((and(value "" "foo")(value "" "bar")))#"},
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_complex ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ "foo and bar or cuux",
|
||||||
|
R"#((or(and(value "" "foo")(value "" "bar")))#" +
|
||||||
|
std::string(R"#((value "" "cuux")))#") },
|
||||||
|
{ "a and not b",
|
||||||
|
R"#((andnot(value "" "a")(value "" "b")))#"
|
||||||
|
},
|
||||||
|
{ "a and b and c",
|
||||||
|
R"#((and(value "" "a")(and(value "" "b")(value "" "c"))))#"
|
||||||
|
},
|
||||||
|
{ "(a or b) and c",
|
||||||
|
R"#((and(or(value "" "a")(value "" "b"))(value "" "c")))#"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_flatten ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ " Mötørhęåđ", R"#((value "" "motorhead"))#" }
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
g_test_init (&argc, &argv, NULL);
|
||||||
|
|
||||||
|
g_test_add_func ("/parser/basic", test_basic);
|
||||||
|
g_test_add_func ("/parser/complex", test_complex);
|
||||||
|
g_test_add_func ("/parser/flatten", test_flatten);
|
||||||
|
|
||||||
|
return g_test_run ();
|
||||||
|
}
|
|
@ -0,0 +1,143 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <glib.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "tokenizer.hh"
|
||||||
|
|
||||||
|
struct Case {
|
||||||
|
const char *str;
|
||||||
|
const Mux::Tokens tokens;
|
||||||
|
};
|
||||||
|
|
||||||
|
using CaseVec = std::vector<Case>;
|
||||||
|
|
||||||
|
using namespace Mux;
|
||||||
|
using TT = Token::Type;
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_cases(const CaseVec& cases)
|
||||||
|
{
|
||||||
|
for (const auto& casus : cases ) {
|
||||||
|
const auto tokens = tokenize (casus.str);
|
||||||
|
|
||||||
|
g_assert_cmpuint ((guint)tokens.size(),==,(guint)casus.tokens.size());
|
||||||
|
for (size_t u = 0; u != tokens.size(); ++u) {
|
||||||
|
if (g_test_verbose()) {
|
||||||
|
std::cerr << "case " << u << " " << casus.str << std::endl;
|
||||||
|
std::cerr << "exp: '" << casus.tokens[u] << "'" << std::endl;
|
||||||
|
std::cerr << "got: '" << tokens[u] << "'" << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
g_assert_true (tokens[u] == casus.tokens[u]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_basic ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ "", {} },
|
||||||
|
|
||||||
|
{ "foo", Tokens{Token{3, TT::Data, "foo"}}},
|
||||||
|
|
||||||
|
{ "foo bar cuux", Tokens{Token{3, TT::Data, "foo"},
|
||||||
|
Token{7, TT::Data, "bar"},
|
||||||
|
Token{12, TT::Data, "cuux"}}},
|
||||||
|
|
||||||
|
{ "\"foo bar\"", Tokens{ Token{9, TT::Data, "foo bar"}}},
|
||||||
|
|
||||||
|
// ie. ignore missing closing '"'
|
||||||
|
{ "\"foo bar", Tokens{ Token{8, TT::Data, "foo bar"}}},
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_specials ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ ")*(", Tokens{Token{1, TT::Close, ")"},
|
||||||
|
Token{2, TT::Data, "*"},
|
||||||
|
Token{3, TT::Open, "("}}},
|
||||||
|
{ "\")*(\"", Tokens{Token{5, TT::Data, ")*("}}},
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_ops ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ "foo and bar oR cuux XoR fnorb",
|
||||||
|
Tokens{Token{3, TT::Data, "foo"},
|
||||||
|
Token{7, TT::And, "and"},
|
||||||
|
Token{11, TT::Data, "bar"},
|
||||||
|
Token{14, TT::Or, "oR"},
|
||||||
|
Token{19, TT::Data, "cuux"},
|
||||||
|
Token{23, TT::Xor, "XoR"},
|
||||||
|
Token{29, TT::Data, "fnorb"}}},
|
||||||
|
{ "NOT (aap or mies)",
|
||||||
|
Tokens{Token{3, TT::Not, "NOT"},
|
||||||
|
Token{5, TT::Open, "("},
|
||||||
|
Token{8, TT::Data, "aap"},
|
||||||
|
Token{11, TT::Or, "or"},
|
||||||
|
Token{16, TT::Data, "mies"},
|
||||||
|
Token{17, TT::Close, ")"}}}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_escape ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ "foo\"bar\"", Tokens{Token{3, TT::Data, "foo"},
|
||||||
|
Token{8, TT::Data, "bar"}}},
|
||||||
|
{ "\"fnorb\"", Tokens{Token{7, TT::Data, "fnorb"}}},
|
||||||
|
{ "\\\"fnorb\\\"", Tokens{Token{9, TT::Data, "\"fnorb\""}}},
|
||||||
|
{ "foo\\\"bar\\\"", Tokens{Token{10, TT::Data, "foo\"bar\""}}}
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
g_test_init (&argc, &argv, NULL);
|
||||||
|
|
||||||
|
g_test_add_func ("/tokens/basic", test_basic);
|
||||||
|
g_test_add_func ("/tokens/specials", test_specials);
|
||||||
|
g_test_add_func ("/tokens/ops", test_ops);
|
||||||
|
g_test_add_func ("/tokens/escape", test_escape);
|
||||||
|
|
||||||
|
return g_test_run ();
|
||||||
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "parser.hh"
|
||||||
|
using namespace Mux;
|
||||||
|
|
||||||
|
struct Case {
|
||||||
|
const std::string expr;
|
||||||
|
bool is_first;
|
||||||
|
const std::string expected;
|
||||||
|
};
|
||||||
|
using CaseVec = std::vector<Case>;
|
||||||
|
using ProcFunc = std::function<std::string(std::string, bool)>;
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_cases(const CaseVec& cases, ProcFunc proc)
|
||||||
|
{
|
||||||
|
for (const auto& casus : cases ) {
|
||||||
|
|
||||||
|
const auto res = proc(casus.expr, casus.is_first);
|
||||||
|
if (g_test_verbose()) {
|
||||||
|
std::cout << "\n";
|
||||||
|
std::cout << casus.expr << ' ' << casus.is_first << std::endl;
|
||||||
|
std::cout << "exp:" << casus.expected << std::endl;
|
||||||
|
std::cout << "got:" << res << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert_true (casus.expected == res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_date ()
|
||||||
|
{
|
||||||
|
g_setenv ("TZ", "Europe/Helsinki", TRUE);
|
||||||
|
|
||||||
|
CaseVec cases = {
|
||||||
|
{ "2015-09-18T09:10:23", true, "001442556623" },
|
||||||
|
{ "1972-12-14T09:10:23", true, "000093165023" },
|
||||||
|
{ "1854-11-18T17:10:23", true, "000000000000" },
|
||||||
|
{ "fnorb", true, "000000000000" },
|
||||||
|
{ "fnorb", false, "999999999999" },
|
||||||
|
{ "", false, "999999999999" },
|
||||||
|
{ "", true, "000000000000" }
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases, [](auto s, auto f){ return date_to_time_t_string(s,f); });
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_size ()
|
||||||
|
{
|
||||||
|
CaseVec cases = {
|
||||||
|
{ "456", true, "0000000456" },
|
||||||
|
{ "", false, "9999999999" },
|
||||||
|
{ "", true, "0000000000" },
|
||||||
|
};
|
||||||
|
|
||||||
|
test_cases (cases, [](auto s, auto f){ return size_to_string(s,f); });
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
g_test_init (&argc, &argv, NULL);
|
||||||
|
|
||||||
|
g_test_add_func ("/utils/process-date", test_date);
|
||||||
|
g_test_add_func ("/utils/process-size", test_size);
|
||||||
|
|
||||||
|
return g_test_run ();
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "tokenizer.hh"
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
std::string s;
|
||||||
|
|
||||||
|
for (auto i = 1; i < argc; ++i)
|
||||||
|
s += " " + std::string(argv[i]);
|
||||||
|
|
||||||
|
const auto tvec = Mux::tokenize (s);
|
||||||
|
for (const auto& t : tvec)
|
||||||
|
std::cout << t << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,128 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "tokenizer.hh"
|
||||||
|
#include <cctype>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
using namespace Mux;
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_separator (char c)
|
||||||
|
{
|
||||||
|
const auto seps = std::string (":()\"");
|
||||||
|
|
||||||
|
if (isblank(c))
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return seps.find(c) != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Mux::Token
|
||||||
|
op_or_value (size_t pos, const std::string& val)
|
||||||
|
{
|
||||||
|
auto s = val;
|
||||||
|
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
|
||||||
|
|
||||||
|
if (s == "and")
|
||||||
|
return Token{pos, Token::Type::And, val};
|
||||||
|
else if (s == "or")
|
||||||
|
return Token{pos, Token::Type::Or, val};
|
||||||
|
else if (s == "xor")
|
||||||
|
return Token{pos, Token::Type::Xor, val};
|
||||||
|
else if (s == "not")
|
||||||
|
return Token{pos, Token::Type::Not, val};
|
||||||
|
else
|
||||||
|
return Token{pos, Token::Type::Data, val};
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
unread_char (std::string& food, char kar, size_t& pos)
|
||||||
|
{
|
||||||
|
food = kar + food;
|
||||||
|
--pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Mux::Token
|
||||||
|
eat_token (std::string& food, size_t& pos)
|
||||||
|
{
|
||||||
|
bool quoted{};
|
||||||
|
bool escaped{};
|
||||||
|
std::string value {};
|
||||||
|
|
||||||
|
while (!food.empty()) {
|
||||||
|
|
||||||
|
const auto kar = food[0];
|
||||||
|
food.erase(0, 1);
|
||||||
|
++pos;
|
||||||
|
|
||||||
|
if (kar == '\\') {
|
||||||
|
escaped = !escaped;
|
||||||
|
if (escaped)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kar == '"' && !escaped && quoted)
|
||||||
|
return Token{pos, Token::Type::Data, value};
|
||||||
|
|
||||||
|
if (!quoted && !escaped && is_separator(kar)) {
|
||||||
|
|
||||||
|
if (!value.empty() && kar != ':') {
|
||||||
|
unread_char (food, kar, pos);
|
||||||
|
return op_or_value(pos, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kar == '"')
|
||||||
|
quoted = true;
|
||||||
|
|
||||||
|
if (quoted || isblank(kar))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
switch (kar) {
|
||||||
|
case '(': return {pos, Token::Type::Open, "("};
|
||||||
|
case ')': return {pos, Token::Type::Close,")"};
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
value += kar;
|
||||||
|
escaped = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {pos, Token::Type::Data, value};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Mux::Tokens
|
||||||
|
Mux::tokenize (const std::string& s)
|
||||||
|
{
|
||||||
|
Tokens tokens{};
|
||||||
|
std::string food{s};
|
||||||
|
size_t pos{0};
|
||||||
|
|
||||||
|
if (s.empty())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
while (!food.empty())
|
||||||
|
tokens.emplace_back(eat_token (food, pos));
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
|
@ -0,0 +1,140 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __TOKENIZER_HH__
|
||||||
|
#define __TOKENIZER_HH__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <deque>
|
||||||
|
#include <ostream>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
// A simple tokenizer, which turns a string into a deque of tokens
|
||||||
|
//
|
||||||
|
// It recognizes '(', ')', '*' 'and', 'or', 'xor', 'not'
|
||||||
|
//
|
||||||
|
// Note that even if we recognizes those at the lexical level, they might be demoted to mere strings
|
||||||
|
// when we're creating the parse tree.
|
||||||
|
//
|
||||||
|
// Furthermore, we detect ranges ("a..b") and regexps (/../) at the parser level, since we need a
|
||||||
|
// bit more context to resolve ambiguities.
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
// A token
|
||||||
|
struct Token {
|
||||||
|
enum class Type {
|
||||||
|
Data, /**< e .g., banana or date:..456 */
|
||||||
|
|
||||||
|
// Brackets
|
||||||
|
Open, /**< ( */
|
||||||
|
Close, /**< ) */
|
||||||
|
|
||||||
|
// Unops
|
||||||
|
Not, /**< logical not*/
|
||||||
|
|
||||||
|
// Binops
|
||||||
|
And, /**< logical and */
|
||||||
|
Or, /**< logical not */
|
||||||
|
Xor, /**< logical xor */
|
||||||
|
|
||||||
|
Empty, /**< nothing */
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t pos{}; /**< position in string */
|
||||||
|
Type type{}; /**< token type */
|
||||||
|
const std::string str{}; /**< data for this token */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator==
|
||||||
|
*
|
||||||
|
* @param rhs right-hand side
|
||||||
|
*
|
||||||
|
* @return true if rhs is equal to this; false otherwise
|
||||||
|
*/
|
||||||
|
bool operator==(const Token& rhs) const {
|
||||||
|
return pos == rhs.pos &&
|
||||||
|
type == rhs.type &&
|
||||||
|
str == rhs.str;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator<<
|
||||||
|
*
|
||||||
|
* @param os an output stream
|
||||||
|
* @param t a token type
|
||||||
|
*
|
||||||
|
* @return the updated output stream
|
||||||
|
*/
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, Token::Type t)
|
||||||
|
{
|
||||||
|
switch (t) {
|
||||||
|
case Token::Type::Data: os << "<data>"; break;
|
||||||
|
|
||||||
|
case Token::Type::Open: os << "<open>"; break;
|
||||||
|
case Token::Type::Close: os << "<close>";break;
|
||||||
|
|
||||||
|
case Token::Type::Not: os << "<not>"; break;
|
||||||
|
case Token::Type::And: os << "<and>"; break;
|
||||||
|
case Token::Type::Or: os << "<or>"; break;
|
||||||
|
case Token::Type::Xor: os << "<xor>"; break;
|
||||||
|
|
||||||
|
default: // can't happen, but pacify compiler
|
||||||
|
throw std::runtime_error ("<<bug>>");
|
||||||
|
}
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* operator<<
|
||||||
|
*
|
||||||
|
* @param os an output stream
|
||||||
|
* @param t a token
|
||||||
|
*
|
||||||
|
* @return the updated output stream
|
||||||
|
*/
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, const Token& t)
|
||||||
|
{
|
||||||
|
os << t.pos << ": " << t.type;
|
||||||
|
|
||||||
|
if (!t.str.empty())
|
||||||
|
os << " [" << t.str << "]";
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokenize a string into a vector of tokens. The tokenization always succeeds, ie., ignoring errors
|
||||||
|
* such a missing end-".
|
||||||
|
*
|
||||||
|
* @param s a string
|
||||||
|
*
|
||||||
|
* @return a deque of tokens
|
||||||
|
*/
|
||||||
|
using Tokens = std::deque<Token>;
|
||||||
|
Tokens tokenize (const std::string& s);
|
||||||
|
|
||||||
|
} // namespace Mux
|
||||||
|
|
||||||
|
#endif /* __TOKENIZER_HH__ */
|
|
@ -0,0 +1,104 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <parser/data.hh>
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
// A node in the parse tree
|
||||||
|
struct Node {
|
||||||
|
enum class Type {
|
||||||
|
Empty, // only for empty trees
|
||||||
|
OpAnd,
|
||||||
|
OpOr,
|
||||||
|
OpXor,
|
||||||
|
OpAndNot,
|
||||||
|
OpNot,
|
||||||
|
Value,
|
||||||
|
Range,
|
||||||
|
Invalid
|
||||||
|
};
|
||||||
|
|
||||||
|
Node(Type _type, std::unique_ptr<Data>&& _data):
|
||||||
|
type{_type}, data{std::move(_data)} {}
|
||||||
|
Node(Type _type): type{_type} {}
|
||||||
|
Node(Node&& rhs) = default;
|
||||||
|
|
||||||
|
Type type;
|
||||||
|
std::unique_ptr<Data> data;
|
||||||
|
|
||||||
|
static constexpr const char* type_name (Type t) {
|
||||||
|
switch (t) {
|
||||||
|
case Type::Empty: return ""; break;
|
||||||
|
case Type::OpAnd: return "and"; break;
|
||||||
|
case Type::OpOr: return "or"; break;
|
||||||
|
case Type::OpXor: return "xor"; break;
|
||||||
|
case Type::OpAndNot: return "andnot"; break;
|
||||||
|
case Type::OpNot: return "not"; break;
|
||||||
|
case Type::Value: return "value"; break;
|
||||||
|
case Type::Range: return "range"; break;
|
||||||
|
case Type::Invalid: return "<invalid>"; break;
|
||||||
|
default:
|
||||||
|
throw std::runtime_error ("bug");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool is_binop(Type t) {
|
||||||
|
return t == Type::OpAnd || t == Type::OpAndNot ||
|
||||||
|
t == Type::OpOr || t == Type::OpXor;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, const Node& t)
|
||||||
|
{
|
||||||
|
os << Node::type_name(t.type);
|
||||||
|
if (t.data)
|
||||||
|
os << t.data;
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Tree {
|
||||||
|
Tree(Node&& _node): node(std::move(_node)) {}
|
||||||
|
Tree(Tree&& rhs) = default;
|
||||||
|
|
||||||
|
void add_child (Tree&& child) { children.emplace_back(std::move(child)); }
|
||||||
|
bool empty() const { return node.type == Node::Type::Empty; }
|
||||||
|
|
||||||
|
Node node;
|
||||||
|
std::vector<Tree> children;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream&
|
||||||
|
operator<< (std::ostream& os, const Tree& tree)
|
||||||
|
{
|
||||||
|
os << '(' << tree.node;
|
||||||
|
for (const auto& subtree : tree.children)
|
||||||
|
os << subtree;
|
||||||
|
os << ')';
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Mux
|
|
@ -0,0 +1,349 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define GNU_SOURCE
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "utils.hh"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
|
using namespace Mux;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static gunichar
|
||||||
|
unichar_tolower (gunichar uc)
|
||||||
|
{
|
||||||
|
if (!g_unichar_isalpha(uc))
|
||||||
|
return uc;
|
||||||
|
|
||||||
|
if (g_unichar_get_script (uc) != G_UNICODE_SCRIPT_LATIN)
|
||||||
|
return g_unichar_tolower (uc);
|
||||||
|
|
||||||
|
switch (uc)
|
||||||
|
{
|
||||||
|
case 0x00e6:
|
||||||
|
case 0x00c6: return 'e'; /* æ */
|
||||||
|
case 0x00f8: return 'o'; /* ø */
|
||||||
|
case 0x0110:
|
||||||
|
case 0x0111: return 'd'; /* đ */
|
||||||
|
/* todo: many more */
|
||||||
|
default: return g_unichar_tolower (uc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gx_utf8_flatten:
|
||||||
|
* @str: a UTF-8 string
|
||||||
|
* @len: the length of @str, or -1 if it is %NULL-terminated
|
||||||
|
*
|
||||||
|
* Flatten some UTF-8 string; that is, downcase it and remove any diacritics.
|
||||||
|
*
|
||||||
|
* Returns: (transfer full): a flattened string, free with g_free().
|
||||||
|
*/
|
||||||
|
static char*
|
||||||
|
gx_utf8_flatten (const gchar *str, gssize len)
|
||||||
|
{
|
||||||
|
GString *gstr;
|
||||||
|
char *norm, *cur;
|
||||||
|
|
||||||
|
g_return_val_if_fail (str, NULL);
|
||||||
|
|
||||||
|
norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL);
|
||||||
|
if (!norm)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
gstr = g_string_sized_new (strlen (norm));
|
||||||
|
|
||||||
|
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur))
|
||||||
|
{
|
||||||
|
gunichar uc;
|
||||||
|
|
||||||
|
uc = g_utf8_get_char (cur);
|
||||||
|
if (g_unichar_combining_class (uc) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
g_string_append_unichar (gstr, unichar_tolower(uc));
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free (norm);
|
||||||
|
|
||||||
|
return g_string_free (gstr, FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
std::string // gx_utf8_flatten
|
||||||
|
Mux::utf8_flatten (const std::string& str)
|
||||||
|
{
|
||||||
|
char *flat = gx_utf8_flatten (str.c_str(), str.length());
|
||||||
|
if (!flat)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
std::string s(flat);
|
||||||
|
g_free (flat);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Mux::quote (const std::string& str)
|
||||||
|
{
|
||||||
|
char *s = g_strescape (str.c_str(), NULL);
|
||||||
|
if (!s)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
std::string res (s);
|
||||||
|
g_free (s);
|
||||||
|
|
||||||
|
return "\"" + res + "\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Mux::format (const char *frm, ...)
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
|
||||||
|
va_start (args, frm);
|
||||||
|
|
||||||
|
char *s = {};
|
||||||
|
const auto res = vasprintf (&s, frm, args);
|
||||||
|
va_end (args);
|
||||||
|
if (res == -1) {
|
||||||
|
std::cerr << "string format failed" << std::endl;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string str = s;
|
||||||
|
free (s);
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr const auto InternalDateFormat = "%012" G_GINT64_FORMAT;
|
||||||
|
constexpr const char InternalDateMin[] = "000000000000";
|
||||||
|
constexpr const char InternalDateMax[] = "999999999999";
|
||||||
|
static_assert(sizeof(InternalDateMin) == 12 + 1);
|
||||||
|
static_assert(sizeof(InternalDateMax) == 12 + 1);
|
||||||
|
|
||||||
|
static std::string
|
||||||
|
date_boundary (bool is_first)
|
||||||
|
{
|
||||||
|
return is_first ? InternalDateMin : InternalDateMax;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Mux::date_to_time_t_string (time_t t)
|
||||||
|
{
|
||||||
|
char buf[sizeof(InternalDateMax)];
|
||||||
|
snprintf (buf, sizeof(buf), InternalDateFormat, t);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static std::string
|
||||||
|
delta_ymwdhMs (const std::string& expr)
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
auto num = strtol (expr.c_str(), &endptr, 10);
|
||||||
|
if (num <= 0 || num > 9999 || !endptr || !*endptr)
|
||||||
|
return date_boundary (true);
|
||||||
|
|
||||||
|
int years, months, weeks, days, hours, minutes, seconds;
|
||||||
|
years = months = weeks = days = hours = minutes = seconds = 0;
|
||||||
|
|
||||||
|
switch (endptr[0]) {
|
||||||
|
case 's': seconds = num; break;
|
||||||
|
case 'M': minutes = num; break;
|
||||||
|
case 'h': hours = num; break;
|
||||||
|
case 'd': days = num; break;
|
||||||
|
case 'w': weeks = num; break;
|
||||||
|
case 'm': months = num; break;
|
||||||
|
case 'y': years = num; break;
|
||||||
|
default:
|
||||||
|
return date_boundary (true);
|
||||||
|
}
|
||||||
|
|
||||||
|
GDateTime *then, *now = g_date_time_new_now_local ();
|
||||||
|
if (weeks != 0)
|
||||||
|
then = g_date_time_add_weeks (now, -weeks);
|
||||||
|
else
|
||||||
|
then = g_date_time_add_full (now, -years, -months,-days,
|
||||||
|
-hours, -minutes, -seconds);
|
||||||
|
|
||||||
|
time_t t = MAX (0, (gint64)g_date_time_to_unix (then));
|
||||||
|
|
||||||
|
g_date_time_unref (then);
|
||||||
|
g_date_time_unref (now);
|
||||||
|
|
||||||
|
return date_to_time_t_string (t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static std::string
|
||||||
|
special_date (const std::string& d, bool is_first)
|
||||||
|
{
|
||||||
|
if (d == "now")
|
||||||
|
return date_to_time_t_string (time(NULL));
|
||||||
|
|
||||||
|
else if (d == "today") {
|
||||||
|
|
||||||
|
GDateTime *dt, *midnight;
|
||||||
|
dt = g_date_time_new_now_local ();
|
||||||
|
|
||||||
|
if (!is_first) {
|
||||||
|
GDateTime *tmp = dt;
|
||||||
|
dt = g_date_time_add_days (dt, 1);
|
||||||
|
g_date_time_unref (tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
midnight = g_date_time_add_full (dt, 0, 0, 0,
|
||||||
|
-g_date_time_get_hour(dt),
|
||||||
|
-g_date_time_get_minute (dt),
|
||||||
|
-g_date_time_get_second (dt));
|
||||||
|
time_t t = MAX(0, (gint64)g_date_time_to_unix (midnight));
|
||||||
|
g_date_time_unref (dt);
|
||||||
|
g_date_time_unref (midnight);
|
||||||
|
return date_to_time_t_string ((time_t)t);
|
||||||
|
|
||||||
|
} else
|
||||||
|
return date_boundary (is_first);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
constexpr const char UserDateMin[] = "19700101000000";
|
||||||
|
constexpr const char UserDateMax[] = "29993112235959";
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Mux::date_to_time_t_string (const std::string& dstr, bool is_first)
|
||||||
|
{
|
||||||
|
gint64 t;
|
||||||
|
struct tm tbuf;
|
||||||
|
GDateTime *dtime;
|
||||||
|
|
||||||
|
/* one-sided dates */
|
||||||
|
if (dstr.empty())
|
||||||
|
return date_boundary (is_first);
|
||||||
|
else if (is_first && dstr.find_first_of("ymdwhMs") != std::string::npos)
|
||||||
|
return delta_ymwdhMs (dstr);
|
||||||
|
|
||||||
|
std::string date (is_first ? UserDateMin : UserDateMax);
|
||||||
|
std::copy_if (dstr.begin(), dstr.end(), date.begin(),[](auto c){return isdigit(c);});
|
||||||
|
|
||||||
|
memset (&tbuf, 0, sizeof tbuf);
|
||||||
|
if (!strptime (date.c_str(), "%Y%m%d%H%M%S", &tbuf) &&
|
||||||
|
!strptime (date.c_str(), "%Y%m%d%H%M", &tbuf) &&
|
||||||
|
!strptime (date.c_str(), "%Y%m%d", &tbuf) &&
|
||||||
|
!strptime (date.c_str(), "%Y%m", &tbuf) &&
|
||||||
|
!strptime (date.c_str(), "%Y", &tbuf))
|
||||||
|
return special_date (date, is_first);
|
||||||
|
|
||||||
|
dtime = g_date_time_new_local (tbuf.tm_year + 1900,
|
||||||
|
tbuf.tm_mon + 1,
|
||||||
|
tbuf.tm_mday,
|
||||||
|
tbuf.tm_hour,
|
||||||
|
tbuf.tm_min,
|
||||||
|
tbuf.tm_sec);
|
||||||
|
if (!dtime) {
|
||||||
|
g_warning ("invalid %s date '%s'",
|
||||||
|
is_first ? "lower" : "upper", date.c_str());
|
||||||
|
return date_boundary (is_first);
|
||||||
|
}
|
||||||
|
|
||||||
|
t = (gint64)g_date_time_to_unix (dtime);
|
||||||
|
g_date_time_unref (dtime);
|
||||||
|
|
||||||
|
if (t < 0 || t > 9999999999)
|
||||||
|
return date_boundary (is_first);
|
||||||
|
else
|
||||||
|
return date_to_time_t_string (t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
constexpr const auto SizeFormat = "%010" G_GINT64_FORMAT;
|
||||||
|
|
||||||
|
constexpr const char SizeMin[] = "0000000000";
|
||||||
|
constexpr const char SizeMax[] = "9999999999";
|
||||||
|
static_assert(sizeof(SizeMin) == 10 + 1);
|
||||||
|
static_assert(sizeof(SizeMax) == 10 + 1);
|
||||||
|
|
||||||
|
static std::string
|
||||||
|
size_boundary (bool is_first)
|
||||||
|
{
|
||||||
|
return is_first ? SizeMin : SizeMax;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Mux::size_to_string (int64_t size)
|
||||||
|
{
|
||||||
|
char buf[sizeof(SizeMax)];
|
||||||
|
snprintf (buf, sizeof(buf), SizeFormat, size);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Mux::size_to_string (const std::string& val, bool is_first)
|
||||||
|
{
|
||||||
|
std::string str;
|
||||||
|
GRegex *rx;
|
||||||
|
GMatchInfo *minfo;
|
||||||
|
|
||||||
|
/* one-sided ranges */
|
||||||
|
if (val.empty())
|
||||||
|
return size_boundary (is_first);
|
||||||
|
|
||||||
|
rx = g_regex_new ("(\\d+)(b|k|kb|m|mb|g|gb)?",
|
||||||
|
G_REGEX_CASELESS, (GRegexMatchFlags)0, NULL);
|
||||||
|
minfo = NULL;
|
||||||
|
if (g_regex_match (rx, val.c_str(), (GRegexMatchFlags)0, &minfo)) {
|
||||||
|
gint64 size;
|
||||||
|
char *s;
|
||||||
|
|
||||||
|
s = g_match_info_fetch (minfo, 1);
|
||||||
|
size = atoll (s);
|
||||||
|
g_free (s);
|
||||||
|
|
||||||
|
s = g_match_info_fetch (minfo, 2);
|
||||||
|
switch (s ? g_ascii_tolower(s[0]) : 0) {
|
||||||
|
case 'k': size *= 1024; break;
|
||||||
|
case 'm': size *= (1024 * 1024); break;
|
||||||
|
case 'g': size *= (1024 * 1024 * 1024); break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free (s);
|
||||||
|
str = size_to_string (size);
|
||||||
|
} else
|
||||||
|
str = size_boundary (is_first);
|
||||||
|
|
||||||
|
g_regex_unref (rx);
|
||||||
|
g_match_info_unref (minfo);
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
|
@ -0,0 +1,100 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifndef __UTILS_HH__
|
||||||
|
#define __UTILS_HH__
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flatten a string -- downcase and fold diacritics etc.
|
||||||
|
*
|
||||||
|
* @param str a string
|
||||||
|
*
|
||||||
|
* @return a flattened string
|
||||||
|
*/
|
||||||
|
std::string utf8_flatten (const std::string& str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quote & escape a string
|
||||||
|
*
|
||||||
|
* @param str a string
|
||||||
|
*
|
||||||
|
* @return quoted string
|
||||||
|
*/
|
||||||
|
std::string quote (const std::string& str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a string, printf style
|
||||||
|
*
|
||||||
|
* @param frm format string
|
||||||
|
* @param ... parameters
|
||||||
|
*
|
||||||
|
* @return a formatted string
|
||||||
|
*/
|
||||||
|
std::string format (const char *frm, ...)
|
||||||
|
__attribute__((format(printf, 1, 2)));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an ISO date to the corresponding time expressed as a string
|
||||||
|
* with a 10-digit time_t
|
||||||
|
*
|
||||||
|
* @param date
|
||||||
|
* @param first
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
std::string date_to_time_t_string (const std::string& date, bool first);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* time_t expressed as a string with a 10-digit time_t
|
||||||
|
*
|
||||||
|
* @param t
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
std::string date_to_time_t_string (time_t t);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a size string to a size in bytes
|
||||||
|
*
|
||||||
|
* @param sizestr the size string
|
||||||
|
* @param first
|
||||||
|
*
|
||||||
|
* @return the size expressed as a string with the decimal number of bytes
|
||||||
|
*/
|
||||||
|
std::string size_to_string (const std::string& sizestr, bool first);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a size into a size in bytes string
|
||||||
|
*
|
||||||
|
* @param size the size
|
||||||
|
* @param first
|
||||||
|
*
|
||||||
|
* @return the size expressed as a string with the decimal number of bytes
|
||||||
|
*/
|
||||||
|
std::string size_to_string (int64_t size);
|
||||||
|
|
||||||
|
} // namespace Mux
|
||||||
|
|
||||||
|
#endif /* __UTILS_HH__ */
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <xapian.h>
|
||||||
|
#include "parser/xapian.hh"
|
||||||
|
|
||||||
|
using namespace Mux;
|
||||||
|
|
||||||
|
static Xapian::Query
|
||||||
|
xapian_query_op (const Mux::Tree& tree)
|
||||||
|
{
|
||||||
|
Xapian::Query::op op;
|
||||||
|
|
||||||
|
switch (tree.node.type) {
|
||||||
|
case Node::Type::OpNot: // OpNot x ::= <all> AND NOT x
|
||||||
|
if (tree.children.size() != 1)
|
||||||
|
throw std::runtime_error ("invalid # of children");
|
||||||
|
return Xapian::Query (Xapian::Query::OP_AND_NOT,
|
||||||
|
Xapian::Query::MatchAll,
|
||||||
|
xapian_query(tree.children.front()));
|
||||||
|
case Node::Type::OpAnd: op = Xapian::Query::OP_AND; break;
|
||||||
|
case Node::Type::OpOr: op = Xapian::Query::OP_OR; break;
|
||||||
|
case Node::Type::OpXor: op = Xapian::Query::OP_XOR; break;
|
||||||
|
case Node::Type::OpAndNot: op = Xapian::Query::OP_AND_NOT; break;
|
||||||
|
default: throw std::runtime_error ("invalid op"); // bug
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Xapian::Query> childvec;
|
||||||
|
for (const auto& subtree: tree.children)
|
||||||
|
childvec.emplace_back(xapian_query(subtree));
|
||||||
|
|
||||||
|
return Xapian::Query(op, childvec.begin(), childvec.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
Xapian::Query
|
||||||
|
Mux::xapian_query (const Mux::Tree& tree)
|
||||||
|
{
|
||||||
|
switch (tree.node.type) {
|
||||||
|
case Node::Type::Empty:
|
||||||
|
return Xapian::Query();
|
||||||
|
case Node::Type::OpNot:
|
||||||
|
case Node::Type::OpAnd:
|
||||||
|
case Node::Type::OpOr:
|
||||||
|
case Node::Type::OpXor:
|
||||||
|
case Node::Type::OpAndNot:
|
||||||
|
return xapian_query_op (tree);
|
||||||
|
case Node::Type::Value: {
|
||||||
|
const auto v = dynamic_cast<Value*> (tree.node.data.get());
|
||||||
|
return Xapian::Query(v->prefix + v->value);
|
||||||
|
}
|
||||||
|
case Node::Type::Range: {
|
||||||
|
const auto r = dynamic_cast<Range*> (tree.node.data.get());
|
||||||
|
return Xapian::Query(Xapian::Query::OP_VALUE_RANGE,
|
||||||
|
(Xapian::valueno)r->id, r->lower, r->upper);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw std::runtime_error ("invalid query"); // bug
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
|
**
|
||||||
|
** This library is free software; you can redistribute it and/or
|
||||||
|
** modify it under the terms of the GNU Lesser General Public License
|
||||||
|
** as published by the Free Software Foundation; either version 2.1
|
||||||
|
** of the License, or (at your option) any later version.
|
||||||
|
**
|
||||||
|
** This library is distributed in the hope that it will be useful,
|
||||||
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
** Lesser General Public License for more details.
|
||||||
|
**
|
||||||
|
** You should have received a copy of the GNU Lesser General Public
|
||||||
|
** License along with this library; if not, write to the Free
|
||||||
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
** 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __XAPIAN_HH__
|
||||||
|
#define __XAPIAN_HH__
|
||||||
|
|
||||||
|
#include <xapian.h>
|
||||||
|
#include <parser/parser.hh>
|
||||||
|
|
||||||
|
namespace Mux {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform a parse-tree into a Xapian query object
|
||||||
|
*
|
||||||
|
* @param tree a parse tree
|
||||||
|
*
|
||||||
|
* @return a Xapian query object
|
||||||
|
*/
|
||||||
|
Xapian::Query xapian_query (const Mux::Tree& tree);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __XAPIAN_H__ */
|
Loading…
Reference in New Issue