2020-11-03 08:58:59 +01:00
|
|
|
/*
|
|
|
|
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
|
|
|
**
|
|
|
|
** This library is free software; you can redistribute it and/or
|
|
|
|
** modify it under the terms of the GNU Lesser General Public License
|
|
|
|
** as published by the Free Software Foundation; either version 2.1
|
|
|
|
** of the License, or (at your option) any later version.
|
|
|
|
**
|
|
|
|
** This library is distributed in the hope that it will be useful,
|
|
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
** Lesser General Public License for more details.
|
|
|
|
**
|
|
|
|
** You should have received a copy of the GNU Lesser General Public
|
|
|
|
** License along with this library; if not, write to the Free
|
|
|
|
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
** 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
#include "mu-parser.hh"
|
2022-03-03 23:06:31 +01:00
|
|
|
|
|
|
|
#include <algorithm>
|
2022-06-12 18:44:00 +02:00
|
|
|
#include <regex>
|
2022-04-28 21:56:37 +02:00
|
|
|
#include <limits>
|
2022-03-03 23:06:31 +01:00
|
|
|
|
2020-11-03 08:58:59 +01:00
|
|
|
#include "mu-tokenizer.hh"
|
|
|
|
#include "utils/mu-utils.hh"
|
|
|
|
#include "utils/mu-error.hh"
|
2022-03-26 16:16:46 +01:00
|
|
|
#include "message/mu-message.hh"
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
using namespace Mu;
|
|
|
|
|
|
|
|
// 3 precedence levels: units (NOT,()) > factors (OR) > terms (AND)
|
|
|
|
|
|
|
|
// query -> <term-1> | ε
|
|
|
|
// <term-1> -> <factor-1> <term-2> | ε
|
|
|
|
// <term-2> -> OR|XOR <term-1> | ε
|
|
|
|
// <factor-1> -> <unit> <factor-2> | ε
|
|
|
|
// <factor-2> -> [AND]|AND NOT <factor-1> | ε
|
|
|
|
// <unit> -> [NOT] <term-1> | ( <term-1> ) | <data>
|
|
|
|
// <data> -> <value> | <range> | <regex>
|
|
|
|
// <value> -> [field:]value
|
|
|
|
// <range> -> [field:][lower]..[upper]
|
|
|
|
// <regex> -> [field:]/regex/
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
#define BUG(...) \
|
|
|
|
Mu::Error(Error::Code::Internal, format("%u: BUG: ", __LINE__) + format(__VA_ARGS__))
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the "shortcut"/internal fields for the the given fieldstr or empty if there is none
|
|
|
|
*
|
|
|
|
* @param fieldstr a fieldstr, e.g "subject" or "s" for the subject field
|
|
|
|
*
|
|
|
|
* @return a vector with "exploded" values, with a code and a fullname. E.g. "s" might map
|
|
|
|
* to [<"S","subject">], while "recip" could map to [<"to", "T">, <"cc", "C">, <"bcc", "B">]
|
|
|
|
*/
|
|
|
|
struct FieldInfo {
|
2021-10-20 11:18:15 +02:00
|
|
|
const std::string field;
|
|
|
|
const std::string prefix;
|
|
|
|
bool supports_phrase;
|
2022-03-03 23:06:31 +01:00
|
|
|
Field::Id id;
|
2020-11-03 08:58:59 +01:00
|
|
|
};
|
|
|
|
using FieldInfoVec = std::vector<FieldInfo>;
|
|
|
|
struct Parser::Private {
|
2022-03-03 23:06:31 +01:00
|
|
|
Private(const Store& store, Parser::Flags flags) : store_{store}, flags_{flags} {}
|
2021-10-20 11:18:15 +02:00
|
|
|
|
|
|
|
std::vector<std::string> process_regex(const std::string& field,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::regex& rx) const;
|
2021-10-20 11:18:15 +02:00
|
|
|
|
|
|
|
Mu::Tree term_1(Mu::Tokens& tokens, WarningVec& warnings) const;
|
|
|
|
Mu::Tree term_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const;
|
|
|
|
Mu::Tree factor_1(Mu::Tokens& tokens, WarningVec& warnings) const;
|
|
|
|
Mu::Tree factor_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const;
|
|
|
|
Mu::Tree unit(Mu::Tokens& tokens, WarningVec& warnings) const;
|
|
|
|
Mu::Tree data(Mu::Tokens& tokens, WarningVec& warnings) const;
|
|
|
|
Mu::Tree range(const FieldInfoVec& fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::string& lower,
|
|
|
|
const std::string& upper,
|
|
|
|
size_t pos,
|
|
|
|
WarningVec& warnings) const;
|
2021-10-20 11:18:15 +02:00
|
|
|
Mu::Tree regex(const FieldInfoVec& fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::string& v,
|
|
|
|
size_t pos,
|
|
|
|
WarningVec& warnings) const;
|
2021-10-20 11:18:15 +02:00
|
|
|
Mu::Tree value(const FieldInfoVec& fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::string& v,
|
|
|
|
size_t pos,
|
|
|
|
WarningVec& warnings) const;
|
2021-10-20 11:18:15 +02:00
|
|
|
|
|
|
|
private:
|
|
|
|
const Store& store_;
|
2022-03-03 23:06:31 +01:00
|
|
|
const Parser::Flags flags_;
|
2020-11-03 08:58:59 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
static std::string
|
2021-10-20 11:18:15 +02:00
|
|
|
process_value(const std::string& field, const std::string& value)
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2022-03-20 13:12:41 +01:00
|
|
|
const auto id_opt{field_from_name(field)};
|
2022-03-03 23:06:31 +01:00
|
|
|
if (id_opt) {
|
2022-06-18 13:35:01 +02:00
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wswitch-enum"
|
2022-03-20 13:12:41 +01:00
|
|
|
switch (id_opt->id) {
|
2022-03-03 23:06:31 +01:00
|
|
|
case Field::Id::Priority: {
|
|
|
|
if (!value.empty())
|
|
|
|
return std::string(1, value[0]);
|
|
|
|
} break;
|
|
|
|
case Field::Id::Flags:
|
2022-03-20 13:12:41 +01:00
|
|
|
if (const auto info{flag_info(value)}; info)
|
2022-03-03 23:06:31 +01:00
|
|
|
return std::string(1, info->shortcut_lower());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2022-06-18 13:35:01 +02:00
|
|
|
#pragma GCC diagnostic pop
|
2021-10-20 11:18:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return value; // XXX prio/flags, etc. alias
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2022-03-03 23:06:31 +01:00
|
|
|
add_field(std::vector<FieldInfo>& fields, Field::Id field_id)
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2022-03-20 13:12:41 +01:00
|
|
|
const auto field{field_from_id(field_id)};
|
2022-03-03 23:06:31 +01:00
|
|
|
if (!field.shortcut)
|
2021-10-20 11:18:15 +02:00
|
|
|
return; // can't be searched
|
2020-11-03 08:58:59 +01:00
|
|
|
|
2022-03-03 23:06:31 +01:00
|
|
|
fields.emplace_back(FieldInfo{std::string{field.name}, field.xapian_term(),
|
2022-03-19 17:41:05 +01:00
|
|
|
field.is_indexable_term(), field_id});
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static std::vector<FieldInfo>
|
2022-03-03 23:06:31 +01:00
|
|
|
process_field(const std::string& field_str, Parser::Flags flags)
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2021-10-20 11:18:15 +02:00
|
|
|
std::vector<FieldInfo> fields;
|
2022-03-03 23:06:31 +01:00
|
|
|
if (any_of(flags & Parser::Flags::UnitTest)) {
|
|
|
|
add_field(fields, Field::Id::MessageId);
|
2021-10-20 11:18:15 +02:00
|
|
|
return fields;
|
|
|
|
}
|
|
|
|
|
2022-03-03 23:06:31 +01:00
|
|
|
if (field_str == "contact" || field_str == "recip") { // multi fields
|
|
|
|
add_field(fields, Field::Id::To);
|
|
|
|
add_field(fields, Field::Id::Cc);
|
|
|
|
add_field(fields, Field::Id::Bcc);
|
|
|
|
if (field_str == "contact")
|
|
|
|
add_field(fields, Field::Id::From);
|
|
|
|
} else if (field_str.empty()) {
|
|
|
|
add_field(fields, Field::Id::To);
|
|
|
|
add_field(fields, Field::Id::Cc);
|
|
|
|
add_field(fields, Field::Id::Bcc);
|
|
|
|
add_field(fields, Field::Id::From);
|
|
|
|
add_field(fields, Field::Id::Subject);
|
|
|
|
add_field(fields, Field::Id::BodyText);
|
2022-03-20 13:12:41 +01:00
|
|
|
} else if (const auto field_opt{field_from_name(field_str)}; field_opt)
|
|
|
|
add_field(fields, field_opt->id);
|
2021-10-20 11:18:15 +02:00
|
|
|
|
|
|
|
return fields;
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
2022-03-03 23:06:31 +01:00
|
|
|
is_range_field(const std::string& field_str)
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2022-03-20 13:12:41 +01:00
|
|
|
if (const auto field_opt{field_from_name(field_str)}; !field_opt)
|
2021-10-20 11:18:15 +02:00
|
|
|
return false;
|
|
|
|
else
|
2022-03-20 13:12:41 +01:00
|
|
|
return field_opt->is_range();
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
struct MyRange {
|
2021-10-20 11:18:15 +02:00
|
|
|
std::string lower;
|
|
|
|
std::string upper;
|
2020-11-03 08:58:59 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
static MyRange
|
2022-03-03 23:06:31 +01:00
|
|
|
process_range(const std::string& field_str,
|
|
|
|
const std::string& lower, const std::string& upper)
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2022-03-20 13:12:41 +01:00
|
|
|
const auto field_opt{field_from_name(field_str)};
|
|
|
|
if (!field_opt)
|
2021-10-20 11:18:15 +02:00
|
|
|
return {lower, upper};
|
|
|
|
|
|
|
|
std::string l2 = lower;
|
|
|
|
std::string u2 = upper;
|
2022-04-28 21:56:37 +02:00
|
|
|
constexpr auto upper_limit = std::numeric_limits<int64_t>::max();
|
2021-10-20 11:18:15 +02:00
|
|
|
|
2022-05-05 00:22:14 +02:00
|
|
|
if (field_opt->id == Field::Id::Date || field_opt->id == Field::Id::Changed) {
|
2022-04-28 21:56:37 +02:00
|
|
|
l2 = to_lexnum(parse_date_time(lower, true).value_or(0));
|
|
|
|
u2 = to_lexnum(parse_date_time(upper, false).value_or(upper_limit));
|
2022-03-20 13:12:41 +01:00
|
|
|
} else if (field_opt->id == Field::Id::Size) {
|
2022-04-28 21:56:37 +02:00
|
|
|
l2 = to_lexnum(parse_size(lower, true).value_or(0));
|
|
|
|
u2 = to_lexnum(parse_size(upper, false).value_or(upper_limit));
|
2021-10-20 11:18:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return {l2, u2};
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string>
|
2022-03-03 23:06:31 +01:00
|
|
|
Parser::Private::process_regex(const std::string& field_str,
|
|
|
|
const std::regex& rx) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2022-03-20 13:12:41 +01:00
|
|
|
const auto field_opt{field_from_name(field_str)};
|
|
|
|
if (!field_opt)
|
2021-10-20 11:18:15 +02:00
|
|
|
return {};
|
2020-11-03 08:58:59 +01:00
|
|
|
|
2022-03-20 13:12:41 +01:00
|
|
|
const auto prefix{field_opt->xapian_term()};
|
2021-10-20 11:18:15 +02:00
|
|
|
std::vector<std::string> terms;
|
2022-03-20 13:12:41 +01:00
|
|
|
store_.for_each_term(field_opt->id, [&](auto&& str) {
|
2022-06-12 18:44:00 +02:00
|
|
|
auto val{str.c_str() + 1}; // strip off the Xapian prefix.
|
|
|
|
if (std::regex_search(val, rx))
|
|
|
|
terms.emplace_back(std::move(val));
|
2021-10-20 11:18:15 +02:00
|
|
|
return true;
|
|
|
|
});
|
2020-11-03 08:58:59 +01:00
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
return terms;
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static Token
|
2021-10-20 11:18:15 +02:00
|
|
|
look_ahead(const Mu::Tokens& tokens)
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
return tokens.front();
|
|
|
|
}
|
|
|
|
|
|
|
|
static Mu::Tree
|
|
|
|
empty()
|
|
|
|
{
|
|
|
|
return {{Node::Type::Empty}};
|
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::value(const FieldInfoVec& fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::string& v,
|
|
|
|
size_t pos,
|
|
|
|
WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
auto val = utf8_flatten(v);
|
|
|
|
|
|
|
|
if (fields.empty())
|
|
|
|
throw BUG("expected one or more fields");
|
|
|
|
|
|
|
|
if (fields.size() == 1) {
|
|
|
|
const auto item = fields.front();
|
|
|
|
return Tree({Node::Type::Value,
|
2022-06-12 18:44:00 +02:00
|
|
|
FieldValue{item.id, process_value(item.field, val)}});
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// a 'multi-field' such as "recip:"
|
|
|
|
Tree tree(Node{Node::Type::OpOr});
|
2021-10-20 11:18:15 +02:00
|
|
|
for (const auto& item : fields)
|
|
|
|
tree.add_child(Tree({Node::Type::Value,
|
2022-06-12 18:44:00 +02:00
|
|
|
FieldValue{item.id,
|
|
|
|
process_value(item.field, val)}}));
|
2020-11-03 08:58:59 +01:00
|
|
|
return tree;
|
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::regex(const FieldInfoVec& fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::string& v,
|
|
|
|
size_t pos,
|
|
|
|
WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
if (v.length() < 2)
|
|
|
|
throw BUG("expected regexp, got '%s'", v.c_str());
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
const auto rxstr = utf8_flatten(v.substr(1, v.length() - 2));
|
|
|
|
|
|
|
|
try {
|
2022-06-12 18:44:00 +02:00
|
|
|
Tree tree(Node{Node::Type::OpOr});
|
2021-10-20 11:18:15 +02:00
|
|
|
const auto rx = std::regex(rxstr);
|
|
|
|
for (const auto& field : fields) {
|
|
|
|
const auto terms = process_regex(field.field, rx);
|
|
|
|
for (const auto& term : terms) {
|
Avoid word-splitting regular expression matches
Previously, we would conduct regular expression searches by
enumerating all values of a given term, manually regex-matching each
one against our search regular expression, remember all the term
values that matched our regular expression, then do a big Xapian
OR-query that matched any of those term values. In constructing this
OR-query, however, we would split each term value on space and add a
separate Xapian phrase search term for each resulting word. This
approach worked fine most of the time, beacuse when we index a term,
we index both each word in a term and the whole term by itself.
This word splitting produced false negatives in some matches, however,
because Xapian and the Mu-level word splitting code do word splitting
slightly differently and apply different transformations to the text
while splitting. (For example, Xapian transforms fancy Unicode
apostrophes to ASCII apostrophes.)
This patch avoids the problem by not word splitting when constructing
the big Xapian OR-query for finding the results of regular
expression matching.
2022-11-14 17:35:10 +01:00
|
|
|
tree.add_child(Tree({Node::Type::ValueAtomic,
|
2022-06-12 18:44:00 +02:00
|
|
|
FieldValue{field.id, term}}));
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tree.children.empty())
|
|
|
|
return empty();
|
|
|
|
else
|
|
|
|
return tree;
|
|
|
|
|
|
|
|
} catch (...) {
|
|
|
|
// fallback
|
2021-10-20 11:18:15 +02:00
|
|
|
warnings.push_back({pos, "invalid regexp"});
|
|
|
|
return value(fields, v, pos, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::range(const FieldInfoVec& fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
const std::string& lower,
|
|
|
|
const std::string& upper,
|
|
|
|
size_t pos,
|
|
|
|
WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
if (fields.empty())
|
|
|
|
throw BUG("expected field");
|
|
|
|
|
|
|
|
const auto& field = fields.front();
|
|
|
|
if (!is_range_field(field.field))
|
2021-10-20 11:18:15 +02:00
|
|
|
return value(fields, lower + ".." + upper, pos, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
auto prange = process_range(field.field, lower, upper);
|
2020-11-03 08:58:59 +01:00
|
|
|
if (prange.lower > prange.upper)
|
2021-10-20 11:18:15 +02:00
|
|
|
prange = process_range(field.field, upper, lower);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
return Tree({Node::Type::Range,
|
2022-06-12 18:44:00 +02:00
|
|
|
FieldValue{field.id, prange.lower, prange.upper}});
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::data(Mu::Tokens& tokens, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
const auto token = look_ahead(tokens);
|
|
|
|
if (token.type != Token::Type::Data)
|
2021-10-20 11:18:15 +02:00
|
|
|
warnings.push_back({token.pos, "expected: value"});
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
tokens.pop_front();
|
|
|
|
|
|
|
|
std::string field, val;
|
2021-10-20 11:18:15 +02:00
|
|
|
const auto col = token.str.find(":");
|
|
|
|
if (col != 0 && col != std::string::npos && col != token.str.length() - 1) {
|
2020-11-03 08:58:59 +01:00
|
|
|
field = token.str.substr(0, col);
|
2021-10-20 11:18:15 +02:00
|
|
|
val = token.str.substr(col + 1);
|
2020-11-03 08:58:59 +01:00
|
|
|
} else
|
|
|
|
val = token.str;
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
auto fields = process_field(field, flags_);
|
|
|
|
if (fields.empty()) { // not valid field...
|
|
|
|
warnings.push_back({token.pos, format("invalid field '%s'", field.c_str())});
|
|
|
|
fields = process_field("", flags_);
|
2020-11-03 08:58:59 +01:00
|
|
|
// fallback, treat the whole of foo:bar as a value
|
2021-10-20 11:18:15 +02:00
|
|
|
return value(fields, field + ":" + val, token.pos, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// does it look like a regexp?
|
2020-12-05 09:38:39 +01:00
|
|
|
if (val.length() >= 2)
|
2021-10-20 11:18:15 +02:00
|
|
|
if (val[0] == '/' && val[val.length() - 1] == '/')
|
|
|
|
return regex(fields, val, token.pos, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
// does it look like a range?
|
|
|
|
const auto dotdot = val.find("..");
|
|
|
|
if (dotdot != std::string::npos)
|
2021-10-20 11:18:15 +02:00
|
|
|
return range(fields,
|
2022-03-03 23:06:31 +01:00
|
|
|
val.substr(0, dotdot),
|
|
|
|
val.substr(dotdot + 2),
|
|
|
|
token.pos,
|
|
|
|
warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
else if (is_range_field(fields.front().field)) {
|
|
|
|
// range field without a range - treat as field:val..val
|
2021-10-20 11:18:15 +02:00
|
|
|
return range(fields, val, val, token.pos, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// if nothing else, it's a value.
|
2021-10-20 11:18:15 +02:00
|
|
|
return value(fields, val, token.pos, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::unit(Mu::Tokens& tokens, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
if (tokens.empty()) {
|
2021-10-20 11:18:15 +02:00
|
|
|
warnings.push_back({0, "expected: unit"});
|
2020-11-03 08:58:59 +01:00
|
|
|
return empty();
|
|
|
|
}
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
const auto token = look_ahead(tokens);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
if (token.type == Token::Type::Not) {
|
|
|
|
tokens.pop_front();
|
|
|
|
Tree tree{{Node::Type::OpNot}};
|
2021-10-20 11:18:15 +02:00
|
|
|
tree.add_child(unit(tokens, warnings));
|
2020-11-03 08:58:59 +01:00
|
|
|
return tree;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (token.type == Token::Type::Open) {
|
|
|
|
tokens.pop_front();
|
2021-10-20 11:18:15 +02:00
|
|
|
auto tree = term_1(tokens, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
if (tokens.empty())
|
|
|
|
warnings.push_back({token.pos, "expected: ')'"});
|
|
|
|
else {
|
|
|
|
const auto token2 = look_ahead(tokens);
|
|
|
|
if (token2.type == Token::Type::Close)
|
|
|
|
tokens.pop_front();
|
|
|
|
else {
|
|
|
|
warnings.push_back(
|
2021-10-20 11:18:15 +02:00
|
|
|
{token2.pos,
|
|
|
|
std::string("expected: ')' but got ") + token2.str});
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return tree;
|
|
|
|
}
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
return data(tokens, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::factor_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
if (tokens.empty())
|
|
|
|
return empty();
|
|
|
|
|
|
|
|
const auto token = look_ahead(tokens);
|
|
|
|
|
|
|
|
#pragma GCC diagnostic push
|
2021-10-20 11:18:15 +02:00
|
|
|
#pragma GCC diagnostic ignored "-Wswitch-enum"
|
2020-11-03 08:58:59 +01:00
|
|
|
switch (token.type) {
|
|
|
|
case Token::Type::And: {
|
|
|
|
tokens.pop_front();
|
|
|
|
op = Node::Type::OpAnd;
|
|
|
|
} break;
|
|
|
|
|
|
|
|
case Token::Type::Open:
|
|
|
|
case Token::Type::Data:
|
|
|
|
case Token::Type::Not:
|
|
|
|
op = Node::Type::OpAnd; // implicit AND
|
|
|
|
break;
|
|
|
|
|
2022-04-30 00:17:31 +02:00
|
|
|
default:
|
|
|
|
return empty();
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
#pragma GCC diagnostic pop
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
return factor_1(tokens, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::factor_1(Mu::Tokens& tokens, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2021-10-20 11:18:15 +02:00
|
|
|
Node::Type op{Node::Type::Invalid};
|
2020-11-03 08:58:59 +01:00
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
auto t = unit(tokens, warnings);
|
|
|
|
auto a2 = factor_2(tokens, op, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
if (a2.empty())
|
|
|
|
return t;
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
Tree tree{{op}};
|
2020-11-03 08:58:59 +01:00
|
|
|
tree.add_child(std::move(t));
|
|
|
|
tree.add_child(std::move(a2));
|
|
|
|
|
|
|
|
return tree;
|
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::term_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
if (tokens.empty())
|
|
|
|
return empty();
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
const auto token = look_ahead(tokens);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
#pragma GCC diagnostic push
|
2021-10-20 11:18:15 +02:00
|
|
|
#pragma GCC diagnostic ignored "-Wswitch-enum"
|
2020-11-03 08:58:59 +01:00
|
|
|
switch (token.type) {
|
2021-10-20 11:18:15 +02:00
|
|
|
case Token::Type::Or: op = Node::Type::OpOr; break;
|
|
|
|
case Token::Type::Xor: op = Node::Type::OpXor; break;
|
2020-11-03 08:58:59 +01:00
|
|
|
default:
|
|
|
|
if (token.type != Token::Type::Close)
|
|
|
|
warnings.push_back({token.pos, "expected OR|XOR"});
|
|
|
|
return empty();
|
|
|
|
}
|
|
|
|
#pragma GCC diagnostic pop
|
|
|
|
|
|
|
|
tokens.pop_front();
|
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
return term_1(tokens, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Parser::Private::term_1(Mu::Tokens& tokens, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
2021-10-20 11:18:15 +02:00
|
|
|
Node::Type op{Node::Type::Invalid};
|
2020-11-03 08:58:59 +01:00
|
|
|
|
2021-10-20 11:18:15 +02:00
|
|
|
auto t = factor_1(tokens, warnings);
|
|
|
|
auto o2 = term_2(tokens, op, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
if (o2.empty())
|
|
|
|
return t;
|
|
|
|
else {
|
2021-10-20 11:18:15 +02:00
|
|
|
Tree tree{{op}};
|
2020-11-03 08:58:59 +01:00
|
|
|
tree.add_child(std::move(t));
|
|
|
|
tree.add_child(std::move(o2));
|
|
|
|
return tree;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-03 23:06:31 +01:00
|
|
|
Mu::Parser::Parser(const Store& store, Parser::Flags flags) :
|
|
|
|
priv_{std::make_unique<Private>(store, flags)}
|
2021-10-20 11:18:15 +02:00
|
|
|
{
|
|
|
|
}
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
Mu::Parser::~Parser() = default;
|
|
|
|
|
|
|
|
Mu::Tree
|
2021-10-20 11:18:15 +02:00
|
|
|
Mu::Parser::parse(const std::string& expr, WarningVec& warnings) const
|
2020-11-03 08:58:59 +01:00
|
|
|
{
|
|
|
|
try {
|
2021-10-20 11:18:15 +02:00
|
|
|
auto tokens = tokenize(expr);
|
|
|
|
if (tokens.empty())
|
|
|
|
return empty();
|
|
|
|
else
|
|
|
|
return priv_->term_1(tokens, warnings);
|
2020-11-03 08:58:59 +01:00
|
|
|
|
|
|
|
} catch (const std::runtime_error& ex) {
|
|
|
|
std::cerr << ex.what() << std::endl;
|
|
|
|
return empty();
|
|
|
|
}
|
|
|
|
}
|