mirror of https://github.com/djcb/mu.git
query-parser: tidy up
Remove "Data", and use Field directly.
This commit is contained in:
parent
1a84a57e05
commit
48695a1981
|
@ -63,7 +63,6 @@ libmu_la_SOURCES= \
|
|||
mu-bookmarks.hh \
|
||||
mu-contacts-cache.cc \
|
||||
mu-contacts-cache.hh \
|
||||
mu-data.hh \
|
||||
mu-parser.cc \
|
||||
mu-parser.hh \
|
||||
mu-query.cc \
|
||||
|
|
158
lib/mu-data.hh
158
lib/mu-data.hh
|
@ -1,158 +0,0 @@
|
|||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __DATA_HH__
|
||||
#define __DATA_HH__
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
#include <message/mu-message.hh>
|
||||
#include <utils/mu-utils.hh>
|
||||
|
||||
namespace Mu {
|
||||
|
||||
// class representing some data item; either a Value or a Range a Value can still be a Regex (but
|
||||
// that's not a separate type here)
|
||||
struct Data {
|
||||
enum class Type { Value, Range };
|
||||
virtual ~Data() = default;
|
||||
|
||||
Type type; /**< type of data */
|
||||
std::string field; /**< full name of the field */
|
||||
std::string prefix; /**< Xapian prefix for thef field */
|
||||
Field::Id id; /**< The field */
|
||||
|
||||
protected:
|
||||
Data(Type _type, const std::string& _field, const std::string& _prefix,
|
||||
Field::Id _id)
|
||||
: type(_type), field(_field), prefix(_prefix), id(_id)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* operator<<
|
||||
*
|
||||
* @param os an output stream
|
||||
* @param t a data type
|
||||
*
|
||||
* @return the updated output stream
|
||||
*/
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& os, Data::Type t)
|
||||
{
|
||||
switch (t) {
|
||||
case Data::Type::Value: os << "value"; break;
|
||||
case Data::Type::Range: os << "range"; break;
|
||||
default: os << "bug"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
/**
|
||||
* Range type -- [a..b]
|
||||
*/
|
||||
struct Range : public Data {
|
||||
/**
|
||||
* Construct a range
|
||||
*
|
||||
* @param _field the field
|
||||
* @param _prefix the xapian prefix
|
||||
* @param _id xapian value number
|
||||
* @param _lower lower bound
|
||||
* @param _upper upper bound
|
||||
*/
|
||||
Range(const std::string& _field,
|
||||
const std::string& _prefix,
|
||||
Field::Id _id,
|
||||
const std::string& _lower,
|
||||
const std::string& _upper)
|
||||
:
|
||||
|
||||
Data(Data::Type::Range, _field, _prefix, _id), lower(_lower), upper(_upper)
|
||||
{
|
||||
}
|
||||
|
||||
std::string lower; /**< lower bound */
|
||||
std::string upper; /**< upper bound */
|
||||
};
|
||||
|
||||
/**
|
||||
* Basic value
|
||||
*
|
||||
*/
|
||||
struct Value : public Data {
|
||||
/**
|
||||
* Construct a Value
|
||||
*
|
||||
* @param _field the field
|
||||
* @param _prefix the xapian prefix
|
||||
* @param _id field id
|
||||
* @param _value the value
|
||||
*/
|
||||
Value(const std::string& _field,
|
||||
const std::string& _prefix,
|
||||
Field::Id _id,
|
||||
const std::string& _value,
|
||||
bool _phrase = false)
|
||||
: Data(Value::Type::Value, _field, _prefix, _id), value(_value), phrase(_phrase)
|
||||
{
|
||||
}
|
||||
|
||||
std::string value; /**< the value */
|
||||
bool phrase;
|
||||
};
|
||||
|
||||
/**
|
||||
* operator<<
|
||||
*
|
||||
* @param os an output stream
|
||||
* @param v a data ptr
|
||||
*
|
||||
* @return the updated output stream
|
||||
*/
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& os, const std::unique_ptr<Data>& v)
|
||||
{
|
||||
switch (v->type) {
|
||||
case Data::Type::Value: {
|
||||
const auto bval = dynamic_cast<Value*>(v.get());
|
||||
os << ' ' << quote(v->field) << ' ' << quote(utf8_flatten(bval->value));
|
||||
if (bval->phrase)
|
||||
os << " (ph)";
|
||||
|
||||
break;
|
||||
}
|
||||
case Data::Type::Range: {
|
||||
const auto rval = dynamic_cast<Range*>(v.get());
|
||||
os << ' ' << quote(v->field) << ' ' << quote(rval->lower) << ' '
|
||||
<< quote(rval->upper);
|
||||
break;
|
||||
}
|
||||
default: os << "unexpected type"; break;
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace Mu
|
||||
|
||||
#endif /* __DATA_HH__ */
|
|
@ -19,6 +19,7 @@
|
|||
#include "mu-parser.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
#include <limits>
|
||||
|
||||
#include "mu-tokenizer.hh"
|
||||
|
@ -199,8 +200,9 @@ Parser::Private::process_regex(const std::string& field_str,
|
|||
const auto prefix{field_opt->xapian_term()};
|
||||
std::vector<std::string> terms;
|
||||
store_.for_each_term(field_opt->id, [&](auto&& str) {
|
||||
if (std::regex_search(str.c_str() + 1, rx)) // avoid copy
|
||||
terms.emplace_back(str);
|
||||
auto val{str.c_str() + 1}; // strip off the Xapian prefix.
|
||||
if (std::regex_search(val, rx))
|
||||
terms.emplace_back(std::move(val));
|
||||
return true;
|
||||
});
|
||||
|
||||
|
@ -233,22 +235,15 @@ Parser::Private::value(const FieldInfoVec& fields,
|
|||
if (fields.size() == 1) {
|
||||
const auto item = fields.front();
|
||||
return Tree({Node::Type::Value,
|
||||
std::make_unique<Value>(item.field,
|
||||
item.prefix,
|
||||
item.id,
|
||||
process_value(item.field, val),
|
||||
item.supports_phrase)});
|
||||
FieldValue{item.id, process_value(item.field, val)}});
|
||||
}
|
||||
|
||||
// a 'multi-field' such as "recip:"
|
||||
Tree tree(Node{Node::Type::OpOr});
|
||||
for (const auto& item : fields)
|
||||
tree.add_child(Tree({Node::Type::Value,
|
||||
std::make_unique<Value>(item.field,
|
||||
item.prefix,
|
||||
item.id,
|
||||
process_value(item.field, val),
|
||||
item.supports_phrase)}));
|
||||
FieldValue{item.id,
|
||||
process_value(item.field, val)}}));
|
||||
return tree;
|
||||
}
|
||||
|
||||
|
@ -264,14 +259,13 @@ Parser::Private::regex(const FieldInfoVec& fields,
|
|||
const auto rxstr = utf8_flatten(v.substr(1, v.length() - 2));
|
||||
|
||||
try {
|
||||
Tree tree(Node{Node::Type::OpOr});
|
||||
Tree tree(Node{Node::Type::OpOr});
|
||||
const auto rx = std::regex(rxstr);
|
||||
for (const auto& field : fields) {
|
||||
const auto terms = process_regex(field.field, rx);
|
||||
for (const auto& term : terms) {
|
||||
tree.add_child(Tree(
|
||||
{Node::Type::Value,
|
||||
std::make_unique<Value>(field.field, "", field.id, term)}));
|
||||
tree.add_child(Tree({Node::Type::Value,
|
||||
FieldValue{field.id, term}}));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -306,11 +300,7 @@ Parser::Private::range(const FieldInfoVec& fields,
|
|||
prange = process_range(field.field, upper, lower);
|
||||
|
||||
return Tree({Node::Type::Range,
|
||||
std::make_unique<Range>(field.field,
|
||||
field.prefix,
|
||||
field.id,
|
||||
prange.lower,
|
||||
prange.upper)});
|
||||
FieldValue{field.id, prange.lower, prange.upper}});
|
||||
}
|
||||
|
||||
Mu::Tree
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <mu-data.hh>
|
||||
#include <mu-tree.hh>
|
||||
#include <mu-store.hh>
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -22,13 +22,54 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <iostream>
|
||||
#include <message/mu-fields.hh>
|
||||
|
||||
#include <mu-data.hh>
|
||||
#include <utils/mu-option.hh>
|
||||
#include <utils/mu-error.hh>
|
||||
|
||||
namespace Mu {
|
||||
|
||||
struct FieldValue {
|
||||
FieldValue(Field::Id idarg, const std::string valarg):
|
||||
field_id{idarg}, val1{valarg} {}
|
||||
FieldValue(Field::Id idarg, const std::string valarg1, const std::string valarg2):
|
||||
field_id{idarg}, val1{valarg1}, val2{valarg2} {}
|
||||
|
||||
const Field& field() const { return field_from_id(field_id); }
|
||||
const std::string& value() const { return val1; }
|
||||
const std::pair<std::string, std::string> range() const { return { val1, val2 }; }
|
||||
|
||||
const Field::Id field_id;
|
||||
const std::string val1;
|
||||
const std::string val2;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* operator<<
|
||||
*
|
||||
* @param os an output stream
|
||||
* @param fval a field value.
|
||||
*
|
||||
* @return the updated output stream
|
||||
*/
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& os, const FieldValue& fval)
|
||||
{
|
||||
os << ' ' << quote(std::string{fval.field().name});
|
||||
|
||||
if (fval.field().is_range())
|
||||
os << ' ' << quote(fval.range().first)
|
||||
<< ' ' << quote(fval.range().second);
|
||||
else
|
||||
os << ' ' << quote(fval.value());
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
// A node in the parse tree
|
||||
struct Node {
|
||||
enum class Type {
|
||||
|
@ -43,31 +84,39 @@ struct Node {
|
|||
Invalid
|
||||
};
|
||||
|
||||
Node(Type _type, std::unique_ptr<Data>&& _data) : type{_type}, data{std::move(_data)} {}
|
||||
Node(Type _type, FieldValue&& fval) : type{_type}, field_val{std::move(fval)} {}
|
||||
Node(Type _type) : type{_type} {}
|
||||
Node(Node&& rhs) = default;
|
||||
|
||||
Type type;
|
||||
std::unique_ptr<Data> data;
|
||||
Option<FieldValue> field_val;
|
||||
|
||||
static const char* type_name(Type t)
|
||||
{
|
||||
static constexpr std::string_view type_name(Type t) {
|
||||
switch (t) {
|
||||
case Type::Empty: return ""; break;
|
||||
case Type::OpAnd: return "and"; break;
|
||||
case Type::OpOr: return "or"; break;
|
||||
case Type::OpXor: return "xor"; break;
|
||||
case Type::OpAndNot: return "andnot"; break;
|
||||
case Type::OpNot: return "not"; break;
|
||||
case Type::Value: return "value"; break;
|
||||
case Type::Range: return "range"; break;
|
||||
case Type::Invalid: return "<invalid>"; break;
|
||||
default: throw Mu::Error(Error::Code::Internal, "unexpected type");
|
||||
case Type::Empty:
|
||||
return "";
|
||||
case Type::OpAnd:
|
||||
return "and";
|
||||
case Type::OpOr:
|
||||
return "or";
|
||||
case Type::OpXor:
|
||||
return "xor";
|
||||
case Type::OpAndNot:
|
||||
return "andnot";
|
||||
case Type::OpNot:
|
||||
return "not";
|
||||
case Type::Value:
|
||||
return "value";
|
||||
case Type::Range:
|
||||
return "range";
|
||||
case Type::Invalid:
|
||||
return "<invalid>";
|
||||
default:
|
||||
return "<error>";
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr bool is_binop(Type t)
|
||||
{
|
||||
static constexpr bool is_binop(Type t) {
|
||||
return t == Type::OpAnd || t == Type::OpAndNot || t == Type::OpOr ||
|
||||
t == Type::OpXor;
|
||||
}
|
||||
|
@ -77,8 +126,8 @@ inline std::ostream&
|
|||
operator<<(std::ostream& os, const Node& t)
|
||||
{
|
||||
os << Node::type_name(t.type);
|
||||
if (t.data)
|
||||
os << t.data;
|
||||
if (t.field_val)
|
||||
os << t.field_val.value();
|
||||
|
||||
return os;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
** Copyright (C) 2017-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -17,9 +17,7 @@
|
|||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /*HAVE_CONFIG_H*/
|
||||
|
||||
#include <xapian.h>
|
||||
#include "mu-xapian.hh"
|
||||
|
@ -30,24 +28,33 @@ using namespace Mu;
|
|||
static Xapian::Query
|
||||
xapian_query_op(const Mu::Tree& tree)
|
||||
{
|
||||
Xapian::Query::op op;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wswitch-enum"
|
||||
switch (tree.node.type) {
|
||||
case Node::Type::OpNot: // OpNot x ::= <all> AND NOT x
|
||||
if (tree.node.type == Node::Type::OpNot) { // OpNot x ::= <all> AND NOT x
|
||||
if (tree.children.size() != 1)
|
||||
throw std::runtime_error("invalid # of children");
|
||||
return Xapian::Query(Xapian::Query::OP_AND_NOT,
|
||||
Xapian::Query::MatchAll,
|
||||
xapian_query(tree.children.front()));
|
||||
case Node::Type::OpAnd: op = Xapian::Query::OP_AND; break;
|
||||
case Node::Type::OpOr: op = Xapian::Query::OP_OR; break;
|
||||
case Node::Type::OpXor: op = Xapian::Query::OP_XOR; break;
|
||||
case Node::Type::OpAndNot: op = Xapian::Query::OP_AND_NOT; break;
|
||||
default: throw Mu::Error(Error::Code::Internal, "invalid op"); // bug
|
||||
Xapian::Query::MatchAll,
|
||||
xapian_query(tree.children.front()));
|
||||
}
|
||||
|
||||
const auto op = std::invoke([](Node::Type ntype) {
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wswitch-enum"
|
||||
switch (ntype) {
|
||||
case Node::Type::OpAnd:
|
||||
return Xapian::Query::OP_AND;
|
||||
case Node::Type::OpOr:
|
||||
return Xapian::Query::OP_OR;
|
||||
case Node::Type::OpXor:
|
||||
return Xapian::Query::OP_XOR;
|
||||
case Node::Type::OpAndNot:
|
||||
return Xapian::Query::OP_AND_NOT;
|
||||
case Node::Type::OpNot:
|
||||
default:
|
||||
throw Mu::Error(Error::Code::Internal, "invalid op"); // bug
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
}, tree.node.type);
|
||||
|
||||
std::vector<Xapian::Query> childvec;
|
||||
for (const auto& subtree : tree.children)
|
||||
childvec.emplace_back(xapian_query(subtree));
|
||||
|
@ -56,33 +63,37 @@ xapian_query_op(const Mu::Tree& tree)
|
|||
}
|
||||
|
||||
static Xapian::Query
|
||||
make_query(const Value* val, const std::string& str, bool maybe_wildcard)
|
||||
make_query(const FieldValue& fval, bool maybe_wildcard)
|
||||
{
|
||||
const auto vlen{str.length()};
|
||||
if (!maybe_wildcard || vlen <= 1 || str[vlen - 1] != '*')
|
||||
return Xapian::Query(val->prefix + str);
|
||||
const auto vlen{fval.value().length()};
|
||||
if (!maybe_wildcard || vlen <= 1 || fval.value()[vlen - 1] != '*')
|
||||
return Xapian::Query(fval.field().xapian_term(fval.value()));
|
||||
else
|
||||
return Xapian::Query(Xapian::Query::OP_WILDCARD,
|
||||
val->prefix + str.substr(0, vlen - 1));
|
||||
fval.field().xapian_term(fval.value().substr(0, vlen - 1)));
|
||||
}
|
||||
|
||||
static Xapian::Query
|
||||
xapian_query_value(const Mu::Tree& tree)
|
||||
{
|
||||
const auto v = dynamic_cast<Value*>(tree.node.data.get());
|
||||
if (!v->phrase)
|
||||
return make_query(v, v->value, true /*maybe-wildcard*/);
|
||||
// indexable field implies it can be use with a phrase search.
|
||||
const auto& field_val{tree.node.field_val.value()};
|
||||
if (!field_val.field().is_indexable_term()) { //
|
||||
/* not an indexable field; no extra magic needed*/
|
||||
return make_query(field_val, true /*maybe-wildcard*/);
|
||||
}
|
||||
|
||||
const auto parts = split(v->value, " ");
|
||||
const auto parts{split(field_val.value(), " ")};
|
||||
if (parts.empty())
|
||||
return Xapian::Query::MatchNothing; // shouldn't happen
|
||||
|
||||
if (parts.size() == 1)
|
||||
return make_query(v, parts.front(), true /*maybe-wildcard*/);
|
||||
else if (parts.size() == 1)
|
||||
return make_query(field_val, true /*maybe-wildcard*/);
|
||||
|
||||
std::vector<Xapian::Query> phvec;
|
||||
for (const auto& p : parts)
|
||||
phvec.emplace_back(make_query(v, p, false /*no wildcards*/));
|
||||
for (const auto& p : parts) {
|
||||
FieldValue fv{field_val.field_id, p};
|
||||
phvec.emplace_back(make_query(fv, false /*no wildcards*/));
|
||||
}
|
||||
|
||||
return Xapian::Query(Xapian::Query::OP_PHRASE, phvec.begin(), phvec.end());
|
||||
}
|
||||
|
@ -90,12 +101,12 @@ xapian_query_value(const Mu::Tree& tree)
|
|||
static Xapian::Query
|
||||
xapian_query_range(const Mu::Tree& tree)
|
||||
{
|
||||
const auto r{dynamic_cast<Range*>(tree.node.data.get())};
|
||||
const auto& field_val{tree.node.field_val.value()};
|
||||
|
||||
return Xapian::Query(Xapian::Query::OP_VALUE_RANGE,
|
||||
(Xapian::valueno)r->id,
|
||||
r->lower,
|
||||
r->upper);
|
||||
field_val.field().value_no(),
|
||||
field_val.range().first,
|
||||
field_val.range().second);
|
||||
}
|
||||
|
||||
Xapian::Query
|
||||
|
@ -104,15 +115,20 @@ Mu::xapian_query(const Mu::Tree& tree)
|
|||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wswitch-enum"
|
||||
switch (tree.node.type) {
|
||||
case Node::Type::Empty: return Xapian::Query();
|
||||
case Node::Type::Empty:
|
||||
return Xapian::Query();
|
||||
case Node::Type::OpNot:
|
||||
case Node::Type::OpAnd:
|
||||
case Node::Type::OpOr:
|
||||
case Node::Type::OpXor:
|
||||
case Node::Type::OpAndNot: return xapian_query_op(tree);
|
||||
case Node::Type::Value: return xapian_query_value(tree);
|
||||
case Node::Type::Range: return xapian_query_range(tree);
|
||||
default: throw Mu::Error(Error::Code::Internal, "invalid query"); // bug
|
||||
case Node::Type::OpAndNot:
|
||||
return xapian_query_op(tree);
|
||||
case Node::Type::Value:
|
||||
return xapian_query_value(tree);
|
||||
case Node::Type::Range:
|
||||
return xapian_query_range(tree);
|
||||
default:
|
||||
throw Mu::Error(Error::Code::Internal, "invalid query"); // bug
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -17,8 +17,8 @@
|
|||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __XAPIAN_HH__
|
||||
#define __XAPIAN_HH__
|
||||
#ifndef MU_XAPIAN_HH__
|
||||
#define MU_XAPIAN_HH__
|
||||
|
||||
#include <xapian.h>
|
||||
#include <mu-parser.hh>
|
||||
|
@ -36,4 +36,4 @@ Xapian::Query xapian_query(const Mu::Tree& tree);
|
|||
|
||||
} // namespace Mu
|
||||
|
||||
#endif /* __XAPIAN_H__ */
|
||||
#endif /* MU_XAPIAN_H__ */
|
||||
|
|
Loading…
Reference in New Issue