query-parser: tidy up

Remove "Data", and use Field directly.
This commit is contained in:
Dirk-Jan C. Binnema 2022-06-12 19:44:00 +03:00
parent 1a84a57e05
commit 48695a1981
7 changed files with 139 additions and 244 deletions

View File

@ -63,7 +63,6 @@ libmu_la_SOURCES= \
mu-bookmarks.hh \
mu-contacts-cache.cc \
mu-contacts-cache.hh \
mu-data.hh \
mu-parser.cc \
mu-parser.hh \
mu-query.cc \

View File

@ -1,158 +0,0 @@
/*
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public License
** as published by the Free Software Foundation; either version 2.1
** of the License, or (at your option) any later version.
**
** This library is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
** Lesser General Public License for more details.
**
** You should have received a copy of the GNU Lesser General Public
** License along with this library; if not, write to the Free
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
** 02110-1301, USA.
*/
#ifndef __DATA_HH__
#define __DATA_HH__
#include <string>
#include <iostream>
#include <regex>
#include <message/mu-message.hh>
#include <utils/mu-utils.hh>
namespace Mu {
// class representing some data item; either a Value or a Range a Value can still be a Regex (but
// that's not a separate type here)
struct Data {
enum class Type { Value, Range };
virtual ~Data() = default;
Type type; /**< type of data */
std::string field; /**< full name of the field */
std::string prefix; /**< Xapian prefix for thef field */
Field::Id id; /**< The field */
protected:
Data(Type _type, const std::string& _field, const std::string& _prefix,
Field::Id _id)
: type(_type), field(_field), prefix(_prefix), id(_id)
{
}
};
/**
* operator<<
*
* @param os an output stream
* @param t a data type
*
* @return the updated output stream
*/
inline std::ostream&
operator<<(std::ostream& os, Data::Type t)
{
switch (t) {
case Data::Type::Value: os << "value"; break;
case Data::Type::Range: os << "range"; break;
default: os << "bug"; break;
}
return os;
}
/**
* Range type -- [a..b]
*/
struct Range : public Data {
/**
* Construct a range
*
* @param _field the field
* @param _prefix the xapian prefix
* @param _id xapian value number
* @param _lower lower bound
* @param _upper upper bound
*/
Range(const std::string& _field,
const std::string& _prefix,
Field::Id _id,
const std::string& _lower,
const std::string& _upper)
:
Data(Data::Type::Range, _field, _prefix, _id), lower(_lower), upper(_upper)
{
}
std::string lower; /**< lower bound */
std::string upper; /**< upper bound */
};
/**
* Basic value
*
*/
struct Value : public Data {
/**
* Construct a Value
*
* @param _field the field
* @param _prefix the xapian prefix
* @param _id field id
* @param _value the value
*/
Value(const std::string& _field,
const std::string& _prefix,
Field::Id _id,
const std::string& _value,
bool _phrase = false)
: Data(Value::Type::Value, _field, _prefix, _id), value(_value), phrase(_phrase)
{
}
std::string value; /**< the value */
bool phrase;
};
/**
* operator<<
*
* @param os an output stream
* @param v a data ptr
*
* @return the updated output stream
*/
inline std::ostream&
operator<<(std::ostream& os, const std::unique_ptr<Data>& v)
{
switch (v->type) {
case Data::Type::Value: {
const auto bval = dynamic_cast<Value*>(v.get());
os << ' ' << quote(v->field) << ' ' << quote(utf8_flatten(bval->value));
if (bval->phrase)
os << " (ph)";
break;
}
case Data::Type::Range: {
const auto rval = dynamic_cast<Range*>(v.get());
os << ' ' << quote(v->field) << ' ' << quote(rval->lower) << ' '
<< quote(rval->upper);
break;
}
default: os << "unexpected type"; break;
}
return os;
}
} // namespace Mu
#endif /* __DATA_HH__ */

View File

@ -19,6 +19,7 @@
#include "mu-parser.hh"
#include <algorithm>
#include <regex>
#include <limits>
#include "mu-tokenizer.hh"
@ -199,8 +200,9 @@ Parser::Private::process_regex(const std::string& field_str,
const auto prefix{field_opt->xapian_term()};
std::vector<std::string> terms;
store_.for_each_term(field_opt->id, [&](auto&& str) {
if (std::regex_search(str.c_str() + 1, rx)) // avoid copy
terms.emplace_back(str);
auto val{str.c_str() + 1}; // strip off the Xapian prefix.
if (std::regex_search(val, rx))
terms.emplace_back(std::move(val));
return true;
});
@ -233,22 +235,15 @@ Parser::Private::value(const FieldInfoVec& fields,
if (fields.size() == 1) {
const auto item = fields.front();
return Tree({Node::Type::Value,
std::make_unique<Value>(item.field,
item.prefix,
item.id,
process_value(item.field, val),
item.supports_phrase)});
FieldValue{item.id, process_value(item.field, val)}});
}
// a 'multi-field' such as "recip:"
Tree tree(Node{Node::Type::OpOr});
for (const auto& item : fields)
tree.add_child(Tree({Node::Type::Value,
std::make_unique<Value>(item.field,
item.prefix,
item.id,
process_value(item.field, val),
item.supports_phrase)}));
FieldValue{item.id,
process_value(item.field, val)}}));
return tree;
}
@ -264,14 +259,13 @@ Parser::Private::regex(const FieldInfoVec& fields,
const auto rxstr = utf8_flatten(v.substr(1, v.length() - 2));
try {
Tree tree(Node{Node::Type::OpOr});
Tree tree(Node{Node::Type::OpOr});
const auto rx = std::regex(rxstr);
for (const auto& field : fields) {
const auto terms = process_regex(field.field, rx);
for (const auto& term : terms) {
tree.add_child(Tree(
{Node::Type::Value,
std::make_unique<Value>(field.field, "", field.id, term)}));
tree.add_child(Tree({Node::Type::Value,
FieldValue{field.id, term}}));
}
}
@ -306,11 +300,7 @@ Parser::Private::range(const FieldInfoVec& fields,
prange = process_range(field.field, upper, lower);
return Tree({Node::Type::Range,
std::make_unique<Range>(field.field,
field.prefix,
field.id,
prange.lower,
prange.upper)});
FieldValue{field.id, prange.lower, prange.upper}});
}
Mu::Tree

View File

@ -25,7 +25,6 @@
#include <vector>
#include <memory>
#include <mu-data.hh>
#include <mu-tree.hh>
#include <mu-store.hh>

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public License
@ -22,13 +22,54 @@
#include <vector>
#include <string>
#include <string_view>
#include <iostream>
#include <message/mu-fields.hh>
#include <mu-data.hh>
#include <utils/mu-option.hh>
#include <utils/mu-error.hh>
namespace Mu {
struct FieldValue {
FieldValue(Field::Id idarg, const std::string valarg):
field_id{idarg}, val1{valarg} {}
FieldValue(Field::Id idarg, const std::string valarg1, const std::string valarg2):
field_id{idarg}, val1{valarg1}, val2{valarg2} {}
const Field& field() const { return field_from_id(field_id); }
const std::string& value() const { return val1; }
const std::pair<std::string, std::string> range() const { return { val1, val2 }; }
const Field::Id field_id;
const std::string val1;
const std::string val2;
};
/**
* operator<<
*
* @param os an output stream
* @param fval a field value.
*
* @return the updated output stream
*/
inline std::ostream&
operator<<(std::ostream& os, const FieldValue& fval)
{
os << ' ' << quote(std::string{fval.field().name});
if (fval.field().is_range())
os << ' ' << quote(fval.range().first)
<< ' ' << quote(fval.range().second);
else
os << ' ' << quote(fval.value());
return os;
}
// A node in the parse tree
struct Node {
enum class Type {
@ -43,31 +84,39 @@ struct Node {
Invalid
};
Node(Type _type, std::unique_ptr<Data>&& _data) : type{_type}, data{std::move(_data)} {}
Node(Type _type, FieldValue&& fval) : type{_type}, field_val{std::move(fval)} {}
Node(Type _type) : type{_type} {}
Node(Node&& rhs) = default;
Type type;
std::unique_ptr<Data> data;
Option<FieldValue> field_val;
static const char* type_name(Type t)
{
static constexpr std::string_view type_name(Type t) {
switch (t) {
case Type::Empty: return ""; break;
case Type::OpAnd: return "and"; break;
case Type::OpOr: return "or"; break;
case Type::OpXor: return "xor"; break;
case Type::OpAndNot: return "andnot"; break;
case Type::OpNot: return "not"; break;
case Type::Value: return "value"; break;
case Type::Range: return "range"; break;
case Type::Invalid: return "<invalid>"; break;
default: throw Mu::Error(Error::Code::Internal, "unexpected type");
case Type::Empty:
return "";
case Type::OpAnd:
return "and";
case Type::OpOr:
return "or";
case Type::OpXor:
return "xor";
case Type::OpAndNot:
return "andnot";
case Type::OpNot:
return "not";
case Type::Value:
return "value";
case Type::Range:
return "range";
case Type::Invalid:
return "<invalid>";
default:
return "<error>";
}
}
static constexpr bool is_binop(Type t)
{
static constexpr bool is_binop(Type t) {
return t == Type::OpAnd || t == Type::OpAndNot || t == Type::OpOr ||
t == Type::OpXor;
}
@ -77,8 +126,8 @@ inline std::ostream&
operator<<(std::ostream& os, const Node& t)
{
os << Node::type_name(t.type);
if (t.data)
os << t.data;
if (t.field_val)
os << t.field_val.value();
return os;
}

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2017-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public License
@ -17,9 +17,7 @@
** 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <xapian.h>
#include "mu-xapian.hh"
@ -30,24 +28,33 @@ using namespace Mu;
static Xapian::Query
xapian_query_op(const Mu::Tree& tree)
{
Xapian::Query::op op;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch-enum"
switch (tree.node.type) {
case Node::Type::OpNot: // OpNot x ::= <all> AND NOT x
if (tree.node.type == Node::Type::OpNot) { // OpNot x ::= <all> AND NOT x
if (tree.children.size() != 1)
throw std::runtime_error("invalid # of children");
return Xapian::Query(Xapian::Query::OP_AND_NOT,
Xapian::Query::MatchAll,
xapian_query(tree.children.front()));
case Node::Type::OpAnd: op = Xapian::Query::OP_AND; break;
case Node::Type::OpOr: op = Xapian::Query::OP_OR; break;
case Node::Type::OpXor: op = Xapian::Query::OP_XOR; break;
case Node::Type::OpAndNot: op = Xapian::Query::OP_AND_NOT; break;
default: throw Mu::Error(Error::Code::Internal, "invalid op"); // bug
Xapian::Query::MatchAll,
xapian_query(tree.children.front()));
}
const auto op = std::invoke([](Node::Type ntype) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch-enum"
switch (ntype) {
case Node::Type::OpAnd:
return Xapian::Query::OP_AND;
case Node::Type::OpOr:
return Xapian::Query::OP_OR;
case Node::Type::OpXor:
return Xapian::Query::OP_XOR;
case Node::Type::OpAndNot:
return Xapian::Query::OP_AND_NOT;
case Node::Type::OpNot:
default:
throw Mu::Error(Error::Code::Internal, "invalid op"); // bug
}
#pragma GCC diagnostic pop
}, tree.node.type);
std::vector<Xapian::Query> childvec;
for (const auto& subtree : tree.children)
childvec.emplace_back(xapian_query(subtree));
@ -56,33 +63,37 @@ xapian_query_op(const Mu::Tree& tree)
}
static Xapian::Query
make_query(const Value* val, const std::string& str, bool maybe_wildcard)
make_query(const FieldValue& fval, bool maybe_wildcard)
{
const auto vlen{str.length()};
if (!maybe_wildcard || vlen <= 1 || str[vlen - 1] != '*')
return Xapian::Query(val->prefix + str);
const auto vlen{fval.value().length()};
if (!maybe_wildcard || vlen <= 1 || fval.value()[vlen - 1] != '*')
return Xapian::Query(fval.field().xapian_term(fval.value()));
else
return Xapian::Query(Xapian::Query::OP_WILDCARD,
val->prefix + str.substr(0, vlen - 1));
fval.field().xapian_term(fval.value().substr(0, vlen - 1)));
}
static Xapian::Query
xapian_query_value(const Mu::Tree& tree)
{
const auto v = dynamic_cast<Value*>(tree.node.data.get());
if (!v->phrase)
return make_query(v, v->value, true /*maybe-wildcard*/);
// indexable field implies it can be use with a phrase search.
const auto& field_val{tree.node.field_val.value()};
if (!field_val.field().is_indexable_term()) { //
/* not an indexable field; no extra magic needed*/
return make_query(field_val, true /*maybe-wildcard*/);
}
const auto parts = split(v->value, " ");
const auto parts{split(field_val.value(), " ")};
if (parts.empty())
return Xapian::Query::MatchNothing; // shouldn't happen
if (parts.size() == 1)
return make_query(v, parts.front(), true /*maybe-wildcard*/);
else if (parts.size() == 1)
return make_query(field_val, true /*maybe-wildcard*/);
std::vector<Xapian::Query> phvec;
for (const auto& p : parts)
phvec.emplace_back(make_query(v, p, false /*no wildcards*/));
for (const auto& p : parts) {
FieldValue fv{field_val.field_id, p};
phvec.emplace_back(make_query(fv, false /*no wildcards*/));
}
return Xapian::Query(Xapian::Query::OP_PHRASE, phvec.begin(), phvec.end());
}
@ -90,12 +101,12 @@ xapian_query_value(const Mu::Tree& tree)
static Xapian::Query
xapian_query_range(const Mu::Tree& tree)
{
const auto r{dynamic_cast<Range*>(tree.node.data.get())};
const auto& field_val{tree.node.field_val.value()};
return Xapian::Query(Xapian::Query::OP_VALUE_RANGE,
(Xapian::valueno)r->id,
r->lower,
r->upper);
field_val.field().value_no(),
field_val.range().first,
field_val.range().second);
}
Xapian::Query
@ -104,15 +115,20 @@ Mu::xapian_query(const Mu::Tree& tree)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch-enum"
switch (tree.node.type) {
case Node::Type::Empty: return Xapian::Query();
case Node::Type::Empty:
return Xapian::Query();
case Node::Type::OpNot:
case Node::Type::OpAnd:
case Node::Type::OpOr:
case Node::Type::OpXor:
case Node::Type::OpAndNot: return xapian_query_op(tree);
case Node::Type::Value: return xapian_query_value(tree);
case Node::Type::Range: return xapian_query_range(tree);
default: throw Mu::Error(Error::Code::Internal, "invalid query"); // bug
case Node::Type::OpAndNot:
return xapian_query_op(tree);
case Node::Type::Value:
return xapian_query_value(tree);
case Node::Type::Range:
return xapian_query_range(tree);
default:
throw Mu::Error(Error::Code::Internal, "invalid query"); // bug
}
#pragma GCC diagnostic pop
}

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public License
@ -17,8 +17,8 @@
** 02110-1301, USA.
*/
#ifndef __XAPIAN_HH__
#define __XAPIAN_HH__
#ifndef MU_XAPIAN_HH__
#define MU_XAPIAN_HH__
#include <xapian.h>
#include <mu-parser.hh>
@ -36,4 +36,4 @@ Xapian::Query xapian_query(const Mu::Tree& tree);
} // namespace Mu
#endif /* __XAPIAN_H__ */
#endif /* MU_XAPIAN_H__ */