1
0
mirror of https://github.com/djcb/mu.git synced 2024-06-20 06:46:50 +02:00

query-parser: support phrase queries

This commit is contained in:
djcb 2017-10-26 21:31:22 +03:00
parent e4b3174ed8
commit 7cd7d118e2
5 changed files with 73 additions and 22 deletions

View File

@ -118,11 +118,10 @@ static void
test_escape ()
{
CaseVec cases = {
{ "foo\"bar\"", Tokens{Token{3, TT::Data, "foo"},
Token{8, TT::Data, "bar"}}},
{ "foo\"bar\"", Tokens{Token{8, TT::Data, "foobar"}}},
{ "\"fnorb\"", Tokens{Token{7, TT::Data, "fnorb"}}},
{ "\\\"fnorb\\\"", Tokens{Token{9, TT::Data, "\"fnorb\""}}},
{ "foo\\\"bar\\\"", Tokens{Token{10, TT::Data, "foo\"bar\""}}}
{ "\\\"fnorb\\\"", Tokens{Token{9, TT::Data, "fnorb"}}},
{ "foo\\\"bar\\\"", Tokens{Token{10, TT::Data, "foobar"}}}
};
test_cases (cases);

View File

@ -27,12 +27,11 @@ using namespace Mux;
static bool
is_separator (char c)
{
const auto seps = std::string (":()\"");
if (isblank(c))
return true;
else
return seps.find(c) != std::string::npos;
const auto seps = std::string ("()");
return seps.find(c) != std::string::npos;
}
@ -80,8 +79,14 @@ eat_token (std::string& food, size_t& pos)
continue;
}
if (kar == '"' && !escaped && quoted)
return Token{pos, Token::Type::Data, value};
if (kar == '"') {
if (!escaped && quoted)
return Token{pos, Token::Type::Data, value};
else {
quoted = true;
continue;
}
}
if (!quoted && !escaped && is_separator(kar)) {
@ -90,9 +95,6 @@ eat_token (std::string& food, size_t& pos)
return op_or_value(pos, value);
}
if (kar == '"')
quoted = true;
if (quoted || isblank(kar))
continue;

View File

@ -110,6 +110,20 @@ Mux::utf8_flatten (const std::string& str)
}
std::vector<std::string>
Mux::split (const std::string& str, const std::string& sepa)
{
char **parts = g_strsplit(str.c_str(), sepa.c_str(), -1);
std::vector<std::string> vec;
for (auto part = parts; part && *part; ++part)
vec.push_back (*part);
g_strfreev(parts);
return vec;
}
std::string
Mux::quote (const std::string& str)
{

View File

@ -18,6 +18,7 @@
*/
#include <string>
#include <vector>
#ifndef __UTILS_HH__
#define __UTILS_HH__
@ -33,6 +34,17 @@ namespace Mux {
*/
std::string utf8_flatten (const std::string& str);
/**
* Split a string in parts
*
* @param str a string
* @param sepa the separator
*
* @return the parts.
*/
std::vector<std::string> split (const std::string& str,
const std::string& sepa);
/**
* Quote & escape a string
*

View File

@ -48,6 +48,35 @@ xapian_query_op (const Mux::Tree& tree)
return Xapian::Query(op, childvec.begin(), childvec.end());
}
static Xapian::Query
xapian_query_value (const Mux::Tree& tree)
{
const auto v = dynamic_cast<Value*> (tree.node.data.get());
const auto parts = split (v->value, " ");
std::vector<Xapian::Query> phvec;
for (const auto p: parts)
phvec.push_back(Xapian::Query(v->prefix + p));
if (parts.empty())
return Xapian::Query::MatchNothing; // shouldn't happen
if (parts.size() == 1)
return phvec.front();
return Xapian::Query (Xapian::Query::OP_PHRASE,
phvec.begin(), phvec.end());
}
static Xapian::Query
xapian_query_range (const Mux::Tree& tree)
{
const auto r = dynamic_cast<Range*> (tree.node.data.get());
return Xapian::Query(Xapian::Query::OP_VALUE_RANGE,
(Xapian::valueno)r->id, r->lower, r->upper);
}
Xapian::Query
Mux::xapian_query (const Mux::Tree& tree)
{
@ -60,15 +89,10 @@ Mux::xapian_query (const Mux::Tree& tree)
case Node::Type::OpXor:
case Node::Type::OpAndNot:
return xapian_query_op (tree);
case Node::Type::Value: {
const auto v = dynamic_cast<Value*> (tree.node.data.get());
return Xapian::Query(v->prefix + v->value);
}
case Node::Type::Range: {
const auto r = dynamic_cast<Range*> (tree.node.data.get());
return Xapian::Query(Xapian::Query::OP_VALUE_RANGE,
(Xapian::valueno)r->id, r->lower, r->upper);
}
case Node::Type::Value:
return xapian_query_value (tree);
case Node::Type::Range:
return xapian_query_range (tree);
default:
throw std::runtime_error ("invalid query"); // bug
}