fields: 'phrasable' instead of 'indexable'

'Phrasable' is probably a bit clearer description.
This commit is contained in:
Dirk-Jan C. Binnema 2023-09-17 09:59:38 +03:00
parent 9e1c7ddfaa
commit 94c90bd0c5
4 changed files with 36 additions and 38 deletions

View File

@ -81,19 +81,16 @@ static void
add_search_term(Xapian::Document& doc, const Field& field, const std::string& val,
Document::Options opts)
{
if (field.is_normal_term()) {
doc.add_term(field.xapian_term(val));
if (field.is_normal_term() || field.is_phrasable_term()) {
const auto flat{utf8_flatten(val)};
if (field.is_normal_term())
doc.add_term(field.xapian_term(flat));
if (field.is_phrasable_term()) {
auto termgen{make_term_generator(doc, opts)};
termgen.index_text(flat, 1, field.xapian_term());
}
} else if (field.is_boolean_term()) {
doc.add_boolean_term(field.xapian_term(val));
} else if (field.is_indexable_term()) {
auto&& termgen{make_term_generator(doc, opts)};
termgen.index_text(utf8_flatten(val), 1, field.xapian_term());
/* also add as 'normal' term, so some queries where the indexer
* eats special chars also match */
if (field.id != Field::Id::BodyText &&
field.id != Field::Id::EmbeddedText) {
doc.add_term(field.xapian_term(val));
}
} else
throw std::logic_error("not a search term");
}

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2022-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -92,16 +92,13 @@ constexpr /*static*/ bool
validate_field_flags()
{
for (auto&& field: Fields) {
/* - A field has at most one of Indexable, HasTerms, IsXapianBoolean and
IsContact. */
/* - A field has at most one of Phrasable, Boolean */
size_t flagnum{};
if (field.is_indexable_term())
if (field.is_phrasable_term())
++flagnum;
if (field.is_boolean_term())
++flagnum;
if (field.is_normal_term())
++flagnum;
if (flagnum > 1) {
//mu_warning("invalid field {}", field.name);

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2022-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -110,24 +110,24 @@ struct Field {
* (msg), ie. one value containing the list of To: addresses - there
* can be multiple terms, each containing e.g. one of the To:
* addresses - searching uses terms, but to display some field, it
* must be in the value (at least when using MuMsgIter)
* must be in the value
*
* Rules (build-time enforced):
* - A field has at most one of Indexable, HasTerms, IsXapianBoolean and IsContact.
* - A field has at most one of PhrasableTerm, BooleanTerm, ContactTerm.
*/
enum struct Flag {
/*
* Different kind of terms; at most one is true,
* and cannot be combined with IsContact. Compile-time enforced.
* Different kind of terms; at most one is true, and cannot be combined with
* Contact. Compile-time enforced.
*/
NormalTerm = 1 << 0,
/**< Field is a searchable term */
BooleanTerm = 1 << 1,
/**< Field is a boolean search-term (i.e. at most one per message);
* wildcards do not work */
IndexableTerm = 1 << 2,
/**< Field has indexable text as term */
PhrasableTerm = 1 << 2,
/**< Field has phrasable/indexable text as term */
/*
* Contact flag cannot be combined with any of the term flags.
* This is compile-time enforced.
@ -150,10 +150,10 @@ struct Field {
return (static_cast<int>(some_flag) & static_cast<int>(flags)) != 0;
}
constexpr bool is_indexable_term() const { return any_of(Flag::IndexableTerm); }
constexpr bool is_phrasable_term() const { return any_of(Flag::PhrasableTerm); }
constexpr bool is_boolean_term() const { return any_of(Flag::BooleanTerm); }
constexpr bool is_normal_term() const { return any_of(Flag::NormalTerm); }
constexpr bool is_searchable() const { return is_indexable_term() ||
constexpr bool is_searchable() const { return is_phrasable_term() ||
is_boolean_term() ||
is_normal_term(); }
constexpr bool is_sortable() const { return is_value(); }
@ -230,8 +230,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact |
Field::Flag::Value |
Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm,
Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
},
{
Field::Id::BodyText,
@ -240,7 +240,7 @@ static constexpr std::array<Field, Field::id_size()>
"Message plain-text body",
"body:capybara",
'b',
Field::Flag::IndexableTerm,
Field::Flag::PhrasableTerm,
},
{
Field::Id::Cc,
@ -252,7 +252,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact |
Field::Flag::Value |
Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm,
Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
},
{
Field::Id::Changed,
@ -283,7 +284,7 @@ static constexpr std::array<Field, Field::id_size()>
"Embedded text",
"embed:war OR embed:peace",
'e',
Field::Flag::IndexableTerm
Field::Flag::PhrasableTerm
},
{
Field::Id::File,
@ -315,7 +316,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact |
Field::Flag::Value |
Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm,
Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
},
{
Field::Id::Language,
@ -421,8 +423,9 @@ static constexpr std::array<Field, Field::id_size()>
"subject:wombat",
's',
Field::Flag::Value |
Field::Flag::IndexableTerm |
Field::Flag::IncludeInSexp
Field::Flag::IncludeInSexp |
Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm
},
{
Field::Id::Tags,
@ -455,7 +458,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact |
Field::Flag::Value |
Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm,
Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
},
}};

View File

@ -105,9 +105,9 @@ topic_fields(const Options& opts)
auto searchable=[&](const Field& field)->std::string {
if (field.is_boolean_term())
return "boolean";
if (field.is_indexable_term())
return "index";
if (field.is_normal_term())
if (field.is_phrasable_term())
return "phrase";
if (field.is_value())
return "yes";
if (field.is_contact())
return "contact";