fields: 'phrasable' instead of 'indexable'

'Phrasable' is probably a bit clearer description.
This commit is contained in:
Dirk-Jan C. Binnema 2023-09-17 09:59:38 +03:00
parent 9e1c7ddfaa
commit 94c90bd0c5
4 changed files with 36 additions and 38 deletions

View File

@ -81,19 +81,16 @@ static void
add_search_term(Xapian::Document& doc, const Field& field, const std::string& val, add_search_term(Xapian::Document& doc, const Field& field, const std::string& val,
Document::Options opts) Document::Options opts)
{ {
if (field.is_normal_term()) { if (field.is_normal_term() || field.is_phrasable_term()) {
doc.add_term(field.xapian_term(val)); const auto flat{utf8_flatten(val)};
if (field.is_normal_term())
doc.add_term(field.xapian_term(flat));
if (field.is_phrasable_term()) {
auto termgen{make_term_generator(doc, opts)};
termgen.index_text(flat, 1, field.xapian_term());
}
} else if (field.is_boolean_term()) { } else if (field.is_boolean_term()) {
doc.add_boolean_term(field.xapian_term(val)); doc.add_boolean_term(field.xapian_term(val));
} else if (field.is_indexable_term()) {
auto&& termgen{make_term_generator(doc, opts)};
termgen.index_text(utf8_flatten(val), 1, field.xapian_term());
/* also add as 'normal' term, so some queries where the indexer
* eats special chars also match */
if (field.id != Field::Id::BodyText &&
field.id != Field::Id::EmbeddedText) {
doc.add_term(field.xapian_term(val));
}
} else } else
throw std::logic_error("not a search term"); throw std::logic_error("not a search term");
} }

View File

@ -1,5 +1,5 @@
/* /*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2022-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
** This program is free software; you can redistribute it and/or modify it ** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the ** under the terms of the GNU General Public License as published by the
@ -92,16 +92,13 @@ constexpr /*static*/ bool
validate_field_flags() validate_field_flags()
{ {
for (auto&& field: Fields) { for (auto&& field: Fields) {
/* - A field has at most one of Indexable, HasTerms, IsXapianBoolean and /* - A field has at most one of Phrasable, Boolean */
IsContact. */
size_t flagnum{}; size_t flagnum{};
if (field.is_indexable_term()) if (field.is_phrasable_term())
++flagnum; ++flagnum;
if (field.is_boolean_term()) if (field.is_boolean_term())
++flagnum; ++flagnum;
if (field.is_normal_term())
++flagnum;
if (flagnum > 1) { if (flagnum > 1) {
//mu_warning("invalid field {}", field.name); //mu_warning("invalid field {}", field.name);

View File

@ -1,5 +1,5 @@
/* /*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2022-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
** This program is free software; you can redistribute it and/or modify it ** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the ** under the terms of the GNU General Public License as published by the
@ -110,24 +110,24 @@ struct Field {
* (msg), ie. one value containing the list of To: addresses - there * (msg), ie. one value containing the list of To: addresses - there
* can be multiple terms, each containing e.g. one of the To: * can be multiple terms, each containing e.g. one of the To:
* addresses - searching uses terms, but to display some field, it * addresses - searching uses terms, but to display some field, it
* must be in the value (at least when using MuMsgIter) * must be in the value
* *
* Rules (build-time enforced): * Rules (build-time enforced):
* - A field has at most one of Indexable, HasTerms, IsXapianBoolean and IsContact. * - A field has at most one of PhrasableTerm, BooleanTerm, ContactTerm.
*/ */
enum struct Flag { enum struct Flag {
/* /*
* Different kind of terms; at most one is true, * Different kind of terms; at most one is true, and cannot be combined with
* and cannot be combined with IsContact. Compile-time enforced. * Contact. Compile-time enforced.
*/ */
NormalTerm = 1 << 0, NormalTerm = 1 << 0,
/**< Field is a searchable term */ /**< Field is a searchable term */
BooleanTerm = 1 << 1, BooleanTerm = 1 << 1,
/**< Field is a boolean search-term (i.e. at most one per message); /**< Field is a boolean search-term (i.e. at most one per message);
* wildcards do not work */ * wildcards do not work */
IndexableTerm = 1 << 2, PhrasableTerm = 1 << 2,
/**< Field has indexable text as term */ /**< Field has phrasable/indexable text as term */
/* /*
* Contact flag cannot be combined with any of the term flags. * Contact flag cannot be combined with any of the term flags.
* This is compile-time enforced. * This is compile-time enforced.
@ -150,10 +150,10 @@ struct Field {
return (static_cast<int>(some_flag) & static_cast<int>(flags)) != 0; return (static_cast<int>(some_flag) & static_cast<int>(flags)) != 0;
} }
constexpr bool is_indexable_term() const { return any_of(Flag::IndexableTerm); } constexpr bool is_phrasable_term() const { return any_of(Flag::PhrasableTerm); }
constexpr bool is_boolean_term() const { return any_of(Flag::BooleanTerm); } constexpr bool is_boolean_term() const { return any_of(Flag::BooleanTerm); }
constexpr bool is_normal_term() const { return any_of(Flag::NormalTerm); } constexpr bool is_normal_term() const { return any_of(Flag::NormalTerm); }
constexpr bool is_searchable() const { return is_indexable_term() || constexpr bool is_searchable() const { return is_phrasable_term() ||
is_boolean_term() || is_boolean_term() ||
is_normal_term(); } is_normal_term(); }
constexpr bool is_sortable() const { return is_value(); } constexpr bool is_sortable() const { return is_value(); }
@ -230,8 +230,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact | Field::Flag::Contact |
Field::Flag::Value | Field::Flag::Value |
Field::Flag::IncludeInSexp | Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm, Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
}, },
{ {
Field::Id::BodyText, Field::Id::BodyText,
@ -240,7 +240,7 @@ static constexpr std::array<Field, Field::id_size()>
"Message plain-text body", "Message plain-text body",
"body:capybara", "body:capybara",
'b', 'b',
Field::Flag::IndexableTerm, Field::Flag::PhrasableTerm,
}, },
{ {
Field::Id::Cc, Field::Id::Cc,
@ -252,7 +252,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact | Field::Flag::Contact |
Field::Flag::Value | Field::Flag::Value |
Field::Flag::IncludeInSexp | Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm, Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
}, },
{ {
Field::Id::Changed, Field::Id::Changed,
@ -283,7 +284,7 @@ static constexpr std::array<Field, Field::id_size()>
"Embedded text", "Embedded text",
"embed:war OR embed:peace", "embed:war OR embed:peace",
'e', 'e',
Field::Flag::IndexableTerm Field::Flag::PhrasableTerm
}, },
{ {
Field::Id::File, Field::Id::File,
@ -315,7 +316,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact | Field::Flag::Contact |
Field::Flag::Value | Field::Flag::Value |
Field::Flag::IncludeInSexp | Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm, Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
}, },
{ {
Field::Id::Language, Field::Id::Language,
@ -421,8 +423,9 @@ static constexpr std::array<Field, Field::id_size()>
"subject:wombat", "subject:wombat",
's', 's',
Field::Flag::Value | Field::Flag::Value |
Field::Flag::IndexableTerm | Field::Flag::IncludeInSexp |
Field::Flag::IncludeInSexp Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm
}, },
{ {
Field::Id::Tags, Field::Id::Tags,
@ -455,7 +458,8 @@ static constexpr std::array<Field, Field::id_size()>
Field::Flag::Contact | Field::Flag::Contact |
Field::Flag::Value | Field::Flag::Value |
Field::Flag::IncludeInSexp | Field::Flag::IncludeInSexp |
Field::Flag::IndexableTerm, Field::Flag::NormalTerm |
Field::Flag::PhrasableTerm,
}, },
}}; }};

View File

@ -105,9 +105,9 @@ topic_fields(const Options& opts)
auto searchable=[&](const Field& field)->std::string { auto searchable=[&](const Field& field)->std::string {
if (field.is_boolean_term()) if (field.is_boolean_term())
return "boolean"; return "boolean";
if (field.is_indexable_term()) if (field.is_phrasable_term())
return "index"; return "phrase";
if (field.is_normal_term()) if (field.is_value())
return "yes"; return "yes";
if (field.is_contact()) if (field.is_contact())
return "contact"; return "contact";