query: filter out docs without query-matches

This avoid including unwanted messages in threaded results.
Also some cleanups.
This commit is contained in:
Dirk-Jan C. Binnema 2021-02-15 19:11:22 +02:00
parent 950883ad39
commit 579f841bdd
4 changed files with 44 additions and 49 deletions

View File

@ -102,7 +102,7 @@ private:
} MU_XAPIAN_CATCH_BLOCK_RETURN (Nothing);
};
struct MatchDeciderLeader: public MatchDecider {
struct MatchDeciderLeader final: public MatchDecider {
MatchDeciderLeader (QueryFlags qflags, DeciderInfo& info):
MatchDecider(qflags, info)
{}
@ -155,7 +155,7 @@ Mu::make_leader_decider (QueryFlags qflags, DeciderInfo& info)
return std::make_unique<MatchDeciderLeader>(qflags, info);
}
struct MatchDeciderRelated: public MatchDecider {
struct MatchDeciderRelated final: public MatchDecider {
MatchDeciderRelated(QueryFlags qflags, DeciderInfo& info):
MatchDecider(qflags, info) {}
/**
@ -200,8 +200,8 @@ Mu::make_related_decider (QueryFlags qflags, DeciderInfo& info)
return std::make_unique<MatchDeciderRelated>(qflags, info);
}
struct MatchDeciderFinal: public MatchDecider {
MatchDeciderFinal(QueryFlags qflags, DeciderInfo& info):
struct MatchDeciderThread final: public MatchDecider {
MatchDeciderThread(QueryFlags qflags, DeciderInfo& info):
MatchDecider{qflags, info} {}
/**
* operator()
@ -219,16 +219,13 @@ struct MatchDeciderFinal: public MatchDecider {
// we may have seen this match in the "Leader" query,
// or in the second (unbuounded) related query;
const auto it{decider_info_.matches.find(doc.get_docid())};
if (it == decider_info_.matches.end())
return false;
else
return should_include(it->second);
return it != decider_info_.matches.end() && !it->second.thread_path.empty();
}
};
std::unique_ptr<Xapian::MatchDecider>
Mu::make_final_decider (QueryFlags qflags, DeciderInfo& info)
Mu::make_thread_decider (QueryFlags qflags, DeciderInfo& info)
{
return std::make_unique<MatchDeciderFinal>(qflags, info);
return std::make_unique<MatchDeciderThread>(qflags, info);
}

View File

@ -68,16 +68,16 @@ std::unique_ptr<Xapian::MatchDecider> make_related_decider(QueryFlags qflags,
/**
* Make a "final" decider, that is, a MatchDecider that removes all but
* the document excepts for the ones included earlier.
* Make a "thread" decider, that is, a MatchDecider that removes all but the
* document excepts for the ones found during initial/related searches.
*
* @param qflags query flags
* @param match_info receives information about the matches.
*
* @return a unique_ptr to a match decider.
*/
std::unique_ptr<Xapian::MatchDecider> make_final_decider (QueryFlags qflags,
DeciderInfo& info);
std::unique_ptr<Xapian::MatchDecider> make_thread_decider (QueryFlags qflags,
DeciderInfo& info);
} // namepace Mu

View File

@ -81,7 +81,7 @@ struct Container {
// That the sub-root-levels of thtreas are always sorted
// by date, in ascending order.
std::string thread_date_key;
Option<QueryMatch&> query_match;
bool is_nuked{};
@ -103,7 +103,7 @@ using ContainerVec = Container::ContainerVec;
static std::ostream&
operator<<(std::ostream& os, const Container& container)
{
os << "container: " << std::right << std::setw(10) << &container
os << "container: " << std::right << std::setw(10) << &container
<< ": parent: " << std::right << std::setw(10) << container.parent
<< " [" << container.thread_date_key << "]"
<< "\n children: ";
@ -122,7 +122,6 @@ operator<<(std::ostream& os, const Container& container)
using IdTable = std::unordered_map<std::string, Container>;
using DupTable = std::multimap<std::string, Container>;
//template <typename QueryResultsType> using DupsVec = std::vector<decltype(QueryResultsType::value_type)>;
static void
handle_duplicates (IdTable& id_table, DupTable& dup_table)
@ -165,8 +164,8 @@ determine_id_table (QueryResultsType& qres)
auto c_it = id_table.find(msgid);
auto& container = [&]()->Container& {
if (c_it != id_table.end()) {
assert(!c_it->second.query_match);
c_it->second.query_match = mi.query_match();
if (!c_it->second.query_match) // hmm, dup?
c_it->second.query_match = mi.query_match();
return c_it->second;
} else {
// Else:
@ -218,7 +217,7 @@ determine_id_table (QueryResultsType& qres)
parent_ref_container = ref_container;
}
// Add the query_match to the chain.
if (parent_ref_container && !container.parent) {
if (!parent_ref_container->is_reachable(&container))
@ -276,13 +275,13 @@ static void
prune (Container* child)
{
Container *container{child->parent};
for (auto& grandchild: child->children) {
grandchild->parent = container;
if (container)
container->children.emplace_back(grandchild);
}
child->children.clear();
child->is_nuked = true;
@ -295,7 +294,7 @@ static bool
prune_empty_containers (Container& container)
{
Containers to_prune;
container.for_each_child([&](auto& child){
if (prune_empty_containers(*child))
to_prune.emplace_back(child);
@ -303,7 +302,7 @@ prune_empty_containers (Container& container)
for (auto& child: to_prune)
prune (child);
// Never nuke these.
if (container.query_match)
return false;
@ -328,7 +327,7 @@ static void
prune_empty_containers (IdTable& id_table)
{
for (auto&& item: id_table) {
auto& child(item.second);
if (child.parent)
continue; // not a root child.
@ -384,7 +383,7 @@ update_container (Container& container, bool descending,
first->query_match->flags |= QueryMatch::Flags::First;
Container* last = container.children.back();
if (last->query_match)
last->query_match->flags |= QueryMatch::Flags::Last;
last->query_match->flags |= QueryMatch::Flags::Last;
}
// calculate the "thread-subject", which is for UI
@ -408,7 +407,7 @@ update_container (Container& container, bool descending,
return true;
}
static void
update_containers (Containers& children, bool descending, ThreadPath& tpath,
@ -450,7 +449,7 @@ static void
sort_container (Container& container)
{
// 1. childless container.
if (container.children.empty()) {
container.thread_date_key = container.query_match->date_key;
return;
@ -493,12 +492,12 @@ sort_siblings (IdTable& id_table, bool descending)
if (c->query_match) // for debugging, remember
c->query_match->thread_date = c->thread_date_key;
}
// and then sort the root set.
//
// The difference with the sub-root containers is that at the top-level,
// we can sort either in ascending or descending order, while on the
// subroot level it's always in ascending order.
// subroot level it's always in ascending order.
//
// Note that unless we're testing, _xapian_ will handle
// the ascending/descending of the top level.
@ -524,7 +523,7 @@ operator<<(std::ostream& os, const IdTable& id_table)
os << item.first << " => " << item.second << "\n";
}
os << "------------------------------------------------\n";
std::set<std::string> ids;
for (auto&& item: id_table) {
if (item.second.query_match)
@ -551,7 +550,7 @@ calculate_threads_real (Results& qres, bool descending)
if (g_test_verbose())
std::cout << "*** id-table(1):\n" << id_table << "\n";
// // Step 2: get the root set
// // Step 3: discard id_table
@ -786,11 +785,11 @@ static void
test_prune_empty_with_children()
{
// m6 should be nuked
auto results = MockQueryResults {
auto results = MockQueryResults {
MockQueryResult{ "m1", "1", {"m7", "m6"} },
MockQueryResult{ "m2", "2", {"m7", "m6"} },
};
calculate_threads(results, false);
assert_thread_paths (results, {
@ -843,9 +842,9 @@ test_thread_info_descending()
assert_thread_paths (results, {
{ "m1", "0:z"}, // 5
{ "m2", "1:z" }, // 2
{ "m2", "1:z" }, // 2
{ "m4", "1:f:z" }, // 2
{ "m3", "1:e:z"}, // 3
{ "m3", "1:e:z"}, // 3
});
g_assert_true (results[0].query_match().has_flag(
@ -856,7 +855,7 @@ test_thread_info_descending()
QueryMatch::Flags::Last));
g_assert_true (results[3].query_match().has_flag(
QueryMatch::Flags::First));
}

View File

@ -49,7 +49,7 @@ struct Query::Private {
const StringSet& thread_ids,
MuMsgFieldId sortfieldid, QueryFlags qflags) const;
Option<QueryResults> run_threaded (QueryResults &qres, Xapian::Enquire& enq,
Option<QueryResults> run_threaded (QueryResults&& qres, Xapian::Enquire& enq,
QueryFlags qflags) const;
Option<QueryResults> run_singular (const std::string& expr, MuMsgFieldId sortfieldid,
QueryFlags qflags, size_t maxnum) const;
@ -118,32 +118,31 @@ Query::Private::make_related_enquire (const Xapian::Query& first_q,
}
struct ThreadKeyMaker: public Xapian::KeyMaker {
ThreadKeyMaker (const QueryMatches& matches, bool descending):
match_info_(matches),
not_found_key_{descending ? "#" : "z"}
{}
ThreadKeyMaker (const QueryMatches& matches): match_info_(matches) {}
std::string operator()(const Xapian::Document& doc) const override {
const auto it{match_info_.find(doc.get_docid())};
return (it == match_info_.end()) ? not_found_key_ : it->second.thread_path;
if (it == match_info_.end())
g_warning ("not found! %u", doc.get_docid());
return (it == match_info_.end()) ? "" : it->second.thread_path;
}
const QueryMatches& match_info_;
const std::string not_found_key_;
};
Option<QueryResults>
Query::Private::run_threaded (QueryResults &qres, Xapian::Enquire& enq,
Query::Private::run_threaded (QueryResults&& qres, Xapian::Enquire& enq,
QueryFlags qflags) const
{
const auto descending{any_of(qflags & QueryFlags::Descending)};
calculate_threads(qres, descending);
ThreadKeyMaker key_maker{qres.query_matches(), descending};
ThreadKeyMaker key_maker{qres.query_matches()};
enq.set_sort_by_key(&key_maker, descending);
DeciderInfo minfo;
minfo.matches = qres.query_matches();
auto mset{enq.get_mset(0, qres.size(), {}, make_final_decider(qflags, minfo).get())};
auto mset{enq.get_mset(0, qres.size(), {},
make_thread_decider(qflags, minfo).get())};
mset.fetch();
return QueryResults{mset, std::move(qres.query_matches())};
@ -174,7 +173,7 @@ Query::Private::run_singular (const std::string& expr, MuMsgFieldId sortfieldid,
auto qres{QueryResults{mset, std::move(minfo.matches)}};
return threading ? run_threaded(qres, enq, qflags) : qres;
return threading ? run_threaded(std::move(qres), enq, qflags) : qres;
}
@ -209,7 +208,7 @@ Query::Private::run_related (const std::string& expr, MuMsgFieldId sortfieldid,
{}, make_related_decider(qflags, minfo).get())};
auto qres{QueryResults{r_mset, std::move(minfo.matches)}};
return threading ? run_threaded(qres, r_enq, qflags) : qres;
return threading ? run_threaded(std::move(qres), r_enq, qflags) : qres;
}
Option<QueryResults>