mirror of https://github.com/djcb/mu.git
store: add 'add_document' optimization, use it
*Usually* we need Xapian's replace_document() API, but when we know a document (message) is completely new, we can use the faster add_document(). That is the case with the initial (re)indexing, when start with an empty database. Also a few smaller cleanups.
This commit is contained in:
parent
4d8ba5f579
commit
4c0b7db3d8
|
@ -79,14 +79,18 @@ private:
|
|||
struct Indexer::Private {
|
||||
Private(Mu::Store& store)
|
||||
: store_{store}, scanner_{store_.root_maildir(),
|
||||
[this](auto&& path,
|
||||
auto&& statbuf, auto&& info) {
|
||||
return handler(path, statbuf, info);
|
||||
}},
|
||||
max_message_size_{store_.config().get<Mu::Config::Id::MaxMessageSize>()} {
|
||||
mu_message("created indexer for {} -> {} (batch-size: {})",
|
||||
store.root_maildir(), store.path(),
|
||||
store.config().get<Mu::Config::Id::BatchSize>());
|
||||
[this](auto&& path,
|
||||
auto&& statbuf, auto&& info) {
|
||||
return handler(path, statbuf, info);
|
||||
}},
|
||||
max_message_size_{store_.config().get<Mu::Config::Id::MaxMessageSize>()},
|
||||
was_empty_{store.empty()} {
|
||||
|
||||
mu_message("created indexer for {} -> "
|
||||
"{} (batch-size: {}; was-empty: {})",
|
||||
store.root_maildir(), store.path(),
|
||||
store.config().get<Mu::Config::Id::BatchSize>(),
|
||||
was_empty_);
|
||||
}
|
||||
|
||||
~Private() {
|
||||
|
@ -127,11 +131,11 @@ struct Indexer::Private {
|
|||
|
||||
AsyncQueue<WorkItem> todos_;
|
||||
|
||||
Progress progress_;
|
||||
IndexState state_;
|
||||
Progress progress_{};
|
||||
IndexState state_{};
|
||||
std::mutex lock_, w_lock_;
|
||||
|
||||
std::atomic<time_t> completed_;
|
||||
std::atomic<time_t> completed_{};
|
||||
bool was_empty_{};
|
||||
};
|
||||
|
||||
bool
|
||||
|
@ -240,7 +244,12 @@ Indexer::Private::add_message(const std::string& path)
|
|||
mu_warning("failed to create message from {}: {}", path, msg.error().what());
|
||||
return false;
|
||||
}
|
||||
auto res = store_.add_message(msg.value(), true /*use-transaction*/);
|
||||
// if the store was empty, we know that the message is completely new
|
||||
// and can use the fast path (Xapians 'add_document' rather tahn
|
||||
// 'replace_document)
|
||||
auto res = store_.add_message(msg.value(),
|
||||
true /*use-transaction*/,
|
||||
was_empty_);
|
||||
if (!res) {
|
||||
mu_warning("failed to add message @ {}: {}", path, res.error().what());
|
||||
return false;
|
||||
|
|
|
@ -133,6 +133,7 @@ struct Store::Private {
|
|||
}
|
||||
|
||||
Option<Message> find_message_unlocked(Store::Id docid) const;
|
||||
Result<Store::Id> add_message_unlocked(Message& msg);
|
||||
Result<Store::Id> update_message_unlocked(Message& msg, Store::Id docid);
|
||||
Result<Store::Id> update_message_unlocked(Message& msg, const std::string& old_path);
|
||||
Result<Message> move_message_unlocked(Message&& msg,
|
||||
|
@ -150,10 +151,22 @@ struct Store::Private {
|
|||
std::mutex lock_;
|
||||
};
|
||||
|
||||
Result<Store::Id>Store::Private::update_message_unlocked(Message& msg, Store::Id docid)
|
||||
|
||||
Result<Store::Id>
|
||||
Store::Private::add_message_unlocked(Message& msg)
|
||||
{
|
||||
auto docid{xapian_db_.add_document(msg.document().xapian_document())};
|
||||
mu_debug("added message @ {}; docid = {}", msg.path(), docid);
|
||||
|
||||
return Ok(std::move(docid));
|
||||
}
|
||||
|
||||
|
||||
Result<Store::Id>
|
||||
Store::Private::update_message_unlocked(Message& msg, Store::Id docid)
|
||||
{
|
||||
xapian_db_.replace_document(docid, msg.document().xapian_document());
|
||||
g_debug("updated message @ %s; docid = %u", msg.path().c_str(), docid);
|
||||
mu_debug("updated message @ {}; docid = {}", msg.path(), docid);
|
||||
|
||||
return Ok(std::move(docid));
|
||||
}
|
||||
|
@ -288,26 +301,15 @@ Store::indexer()
|
|||
}
|
||||
|
||||
Result<Store::Id>
|
||||
Store::add_message(const std::string& path, bool use_transaction)
|
||||
Store::add_message(Message& msg, bool use_transaction, bool is_new)
|
||||
{
|
||||
if (auto msg{Message::make_from_path(path)}; !msg)
|
||||
return Err(msg.error());
|
||||
else
|
||||
return add_message(msg.value(), use_transaction);
|
||||
}
|
||||
|
||||
Result<Store::Id>
|
||||
Store::add_message(Message& msg, bool use_transaction)
|
||||
{
|
||||
std::lock_guard guard{priv_->lock_};
|
||||
|
||||
const auto mdir{maildir_from_path(msg.path(),
|
||||
root_maildir())};
|
||||
if (!mdir)
|
||||
return Err(mdir.error());
|
||||
|
||||
if (auto&& res = msg.set_maildir(mdir.value()); !res)
|
||||
return Err(res.error());
|
||||
|
||||
/* add contacts from this message to cache; this cache
|
||||
* also determines whether those contacts are _personal_, i.e. match
|
||||
* our personal addresses.
|
||||
|
@ -320,37 +322,34 @@ Store::add_message(Message& msg, bool use_transaction)
|
|||
if (is_personal)
|
||||
msg.set_flags(msg.flags() | Flags::Personal);
|
||||
|
||||
std::lock_guard guard{priv_->lock_};
|
||||
if (use_transaction)
|
||||
priv_->transaction_inc();
|
||||
|
||||
auto res = priv_->update_message_unlocked(msg, msg.path());
|
||||
auto&& res = is_new ?
|
||||
priv_->add_message_unlocked(msg) :
|
||||
priv_->update_message_unlocked(msg, msg.path());
|
||||
if (!res)
|
||||
return Err(res.error());
|
||||
|
||||
if (use_transaction) /* commit if batch is full */
|
||||
priv_->transaction_maybe_commit();
|
||||
|
||||
g_debug("added %smessage @ %s; docid = %u",
|
||||
is_personal ? "personal " : "", msg.path().c_str(), *res);
|
||||
mu_debug("added {}message @ {}; docid = {}",
|
||||
is_personal ? "personal " : "", msg.path(), *res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Result<Store::Id>
|
||||
Store::update_message(Message& msg, Store::Id docid)
|
||||
{
|
||||
std::lock_guard guard{priv_->lock_};
|
||||
|
||||
return priv_->update_message_unlocked(msg, docid);
|
||||
}
|
||||
|
||||
bool
|
||||
Store::remove_message(const std::string& path)
|
||||
{
|
||||
std::lock_guard guard{priv_->lock_};
|
||||
const auto term{field_from_id(Field::Id::Path).xapian_term(path)};
|
||||
|
||||
std::lock_guard guard{priv_->lock_};
|
||||
|
||||
xapian_db().delete_document(term);
|
||||
g_debug("deleted message @ %s from store", path.c_str());
|
||||
mu_debug("deleted message @ {} from store", path);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -433,14 +432,11 @@ messages_with_msgid(const Store& store, const std::string& msgid, size_t max=100
|
|||
} else if (msgid.empty())
|
||||
return {};
|
||||
|
||||
const auto xprefix{field_from_id(Field::Id::MessageId).shortcut};
|
||||
/*XXX this is a bit dodgy */
|
||||
auto tmp{g_ascii_strdown(msgid.c_str(), -1)};
|
||||
auto expr{g_strdup_printf("%c:%s", xprefix, tmp)};
|
||||
g_free(tmp);
|
||||
constexpr auto xprefix{field_from_id(Field::Id::MessageId).shortcut};
|
||||
auto expr{mu_format("{}:{}", xprefix,
|
||||
to_string_gchar(g_ascii_strdown(msgid.c_str(), -1)))};
|
||||
|
||||
const auto res{store.run_query(expr, {}, QueryFlags::None, max)};
|
||||
g_free(expr);
|
||||
if (!res) {
|
||||
mu_warning("failed to run message-id-query: {}", res.error().what());
|
||||
return {};
|
||||
|
|
|
@ -186,30 +186,32 @@ public:
|
|||
std::string parse_query(const std::string& expr, bool xapian) const;
|
||||
|
||||
/**
|
||||
* Add a message to the store. When planning to write many messages,
|
||||
* it's much faster to do so in a transaction. If so, set
|
||||
* Add or update a message to the store. When planning to write many
|
||||
* messages, it's much faster to do so in a transaction. If so, set
|
||||
* @in_transaction to true. When done with adding messages, call
|
||||
* commit().
|
||||
*
|
||||
* @param path the message path.
|
||||
* @param whether to bundle up to batch_size changes in a transaction
|
||||
*
|
||||
* @return the doc id of the added message or an error.
|
||||
*/
|
||||
Result<Id> add_message(const std::string& path, bool use_transaction = false);
|
||||
|
||||
/**
|
||||
* Add a message to the store. When planning to write many messages,
|
||||
* it's much faster to do so in a transaction. If so, set
|
||||
* @in_transaction to true. When done with adding messages, call
|
||||
* commit().
|
||||
* Optimization: If you are sure the message (i.e., a message with the
|
||||
* given file-system path) does not yet exist in the database, ie., when
|
||||
* doing the initial indexing, set @p is_new to true since we then don't
|
||||
* have to check for the existing message.
|
||||
*
|
||||
* @param msg a message
|
||||
* @param whether to bundle up to batch_size changes in a transaction
|
||||
* @param use_transaction whether to bundle up to batch_size
|
||||
* changes in a transaction
|
||||
* @param is_new whether this is a completely new message
|
||||
*
|
||||
* @return the doc id of the added message or an error.
|
||||
*/
|
||||
Result<Id> add_message(Message& msg, bool use_transaction = false);
|
||||
Result<Id> add_message(Message& msg, bool use_transaction = false,
|
||||
bool is_new = false);
|
||||
Result<Id> add_message(const std::string& path, bool use_transaction = false,
|
||||
bool is_new = false) {
|
||||
if (auto msg{Message::make_from_path(path)}; !msg)
|
||||
return Err(msg.error());
|
||||
else
|
||||
return add_message(msg.value(), use_transaction, is_new);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a message in the store.
|
||||
|
@ -219,7 +221,6 @@ public:
|
|||
*
|
||||
* @return Ok() or an error.
|
||||
*/
|
||||
Result<Store::Id> update_message(Message& msg, Id id);
|
||||
|
||||
/**
|
||||
* Remove a message from the store. It will _not_ remove the message
|
||||
|
@ -414,7 +415,7 @@ public:
|
|||
*
|
||||
* @return true or false
|
||||
*/
|
||||
size_t empty() const { return xapian_db().empty(); }
|
||||
bool empty() const { return xapian_db().empty(); }
|
||||
|
||||
/*
|
||||
* _almost_ private
|
||||
|
|
|
@ -281,6 +281,22 @@ public:
|
|||
DB_LOCKED; return db().term_exists(term);}, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new document to the database
|
||||
*
|
||||
* @param doc a document (message)
|
||||
*
|
||||
* @return new docid or 0
|
||||
*/
|
||||
Xapian::docid add_document(const Xapian::Document& doc) {
|
||||
return xapian_try([&]{
|
||||
DB_LOCKED;
|
||||
auto&& id= wdb().add_document(doc);
|
||||
set_timestamp(MetadataIface::last_change_key);
|
||||
return id;
|
||||
}, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace document in database
|
||||
*
|
||||
|
@ -288,7 +304,7 @@ public:
|
|||
* @param id docid
|
||||
* @param doc replacement document
|
||||
*
|
||||
* @return new docid or nothing.
|
||||
* @return new docid or 0
|
||||
*/
|
||||
Xapian::docid replace_document(const std::string& term, const Xapian::Document& doc) {
|
||||
return xapian_try([&]{
|
||||
|
|
|
@ -202,7 +202,7 @@ goto * instructions[pOp->opcode];
|
|||
g_assert_cmpuint(store->size(),==, 1);
|
||||
|
||||
/* ensure 'update' dtrt, i.e., nothing. */
|
||||
const auto docid2 = store->update_message(*message, *docid);
|
||||
const auto docid2 = store->add_message(*message, *docid);
|
||||
assert_valid_result(docid2);
|
||||
g_assert_cmpuint(store->size(),==, 1);
|
||||
g_assert_cmpuint(*docid,==,*docid2);
|
||||
|
|
Loading…
Reference in New Issue