diff --git a/lib/index/mu-indexer.cc b/lib/index/mu-indexer.cc index 07cf047d..1f90bc72 100644 --- a/lib/index/mu-indexer.cc +++ b/lib/index/mu-indexer.cc @@ -69,7 +69,7 @@ struct IndexState { void change_to(State new_state) { g_debug("changing indexer state %s->%s", name((State)state_), - name((State)new_state)); + name((State)new_state)); state_ = new_state; } @@ -80,14 +80,14 @@ private: struct Indexer::Private { Private(Mu::Store& store) : store_{store}, scanner_{store_.properties().root_maildir, - [this](auto&& path, auto&& statbuf, auto&& info) { + [this](auto&& path, auto&& statbuf, auto&& info) { return handler(path, statbuf, info); }}, max_message_size_{store_.properties().max_message_size} { g_message("created indexer for %s -> %s (batch-size: %zu)", - store.properties().root_maildir.c_str(), - store.properties().database_path.c_str(), store.properties().batch_size); + store.properties().root_maildir.c_str(), + store.properties().database_path.c_str(), store.properties().batch_size); } ~Private() @@ -135,7 +135,7 @@ struct Indexer::Private { bool Indexer::Private::handler(const std::string& fullpath, struct stat* statbuf, - Scanner::HandleType htype) + Scanner::HandleType htype) { switch (htype) { case Scanner::HandleType::EnterDir: @@ -148,8 +148,8 @@ Indexer::Private::handler(const std::string& fullpath, struct stat* statbuf, if (conf_.lazy_check && dirstamp_ >= statbuf->st_mtime && htype == Scanner::HandleType::EnterNewCur) { g_debug("skip %s (seems up-to-date: %s >= %s)", fullpath.c_str(), - time_to_string("%FT%T", dirstamp_).c_str(), - time_to_string("%FT%T", statbuf->st_mtime).c_str()); + time_to_string("%FT%T", dirstamp_).c_str(), + time_to_string("%FT%T", statbuf->st_mtime).c_str()); return false; } @@ -183,7 +183,7 @@ Indexer::Private::handler(const std::string& fullpath, struct stat* statbuf, if ((size_t)statbuf->st_size > max_message_size_) { g_debug("skip %s (too big: %" G_GINT64_FORMAT " bytes)", fullpath.c_str(), - (gint64)statbuf->st_size); + (gint64)statbuf->st_size); return false; } @@ -216,6 +216,26 @@ Indexer::Private::maybe_start_worker() } } +static bool +add_message(Store& store, const std::string& path) +{ + auto msg{Message::make_from_path(path)}; + if (!msg) { + g_warning("failed to create message from %s: %s", + path.c_str(), msg.error().what()); + return false; + } + + auto res = store.add_message(msg.value(), true /*use-transaction*/); + if (!res) { + g_warning("failed to add message @ %s: %s", + path.c_str(), res.error().what()); + return false; + } + + return true; +} + void Indexer::Private::item_worker() { @@ -230,9 +250,8 @@ Indexer::Private::item_worker() std::lock_guard lock{w_lock_}; switch (item.type) { case WorkItem::Type::File: - store_.add_message(item.full_path, - true /*use-transaction*/); - ++progress_.updated; + if (add_message(store_, item.full_path)) + ++progress_.updated; break; case WorkItem::Type::Dir: store_.set_dirstamp(item.full_path, ::time(NULL)); @@ -243,7 +262,7 @@ Indexer::Private::item_worker() } } catch (const Mu::Error& er) { g_warning("error adding message @ %s: %s", - item.full_path.c_str(), er.what()); + item.full_path.c_str(), er.what()); } maybe_start_worker(); @@ -262,7 +281,7 @@ Indexer::Private::cleanup() ++n; if (::access(path.c_str(), R_OK) != 0) { g_debug("cannot read %s (id=%u); queueing for removal from store", - path.c_str(), id); + path.c_str(), id); orphans.emplace_back(id); } @@ -304,7 +323,7 @@ Indexer::Private::scan_worker() return workers_.size(); }); g_debug("process %zu remaining message(s) with %zu worker(s)", - todos_.size(), workers_size); + todos_.size(), workers_size); while (!todos_.empty()) std::this_thread::sleep_for(100ms); } @@ -328,16 +347,15 @@ Indexer::Private::start(const Indexer::Config& conf) conf_ = conf; if (conf_.max_threads == 0) { - /* we're blocked mostly by a) filesystem and b) database; - * so it's not very useful to start many threads */ - max_workers_ = std::max( - 4U, std::thread::hardware_concurrency()); + /* note, most time is spent in the (single) db thread + * but creating messages in parallel still helps a bit */ + max_workers_ = std::thread::hardware_concurrency()/2; } else max_workers_ = conf.max_threads; g_debug("starting indexer with <= %zu worker thread(s)", max_workers_); g_debug("indexing: %s; clean-up: %s", conf_.scan ? "yes" : "no", - conf_.cleanup ? "yes" : "no"); + conf_.cleanup ? "yes" : "no"); state_.change_to(IndexState::Scanning); /* kick off the first worker, which will spawn more if needed. */ diff --git a/lib/mu-store.cc b/lib/mu-store.cc index bddeda7b..38b06717 100644 --- a/lib/mu-store.cc +++ b/lib/mu-store.cc @@ -69,8 +69,9 @@ struct Store::Private { : read_only_{readonly}, db_{make_xapian_db(path, read_only_ ? XapianOpts::ReadOnly : XapianOpts::Open)}, - properties_{make_properties(path)}, contacts_cache_{db().get_metadata(ContactsKey), - properties_.personal_addresses} + properties_{make_properties(path)}, + contacts_cache_{db().get_metadata(ContactsKey), + properties_.personal_addresses} {} Private(const std::string& path, @@ -204,10 +205,10 @@ struct Store::Private { } Store::Properties init_metadata(const Store::Config& conf, - const std::string& path, - const std::string& root_maildir, - const StringVec& personal_addresses) - { + const std::string& path, + const std::string& root_maildir, + const StringVec& personal_addresses) + { writable_db().set_metadata(SchemaVersionKey, ExpectedSchemaVersion); writable_db().set_metadata(CreatedKey, Mu::format("%" PRId64, (int64_t)::time({}))); @@ -218,7 +219,7 @@ struct Store::Private { : DefaultMaxMessageSize; writable_db().set_metadata(MaxMessageSizeKey, Mu::format("%zu", max_msg_size)); - writable_db().set_metadata(RootMaildirKey, root_maildir); + writable_db().set_metadata(RootMaildirKey, canonicalize_filename(root_maildir, {})); std::string addrs; for (const auto& addr : personal_addresses) { // _very_ minimal check. @@ -318,60 +319,54 @@ Store::empty() const return size() == 0; } -static std::string -maildir_from_path(const std::string& root, const std::string& path) -{ - if (G_UNLIKELY(root.empty()) || root.length() >= path.length() || path.find(root) != 0) - throw Mu::Error{Error::Code::InvalidArgument, - "root '%s' is not a proper suffix of path '%s'", - root.c_str(), - path.c_str()}; - - auto mdir{path.substr(root.length())}; - auto slash{mdir.rfind('/')}; - - if (G_UNLIKELY(slash == std::string::npos) || slash < 4) - throw Mu::Error{Error::Code::InvalidArgument, "invalid path: %s", path.c_str()}; - mdir.erase(slash); - auto subdir = mdir.data() + slash - 4; - if (G_UNLIKELY(strncmp(subdir, "/cur", 4) != 0 && strncmp(subdir, "/new", 4))) - throw Mu::Error{Error::Code::InvalidArgument, - "cannot find '/new' or '/cur' - invalid path: %s", - path.c_str()}; - if (mdir.length() == 4) - return "/"; - - mdir.erase(mdir.length() - 4); - return mdir; -} - Result Store::add_message(const std::string& path, bool use_transaction) { - const auto maildir{maildir_from_path(properties().root_maildir, path)}; - auto msg{Message::make_from_path(Message::Options::None, path, maildir)}; - if (G_UNLIKELY(!msg)) + if (auto msg{Message::make_from_path(path)}; !msg) return Err(msg.error()); + else + return add_message(msg.value(), use_transaction); +} + +Result +Store::add_message(Message& msg, bool use_transaction) +{ + const auto mdir{mu_maildir_from_path(msg.path(), + properties().root_maildir)}; + if (!mdir) + return Err(mdir.error()); + + if (auto&& res = msg.set_maildir(mdir.value()); !res) + return Err(res.error()); std::lock_guard guard{priv_->lock_}; - if (use_transaction) - priv_->transaction_inc(); + priv_->contacts_cache_.add(msg.all_contacts()); - const auto docid = priv_->writable_db().add_document( - msg->document().xapian_document()); + const auto docid = xapian_try([&]{ - if (use_transaction) /* commit if batch is full */ - priv_->transaction_maybe_commit(); + if (use_transaction) + priv_->transaction_inc(); + + const auto docid = priv_->writable_db().add_document( + msg.document().xapian_document()); + + if (use_transaction) /* commit if batch is full */ + priv_->transaction_maybe_commit(); + + return docid; + }, InvalidId); if (G_UNLIKELY(docid == InvalidId)) return Err(Error::Code::Message, "failed to add message"); - g_debug("added message @ %s; docid = %u", path.c_str(), docid); + g_debug("added message @ %s; docid = %u", msg.path().c_str(), docid); + g_debug("%s", msg.document().xapian_document().get_description().c_str()); return Ok(static_cast(docid)); } + bool Store::update_message(const Message& msg, unsigned docid) { @@ -379,8 +374,8 @@ Store::update_message(const Message& msg, unsigned docid) [&]{ priv_->writable_db().replace_document( docid, msg.document().xapian_document()); - - g_debug("updated message %u @ %s", docid, msg.path().c_str()); + g_debug("updated message @ %s; docid = %u", + msg.path().c_str(), docid); return true; }, false); } @@ -462,9 +457,6 @@ Store::move_message(Store::Id id, return Ok(std::move(msg.value())); } - - - std::string Store::metadata(const std::string& key) const { @@ -598,14 +590,12 @@ Store::lock() const return priv_->lock_; } -Option +Result Store::run_query(const std::string& expr, - Option sortfield_id, + Field::Id sortfield_id, QueryFlags flags, size_t maxnum) const { - return xapian_try([&] { - Query q{*this}; - return q.run(expr, sortfield_id, flags, maxnum);}, Nothing); + return Query{*this}.run(expr, sortfield_id, flags, maxnum); } size_t diff --git a/lib/mu-store.hh b/lib/mu-store.hh index 8140ee48..d6da9df3 100644 --- a/lib/mu-store.hh +++ b/lib/mu-store.hh @@ -133,7 +133,7 @@ public: Indexer& indexer(); /** - * Run a query; see the `mu-query` man page for the syntax. + * Run a query; see the `mu-query` man page for the syntax. * * Multi-threaded callers must aquire the lock and keep it * at least as long as the return value. @@ -143,11 +143,11 @@ public: * @param flags query flags * @param maxnum maximum number of results to return. 0 for 'no limit' * - * @return the query-results, or Nothing in case of error. + * @return the query-results or an error. */ std::mutex& lock() const; - Option run_query(const std::string& expr = "", - Option sortfield_id = {}, + Result run_query(const std::string& expr, + Field::Id sortfield_id = Field::Id::Date, QueryFlags flags = QueryFlags::None, size_t maxnum = 0) const; @@ -186,6 +186,19 @@ public: */ Result add_message(const std::string& path, bool use_transaction = false); + /** + * Add a message to the store. When planning to write many messages, + * it's much faster to do so in a transaction. If so, set + * @in_transaction to true. When done with adding messages, call + * commit(). + * + * @param msg a message + * @param whether to bundle up to batch_size changes in a transaction + * + * @return the doc id of the added message or an error. + */ + Result add_message(Message& msg, bool use_transaction = false); + /** * Update a message in the store. * diff --git a/lib/tests/test-mu-store.cc b/lib/tests/test-mu-store.cc index 6f45a079..c8a3e37a 100644 --- a/lib/tests/test-mu-store.cc +++ b/lib/tests/test-mu-store.cc @@ -52,31 +52,178 @@ test_store_ctor_dtor() static void test_store_add_count_remove() { - TempDir tempdir; - Mu::Store store{tempdir.path(), MuTestMaildir, {}, {}}; + TempDir tempdir{false}; - const auto id1 = store.add_message(MuTestMaildir + "/cur/1283599333.1840_11.cthulhu!2,"); + Mu::Store store{tempdir.path() + "/xapian", MuTestMaildir, {}, {}}; + + const auto msgpath{MuTestMaildir + "/cur/1283599333.1840_11.cthulhu!2,"}; + const auto id1 = store.add_message(msgpath); assert_valid_result(id1); + store.commit(); g_assert_cmpuint(store.size(), ==, 1); - g_assert_true(store.contains_message(MuTestMaildir + "/cur/1283599333.1840_11.cthulhu!2,")); + g_assert_true(store.contains_message(msgpath)); + + g_assert_true(store.contains_message(msgpath)); const auto id2 = store.add_message(MuTestMaildir2 + "/bar/cur/mail3"); - assert_valid_result(id2); + g_assert_false(!!id2); // wrong maildir. + store.commit(); + + const auto msg3path{MuTestMaildir + "/cur/1252168370_3.14675.cthulhu!2,S"}; + const auto id3 = store.add_message(msg3path); + assert_valid_result(id3); g_assert_cmpuint(store.size(), ==, 2); - g_assert_true(store.contains_message(MuTestMaildir2 + "/bar/cur/mail3")); + g_assert_true(store.contains_message(msg3path)); store.remove_message(id1.value()); g_assert_cmpuint(store.size(), ==, 1); g_assert_false( store.contains_message(MuTestMaildir + "/cur/1283599333.1840_11.cthulhu!2,")); - store.remove_message(MuTestMaildir2 + "/bar/cur/mail3"); + store.remove_message(msg3path); g_assert_true(store.empty()); - g_assert_false(store.contains_message(MuTestMaildir2 + "/bar/cur/mail3")); + g_assert_false(store.contains_message(msg3path)); } + +static void +test_message_mailing_list() +{ + constexpr const char *test_message_1 = +R"(Return-Path: +X-Original-To: xxxx@localhost +Delivered-To: xxxx@localhost +Received: from mindcrime (localhost [127.0.0.1]) + by mail.xxxxsoftware.nl (Postfix) with ESMTP id 32F276963F + for ; Mon, 4 Aug 2008 21:49:34 +0300 (EEST) +Message-Id: <83B5AF40-DBFA-4578-A043-04C80276E195@sqlabs.net> +From: anon@example.com +To: sqlite-dev@sqlite.org +Mime-Version: 1.0 (Apple Message framework v926) +Date: Mon, 4 Aug 2008 11:40:49 +0200 +X-Mailer: Apple Mail (2.926) +Subject: Capybaras United +Precedence: list +Reply-To: sqlite-dev@sqlite.org +List-Id: +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +Sender: sqlite-dev-bounces@sqlite.org +Content-Length: 639 + +Inside sqlite3VdbeExec there is a very big switch statement. +In order to increase performance with few modifications to the +original code, why not use this technique ? +http://docs.freebsd.org/info/gcc/gcc.info.Labels_as_Values.html + +With a properly defined "instructions" array, instead of the switch +statement you can use something like: +goto * instructions[pOp->opcode]; +)"; + TempDir tempdir; + Mu::Store store{tempdir.path(), "/home/test/Maildir", {}, {}}; + + const auto msgpath{"/home/test/Maildir/inbox/cur/1649279256.107710_1.evergrey:2,S"}; + auto message{Message::make_from_text(test_message_1, msgpath)}; + assert_valid_result(message); + + const auto docid = store.add_message(*message); + assert_valid_result(docid); + g_assert_cmpuint(store.size(),==, 1); + + auto msg2{store.find_message(*docid)}; + g_assert_true(!!msg2); + assert_equal(message->path(), msg2->path()); + + g_assert_true(store.contains_message(message->path())); + + const auto qr = store.run_query("to:sqlite-dev@sqlite.org"); + g_assert_true(!!qr); + g_assert_cmpuint(qr->size(), ==, 1); +} + + +static void +test_message_attachments(void) +{ + constexpr const char* msg_text = +R"(Return-Path: +Received: from pop.gmail.com [256.85.129.309] + by evergrey with POP3 (fetchmail-6.4.29) + for (single-drop); Thu, 24 Mar 2022 20:12:40 +0200 (EET) +Sender: "Foo, Example" +User-agent: mu4e 1.7.11; emacs 29.0.50 +From: "Foo Example" +To: bar@example.com +Subject: =?utf-8?B?w6R0dMOkY2htZcOxdHM=?= +Date: Thu, 24 Mar 2022 20:04:39 +0200 +Organization: ACME Inc. +Message-Id: <3144HPOJ0VC77.3H1XTAG2AMTLH@"@WILSONB.COM> +MIME-Version: 1.0 +X-label: @NextActions operation:mindcrime Queensrÿche +Content-Type: multipart/mixed; boundary="=-=-=" + +--=-=-= +Content-Type: text/plain + +Hello, +--=-=-= +Content-Type: image/jpeg +Content-Disposition: attachment; filename=file-01.bin +Content-Transfer-Encoding: base64 + +AAECAw== +--=-=-= +Content-Type: audio/ogg +Content-Disposition: inline; filename=/tmp/file-02.bin +Content-Transfer-Encoding: base64 + +BAUGBw== +--=-=-= +Content-Type: message/rfc822 +Content-Disposition: attachment; + filename="message.eml" + +From: "Fnorb" +To: Bob +Subject: news for you +Date: Mon, 28 Mar 2022 22:53:26 +0300 + +Attached message! + +--=-=-= +Content-Type: text/plain + +World! +--=-=-=-- +)"; + + TempDir tempdir; + Mu::Store store{tempdir.path(), "/home/test/Maildir", {}, {}}; + + auto message{Message::make_from_text( + msg_text, + "/home/test/Maildir/inbox/cur/1649279256.abcde_1.evergrey:2,S")}; + assert_valid_result(message); + + const auto docid = store.add_message(*message); + assert_valid_result(docid); + store.commit(); + + auto msg2{store.find_message(*docid)}; + g_assert_true(!!msg2); + assert_equal(message->path(), msg2->path()); + + g_assert_true(store.contains_message(message->path())); + + // for (auto&& term = msg2->document().xapian_document().termlist_begin(); + // term != msg2->document().xapian_document().termlist_end(); ++term) + // g_message(">>> %s", (*term).c_str()); +} + + int main(int argc, char* argv[]) { @@ -85,11 +232,10 @@ main(int argc, char* argv[]) /* mu_runtime_init/uninit */ g_test_add_func("/store/ctor-dtor", test_store_ctor_dtor); g_test_add_func("/store/add-count-remove", test_store_add_count_remove); - - // if (!g_test_verbose()) - // g_log_set_handler (NULL, - // G_LOG_LEVEL_MASK | G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION, - // (GLogFunc)black_hole, NULL); + g_test_add_func("/store/message/mailing-list", + test_message_mailing_list); + g_test_add_func("/store/message/attachments", + test_message_attachments); return g_test_run(); }