mirror of https://github.com/djcb/mu.git
lib/index: Implement new indexer
Implement a new message indexer consisting of a single-threaded scanner and a multi-threaded indexer. This allows for a number of optimizations as well as background indexing, though this initial version should be behave similar to the old indexer.
This commit is contained in:
parent
0e50bfc02c
commit
4e6bd7dfdf
|
@ -264,6 +264,7 @@ lib/Makefile
|
|||
lib/doxyfile
|
||||
lib/utils/Makefile
|
||||
lib/query/Makefile
|
||||
lib/index/Makefile
|
||||
mu4e/Makefile
|
||||
mu4e/mu4e-meta.el
|
||||
guile/Makefile
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
# before descending into tests/
|
||||
include $(top_srcdir)/gtest.mk
|
||||
|
||||
SUBDIRS= utils query
|
||||
SUBDIRS= utils query index
|
||||
|
||||
if HAVE_JSON_GLIB
|
||||
json_srcs= \
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
## Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
##
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 3 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License
|
||||
## along with this program; if not, write to the Free Software Foundation,
|
||||
## Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
include $(top_srcdir)/gtest.mk
|
||||
|
||||
AM_CPPFLAGS= \
|
||||
$(CODE_COVERAGE_CPPFLAGS)
|
||||
|
||||
AM_CXXFLAGS= \
|
||||
$(WARN_CXXFLAGS) \
|
||||
$(GLIB_CFLAGS) \
|
||||
$(ASAN_CXXFLAGS) \
|
||||
$(CODE_COVERAGE_CFLAGS) \
|
||||
-I${top_srcdir}/lib
|
||||
|
||||
AM_LDFLAGS= \
|
||||
$(ASAN_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES= \
|
||||
libmu-index.la
|
||||
|
||||
libmu_index_la_SOURCES= \
|
||||
mu-indexer.cc \
|
||||
mu-indexer.hh \
|
||||
mu-scanner.cc \
|
||||
mu-scanner.hh
|
||||
|
||||
libmu_index_la_LIBADD= \
|
||||
$(GLIB_LIBS) \
|
||||
$(CODE_COVERAGE_LIBS)
|
||||
|
||||
include $(top_srcdir)/aminclude_static.am
|
|
@ -0,0 +1,350 @@
|
|||
/*
|
||||
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify it
|
||||
** under the terms of the GNU General Public License as published by the
|
||||
** Free Software Foundation; either version 3, or (at your option) any
|
||||
** later version.
|
||||
**
|
||||
** This program is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
** GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License
|
||||
** along with this program; if not, write to the Free Software Foundation,
|
||||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
|
||||
#include "mu-indexer.hh"
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <condition_variable>
|
||||
#include <iostream>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
#include <xapian.h>
|
||||
|
||||
#include "mu-scanner.hh"
|
||||
#include "utils/mu-async-queue.hh"
|
||||
#include "utils/mu-error.hh"
|
||||
#include "../mu-store.hh"
|
||||
|
||||
using namespace Mu;
|
||||
|
||||
struct Indexer::Private {
|
||||
Private (Mu::Store& store):
|
||||
store_{store},
|
||||
scanner_{store_.metadata().root_maildir,
|
||||
[this](auto&& path, auto&& statbuf, auto&& info){
|
||||
return handler(path, statbuf, info);
|
||||
}},
|
||||
max_message_size_{store_.metadata().max_message_size} {
|
||||
|
||||
g_message ("created indexer for %s -> %s",
|
||||
store.metadata().root_maildir.c_str(),
|
||||
store.metadata().database_path.c_str());
|
||||
}
|
||||
|
||||
~Private() { stop(); }
|
||||
|
||||
bool dir_predicate (const std::string& path, const struct dirent* dirent) const;
|
||||
bool handler (const std::string& fullpath, struct stat *statbuf,
|
||||
Scanner::HandleType htype);
|
||||
|
||||
void maybe_start_worker();
|
||||
void worker();
|
||||
|
||||
bool cleanup();
|
||||
|
||||
bool start(const Indexer::Config& conf);
|
||||
bool stop();
|
||||
|
||||
Indexer::Config conf_;
|
||||
Store& store_;
|
||||
Scanner scanner_;
|
||||
const size_t max_message_size_;
|
||||
|
||||
time_t dirstamp_{};
|
||||
std::atomic<bool> scan_done_{true}, clean_done_{true};
|
||||
|
||||
std::size_t max_workers_;
|
||||
std::vector<std::thread> workers_;
|
||||
std::thread scanner_worker_;
|
||||
|
||||
AsyncQueue<std::string> fq_;
|
||||
|
||||
struct Progress {
|
||||
void reset() {
|
||||
processed = updated = removed = 0;
|
||||
}
|
||||
std::atomic<size_t> processed{}; /**< Number of messages processed */
|
||||
std::atomic<size_t> updated{}; /**< Number of messages added/updated to store */
|
||||
std::atomic<size_t> removed{}; /**< Number of message removed from store */
|
||||
};
|
||||
Progress progress_;
|
||||
|
||||
std::mutex lock_, wlock_;
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
Indexer::Private::handler (const std::string& fullpath, struct stat *statbuf,
|
||||
Scanner::HandleType htype)
|
||||
{
|
||||
switch (htype) {
|
||||
case Scanner::HandleType::EnterDir: {
|
||||
|
||||
// in lazy-mode, we ignore this dir if its dirstamp suggest it
|
||||
// is up-to-date (this is _not_ always true; hence we call it
|
||||
// lazy-mode)
|
||||
dirstamp_ = store_.dirstamp(fullpath);
|
||||
if (conf_.lazy_check && dirstamp_ == statbuf->st_mtime) {
|
||||
g_debug("skip %s (seems up-to-date)", fullpath.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// don't index dirs with '.noindex'
|
||||
auto noindex = ::access((fullpath + "/.noindex").c_str(), F_OK) == 0;
|
||||
if (noindex) {
|
||||
g_debug ("skip %s (has .noindex)", fullpath.c_str());
|
||||
return false; // don't descend into this dir.
|
||||
}
|
||||
|
||||
// don't index dirs with '.noupdate', unless we do a full
|
||||
// (re)index.
|
||||
if (!conf_.ignore_noupdate) {
|
||||
auto noupdate = ::access((fullpath + "/.noupdate").c_str(), F_OK) == 0;
|
||||
if (noupdate) {
|
||||
g_debug ("skip %s (has .noupdate)", fullpath.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
g_debug ("process %s", fullpath.c_str());
|
||||
return true;
|
||||
|
||||
}
|
||||
case Scanner::HandleType::LeaveDir: {
|
||||
store_.set_dirstamp(fullpath, ::time({}));
|
||||
return true;
|
||||
}
|
||||
|
||||
case Scanner::HandleType::File: {
|
||||
|
||||
if ((size_t)statbuf->st_size > max_message_size_) {
|
||||
g_debug ("skip %s (too big: %zu bytes)",
|
||||
fullpath.c_str(), statbuf->st_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
// if the message is not in the db yet, or not up-to-date, queue
|
||||
// it for updating/inserting.
|
||||
if (statbuf->st_mtime <= dirstamp_ &&
|
||||
store_.contains_message (fullpath)) {
|
||||
//g_debug ("skip %s: already up-to-date");
|
||||
return false;
|
||||
}
|
||||
|
||||
fq_.push(std::string{fullpath});
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
g_return_val_if_reached (false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Indexer::Private::maybe_start_worker()
|
||||
{
|
||||
std::lock_guard<std::mutex> wlock{wlock_};
|
||||
|
||||
if (fq_.size() > workers_.size() && workers_.size() < max_workers_)
|
||||
workers_.emplace_back(std::thread([this]{worker();}));
|
||||
}
|
||||
|
||||
void
|
||||
Indexer::Private::worker()
|
||||
{
|
||||
std::string item;
|
||||
|
||||
g_debug ("started worker");
|
||||
|
||||
while (!scan_done_ || !fq_.empty()) {
|
||||
|
||||
if (!fq_.pop (item, 250ms))
|
||||
continue;
|
||||
|
||||
//g_debug ("popped (n=%zu) path %s", fq_.size(), item.c_str());
|
||||
++progress_.processed;
|
||||
|
||||
try {
|
||||
store_.add_message(item);
|
||||
++progress_.updated;
|
||||
|
||||
} catch (const Mu::Error& er) {
|
||||
g_warning ("error adding message @ %s: %s",
|
||||
item.c_str(), er.what());
|
||||
}
|
||||
|
||||
maybe_start_worker();
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Indexer::Private::cleanup()
|
||||
{
|
||||
g_debug ("starting cleanup");
|
||||
|
||||
std::vector<Store::Id> orphans_; // store messages without files.
|
||||
store_.for_each([&](Store::Id id, const std::string &path) {
|
||||
|
||||
if (clean_done_)
|
||||
return false;
|
||||
|
||||
if (::access(path.c_str(), F_OK) != 0) {
|
||||
g_debug ("%s not found; queing id=%u for removal",
|
||||
path.c_str(), id);
|
||||
orphans_.emplace_back(id);
|
||||
}
|
||||
|
||||
return !clean_done_;
|
||||
});
|
||||
|
||||
if (orphans_.empty()) {
|
||||
g_debug("nothing to clean up");
|
||||
return true;
|
||||
}
|
||||
|
||||
store_.remove_messages (orphans_);
|
||||
g_debug ("removed %zu orphan messages from store", orphans_.size());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
Indexer::Private::start(const Indexer::Config& conf)
|
||||
{
|
||||
stop();
|
||||
|
||||
conf_ = conf;
|
||||
if (conf_.max_threads == 0)
|
||||
max_workers_ = std::thread::hardware_concurrency();
|
||||
else
|
||||
max_workers_ = conf.max_threads;
|
||||
|
||||
g_debug ("starting indexer with up to %zu threads", max_workers_);
|
||||
|
||||
scan_done_ = false;
|
||||
workers_.emplace_back(std::thread([this]{worker();}));
|
||||
|
||||
scan_done_ = clean_done_ = false;
|
||||
scanner_worker_ = std::thread([this]{
|
||||
|
||||
progress_.reset();
|
||||
|
||||
if (conf_.scan) {
|
||||
g_debug("starting scanner");
|
||||
|
||||
if (!scanner_.start()) {
|
||||
g_warning ("failed to start scanner");
|
||||
return;
|
||||
}
|
||||
|
||||
scan_done_ = true;
|
||||
g_debug ("scanner finished");
|
||||
}
|
||||
|
||||
if (conf_.cleanup) {
|
||||
g_debug ("starting cleanup");
|
||||
cleanup();
|
||||
clean_done_ = true;
|
||||
g_debug ("cleanup finished");
|
||||
}
|
||||
|
||||
store_.commit();
|
||||
});
|
||||
|
||||
g_debug ("started indexer");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Indexer::Private::stop()
|
||||
{
|
||||
scanner_.stop();
|
||||
scan_done_ = clean_done_ = true;
|
||||
|
||||
const auto w_n = workers_.size();
|
||||
|
||||
fq_.clear();
|
||||
if (scanner_worker_.joinable())
|
||||
scanner_worker_.join();
|
||||
|
||||
for (auto&& w: workers_)
|
||||
if (w.joinable())
|
||||
w.join();
|
||||
workers_.clear();
|
||||
|
||||
if (w_n > 0)
|
||||
g_debug ("stopped indexer (joined %zu worker(s))", w_n);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Indexer::Indexer (Store& store):
|
||||
priv_{std::make_unique<Private>(store)}
|
||||
{}
|
||||
|
||||
Indexer::~Indexer() = default;
|
||||
|
||||
bool
|
||||
Indexer::start(const Indexer::Config& conf)
|
||||
{
|
||||
std::lock_guard<std::mutex> l(priv_->lock_);
|
||||
if (is_running())
|
||||
return true;
|
||||
|
||||
return priv_->start(conf);
|
||||
}
|
||||
|
||||
bool
|
||||
Indexer::stop()
|
||||
{
|
||||
std::lock_guard<std::mutex> l(priv_->lock_);
|
||||
|
||||
if (!is_running())
|
||||
return true;
|
||||
|
||||
g_debug ("stopping indexer");
|
||||
return priv_->stop();
|
||||
}
|
||||
|
||||
bool
|
||||
Indexer::is_running() const
|
||||
{
|
||||
return !priv_->scan_done_ || !priv_->clean_done_ ||
|
||||
!priv_->fq_.empty();
|
||||
}
|
||||
|
||||
Indexer::Progress
|
||||
Indexer::progress() const
|
||||
{
|
||||
return Progress{
|
||||
is_running(),
|
||||
priv_->progress_.processed,
|
||||
priv_->progress_.updated,
|
||||
priv_->progress_.removed
|
||||
};
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify it
|
||||
** under the terms of the GNU General Public License as published by the
|
||||
** Free Software Foundation; either version 3, or (at your option) any
|
||||
** later version.
|
||||
**
|
||||
** This program is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
** GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License
|
||||
** along with this program; if not, write to the Free Software Foundation,
|
||||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
|
||||
#ifndef MU_INDEXER_HH__
|
||||
#define MU_INDEXER_HH__
|
||||
|
||||
#include <memory>
|
||||
#include <chrono>
|
||||
|
||||
namespace Mu {
|
||||
|
||||
struct Store;
|
||||
|
||||
/// An object abstracting the index process.
|
||||
class Indexer {
|
||||
public:
|
||||
/**
|
||||
* Construct an indexer object
|
||||
*
|
||||
* @param store the message store to use
|
||||
*/
|
||||
Indexer (Store& store);
|
||||
|
||||
/**
|
||||
* DTOR
|
||||
*/
|
||||
~Indexer();
|
||||
|
||||
/// A configuration object for the indexer
|
||||
struct Config {
|
||||
bool scan{true};
|
||||
/**< scan for new messages */
|
||||
bool cleanup{true};
|
||||
/**< clean messages no longer in the file system */
|
||||
size_t max_threads{};
|
||||
/**< maximum # of threads to use */
|
||||
bool ignore_noupdate{};
|
||||
/**< ignore .noupdate files */
|
||||
bool lazy_check{};
|
||||
/**< whether to skip directories that don't have a changed
|
||||
* mtime */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Start indexing. If already underway, do nothing.
|
||||
*
|
||||
* @param conf a configuration object
|
||||
*
|
||||
* @return true if starting worked or an indexing process was already
|
||||
* underway; false otherwise.
|
||||
*
|
||||
*/
|
||||
bool start(const Config& conf);
|
||||
|
||||
/**
|
||||
* Stop indexing. If not indexing, do nothing.
|
||||
*
|
||||
*
|
||||
* @return true if we stopped indexing, or indexing was not underway.
|
||||
* False otherwise.
|
||||
*/
|
||||
bool stop();
|
||||
|
||||
/**
|
||||
* Is an indexing process running?
|
||||
*
|
||||
* @return true or false.
|
||||
*/
|
||||
bool is_running() const;
|
||||
|
||||
|
||||
// Object describing current progress
|
||||
struct Progress {
|
||||
bool running{}; /**< Is an index operation in progress? */
|
||||
size_t processed{}; /**< Number of messages processed */
|
||||
size_t updated{}; /**< Number of messages added/updated to store */
|
||||
size_t removed{}; /**< Number of message removed from store */
|
||||
};
|
||||
|
||||
/**
|
||||
* Get an object describing the current progress. The progress object
|
||||
* describes the most recent indexing job, and is reset up a fresh
|
||||
* start().
|
||||
*
|
||||
* @return a progress object.
|
||||
*/
|
||||
Progress progress() const;
|
||||
|
||||
private:
|
||||
struct Private;
|
||||
std::unique_ptr<Private> priv_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namepace Mu
|
||||
#endif /* MU_INDEXER_HH__ */
|
|
@ -0,0 +1,242 @@
|
|||
/*
|
||||
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify it
|
||||
** under the terms of the GNU General Public License as published by the
|
||||
** Free Software Foundation; either version 3, or (at your option) any
|
||||
** later version.
|
||||
**
|
||||
** This program is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
** GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License
|
||||
** along with this program; if not, write to the Free Software Foundation,
|
||||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
#include "mu-scanner.hh"
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#include "utils/mu-utils.hh"
|
||||
#include "utils/mu-error.hh"
|
||||
|
||||
using namespace Mu;
|
||||
|
||||
struct Scanner::Private {
|
||||
Private (const std::string& root_dir,
|
||||
Scanner::Handler handler):
|
||||
root_dir_{root_dir}, handler_{handler} {
|
||||
if (!handler_)
|
||||
throw Mu::Error{Error::Code::Internal, "missing handler"};
|
||||
}
|
||||
~Private() {
|
||||
stop();
|
||||
}
|
||||
|
||||
bool start();
|
||||
bool stop();
|
||||
bool process_dentry (const std::string& path, struct dirent *dentry, bool is_maildir);
|
||||
bool process_dir (const std::string& path, bool is_maildir);
|
||||
|
||||
const std::string root_dir_;
|
||||
const Scanner::Handler handler_;
|
||||
std::atomic<bool> running_{};
|
||||
std::mutex lock_;
|
||||
};
|
||||
|
||||
|
||||
static bool
|
||||
is_special_dir (const struct dirent *dentry)
|
||||
{
|
||||
const auto d_name{dentry->d_name};
|
||||
return d_name[0] == '\0' ||
|
||||
(d_name[1] == '\0' && d_name[0] == '.') ||
|
||||
(d_name[2] == '\0' && d_name[0] == '.' && d_name[1] == '.');
|
||||
}
|
||||
|
||||
static bool
|
||||
is_new_cur (const char *dirname)
|
||||
{
|
||||
if (dirname[0] == 'c' && dirname[1] == 'u' && dirname[2] == 'r' && dirname[3] == '\0')
|
||||
return true;
|
||||
|
||||
if (dirname[0] == 'n' && dirname[1] == 'e' && dirname[2] == 'w' && dirname[3] == '\0')
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Scanner::Private::process_dentry (const std::string& path, struct dirent *dentry,
|
||||
bool is_maildir)
|
||||
{
|
||||
if (is_special_dir (dentry))
|
||||
return true; // ignore.
|
||||
|
||||
const auto fullpath{path + "/" + dentry->d_name};
|
||||
struct stat statbuf;
|
||||
if (::stat(fullpath.c_str(), &statbuf) != 0) {
|
||||
g_warning ("failed to stat %s: %s", fullpath.c_str(), ::strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
|
||||
const auto res = handler_(fullpath, &statbuf, Scanner::HandleType::EnterDir);
|
||||
if (!res) {
|
||||
//g_debug ("skipping dir %s", fullpath.c_str());
|
||||
return true; // skip
|
||||
}
|
||||
|
||||
process_dir (fullpath, is_new_cur(dentry->d_name));
|
||||
|
||||
return handler_(fullpath, &statbuf, Scanner::HandleType::LeaveDir);
|
||||
|
||||
} else if (S_ISREG(statbuf.st_mode) && is_maildir)
|
||||
return handler_(fullpath, &statbuf, Scanner::HandleType::File);
|
||||
|
||||
g_debug ("skip %s (neither maildir-file nor directory)", fullpath.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
Scanner::Private::process_dir (const std::string& path, bool is_maildir)
|
||||
{
|
||||
const auto dir = opendir (path.c_str());
|
||||
if (G_UNLIKELY(!dir)) {
|
||||
g_warning("failed to scan dir %s: %s", path.c_str(), strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: sort dentries by inode order, which makes things faster for extfs.
|
||||
// see mu-maildir.c
|
||||
|
||||
while (running_) {
|
||||
errno = 0;
|
||||
const auto dentry{readdir(dir)};
|
||||
|
||||
if (G_LIKELY(dentry)) {
|
||||
process_dentry (path, dentry, is_maildir);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (errno != 0) {
|
||||
g_warning("failed to read %s: %s", path.c_str(), strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
closedir (dir);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Scanner::Private::start()
|
||||
{
|
||||
const auto& path{root_dir_};
|
||||
if (G_UNLIKELY(path.length() > PATH_MAX)) {
|
||||
g_warning("path too long");
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto mode{F_OK | R_OK};
|
||||
if (G_UNLIKELY(access (path.c_str(), mode) != 0)) {
|
||||
g_warning("'%s' is not readable: %s", path.c_str(), strerror (errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
struct stat statbuf{};
|
||||
if (G_UNLIKELY(stat (path.c_str(), &statbuf) != 0)) {
|
||||
g_warning("'%s' is not stat'able: %s", path.c_str(), strerror (errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (G_UNLIKELY(!S_ISDIR (statbuf.st_mode))) {
|
||||
g_warning("'%s' is not a directory", path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
running_ = true;
|
||||
g_debug ("starting scan @ %s", root_dir_.c_str());
|
||||
|
||||
auto basename{g_path_get_basename(root_dir_.c_str())};
|
||||
const auto is_maildir = (g_strcmp0(basename, "cur") == 0 ||
|
||||
g_strcmp0(basename,"new") == 0);
|
||||
g_free(basename);
|
||||
|
||||
const auto start{std::chrono::steady_clock::now()};
|
||||
process_dir(root_dir_, is_maildir);
|
||||
const auto elapsed = std::chrono::steady_clock::now() - start;
|
||||
g_debug ("finished scan of %s in %" G_GINT64_FORMAT " ms", root_dir_.c_str(),
|
||||
to_ms(elapsed));
|
||||
running_ = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Scanner::Private::stop()
|
||||
{
|
||||
if (!running_)
|
||||
return true; // nothing to do
|
||||
|
||||
g_debug ("stopping scan");
|
||||
running_ = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Scanner::Scanner (const std::string& root_dir,
|
||||
Scanner::Handler handler):
|
||||
priv_{std::make_unique<Private>(root_dir, handler)}
|
||||
{}
|
||||
|
||||
Scanner::~Scanner() = default;
|
||||
|
||||
bool
|
||||
Scanner::start()
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> l(priv_->lock_);
|
||||
if (priv_->running_)
|
||||
return true; //nothing to do
|
||||
|
||||
priv_->running_ = true;
|
||||
}
|
||||
|
||||
const auto res = priv_->start();
|
||||
priv_->running_ = false;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
bool
|
||||
Scanner::stop()
|
||||
{
|
||||
std::lock_guard<std::mutex> l(priv_->lock_);
|
||||
|
||||
return priv_->stop();
|
||||
}
|
||||
|
||||
bool
|
||||
Scanner::is_running() const
|
||||
{
|
||||
return priv_->running_;
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify it
|
||||
** under the terms of the GNU General Public License as published by the
|
||||
** Free Software Foundation; either version 3, or (at your option) any
|
||||
** later version.
|
||||
**
|
||||
** This program is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
** GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License
|
||||
** along with this program; if not, write to the Free Software Foundation,
|
||||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
|
||||
#ifndef MU_SCANNER_HH__
|
||||
#define MU_SCANNER_HH__
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include <dirent.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace Mu {
|
||||
|
||||
/// @brief Maildir scanner
|
||||
///
|
||||
/// Scans maildir (trees) recursively, and calls the Handler callback for
|
||||
/// directories & files.
|
||||
///
|
||||
/// It filters out (i.e., does call the handler for):
|
||||
/// - files starting with '.'
|
||||
/// - files that do not live in a cur / new leaf maildir
|
||||
/// - directories '.' and '..'
|
||||
///
|
||||
class Scanner {
|
||||
public:
|
||||
enum struct HandleType { File, EnterDir, LeaveDir };
|
||||
|
||||
/// Prototype for a handler function
|
||||
using Handler = std::function<bool(const std::string& fullpath,
|
||||
struct stat* statbuf,
|
||||
HandleType htype)>;
|
||||
/**
|
||||
* Construct a scanner object for scanning a directory, recursively.
|
||||
*
|
||||
* If handler is a directroy
|
||||
*
|
||||
*
|
||||
* @param root_dir root dir to start scanning
|
||||
* @param handler handler function for some direntry
|
||||
*/
|
||||
Scanner (const std::string& root_dir, Handler handler);
|
||||
|
||||
/**
|
||||
* DTOR
|
||||
*/
|
||||
~Scanner();
|
||||
|
||||
/**
|
||||
* Start the scan; this is a blocking call than run until
|
||||
* finished or (from another thread) stop() is called.
|
||||
*
|
||||
* @return true if starting worked; false otherwise
|
||||
*/
|
||||
bool start();
|
||||
|
||||
/**
|
||||
* Stop the scan
|
||||
*
|
||||
* @return true if stopping worked; false otherwi%sse
|
||||
*/
|
||||
bool stop();
|
||||
|
||||
/**
|
||||
* Is a scan currently running?
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
bool is_running() const;
|
||||
|
||||
private:
|
||||
struct Private;
|
||||
std::unique_ptr<Private> priv_;
|
||||
};
|
||||
|
||||
} // namepace Mu
|
||||
|
||||
#endif /* MU_SCANNER_HH__ */
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <glib.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "mu-scanner.hh"
|
||||
#include "mu-utils.hh"
|
||||
|
||||
using namespace Mu;
|
||||
|
||||
|
||||
static void
|
||||
test_scan_maildir ()
|
||||
{
|
||||
allow_warnings();
|
||||
|
||||
Scanner scanner{"/home/djcb/Maildir",
|
||||
[](const dirent* dentry)->bool {
|
||||
g_print ("%02x %s\n", dentry->d_type, dentry->d_name);
|
||||
return true;
|
||||
},
|
||||
[](const std::string& fullpath, const struct stat* statbuf,
|
||||
auto&& info)->bool {
|
||||
g_print ("%s %zu\n", fullpath.c_str(), statbuf->st_size);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
g_assert_true (scanner.start());
|
||||
|
||||
while (scanner.is_running()) {
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[]) try
|
||||
{
|
||||
g_test_init (&argc, &argv, NULL);
|
||||
|
||||
g_test_add_func ("/utils/scanner/scan-maildir", test_scan_maildir);
|
||||
|
||||
return g_test_run ();
|
||||
|
||||
|
||||
} catch (const std::runtime_error& re) {
|
||||
std::cerr << re.what() << "\n";
|
||||
return 1;
|
||||
}
|
476
lib/mu-index.c
476
lib/mu-index.c
|
@ -1,476 +0,0 @@
|
|||
/*
|
||||
** Copyright (C) 2008-2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify
|
||||
1** it under the terms of the GNU General Public License as published by
|
||||
** the Free Software Foundation; either version 3 of the License, or
|
||||
** (at your option) any later version.
|
||||
**
|
||||
** This program is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
** GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License
|
||||
** along with this program; if not, write to the Free Software Foundation,
|
||||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "mu-index.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <glib.h>
|
||||
#include <glib/gstdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "mu-maildir.h"
|
||||
|
||||
#define MU_LAST_USED_MAILDIR_KEY "last_used_maildir"
|
||||
#define MU_INDEX_MAX_FILE_SIZE (500*1000*1000) /* 500 Mb */
|
||||
/* apparently, people are getting really big mails, so let us index those (by
|
||||
* default)*/
|
||||
|
||||
struct _MuIndex {
|
||||
MuStore *_store;
|
||||
gboolean _needs_reindex;
|
||||
guint _max_filesize;
|
||||
};
|
||||
|
||||
MuIndex*
|
||||
mu_index_new (MuStore *store, GError **err)
|
||||
{
|
||||
MuIndex *index;
|
||||
unsigned count;
|
||||
|
||||
g_return_val_if_fail (store, NULL);
|
||||
g_return_val_if_fail (!mu_store_is_read_only(store), NULL);
|
||||
|
||||
index = g_new0 (MuIndex, 1);
|
||||
|
||||
index->_store = mu_store_ref (store);
|
||||
|
||||
/* set the default max file size */
|
||||
index->_max_filesize = MU_INDEX_MAX_FILE_SIZE;
|
||||
|
||||
count = mu_store_count (store, err);
|
||||
if (count == (unsigned)-1)
|
||||
return NULL;
|
||||
else if (count == 0)
|
||||
index->_needs_reindex = TRUE;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
void
|
||||
mu_index_destroy (MuIndex *index)
|
||||
{
|
||||
if (!index)
|
||||
return;
|
||||
|
||||
mu_store_unref (index->_store);
|
||||
g_free (index);
|
||||
}
|
||||
|
||||
|
||||
struct _MuIndexCallbackData {
|
||||
MuIndexMsgCallback _idx_msg_cb;
|
||||
MuIndexDirCallback _idx_dir_cb;
|
||||
MuStore* _store;
|
||||
void* _user_data;
|
||||
MuIndexStats* _stats;
|
||||
gboolean _reindex;
|
||||
gboolean _lazy_check;
|
||||
time_t _dirstamp;
|
||||
guint _max_filesize;
|
||||
};
|
||||
typedef struct _MuIndexCallbackData MuIndexCallbackData;
|
||||
|
||||
|
||||
/* checks to determine if we need to (re)index this message note:
|
||||
* simply checking timestamps is not good enough because message may
|
||||
* be moved from other dirs (e.g. from 'new' to 'cur') and the time
|
||||
* stamps won't change. */
|
||||
static inline gboolean
|
||||
needs_index (MuIndexCallbackData *data, const char *fullpath,
|
||||
time_t filestamp)
|
||||
{
|
||||
/* unconditionally reindex */
|
||||
if (data->_reindex)
|
||||
return TRUE;
|
||||
|
||||
/* it's not in the database yet */
|
||||
if (!mu_store_contains_message (data->_store, fullpath))
|
||||
return TRUE;
|
||||
|
||||
/* it's there, but it's not up to date */
|
||||
if ((unsigned)filestamp >= (unsigned)data->_dirstamp)
|
||||
return TRUE;
|
||||
|
||||
return FALSE; /* index not needed */
|
||||
}
|
||||
|
||||
|
||||
static MuError
|
||||
insert_or_update_maybe (const char *fullpath, const char *mdir,
|
||||
time_t filestamp, MuIndexCallbackData *data,
|
||||
gboolean *updated)
|
||||
{
|
||||
MuMsg *msg;
|
||||
GError *err;
|
||||
gboolean rv;
|
||||
|
||||
*updated = FALSE;
|
||||
if (!needs_index (data, fullpath, filestamp))
|
||||
return MU_OK; /* nothing to do for this one */
|
||||
|
||||
err = NULL;
|
||||
msg = mu_msg_new_from_file (fullpath, mdir, &err);
|
||||
if (!msg) {
|
||||
if (!err)
|
||||
g_warning ("error creating message object: %s",
|
||||
fullpath);
|
||||
else {
|
||||
g_warning ("%s", err->message);
|
||||
g_clear_error (&err);
|
||||
}
|
||||
/* warn, then simply continue */
|
||||
return MU_OK;
|
||||
}
|
||||
|
||||
/* we got a valid id; scan the message contents as well */
|
||||
rv = mu_store_add_msg (data->_store, msg, &err);
|
||||
mu_msg_unref (msg);
|
||||
|
||||
if (!rv) {
|
||||
g_warning ("error storing message object: %s",
|
||||
err ? err->message : "cause unknown");
|
||||
g_clear_error (&err);
|
||||
return MU_ERROR;
|
||||
}
|
||||
|
||||
*updated = TRUE;
|
||||
return MU_OK;
|
||||
}
|
||||
|
||||
|
||||
static MuError
|
||||
run_msg_callback_maybe (MuIndexCallbackData *data)
|
||||
{
|
||||
MuError result;
|
||||
|
||||
if (!data || !data->_idx_msg_cb)
|
||||
return MU_OK;
|
||||
|
||||
result = data->_idx_msg_cb (data->_stats, data->_user_data);
|
||||
if (G_UNLIKELY(result != MU_OK && result != MU_STOP))
|
||||
g_warning ("error in callback");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static MuError
|
||||
on_run_maildir_msg (const char *fullpath, const char *mdir,
|
||||
struct stat *statbuf, MuIndexCallbackData *data)
|
||||
{
|
||||
MuError result;
|
||||
gboolean updated;
|
||||
|
||||
/* protect against too big messages */
|
||||
if (G_UNLIKELY(statbuf->st_size > data->_max_filesize)) {
|
||||
g_warning ("ignoring because bigger than %u bytes: %s",
|
||||
data->_max_filesize, fullpath);
|
||||
return MU_OK; /* not an error */
|
||||
}
|
||||
|
||||
result = run_msg_callback_maybe (data);
|
||||
if (result != MU_OK)
|
||||
return result;
|
||||
|
||||
/* see if we need to update/insert anything...
|
||||
* use the ctime, so any status change will be visible (perms,
|
||||
* filename etc.)*/
|
||||
result = insert_or_update_maybe (fullpath, mdir, statbuf->st_ctime,
|
||||
data, &updated);
|
||||
|
||||
if (result == MU_OK && data && data->_stats) { /* update statistics */
|
||||
++data->_stats->_processed;
|
||||
updated ? ++data->_stats->_updated : ++data->_stats->_uptodate;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static time_t
|
||||
get_dir_timestamp (const char *path)
|
||||
{
|
||||
struct stat statbuf;
|
||||
|
||||
if (stat (path, &statbuf) != 0) {
|
||||
g_warning ("failed to stat %s: %s",
|
||||
path, strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
|
||||
return statbuf.st_ctime;
|
||||
}
|
||||
|
||||
static MuError
|
||||
on_run_maildir_dir (const char* fullpath, gboolean enter,
|
||||
MuIndexCallbackData *data)
|
||||
{
|
||||
GError *err;
|
||||
|
||||
err = NULL;
|
||||
|
||||
/* xapian stores a per-dir timestamp; we use this timestamp to determine
|
||||
* whether a message is up-to-date
|
||||
*/
|
||||
if (enter) {
|
||||
data->_dirstamp =
|
||||
mu_store_get_dirstamp (data->_store, fullpath, &err);
|
||||
/* in 'lazy' mode, we only check the dir timestamp, and if it's
|
||||
* up to date, we don't bother with this dir. This fails to
|
||||
* account for messages below this dir that have merely
|
||||
* _changed_ though */
|
||||
if (data->_lazy_check && mu_maildir_is_leaf_dir(fullpath)) {
|
||||
time_t dirstamp;
|
||||
dirstamp = get_dir_timestamp (fullpath);
|
||||
if (dirstamp <= data->_dirstamp) {
|
||||
g_debug ("ignore %s (up-to-date)", fullpath);
|
||||
return MU_IGNORE;
|
||||
}
|
||||
}
|
||||
g_debug ("entering %s", fullpath);
|
||||
} else {
|
||||
mu_store_set_dirstamp (data->_store, fullpath,
|
||||
time(NULL), &err);
|
||||
g_debug ("leaving %s", fullpath);
|
||||
}
|
||||
|
||||
if (data->_idx_dir_cb)
|
||||
return data->_idx_dir_cb (fullpath, enter,
|
||||
data->_user_data);
|
||||
|
||||
if (err) {
|
||||
g_warning("%s: error handling %s: %s", __func__,
|
||||
fullpath, err->message);
|
||||
g_clear_error(&err);
|
||||
}
|
||||
|
||||
return MU_OK;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
check_path (const char *path)
|
||||
{
|
||||
g_return_val_if_fail (path, FALSE);
|
||||
|
||||
if (!g_path_is_absolute (path)) {
|
||||
g_warning ("%s: not an absolute path: '%s'", __func__, path);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (access (path, R_OK) != 0) {
|
||||
g_warning ("%s: cannot open '%s': %s",
|
||||
__func__, path, strerror (errno));
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
|
||||
gboolean reindex, gboolean lazycheck,
|
||||
guint max_filesize, MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
|
||||
void *user_data)
|
||||
{
|
||||
cb_data->_idx_msg_cb = msg_cb;
|
||||
cb_data->_idx_dir_cb = dir_cb;
|
||||
|
||||
cb_data->_user_data = user_data;
|
||||
cb_data->_store = xapian;
|
||||
|
||||
cb_data->_reindex = reindex;
|
||||
cb_data->_lazy_check = lazycheck;
|
||||
cb_data->_dirstamp = 0;
|
||||
cb_data->_max_filesize = max_filesize;
|
||||
|
||||
cb_data->_stats = stats;
|
||||
if (cb_data->_stats)
|
||||
memset (cb_data->_stats, 0, sizeof(MuIndexStats));
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
mu_index_set_max_msg_size (MuIndex *index, guint max_size)
|
||||
{
|
||||
g_return_if_fail (index);
|
||||
|
||||
if (max_size == 0)
|
||||
index->_max_filesize = MU_INDEX_MAX_FILE_SIZE;
|
||||
else
|
||||
index->_max_filesize = max_size;
|
||||
}
|
||||
|
||||
|
||||
MuError
|
||||
mu_index_run (MuIndex *index, gboolean reindex, gboolean lazycheck,
|
||||
MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
|
||||
void *user_data)
|
||||
{
|
||||
MuIndexCallbackData cb_data;
|
||||
MuError rv;
|
||||
const char *path;
|
||||
|
||||
g_return_val_if_fail (index && index->_store, MU_ERROR);
|
||||
g_return_val_if_fail (msg_cb, MU_ERROR);
|
||||
|
||||
path = mu_store_root_maildir (index->_store);
|
||||
if (!check_path (path))
|
||||
return MU_ERROR;
|
||||
|
||||
if (index->_needs_reindex)
|
||||
reindex = TRUE;
|
||||
|
||||
init_cb_data (&cb_data, index->_store, reindex, lazycheck,
|
||||
index->_max_filesize, stats,
|
||||
msg_cb, dir_cb, user_data);
|
||||
|
||||
rv = mu_maildir_walk (path,
|
||||
(MuMaildirWalkMsgCallback)on_run_maildir_msg,
|
||||
(MuMaildirWalkDirCallback)on_run_maildir_dir,
|
||||
reindex, /* re-index, ie. do a full update */
|
||||
&cb_data);
|
||||
|
||||
mu_store_flush (index->_store);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static MuError
|
||||
on_stats_maildir_file (const char *fullpath, const char *mdir,
|
||||
struct stat *statbuf,
|
||||
MuIndexCallbackData *cb_data)
|
||||
{
|
||||
MuError result;
|
||||
|
||||
if (cb_data && cb_data->_idx_msg_cb)
|
||||
result = cb_data->_idx_msg_cb (cb_data->_stats,
|
||||
cb_data->_user_data);
|
||||
else
|
||||
result = MU_OK;
|
||||
|
||||
if (result == MU_OK) {
|
||||
if (cb_data->_stats)
|
||||
++cb_data->_stats->_processed;
|
||||
return MU_OK;
|
||||
}
|
||||
|
||||
return result; /* MU_STOP or MU_OK */
|
||||
}
|
||||
|
||||
|
||||
MuError
|
||||
mu_index_stats (MuIndex *index,
|
||||
MuIndexStats *stats, MuIndexMsgCallback cb_msg,
|
||||
MuIndexDirCallback cb_dir, void *user_data)
|
||||
{
|
||||
const char *path;
|
||||
MuIndexCallbackData cb_data;
|
||||
|
||||
g_return_val_if_fail (index, MU_ERROR);
|
||||
g_return_val_if_fail (cb_msg, MU_ERROR);
|
||||
|
||||
path = mu_store_root_maildir (index->_store);
|
||||
if (!check_path (path))
|
||||
return MU_ERROR;
|
||||
|
||||
if (stats)
|
||||
memset (stats, 0, sizeof(MuIndexStats));
|
||||
|
||||
cb_data._idx_msg_cb = cb_msg;
|
||||
cb_data._idx_dir_cb = cb_dir;
|
||||
|
||||
cb_data._stats = stats;
|
||||
cb_data._user_data = user_data;
|
||||
|
||||
cb_data._dirstamp = 0;
|
||||
|
||||
return mu_maildir_walk (path,
|
||||
(MuMaildirWalkMsgCallback)on_stats_maildir_file,
|
||||
NULL, FALSE, &cb_data);
|
||||
}
|
||||
|
||||
struct _CleanupData {
|
||||
MuStore *_store;
|
||||
MuIndexStats *_stats;
|
||||
MuIndexCleanupDeleteCallback _cb;
|
||||
void *_user_data;
|
||||
|
||||
};
|
||||
typedef struct _CleanupData CleanupData;
|
||||
|
||||
|
||||
static MuError
|
||||
foreach_doc_cb (const char* path, CleanupData *cudata)
|
||||
{
|
||||
if (access (path, R_OK) != 0) {
|
||||
if (errno != EACCES)
|
||||
g_debug ("cannot access %s: %s", path, strerror(errno));
|
||||
if (!mu_store_remove_path (cudata->_store, path))
|
||||
return MU_ERROR; /* something went wrong... bail out */
|
||||
if (cudata->_stats)
|
||||
++cudata->_stats->_cleaned_up;
|
||||
}
|
||||
|
||||
if (cudata->_stats)
|
||||
++cudata->_stats->_processed;
|
||||
|
||||
if (!cudata->_cb)
|
||||
return MU_OK;
|
||||
|
||||
return cudata->_cb (cudata->_stats, cudata->_user_data);
|
||||
}
|
||||
|
||||
|
||||
MuError
|
||||
mu_index_cleanup (MuIndex *index, MuIndexStats *stats,
|
||||
MuIndexCleanupDeleteCallback cb,
|
||||
void *user_data, GError **err)
|
||||
{
|
||||
MuError rv;
|
||||
CleanupData cudata;
|
||||
|
||||
g_return_val_if_fail (index, MU_ERROR);
|
||||
|
||||
cudata._store = index->_store;
|
||||
cudata._stats = stats;
|
||||
cudata._cb = cb;
|
||||
cudata._user_data = user_data;
|
||||
|
||||
rv = mu_store_foreach (index->_store,
|
||||
(MuStoreForeachFunc)foreach_doc_cb,
|
||||
&cudata, err);
|
||||
|
||||
mu_store_flush (index->_store);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
gboolean
|
||||
mu_index_stats_clear (MuIndexStats *stats)
|
||||
{
|
||||
if (!stats)
|
||||
return FALSE;
|
||||
|
||||
memset (stats, 0, sizeof(MuIndexStats));
|
||||
return TRUE;
|
||||
}
|
193
lib/mu-index.h
193
lib/mu-index.h
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
** Copyright (C) 2008-2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify
|
||||
** it under the terms of the GNU General Public License as published by
|
||||
** the Free Software Foundation; either version 3 of the License, or
|
||||
** (at your option) any later version.
|
||||
**
|
||||
** This program is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
** GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License
|
||||
** along with this program; if not, write to the Free Software Foundation,
|
||||
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
**
|
||||
*/
|
||||
|
||||
#ifndef __MU_INDEX_H__
|
||||
#define __MU_INDEX_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <glib.h>
|
||||
#include <utils/mu-util.h>
|
||||
#include <mu-store.hh>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
/* opaque structure */
|
||||
struct _MuIndex;
|
||||
typedef struct _MuIndex MuIndex;
|
||||
|
||||
struct _MuIndexStats {
|
||||
unsigned _processed; /* number of msgs processed or counted */
|
||||
unsigned _updated; /* number of msgs new or updated */
|
||||
unsigned _cleaned_up; /* number of msgs cleaned up */
|
||||
unsigned _uptodate; /* number of msgs already up-to-date */
|
||||
};
|
||||
typedef struct _MuIndexStats MuIndexStats;
|
||||
|
||||
/**
|
||||
* create a new MuIndex instance. NOTE: the database does not have
|
||||
* to exist yet, but the directory must already exist; NOTE(2): before
|
||||
* doing anything with the returned Index object, make sure you haved
|
||||
* called mu_msg_init somewhere in your code.
|
||||
*
|
||||
* @param store a writable MuStore object
|
||||
* @param err to receive error or NULL; there are only errors when this
|
||||
* function returns NULL. Possible errors: see mu-error.h
|
||||
*
|
||||
* @return a new MuIndex instance, or NULL in case of error
|
||||
*/
|
||||
MuIndex* mu_index_new (MuStore *store, GError **err)
|
||||
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
|
||||
|
||||
|
||||
/**
|
||||
* destroy the index instance
|
||||
*
|
||||
* @param index a MuIndex instance, or NULL
|
||||
*/
|
||||
void mu_index_destroy (MuIndex *index);
|
||||
|
||||
|
||||
/**
|
||||
* change the maximum file size that mu-index considers from its
|
||||
* default (MU_INDEX_MAX_FILE_SIZE). Note that the maximum size is a
|
||||
* protection against mu (or the libraries it uses) allocating too
|
||||
* much memory, which can lead to problems
|
||||
*
|
||||
* @param index a mu index object
|
||||
* @param max_size the maximum msg size, or 0 to reset to the default
|
||||
*/
|
||||
void mu_index_set_max_msg_size (MuIndex *index, guint max_size);
|
||||
|
||||
|
||||
/**
|
||||
* callback function for mu_index_(run|stats|cleanup), for each message
|
||||
*
|
||||
* @param stats pointer to structure to receive statistics data
|
||||
* @param user_data pointer to user data
|
||||
*
|
||||
* @return MU_OK to continue, MU_STOP to stop, or MU_ERROR in
|
||||
* case of some error.
|
||||
*/
|
||||
typedef MuError (*MuIndexMsgCallback) (MuIndexStats* stats, void *user_data);
|
||||
|
||||
|
||||
/**
|
||||
* callback function for mu_index_(run|stats|cleanup), for each dir enter/leave
|
||||
*
|
||||
* @param path dirpath we just entered / left
|
||||
* @param enter did we enter (TRUE) or leave(FALSE) the dir?
|
||||
* @param user_data pointer to user data
|
||||
*
|
||||
* @return MU_OK to continue, MU_STOP to stopd or MU_ERROR in
|
||||
* case of some error.
|
||||
*/
|
||||
typedef MuError (*MuIndexDirCallback) (const char* path, gboolean enter,
|
||||
void *user_data);
|
||||
|
||||
/**
|
||||
* start the indexing process
|
||||
*
|
||||
* @param index a valid MuIndex instance
|
||||
* @param force if != 0, force re-indexing already index messages; this is
|
||||
* obviously a lot slower than only indexing new/changed messages
|
||||
* @param lazycheck whether ignore subdirectoryies that have up-to-date
|
||||
* timestamps.
|
||||
* @param stats a structure with some statistics about the results;
|
||||
* note that this function does *not* reset the struct values to allow
|
||||
* for cumulative stats from multiple calls. If needed, you can use
|
||||
* @mu_index_stats_clear before calling this function
|
||||
* @param cb_msg a callback function called for every msg indexed;
|
||||
* @param cb_dir a callback function called for every dir entered/left or NULL
|
||||
* @param user_data a user pointer that will be passed to the callback function
|
||||
*
|
||||
* @return MU_OK if the stats gathering was completed successfully,
|
||||
* MU_STOP if the user stopped or MU_ERROR in
|
||||
* case of some error.
|
||||
*/
|
||||
MuError mu_index_run (MuIndex *index, gboolean force,
|
||||
gboolean lazycheck, MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb,
|
||||
MuIndexDirCallback dir_cb, void *user_data);
|
||||
|
||||
/**
|
||||
* gather some statistics about the Maildir; this is usually much faster than
|
||||
* mu_index_run, and can thus be used to provide some information to the user
|
||||
* note though that the statistics may be different from the reality that
|
||||
* mu_index_run sees, when there are updates in the Maildir
|
||||
*
|
||||
* @param index a valid MuIndex instance
|
||||
* @param stats a structure with some statistics about the results;
|
||||
* note that this function does *not* reset the struct values to allow
|
||||
* for cumulative stats from multiple calls. If needed, you can use
|
||||
* @mu_index_stats_clear before calling this function
|
||||
* @param msg_cb a callback function which will be called for every msg;
|
||||
* @param dir_cb a callback function which will be called for every dir or NULL
|
||||
* @param user_data a user pointer that will be passed to the callback function
|
||||
* xb
|
||||
* @return MU_OK if the stats gathering was completed successfully,
|
||||
* MU_STOP if the user stopped or MU_ERROR in
|
||||
* case of some error.
|
||||
*/
|
||||
MuError mu_index_stats (MuIndex *index, MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
|
||||
void *user_data);
|
||||
|
||||
/**
|
||||
* callback function called for each message
|
||||
*
|
||||
* @param MuIndexCleanupCallback
|
||||
*
|
||||
* @return a MuResult
|
||||
*/
|
||||
typedef MuError (*MuIndexCleanupDeleteCallback) (MuIndexStats *stats,
|
||||
void *user_data);
|
||||
|
||||
/**
|
||||
* cleanup the database; ie. remove entries for which no longer a corresponding
|
||||
* file exists in the maildir
|
||||
*
|
||||
* @param index a valid MuIndex instance
|
||||
* @param stats a structure with some statistics about the results;
|
||||
* note that this function does *not* reset the struct values to allow
|
||||
* for cumulative stats from multiple calls. If needed, you can use
|
||||
* @mu_index_stats_clear before calling this function
|
||||
* @param cb a callback function which will be called for every msg;
|
||||
* @param user_data a user pointer that will be passed to the callback function
|
||||
* @param err to receive error info or NULL. err->code is MuError value
|
||||
*
|
||||
* @return MU_OK if the stats gathering was completed successfully,
|
||||
* MU_STOP if the user stopped or MU_ERROR in
|
||||
* case of some error.
|
||||
*/
|
||||
MuError mu_index_cleanup (MuIndex *index, MuIndexStats *stats,
|
||||
MuIndexCleanupDeleteCallback cb,
|
||||
void *user_data, GError **err);
|
||||
|
||||
/**
|
||||
* clear the stats structure
|
||||
*
|
||||
* @param stats a MuIndexStats object
|
||||
*
|
||||
* @return TRUE if stats != NULL, FALSE otherwise
|
||||
*/
|
||||
gboolean mu_index_stats_clear (MuIndexStats *stats);
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
#endif /*__MU_INDEX_H__*/
|
|
@ -28,7 +28,6 @@
|
|||
|
||||
#include "mu-msg.h"
|
||||
#include "mu-maildir.h"
|
||||
#include "mu-index.h"
|
||||
#include "mu-query.h"
|
||||
#include "mu-msg-iter.h"
|
||||
#include "mu-bookmarks.h"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
** Copyright (C) 2010-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
** Copyright (C) 2010-2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify it
|
||||
** under the terms of the GNU General Public License as published by the
|
||||
|
@ -17,9 +17,7 @@
|
|||
**
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif /*HAVE_CONFIG*/
|
||||
|
||||
#include <gtk/gtk.h>
|
||||
#include <gdk/gdkkeysyms.h>
|
||||
|
@ -28,7 +26,6 @@
|
|||
#include <utils/mu-util.h>
|
||||
#include <mu-store.hh>
|
||||
#include <mu-runtime.h>
|
||||
#include <mu-index.h>
|
||||
|
||||
#include "mug-msg-list-view.h"
|
||||
#include "mug-query-bar.h"
|
||||
|
|
Loading…
Reference in New Issue