mirror of https://github.com/djcb/mu.git
scanner: add maildir-scan mode; improve portability
Use d_ino (struct dirent) only when available. Implement a mode for scanning just maildirs (ie. the dirs with cur / new in them). Use d_type (if available) to optimize that.
This commit is contained in:
parent
8caf504381
commit
f5beea2eb2
|
@ -37,8 +37,19 @@ lib_mu_index_dep = declare_dependency(
|
||||||
link_with: lib_mu_index
|
link_with: lib_mu_index
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# tests
|
# test tool
|
||||||
|
#
|
||||||
|
executable('list-maildirs', 'mu-scanner.cc',
|
||||||
|
install: false,
|
||||||
|
cpp_args: ['-DBUILD_LIST_MAILDIRS'],
|
||||||
|
dependencies: [glib_dep, config_h_dep,
|
||||||
|
lib_mu_utils_dep])
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# unit tests
|
||||||
#
|
#
|
||||||
|
|
||||||
test('test-scanner',
|
test('test-scanner',
|
||||||
|
|
|
@ -38,9 +38,35 @@
|
||||||
|
|
||||||
using namespace Mu;
|
using namespace Mu;
|
||||||
|
|
||||||
|
using Mode = Scanner::Mode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dentry->d_ino, dentry->d_type may not be available
|
||||||
|
*/
|
||||||
|
struct dentry_t {
|
||||||
|
dentry_t(const struct dirent *dentry):
|
||||||
|
#if HAVE_DIRENT_D_INO
|
||||||
|
d_ino{dentry->d_ino},
|
||||||
|
#endif /*HAVE_DIRENT_D_INO*/
|
||||||
|
|
||||||
|
#if HAVE_DIRENT_D_TYPE
|
||||||
|
d_type(dentry->d_type),
|
||||||
|
#endif /*HAVE_DIRENT_D_TYPE*/
|
||||||
|
d_name{static_cast<const char*>(dentry->d_name)} {}
|
||||||
|
#if HAVE_DIRENT_D_INO
|
||||||
|
ino_t d_ino;
|
||||||
|
#endif /*HAVE_DIRENT_D_INO*/
|
||||||
|
|
||||||
|
#if HAVE_DIRENT_D_TYPE
|
||||||
|
unsigned char d_type;
|
||||||
|
#endif /*HAVE_DIRENT_D_TYPE*/
|
||||||
|
|
||||||
|
std::string d_name;
|
||||||
|
};
|
||||||
|
|
||||||
struct Scanner::Private {
|
struct Scanner::Private {
|
||||||
Private(const std::string& root_dir, Scanner::Handler handler):
|
Private(const std::string& root_dir, Scanner::Handler handler, Mode mode):
|
||||||
root_dir_{root_dir}, handler_{handler} {
|
root_dir_{root_dir}, handler_{handler}, mode_{mode} {
|
||||||
if (root_dir_.length() > PATH_MAX)
|
if (root_dir_.length() > PATH_MAX)
|
||||||
throw Mu::Error{Error::Code::InvalidArgument,
|
throw Mu::Error{Error::Code::InvalidArgument,
|
||||||
"path is too long"};
|
"path is too long"};
|
||||||
|
@ -53,38 +79,35 @@ struct Scanner::Private {
|
||||||
Result<void> start();
|
Result<void> start();
|
||||||
void stop();
|
void stop();
|
||||||
|
|
||||||
struct dentry_t {
|
|
||||||
dentry_t(const struct dirent *dentry):
|
|
||||||
d_ino{dentry->d_ino},
|
|
||||||
d_name{static_cast<const char*>(dentry->d_name)} {}
|
|
||||||
ino_t d_ino;
|
|
||||||
std::string d_name;
|
|
||||||
};
|
|
||||||
|
|
||||||
bool process_dentry(const std::string& path, const dentry_t& dentry,
|
bool process_dentry(const std::string& path, const dentry_t& dentry,
|
||||||
bool is_maildir);
|
bool is_maildir);
|
||||||
bool process_dir(const std::string& path, bool is_maildir);
|
bool process_dir(const std::string& path, bool is_maildir);
|
||||||
|
|
||||||
const std::string root_dir_;
|
int lazy_stat(const char *fullpath, struct stat *stat_buf,
|
||||||
const Scanner::Handler handler_;
|
const dentry_t& dentry);
|
||||||
std::atomic<bool> running_{};
|
|
||||||
std::mutex lock_;
|
bool maildirs_only_mode() const { return mode_ == Mode::MaildirsOnly; }
|
||||||
|
|
||||||
|
const std::string root_dir_;
|
||||||
|
const Scanner::Handler handler_;
|
||||||
|
Mode mode_;
|
||||||
|
std::atomic<bool> running_{};
|
||||||
|
std::mutex lock_;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_dotdir(const char *d_name)
|
ignore_dentry(const dentry_t& dentry)
|
||||||
{
|
{
|
||||||
|
const auto d_name{dentry.d_name.c_str()};
|
||||||
|
|
||||||
/* dotdir? */
|
/* dotdir? */
|
||||||
if (d_name[0] == '\0' || (d_name[1] == '\0' && d_name[0] == '.') ||
|
if (d_name[0] == '\0' || (d_name[1] == '\0' && d_name[0] == '.') ||
|
||||||
(d_name[2] == '\0' && d_name[0] == '.' && d_name[1] == '.'))
|
(d_name[2] == '\0' && d_name[0] == '.' && d_name[1] == '.'))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
if (g_strcmp0(d_name, "tmp") == 0)
|
||||||
}
|
return true;
|
||||||
|
|
||||||
static bool
|
|
||||||
do_ignore(const char *d_name)
|
|
||||||
{
|
|
||||||
if (d_name[0] == '.') {
|
if (d_name[0] == '.') {
|
||||||
if (d_name[1] == '#') /* emacs? */
|
if (d_name[1] == '#') /* emacs? */
|
||||||
return true;
|
return true;
|
||||||
|
@ -97,45 +120,78 @@ do_ignore(const char *d_name)
|
||||||
if (g_strcmp0(d_name, "hcache.db") == 0) /* mutt cache? */
|
if (g_strcmp0(d_name, "hcache.db") == 0) /* mutt cache? */
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false; /* don't ignore */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* stat() if necessary (we'd like to avoid it), which we can if we only need the
|
||||||
|
* file-type and we already have that from the dentry.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
Scanner::Private::lazy_stat(const char *path, struct stat *stat_buf, const dentry_t& dentry)
|
||||||
|
{
|
||||||
|
#if HAVE_DIRENT_D_TYPE
|
||||||
|
if (maildirs_only_mode()) {
|
||||||
|
switch (dentry.d_type) {
|
||||||
|
case DT_REG:
|
||||||
|
stat_buf->st_mode = S_IFREG;
|
||||||
|
return 0;
|
||||||
|
case DT_DIR:
|
||||||
|
stat_buf->st_mode = S_IFDIR;
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
/* LNK is inconclusive; we need a stat. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /*HAVE_DIRENT_D_TYPE*/
|
||||||
|
|
||||||
|
int res = ::stat(path, stat_buf);
|
||||||
|
if (res != 0)
|
||||||
|
mu_warning("failed to stat {}: {}", path, g_strerror(errno));
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
Scanner::Private::process_dentry(const std::string& path, const dentry_t& dentry,
|
Scanner::Private::process_dentry(const std::string& path, const dentry_t& dentry,
|
||||||
bool is_maildir)
|
bool is_maildir)
|
||||||
{
|
{
|
||||||
const auto d_name{dentry.d_name.c_str()};
|
if (ignore_dentry(dentry))
|
||||||
|
return true;
|
||||||
|
|
||||||
if (is_dotdir(d_name) || std::strcmp(d_name, "tmp") == 0)
|
auto call_handler=[&](auto&& path, auto&& statbuf, auto&& htype)->bool {
|
||||||
return true; // ignore.
|
return maildirs_only_mode() ? true : handler_(path, statbuf, htype);
|
||||||
if (do_ignore(d_name)) {
|
};
|
||||||
mu_debug("skip {}/{} (ignore)", path, d_name);
|
|
||||||
return true; // ignore
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto fullpath{join_paths(path, d_name)};
|
const auto fullpath{join_paths(path, dentry.d_name)};
|
||||||
struct stat statbuf {};
|
struct stat statbuf{};
|
||||||
if (::stat(fullpath.c_str(), &statbuf) != 0) {
|
if (lazy_stat(fullpath.c_str(), &statbuf, dentry) != 0)
|
||||||
mu_warning("failed to stat {}: {}", fullpath, g_strerror(errno));
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (maildirs_only_mode() && S_ISDIR(statbuf.st_mode) && dentry.d_name == "cur") {
|
||||||
|
handler_(path/*without cur*/, {}, Scanner::HandleType::Maildir);
|
||||||
|
return true; // found maildir; no need to recurse further.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (S_ISDIR(statbuf.st_mode)) {
|
if (S_ISDIR(statbuf.st_mode)) {
|
||||||
const auto new_cur =
|
const auto new_cur = dentry.d_name == "cur" || dentry.d_name == "new";
|
||||||
std::strcmp(d_name, "cur") == 0 || std::strcmp(d_name, "new") == 0;
|
|
||||||
const auto htype =
|
const auto htype =
|
||||||
new_cur ?
|
new_cur ?
|
||||||
Scanner::HandleType::EnterNewCur :
|
Scanner::HandleType::EnterNewCur :
|
||||||
Scanner::HandleType::EnterDir;
|
Scanner::HandleType::EnterDir;
|
||||||
const auto res = handler_(fullpath, &statbuf, htype);
|
|
||||||
|
const auto res = call_handler(fullpath, &statbuf, htype);
|
||||||
if (!res)
|
if (!res)
|
||||||
return true; // skip
|
return true; // skip
|
||||||
|
|
||||||
process_dir(fullpath, new_cur);
|
process_dir(fullpath, new_cur);
|
||||||
return handler_(fullpath, &statbuf, Scanner::HandleType::LeaveDir);
|
return call_handler(fullpath, &statbuf, Scanner::HandleType::LeaveDir);
|
||||||
|
|
||||||
} else if (S_ISREG(statbuf.st_mode) && is_maildir)
|
} else if (S_ISREG(statbuf.st_mode) && is_maildir)
|
||||||
return handler_(fullpath, &statbuf, Scanner::HandleType::File);
|
return call_handler(fullpath, &statbuf, Scanner::HandleType::File);
|
||||||
|
|
||||||
mu_debug("skip {} (neither maildir-file nor directory)", fullpath);
|
mu_debug("skip {} (neither maildir-file nor directory)", fullpath);
|
||||||
|
|
||||||
|
@ -165,6 +221,11 @@ Scanner::Private::process_dir(const std::string& path, bool is_maildir)
|
||||||
while (running_) {
|
while (running_) {
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if (const auto& dentry{::readdir(dir)}; dentry) {
|
if (const auto& dentry{::readdir(dir)}; dentry) {
|
||||||
|
#if HAVE_DIRENT_D_TYPE /* opttimization: filter out non-dirs early */
|
||||||
|
if (maildirs_only_mode() &&
|
||||||
|
dentry->d_type != DT_DIR && dentry->d_type != DT_LNK)
|
||||||
|
continue;
|
||||||
|
#endif /*HAVE_DIRENT_D_TYPE*/
|
||||||
dir_entries.emplace_back(dentry);
|
dir_entries.emplace_back(dentry);
|
||||||
continue;
|
continue;
|
||||||
} else if (errno != 0) {
|
} else if (errno != 0) {
|
||||||
|
@ -176,10 +237,12 @@ Scanner::Private::process_dir(const std::string& path, bool is_maildir)
|
||||||
}
|
}
|
||||||
::closedir(dir);
|
::closedir(dir);
|
||||||
|
|
||||||
|
#if HAVE_DIRENT_D_INO
|
||||||
// sort by i-node; much faster on rotational (HDDs) devices and on SSDs
|
// sort by i-node; much faster on rotational (HDDs) devices and on SSDs
|
||||||
// sort is quick enough to not matter much
|
// sort is quick enough to not matter much
|
||||||
std::sort(dir_entries.begin(), dir_entries.end(),
|
std::sort(dir_entries.begin(), dir_entries.end(),
|
||||||
[](auto&& d1, auto&& d2){ return d1.d_ino < d2.d_ino; });
|
[](auto&& d1, auto&& d2){ return d1.d_ino < d2.d_ino; });
|
||||||
|
#endif /*HAVEN_DIRENT_D_INO*/
|
||||||
|
|
||||||
// now process...
|
// now process...
|
||||||
for (auto&& dentry: dir_entries)
|
for (auto&& dentry: dir_entries)
|
||||||
|
@ -231,8 +294,8 @@ Scanner::Private::stop()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler)
|
Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler, Mode flavor)
|
||||||
: priv_{std::make_unique<Private>(root_dir, handler)}
|
: priv_{std::make_unique<Private>(root_dir, handler, flavor)}
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -264,12 +327,9 @@ Scanner::is_running() const
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if BUILD_TESTS
|
#if BUILD_TESTS
|
||||||
#include "mu-test-utils.hh"
|
#include "mu-test-utils.hh"
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
test_scan_maildir()
|
test_scan_maildir()
|
||||||
{
|
{
|
||||||
|
@ -307,6 +367,29 @@ try {
|
||||||
mu_printerrln("caught exception");
|
mu_printerrln("caught exception");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /*BUILD_TESTS*/
|
#endif /*BUILD_TESTS*/
|
||||||
|
|
||||||
|
#if BUILD_LIST_MAILDIRS
|
||||||
|
|
||||||
|
static bool
|
||||||
|
on_path(const std::string& path, struct stat* statbuf, Scanner::HandleType htype)
|
||||||
|
{
|
||||||
|
mu_println("{}", path);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
if (argc < 2) {
|
||||||
|
mu_printerrln("expected: path to maildir");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scanner scanner{argv[1], on_path, Mode::MaildirsOnly};
|
||||||
|
|
||||||
|
scanner.start();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /*BUILD_LIST_MAILDIRS*/
|
||||||
|
|
|
@ -31,28 +31,51 @@
|
||||||
|
|
||||||
namespace Mu {
|
namespace Mu {
|
||||||
|
|
||||||
/// @brief Maildir scanner
|
/**
|
||||||
///
|
* @brief Maildir scanner
|
||||||
/// Scans maildir (trees) recursively, and calls the Handler callback for
|
*
|
||||||
/// directories & files.
|
* Scans maildir (trees) recursively, and calls the Handler callback for
|
||||||
///
|
* directories & files.
|
||||||
/// It filters out (i.e., does *not* call the handler for):
|
*
|
||||||
/// - files starting with '.'
|
* It filters out (i.e., does *not* call the handler for):
|
||||||
/// - files that do not live in a cur / new leaf maildir
|
* - files starting with '.'
|
||||||
/// - directories '.' and '..' and 'tmp'
|
* - files that do not live in a cur / new leaf maildir
|
||||||
///
|
* - directories '.' and '..' and 'tmp'
|
||||||
|
*/
|
||||||
class Scanner {
|
class Scanner {
|
||||||
public:
|
public:
|
||||||
enum struct HandleType {
|
enum struct HandleType {
|
||||||
|
/*
|
||||||
|
* Mode: All
|
||||||
|
*/
|
||||||
File,
|
File,
|
||||||
EnterNewCur, /* cur/ or new/ */
|
EnterNewCur, /* cur/ or new/ */
|
||||||
EnterDir, /* some other directory */
|
EnterDir, /* some other directory */
|
||||||
LeaveDir
|
LeaveDir,
|
||||||
|
/*
|
||||||
|
* Mode: Maildir
|
||||||
|
*/
|
||||||
|
Maildir,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Prototype for a handler function
|
/**
|
||||||
|
* Callback handler function
|
||||||
|
*
|
||||||
|
* path: full file-system path
|
||||||
|
* statbuf: stat result or nullptr (for Mode::MaildirsOnly)
|
||||||
|
* htype: HandleType. For Mode::MaildirsOnly only Maildir
|
||||||
|
*/
|
||||||
using Handler = std::function<
|
using Handler = std::function<
|
||||||
bool(const std::string& fullpath, struct stat* statbuf, HandleType htype)>;
|
bool(const std::string& path, struct stat* statbuf, HandleType htype)>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Running mode for this Scanner
|
||||||
|
*/
|
||||||
|
enum struct Mode {
|
||||||
|
All, /**< Vanilla */
|
||||||
|
MaildirsOnly /**< Only return maildir to handler */
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a scanner object for scanning a directory, recursively.
|
* Construct a scanner object for scanning a directory, recursively.
|
||||||
*
|
*
|
||||||
|
@ -60,15 +83,16 @@ class Scanner {
|
||||||
*
|
*
|
||||||
* @param root_dir root dir to start scanning
|
* @param root_dir root dir to start scanning
|
||||||
* @param handler handler function for some direntry
|
* @param handler handler function for some direntry
|
||||||
|
* @param options options to influence behavior
|
||||||
*/
|
*/
|
||||||
Scanner(const std::string& root_dir, Handler handler);
|
Scanner(const std::string& root_dir, Handler handler, Mode mode = Mode::All);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DTOR
|
* DTOR
|
||||||
*/
|
*/
|
||||||
~Scanner();
|
~Scanner();
|
||||||
|
|
||||||
/**
|
/**#
|
||||||
* Start the scan; this is a blocking call than runs until
|
* Start the scan; this is a blocking call than runs until
|
||||||
* finished or (from another thread) stop() is called.
|
* finished or (from another thread) stop() is called.
|
||||||
*
|
*
|
||||||
|
|
23
meson.build
23
meson.build
|
@ -47,20 +47,21 @@ endif
|
||||||
# compilers / flags
|
# compilers / flags
|
||||||
#
|
#
|
||||||
extra_flags = [
|
extra_flags = [
|
||||||
'-Wc11-extensions', # for clang
|
|
||||||
'-Wno-unused-parameter',
|
'-Wno-unused-parameter',
|
||||||
'-Wno-cast-function-type',
|
'-Wno-cast-function-type',
|
||||||
'-Wformat-security',
|
'-Wformat-security',
|
||||||
'-Wformat=2',
|
'-Wformat=2',
|
||||||
'-Wstack-protector',
|
'-Wstack-protector',
|
||||||
'-Wno-switch-enum',
|
'-Wno-switch-enum',
|
||||||
'-Wno-keyword-macro',
|
|
||||||
'-Wno-volatile',
|
'-Wno-volatile',
|
||||||
'-Wno-deprecated-volatile',
|
|
||||||
'-Wno-#warnings',
|
|
||||||
# assuming these are false alarm... (in fmt, with gcc13):
|
# assuming these are false alarm... (in fmt, with gcc13):
|
||||||
'-Wno-array-bounds',
|
'-Wno-array-bounds',
|
||||||
'-Wno-stringop-overflow',
|
'-Wno-stringop-overflow',
|
||||||
|
# clang
|
||||||
|
'-Wc11-extensions', # for clang
|
||||||
|
'-Wno-keyword-macro',
|
||||||
|
'-Wno-deprecated-volatile',
|
||||||
|
'-Wno-#warnings',
|
||||||
]
|
]
|
||||||
|
|
||||||
if get_option('buildtype') == 'debug'
|
if get_option('buildtype') == 'debug'
|
||||||
|
@ -104,6 +105,20 @@ add_project_arguments(['-DHAVE_CONFIG_H'], language: 'cpp')
|
||||||
config_h_dep=declare_dependency(
|
config_h_dep=declare_dependency(
|
||||||
include_directories: include_directories(['.']))
|
include_directories: include_directories(['.']))
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# d_type, d_ino are not available universally, so let's check
|
||||||
|
# (we use them for optimizations in mu-scanner
|
||||||
|
#
|
||||||
|
if cxx.has_member('struct dirent', 'd_ino', prefix : '#include<dirent.h>')
|
||||||
|
config_h_data.set('HAVE_DIRENT_D_INO', 1)
|
||||||
|
endif
|
||||||
|
|
||||||
|
if cxx.has_member('struct dirent', 'd_type', prefix : '#include<dirent.h>')
|
||||||
|
config_h_data.set('HAVE_DIRENT_D_TYPE', 1)
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
functions=[
|
functions=[
|
||||||
'setsid'
|
'setsid'
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue