scanner: add maildir-scan mode; improve portability

Use d_ino (struct dirent) only when available.

Implement a mode for scanning just maildirs (ie. the dirs with cur / new
in them). Use d_type (if available) to optimize that.
This commit is contained in:
Dirk-Jan C. Binnema 2023-08-12 15:59:32 +03:00
parent 8caf504381
commit f5beea2eb2
4 changed files with 197 additions and 64 deletions

View File

@ -37,8 +37,19 @@ lib_mu_index_dep = declare_dependency(
link_with: lib_mu_index
)
#
# tests
# test tool
#
executable('list-maildirs', 'mu-scanner.cc',
install: false,
cpp_args: ['-DBUILD_LIST_MAILDIRS'],
dependencies: [glib_dep, config_h_dep,
lib_mu_utils_dep])
#
# unit tests
#
test('test-scanner',

View File

@ -38,9 +38,35 @@
using namespace Mu;
using Mode = Scanner::Mode;
/*
* dentry->d_ino, dentry->d_type may not be available
*/
struct dentry_t {
dentry_t(const struct dirent *dentry):
#if HAVE_DIRENT_D_INO
d_ino{dentry->d_ino},
#endif /*HAVE_DIRENT_D_INO*/
#if HAVE_DIRENT_D_TYPE
d_type(dentry->d_type),
#endif /*HAVE_DIRENT_D_TYPE*/
d_name{static_cast<const char*>(dentry->d_name)} {}
#if HAVE_DIRENT_D_INO
ino_t d_ino;
#endif /*HAVE_DIRENT_D_INO*/
#if HAVE_DIRENT_D_TYPE
unsigned char d_type;
#endif /*HAVE_DIRENT_D_TYPE*/
std::string d_name;
};
struct Scanner::Private {
Private(const std::string& root_dir, Scanner::Handler handler):
root_dir_{root_dir}, handler_{handler} {
Private(const std::string& root_dir, Scanner::Handler handler, Mode mode):
root_dir_{root_dir}, handler_{handler}, mode_{mode} {
if (root_dir_.length() > PATH_MAX)
throw Mu::Error{Error::Code::InvalidArgument,
"path is too long"};
@ -53,38 +79,35 @@ struct Scanner::Private {
Result<void> start();
void stop();
struct dentry_t {
dentry_t(const struct dirent *dentry):
d_ino{dentry->d_ino},
d_name{static_cast<const char*>(dentry->d_name)} {}
ino_t d_ino;
std::string d_name;
};
bool process_dentry(const std::string& path, const dentry_t& dentry,
bool is_maildir);
bool process_dir(const std::string& path, bool is_maildir);
const std::string root_dir_;
const Scanner::Handler handler_;
std::atomic<bool> running_{};
std::mutex lock_;
int lazy_stat(const char *fullpath, struct stat *stat_buf,
const dentry_t& dentry);
bool maildirs_only_mode() const { return mode_ == Mode::MaildirsOnly; }
const std::string root_dir_;
const Scanner::Handler handler_;
Mode mode_;
std::atomic<bool> running_{};
std::mutex lock_;
};
static bool
is_dotdir(const char *d_name)
ignore_dentry(const dentry_t& dentry)
{
const auto d_name{dentry.d_name.c_str()};
/* dotdir? */
if (d_name[0] == '\0' || (d_name[1] == '\0' && d_name[0] == '.') ||
(d_name[2] == '\0' && d_name[0] == '.' && d_name[1] == '.'))
return true;
return false;
}
if (g_strcmp0(d_name, "tmp") == 0)
return true;
static bool
do_ignore(const char *d_name)
{
if (d_name[0] == '.') {
if (d_name[1] == '#') /* emacs? */
return true;
@ -97,45 +120,78 @@ do_ignore(const char *d_name)
if (g_strcmp0(d_name, "hcache.db") == 0) /* mutt cache? */
return true;
return false;
return false; /* don't ignore */
}
/*
* stat() if necessary (we'd like to avoid it), which we can if we only need the
* file-type and we already have that from the dentry.
*/
int
Scanner::Private::lazy_stat(const char *path, struct stat *stat_buf, const dentry_t& dentry)
{
#if HAVE_DIRENT_D_TYPE
if (maildirs_only_mode()) {
switch (dentry.d_type) {
case DT_REG:
stat_buf->st_mode = S_IFREG;
return 0;
case DT_DIR:
stat_buf->st_mode = S_IFDIR;
return 0;
default:
/* LNK is inconclusive; we need a stat. */
break;
}
}
#endif /*HAVE_DIRENT_D_TYPE*/
int res = ::stat(path, stat_buf);
if (res != 0)
mu_warning("failed to stat {}: {}", path, g_strerror(errno));
return res;
}
bool
Scanner::Private::process_dentry(const std::string& path, const dentry_t& dentry,
bool is_maildir)
{
const auto d_name{dentry.d_name.c_str()};
if (ignore_dentry(dentry))
return true;
if (is_dotdir(d_name) || std::strcmp(d_name, "tmp") == 0)
return true; // ignore.
if (do_ignore(d_name)) {
mu_debug("skip {}/{} (ignore)", path, d_name);
return true; // ignore
}
auto call_handler=[&](auto&& path, auto&& statbuf, auto&& htype)->bool {
return maildirs_only_mode() ? true : handler_(path, statbuf, htype);
};
const auto fullpath{join_paths(path, d_name)};
struct stat statbuf {};
if (::stat(fullpath.c_str(), &statbuf) != 0) {
mu_warning("failed to stat {}: {}", fullpath, g_strerror(errno));
const auto fullpath{join_paths(path, dentry.d_name)};
struct stat statbuf{};
if (lazy_stat(fullpath.c_str(), &statbuf, dentry) != 0)
return false;
if (maildirs_only_mode() && S_ISDIR(statbuf.st_mode) && dentry.d_name == "cur") {
handler_(path/*without cur*/, {}, Scanner::HandleType::Maildir);
return true; // found maildir; no need to recurse further.
}
if (S_ISDIR(statbuf.st_mode)) {
const auto new_cur =
std::strcmp(d_name, "cur") == 0 || std::strcmp(d_name, "new") == 0;
const auto new_cur = dentry.d_name == "cur" || dentry.d_name == "new";
const auto htype =
new_cur ?
Scanner::HandleType::EnterNewCur :
Scanner::HandleType::EnterDir;
const auto res = handler_(fullpath, &statbuf, htype);
const auto res = call_handler(fullpath, &statbuf, htype);
if (!res)
return true; // skip
process_dir(fullpath, new_cur);
return handler_(fullpath, &statbuf, Scanner::HandleType::LeaveDir);
return call_handler(fullpath, &statbuf, Scanner::HandleType::LeaveDir);
} else if (S_ISREG(statbuf.st_mode) && is_maildir)
return handler_(fullpath, &statbuf, Scanner::HandleType::File);
return call_handler(fullpath, &statbuf, Scanner::HandleType::File);
mu_debug("skip {} (neither maildir-file nor directory)", fullpath);
@ -165,6 +221,11 @@ Scanner::Private::process_dir(const std::string& path, bool is_maildir)
while (running_) {
errno = 0;
if (const auto& dentry{::readdir(dir)}; dentry) {
#if HAVE_DIRENT_D_TYPE /* opttimization: filter out non-dirs early */
if (maildirs_only_mode() &&
dentry->d_type != DT_DIR && dentry->d_type != DT_LNK)
continue;
#endif /*HAVE_DIRENT_D_TYPE*/
dir_entries.emplace_back(dentry);
continue;
} else if (errno != 0) {
@ -176,10 +237,12 @@ Scanner::Private::process_dir(const std::string& path, bool is_maildir)
}
::closedir(dir);
#if HAVE_DIRENT_D_INO
// sort by i-node; much faster on rotational (HDDs) devices and on SSDs
// sort is quick enough to not matter much
std::sort(dir_entries.begin(), dir_entries.end(),
[](auto&& d1, auto&& d2){ return d1.d_ino < d2.d_ino; });
#endif /*HAVEN_DIRENT_D_INO*/
// now process...
for (auto&& dentry: dir_entries)
@ -231,8 +294,8 @@ Scanner::Private::stop()
}
}
Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler)
: priv_{std::make_unique<Private>(root_dir, handler)}
Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler, Mode flavor)
: priv_{std::make_unique<Private>(root_dir, handler, flavor)}
{
}
@ -264,12 +327,9 @@ Scanner::is_running() const
}
#if BUILD_TESTS
#include "mu-test-utils.hh"
static void
test_scan_maildir()
{
@ -307,6 +367,29 @@ try {
mu_printerrln("caught exception");
return 1;
}
#endif /*BUILD_TESTS*/
#if BUILD_LIST_MAILDIRS
static bool
on_path(const std::string& path, struct stat* statbuf, Scanner::HandleType htype)
{
mu_println("{}", path);
return true;
}
int
main (int argc, char *argv[])
{
if (argc < 2) {
mu_printerrln("expected: path to maildir");
return 1;
}
Scanner scanner{argv[1], on_path, Mode::MaildirsOnly};
scanner.start();
return 0;
}
#endif /*BUILD_LIST_MAILDIRS*/

View File

@ -31,28 +31,51 @@
namespace Mu {
/// @brief Maildir scanner
///
/// Scans maildir (trees) recursively, and calls the Handler callback for
/// directories & files.
///
/// It filters out (i.e., does *not* call the handler for):
/// - files starting with '.'
/// - files that do not live in a cur / new leaf maildir
/// - directories '.' and '..' and 'tmp'
///
/**
* @brief Maildir scanner
*
* Scans maildir (trees) recursively, and calls the Handler callback for
* directories & files.
*
* It filters out (i.e., does *not* call the handler for):
* - files starting with '.'
* - files that do not live in a cur / new leaf maildir
* - directories '.' and '..' and 'tmp'
*/
class Scanner {
public:
enum struct HandleType {
/*
* Mode: All
*/
File,
EnterNewCur, /* cur/ or new/ */
EnterDir, /* some other directory */
LeaveDir
LeaveDir,
/*
* Mode: Maildir
*/
Maildir,
};
/// Prototype for a handler function
/**
* Callback handler function
*
* path: full file-system path
* statbuf: stat result or nullptr (for Mode::MaildirsOnly)
* htype: HandleType. For Mode::MaildirsOnly only Maildir
*/
using Handler = std::function<
bool(const std::string& fullpath, struct stat* statbuf, HandleType htype)>;
bool(const std::string& path, struct stat* statbuf, HandleType htype)>;
/**
* Running mode for this Scanner
*/
enum struct Mode {
All, /**< Vanilla */
MaildirsOnly /**< Only return maildir to handler */
};
/**
* Construct a scanner object for scanning a directory, recursively.
*
@ -60,15 +83,16 @@ class Scanner {
*
* @param root_dir root dir to start scanning
* @param handler handler function for some direntry
* @param options options to influence behavior
*/
Scanner(const std::string& root_dir, Handler handler);
Scanner(const std::string& root_dir, Handler handler, Mode mode = Mode::All);
/**
* DTOR
*/
~Scanner();
/**
/**#
* Start the scan; this is a blocking call than runs until
* finished or (from another thread) stop() is called.
*

View File

@ -47,20 +47,21 @@ endif
# compilers / flags
#
extra_flags = [
'-Wc11-extensions', # for clang
'-Wno-unused-parameter',
'-Wno-cast-function-type',
'-Wformat-security',
'-Wformat=2',
'-Wstack-protector',
'-Wno-switch-enum',
'-Wno-keyword-macro',
'-Wno-volatile',
'-Wno-deprecated-volatile',
'-Wno-#warnings',
# assuming these are false alarm... (in fmt, with gcc13):
'-Wno-array-bounds',
'-Wno-stringop-overflow',
# clang
'-Wc11-extensions', # for clang
'-Wno-keyword-macro',
'-Wno-deprecated-volatile',
'-Wno-#warnings',
]
if get_option('buildtype') == 'debug'
@ -104,6 +105,20 @@ add_project_arguments(['-DHAVE_CONFIG_H'], language: 'cpp')
config_h_dep=declare_dependency(
include_directories: include_directories(['.']))
#
# d_type, d_ino are not available universally, so let's check
# (we use them for optimizations in mu-scanner
#
if cxx.has_member('struct dirent', 'd_ino', prefix : '#include<dirent.h>')
config_h_data.set('HAVE_DIRENT_D_INO', 1)
endif
if cxx.has_member('struct dirent', 'd_type', prefix : '#include<dirent.h>')
config_h_data.set('HAVE_DIRENT_D_TYPE', 1)
endif
functions=[
'setsid'
]