From c5b3059442d2cf8703b27c951b5e3b13e4a0dab9 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Thu, 6 Jan 2011 16:21:09 +0200 Subject: [PATCH] * add searching for message size ranges --- man/mu-find.1 | 10 +++++++ src/mu-query.cc | 70 ++++++++++++++++++++++++++++++++++++++++++++++--- src/mu-str.c | 27 +++++++++++++++++++ src/mu-str.h | 19 ++++++++++++++ 4 files changed, 122 insertions(+), 4 deletions(-) diff --git a/man/mu-find.1 b/man/mu-find.1 index 8a531f4d..7abcf4d4 100644 --- a/man/mu-find.1 +++ b/man/mu-find.1 @@ -106,6 +106,7 @@ search fields and their abbreviations: prio,p Message priority ('low', 'normal' or 'high') flag,g Message Flags date,d Date-Range + size,z Message size .fi For clarity, this man-page uses the longer versions. @@ -223,6 +224,15 @@ sent or received today, you could use: $ mu find date:today..now .fi +The \fBsize\fR or \fBz\fR allows you to match \fIsize ranges\fR -- that is, +match messages that have a byte-size within a certain range. Units (K (for +1000) and M (for 1000 * 1000) are supported). For example to get all messages +between 10Kb and 2Mb (assuming SI units), you could use: + +.nf + $ mu find size:10K..2M +.fi + .SH OPTIONS Note, some of the important options are described in the \fBmu(1)\fR man-page diff --git a/src/mu-query.cc b/src/mu-query.cc index 2cf36f30..ac46d326 100644 --- a/src/mu-query.cc +++ b/src/mu-query.cc @@ -130,12 +130,65 @@ private: }; +class MuSizeRangeProcessor : public Xapian::NumberValueRangeProcessor { +public: + MuSizeRangeProcessor(Xapian::valueno v) + : Xapian::NumberValueRangeProcessor(v) { + } + + Xapian::valueno operator()(std::string &begin, std::string &end) { + + if (!clear_prefix (begin)) + return Xapian::BAD_VALUENO; + + if (!substitute_size (begin) || !substitute_size (end)) + return Xapian::BAD_VALUENO; + + begin = Xapian::sortable_serialise(atol(begin.c_str())); + end = Xapian::sortable_serialise(atol(end.c_str())); + + return (Xapian::valueno)MU_MSG_FIELD_ID_SIZE; + } +private: + bool clear_prefix (std::string& begin) { + + const std::string colon (":"); + const std::string name (mu_msg_field_name + (MU_MSG_FIELD_ID_SIZE) + colon); + const std::string shortcut ( + std::string(1, mu_msg_field_shortcut + (MU_MSG_FIELD_ID_SIZE)) + colon); + + if (begin.find (name) == 0) { + begin.erase (0, name.length()); + return true; + } else if (begin.find (shortcut) == 0) { + begin.erase (0, shortcut.length()); + return true; + } else + return false; + } + + bool substitute_size (std::string& size) { + gchar str[16]; + guint64 num = mu_str_size_parse_kmg (size.c_str()); + if (num == G_MAXUINT64) + return false; + snprintf (str, sizeof(str), "%" G_GUINT64_FORMAT, num); + size = str; + return true; + } +}; + + + static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser); struct _MuQuery { Xapian::Database* _db; Xapian::QueryParser* _qparser; - Xapian::ValueRangeProcessor* _range_processor; + Xapian::ValueRangeProcessor* _date_range_processor; + Xapian::ValueRangeProcessor* _size_range_processor; }; gboolean @@ -151,9 +204,16 @@ init_mu_query (MuQuery *mqx, const char* dbpath) mqx->_qparser->set_database (*mqx->_db); mqx->_qparser->set_default_op (Xapian::Query::OP_AND); - mqx->_range_processor = new MuDateRangeProcessor (); + /* check for dates */ + mqx->_date_range_processor = new MuDateRangeProcessor (); mqx->_qparser->add_valuerangeprocessor - (mqx->_range_processor); + (mqx->_date_range_processor); + + /* check for sizes */ + mqx->_size_range_processor = new MuSizeRangeProcessor + (MU_MSG_FIELD_ID_SIZE); + mqx->_qparser->add_valuerangeprocessor + (mqx->_size_range_processor); mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix, (gpointer)mqx->_qparser); @@ -178,7 +238,9 @@ uninit_mu_query (MuQuery *mqx) try { delete mqx->_db; delete mqx->_qparser; - delete mqx->_range_processor; + + delete mqx->_date_range_processor; + delete mqx->_size_range_processor; } MU_XAPIAN_CATCH_BLOCK; } diff --git a/src/mu-str.c b/src/mu-str.c index 4f643f01..90ffb729 100644 --- a/src/mu-str.c +++ b/src/mu-str.c @@ -309,6 +309,33 @@ mu_str_date_parse_hdwmy (const char* str) return delta <= now ? now - delta : never; } +guint64 +mu_str_size_parse_kmg (const char* str) +{ + guint64 num; + char *end; + + g_return_val_if_fail (str, G_MAXUINT64); + + num = strtol (str, &end, 10); + if (num <= 0) + return G_MAXUINT64; + + if (!end || end[1] != '\0') + return G_MAXUINT64; + + switch (tolower(end[0])) { + case 'k': return num * 1000; /* kilobyte */ + case 'm': return num * 1000 * 1000; /* megabyte */ + /* case 'g': return num * 1000 * 1000 * 1000; /\* gigabyte *\/ */ + default: + return G_MAXUINT64; + } + +} + + + char* diff --git a/src/mu-str.h b/src/mu-str.h index be48ae70..2b68abfa 100644 --- a/src/mu-str.h +++ b/src/mu-str.h @@ -209,6 +209,25 @@ char* mu_str_ascii_xapian_escape (const char *query); time_t mu_str_date_parse_hdwmy (const char* str); + +/** + * parse a byte size; a size is a number, with optionally a + * unit. Units recognized are K (1000) and M (1000*1000). Only the + * first letter is checked and the function is not case-sensitive, so + * 1000Kb, 3M will work equally well. Note, for kB, MB etc., we then + * follow the SI standards, not 2^10 etc. + * + * practical sizes for email messages are in terms of Mb; even in + * extreme cases it should be under 100 Mb. Function return + * GUINT64_MAX if there a parsing error + * + * @param str a string with a size, such a "100", "100Kb", "1Mb" + * + * @return + */ +guint64 mu_str_size_parse_kmg (const char* str); + + /** * create a full path from a path + a filename. function is _not_ * reentrant.