diff --git a/configure.ac b/configure.ac index b3b8d869..d2298b39 100644 --- a/configure.ac +++ b/configure.ac @@ -52,7 +52,10 @@ AC_PROG_CC_C99 AC_PROG_INSTALL AC_HEADER_STDC +AX_CXX_COMPILE_STDCXX_14 + AX_COMPILER_FLAGS(,,[yes]) +AX_VALGRIND_CHECK # for now, use AM_PROG_LIBTOOL, as we don't want to require # a too new setup for autotools/libtool @@ -83,7 +86,7 @@ AS_IF([test "x$enable_mu4e" != "xno"], [ lispdir="${lispdir}/mu4e/" ]) AS_CASE([$emacs_version],[*23*|*24*|*25*|*26*],[build_mu4e=yes], - [AC_WARN([emacs is too old to build mu4e (need emacs >= 23.x)])]) + [AC_WARN([emacs is too old to build mu4e (need emacs >= 23.x)])]) ]) AM_CONDITIONAL(BUILD_MU4E, test "x$build_mu4e" = "xyes") @@ -96,7 +99,7 @@ AC_CHECK_PROGS([PERL], [perl], [no]) AS_IF([test x"$enable_perl" = "xyes" -a x"$PERL" != "xno"], [ AM_PERL_MODULE([Data::SExpression],[build_perl=yes]) if test x"$build_perl" = "xyes"; then - perl_version=`$PERL -Iperl/lib -Mmup -e 'print "$mup::VERSION\n";'` + perl_version=`$PERL -Iperl/lib -Mmup -e 'print "$mup::VERSION\n";'` fi ]) AM_CONDITIONAL(BUILD_PERL, test "x$build_perl" = "xyes") @@ -183,7 +186,7 @@ AC_SUBST(XAPIAN_LIBS) # note that MU_STORE_SCHEMA_VERSION does not necessarily follow MU # versioning, as we hopefully don't have updates for each version; # also, this has nothing to do with Xapian's software version -AC_DEFINE(MU_STORE_SCHEMA_VERSION,["9.9"], ['Schema' version of the database]) +AC_DEFINE(MU_STORE_SCHEMA_VERSION,["0.99"], ['Schema' version of the database]) ############################################################################### @@ -230,15 +233,15 @@ AS_IF([test "x$enable_guile" != "xno"], [ AS_IF([test "x$build_guile" = "xyes"],[ AC_PATH_PROG(GUILE_BINARY, [guile], [], [$GUILE_EXECDIR]) AS_IF([test "x$GUILE_BINARY" != "x"], - [AC_DEFINE_UNQUOTED([GUILE_BINARY],["$GUILE_BINARY"],[Path to the guile binary])], - [AC_MSG_WARN([cannot find guile-snarf])])]) + [AC_DEFINE_UNQUOTED([GUILE_BINARY],["$GUILE_BINARY"],[Path to the guile binary])], + [AC_MSG_WARN([cannot find guile-snarf])])]) AS_IF([test "x$build_guile" = "xyes"],[ AC_PATH_PROG(GUILE_SNARF, [guile-snarf], [], [$GUILE_EXECDIR]) AS_IF([test "x$GUILE_SNARF" != "x"], - [AC_DEFINE_UNQUOTED([GUILE_SNARF],["$GUILE_SNARF"],[Path to guile-snarf]) - GUILE_SITEDIR="`${PKG_CONFIG} guile-2.0 --variable=sitedir`"], - [AC_MSG_WARN([cannot find guile-snarf])])]) + [AC_DEFINE_UNQUOTED([GUILE_SNARF],["$GUILE_SNARF"],[Path to guile-snarf]) + GUILE_SITEDIR="`${PKG_CONFIG} guile-2.0 --variable=sitedir`"], + [AC_MSG_WARN([cannot find guile-snarf])])]) AC_SUBST(GUILE_SITEDIR)]) AS_IF([test "x$build_guile" = "xyes" -a "x$GUILE_SNARF" != "x"], @@ -281,6 +284,7 @@ mu/Makefile mu/tests/Makefile lib/Makefile lib/doxyfile +lib/parser/Makefile lib/tests/Makefile mu4e/Makefile mu4e/mu4e-meta.el diff --git a/lib/Makefile.am b/lib/Makefile.am index d1d126be..baa76656 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,4 +1,4 @@ -## Copyright (C) 2010-2013 Dirk-Jan C. Binnema +## Copyright (C) 2010-2017 Dirk-Jan C. Binnema ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -18,13 +18,14 @@ include $(top_srcdir)/gtest.mk # enforce compiling guile (optionally) first,then this dir first # before decending into tests/ -SUBDIRS= . tests +SUBDIRS= parser . tests AM_CFLAGS= \ $(WARN_CFLAGS) \ -Wno-format-nonliteral \ -Wno-switch-enum \ -Wno-suggest-attribute=format \ + -Wno-deprecated-declarations \ $(GMIME_CFLAGS) \ $(GLIB_CFLAGS) \ $(GUILE_CFLAGS) @@ -103,7 +104,8 @@ libmu_la_LIBADD= \ $(XAPIAN_LIBS) \ $(GMIME_LIBS) \ $(GLIB_LIBS) \ - $(GUILE_LIBS) + $(GUILE_LIBS) \ + ${builddir}/parser/libmuxparser.la EXTRA_DIST= \ mu-msg-crypto.c \ diff --git a/lib/mu-flags.c b/lib/mu-flags.c index d5c33677..330fa316 100644 --- a/lib/mu-flags.c +++ b/lib/mu-flags.c @@ -77,6 +77,20 @@ mu_flag_char (MuFlags flag) } +MuFlags +mu_flag_char_from_name (const char *str) +{ + unsigned u; + + g_return_val_if_fail (str, MU_FLAG_INVALID); + + for (u = 0; u != G_N_ELEMENTS (FLAG_INFO); ++u) + if (g_strcmp0(FLAG_INFO[u].name, str) == 0) + return FLAG_INFO[u].kar; + + return 0; +} + static MuFlags mu_flag_from_char (char kar) diff --git a/lib/mu-flags.h b/lib/mu-flags.h index 00752214..9d892f39 100644 --- a/lib/mu-flags.h +++ b/lib/mu-flags.h @@ -128,6 +128,18 @@ MuFlags mu_flags_from_str (const char *str, MuFlagType types, gboolean ignore_invalid); + + +/** + * Get the MuFlag char for some flag name + * + * @param str a flag name + * + * @return a flag character, or 0 + */ +MuFlags mu_flag_char_from_name (const char *str); + + /** * return the concatenation of all non-standard file flags in str * (ie., characters other than DFPRST) as a newly allocated string. diff --git a/lib/mu-msg-doc.cc b/lib/mu-msg-doc.cc index 249986b3..ad57b80b 100644 --- a/lib/mu-msg-doc.cc +++ b/lib/mu-msg-doc.cc @@ -106,17 +106,13 @@ mu_msg_doc_get_num_field (MuMsgDoc *self, MuMsgFieldId mfid) g_return_val_if_fail (mu_msg_field_id_is_valid(mfid), -1); g_return_val_if_fail (mu_msg_field_is_numeric(mfid), -1); - /* date is a special case, because we store dates as - * strings */ try { const std::string s (self->doc().get_value(mfid)); if (s.empty()) return 0; - else if (mfid == MU_MSG_FIELD_ID_DATE) { - time_t t; - t = mu_date_str_to_time_t (s.c_str(), FALSE/*utc*/); - return static_cast(t); - } else { + else if (mfid == MU_MSG_FIELD_ID_DATE) + return strtol (s.c_str(), NULL, 10); + else { return static_cast (Xapian::sortable_unserialise(s)); } diff --git a/lib/mu-msg-fields.c b/lib/mu-msg-fields.c index da5275df..374719ad 100644 --- a/lib/mu-msg-fields.c +++ b/lib/mu-msg-fields.c @@ -48,11 +48,9 @@ enum _FieldFlags { * for Xapian queries; * wildcards do NOT WORK * for such fields */ - FLAG_PREPROCESS = 1 << 6, /* field needs flattening for - * case/accents */ - FLAG_DONT_CACHE = 1 << 7, /* don't cache this field in + FLAG_DONT_CACHE = 1 << 6, /* don't cache this field in * the MuMsg cache */ - FLAG_RANGE_FIELD = 1 << 8 /* whether this is a range field */ + FLAG_RANGE_FIELD = 1 << 7 /* whether this is a range field */ }; typedef enum _FieldFlags FieldFlags; @@ -84,14 +82,14 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_TYPE_STRING, "bcc" , 'h', 'H', /* 'hidden */ FLAG_GMIME | FLAG_XAPIAN_CONTACT | - FLAG_XAPIAN_VALUE | FLAG_PREPROCESS + FLAG_XAPIAN_VALUE }, { MU_MSG_FIELD_ID_BODY_TEXT, MU_MSG_FIELD_TYPE_STRING, "body", 'b', 'B', - FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_PREPROCESS | + FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_DONT_CACHE }, @@ -106,8 +104,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_CC, MU_MSG_FIELD_TYPE_STRING, "cc", 'c', 'C', - FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE | - FLAG_PREPROCESS + FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE }, { @@ -122,16 +119,14 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_EMBEDDED_TEXT, MU_MSG_FIELD_TYPE_STRING, "embed", 'e', 'E', - FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_PREPROCESS | - FLAG_DONT_CACHE + FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_DONT_CACHE }, { MU_MSG_FIELD_ID_FILE, MU_MSG_FIELD_TYPE_STRING, "file" , 'j', 'J', - FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_PREPROCESS | - FLAG_DONT_CACHE + FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_DONT_CACHE }, @@ -146,8 +141,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_FROM, MU_MSG_FIELD_TYPE_STRING, "from", 'f', 'F', - FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE | - FLAG_PREPROCESS + FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE }, @@ -155,8 +149,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_MAILDIR, MU_MSG_FIELD_TYPE_STRING, "maildir", 'm', 'M', - FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE | - FLAG_PREPROCESS + FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE }, @@ -164,8 +157,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_MAILING_LIST, MU_MSG_FIELD_TYPE_STRING, "list", 'v', 'V', - FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE | - FLAG_PREPROCESS + FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE }, @@ -173,7 +165,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_MIME, MU_MSG_FIELD_TYPE_STRING, "mime" , 'y', 'Y', - FLAG_XAPIAN_TERM | FLAG_PREPROCESS + FLAG_XAPIAN_TERM }, @@ -181,8 +173,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_MSGID, MU_MSG_FIELD_TYPE_STRING, "msgid", 'i', 'I', /* 'i' for Id */ - FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE | - FLAG_PREPROCESS + FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE }, @@ -191,7 +182,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_TYPE_STRING, "path", 'l', 'L', /* 'l' for location */ FLAG_GMIME | FLAG_XAPIAN_VALUE | - FLAG_XAPIAN_BOOLEAN | FLAG_PREPROCESS + FLAG_XAPIAN_BOOLEAN }, @@ -224,15 +215,14 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_TYPE_STRING, "subject", 's', 'S', FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE | - FLAG_XAPIAN_TERM | FLAG_PREPROCESS + FLAG_XAPIAN_TERM }, { MU_MSG_FIELD_ID_TAGS, MU_MSG_FIELD_TYPE_STRING_LIST, "tag", 'x', 'X', - FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_PREPROCESS | - FLAG_XAPIAN_VALUE + FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE }, @@ -247,8 +237,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_TO, MU_MSG_FIELD_TYPE_STRING, "to", 't', 'T', - FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE | - FLAG_PREPROCESS + FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE }, { /* special, internal field, to get a unique key */ @@ -389,13 +378,6 @@ mu_msg_field_xapian_contact (MuMsgFieldId id) } -gboolean -mu_msg_field_preprocess (MuMsgFieldId id) -{ - g_return_val_if_fail (mu_msg_field_id_is_valid(id),FALSE); - return mu_msg_field(id)->_flags & FLAG_PREPROCESS ? TRUE: FALSE; -} - gboolean mu_msg_field_is_numeric (MuMsgFieldId mfid) diff --git a/lib/mu-msg-fields.h b/lib/mu-msg-fields.h index b9d0b17d..96bd2769 100644 --- a/lib/mu-msg-fields.h +++ b/lib/mu-msg-fields.h @@ -1,5 +1,5 @@ /* -** Copyright (C) 2008-2013 Dirk-Jan C. Binnema +** Copyright (C) 2008-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by @@ -59,7 +59,6 @@ enum _MuMsgFieldId { MU_MSG_FIELD_ID_MAILING_LIST, /* mailing list */ MU_MSG_FIELD_ID_THREAD_ID, - MU_MSG_FIELD_ID_NUM }; typedef guint8 MuMsgFieldId; @@ -232,19 +231,6 @@ gboolean mu_msg_field_xapian_value (MuMsgFieldId id) G_GNUC_PURE; */ gboolean mu_msg_field_uses_boolean_prefix (MuMsgFieldId id) G_GNUC_PURE; - -/** - * should this field be escaped for xapian? in practice, should - * word-breaking chars be replaced with '_'? Also, flatten accents, - * downcase? - * - * @param field a MuMsgField - * - * @return TRUE if the field is to be preprocessed, FALSE otherwise - */ -gboolean mu_msg_field_preprocess (MuMsgFieldId id) G_GNUC_PURE; - - /** * is this a range-field? ie. date, or size * diff --git a/lib/mu-msg-prio.c b/lib/mu-msg-prio.c index bf86f666..96a959ac 100644 --- a/lib/mu-msg-prio.c +++ b/lib/mu-msg-prio.c @@ -1,5 +1,5 @@ /* -** Copyright (C) 2012-2013 Dirk-Jan C. Binnema +** Copyright (C) 2012-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the @@ -20,7 +20,7 @@ #include "mu-msg-prio.h" -const char* +const char* mu_msg_prio_name (MuMsgPrio prio) { switch (prio) { @@ -45,8 +45,8 @@ mu_msg_prio_char (MuMsgPrio prio) if (!(prio == 'l' || prio == 'n' || prio == 'h')) { g_warning ("prio: %c", (char)prio); } - - + + g_return_val_if_fail (prio == 'l' || prio == 'n' || prio == 'h', 0); diff --git a/lib/mu-msg-prio.h b/lib/mu-msg-prio.h index dc49c35c..6000f8eb 100644 --- a/lib/mu-msg-prio.h +++ b/lib/mu-msg-prio.h @@ -1,20 +1,20 @@ -/* -** Copyright (C) 2008-2013 Dirk-Jan C. Binnema +/* +** Copyright (C) 2008-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 3 of the License, or ** (at your option) any later version. -** +** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. -** +** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software Foundation, -** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -** +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** */ #ifndef __MU_MSG_PRIO_H__ @@ -37,9 +37,9 @@ static const MuMsgPrio MU_MSG_PRIO_NONE = (MuMsgPrio)0; /** * get a printable name for the message priority * (ie., MU_MSG_PRIO_LOW=>"low" etc.) - * + * * @param prio a message priority - * + * * @return a printable name for this priority */ const char* mu_msg_prio_name (MuMsgPrio prio) G_GNUC_CONST; @@ -49,9 +49,9 @@ const char* mu_msg_prio_name (MuMsgPrio prio) G_GNUC_CONST; * get the MuMsgPriority corresponding to a one-character shortcut * ('l'=>MU_MSG_PRIO_, 'n'=>MU_MSG_PRIO_NORMAL or * 'h'=>MU_MSG_PRIO_HIGH) - * - * @param k a character - * + * + * @param k a character + * * @return a message priority */ MuMsgPrio mu_msg_prio_from_char (char k) G_GNUC_CONST; @@ -61,9 +61,9 @@ MuMsgPrio mu_msg_prio_from_char (char k) G_GNUC_CONST; * get the one-character shortcut corresponding to a message priority * ('l'=>MU_MSG_PRIO_, 'n'=>MU_MSG_PRIO_NORMAL or * 'h'=>MU_MSG_PRIO_HIGH) - * + * * @param prio a mesage priority - * + * * @return a shortcut character or 0 in case of error */ char mu_msg_prio_char (MuMsgPrio prio) G_GNUC_CONST; @@ -71,7 +71,7 @@ char mu_msg_prio_char (MuMsgPrio prio) G_GNUC_CONST; typedef void (*MuMsgPrioForeachFunc) (MuMsgPrio prio, gpointer user_data); /** * call a function for each message priority - * + * * @param func a callback function * @param user_data a user pointer to pass to the callback */ diff --git a/lib/mu-query.cc b/lib/mu-query.cc index f7bc1e9e..0f9bfd64 100644 --- a/lib/mu-query.cc +++ b/lib/mu-query.cc @@ -1,5 +1,5 @@ /* -** Copyright (C) 2008-2016 Dirk-Jan C. Binnema +** Copyright (C) 2008-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by @@ -21,8 +21,9 @@ #include #include #include -#include +#include +#include #include #include @@ -35,252 +36,178 @@ #include "mu-str.h" #include "mu-date.h" -/* - * custom parser for date ranges - */ -class MuDateRangeProcessor : public Xapian::StringValueRangeProcessor { -public: - MuDateRangeProcessor(): - Xapian::StringValueRangeProcessor( - (Xapian::valueno)MU_MSG_FIELD_ID_DATE) {} +#include +#include +#include - Xapian::valueno operator()(std::string &begin, std::string &end) { +struct MuProc: public Mux::ProcIface { - if (!clear_prefix (begin)) - return Xapian::BAD_VALUENO; + MuProc (const Xapian::Database& db): db_{db} {} - begin = to_sortable (begin, true); - end = to_sortable (end, false); + static MuMsgFieldId field_id (const std::string& field) { - if (begin > end) - throw Xapian::QueryParserError - ("end time is before begin"); + if (field.empty()) + return MU_MSG_FIELD_ID_NONE; - return (Xapian::valueno)MU_MSG_FIELD_ID_DATE; + MuMsgFieldId id = mu_msg_field_id_from_name (field.c_str(), FALSE); + if (id != MU_MSG_FIELD_ID_NONE) + return id; + else + return mu_msg_field_id_from_shortcut (field[0], FALSE); } -private: - std::string to_sortable (std::string& s, bool is_begin) { - const char* tmp; - time_t t; + std::string + process_value (const std::string& field, + const std::string& value) const override { + const auto id = field_id (field); + if (id == MU_MSG_FIELD_ID_NONE) + return value; + switch(id) { + case MU_MSG_FIELD_ID_PRIO: { + if (!value.empty()) + return std::string(1, value[0]); + } break; - // note: if s is empty and not is_begin, xapian seems - // to repeat it. - if (s.empty() || g_str_has_suffix (s.c_str(), "..")) { - tmp = mu_date_complete_s ("", is_begin); - } else { - tmp = mu_date_interpret_s (s.c_str(), - is_begin ? TRUE: FALSE); - tmp = mu_date_complete_s (tmp, is_begin ? TRUE: FALSE); - t = mu_date_str_to_time_t (tmp, TRUE /*local*/); - tmp = mu_date_time_t_to_str_s (t, FALSE /*UTC*/); + case MU_MSG_FIELD_ID_FLAGS: { + const auto flag = mu_flag_char_from_name (value.c_str()); + if (flag) + return std::string(1, tolower(flag)); + } break; + + default: + break; } - return s = std::string(tmp); + return value; // XXX prio/flags, etc. alias } + void add_field (std::vector& fields, MuMsgFieldId id) const { - bool clear_prefix (std::string& begin) { + const auto shortcut = mu_msg_field_shortcut(id); + if (!shortcut) + return; // can't be searched - const std::string colon (":"); - const std::string name (mu_msg_field_name - (MU_MSG_FIELD_ID_DATE) + colon); - const std::string shortcut ( - std::string(1, mu_msg_field_shortcut - (MU_MSG_FIELD_ID_DATE)) + colon); + const auto name = mu_msg_field_name (id); + const auto pfx = mu_msg_field_xapian_prefix (id); - if (begin.find (name) == 0) { - begin.erase (0, name.length()); - return true; - } else if (begin.find (shortcut) == 0) { - begin.erase (0, shortcut.length()); - return true; - } else + if (!name || !pfx) + return; + + fields.push_back ({{name}, {pfx}, id}); + } + + std::vector + process_field (const std::string& field) const override { + + std::vector fields; + + if (field == "contact" || field == "recip") { // multi fields + add_field (fields, MU_MSG_FIELD_ID_TO); + add_field (fields, MU_MSG_FIELD_ID_CC); + add_field (fields, MU_MSG_FIELD_ID_BCC); + if (field == "contact") + add_field (fields, MU_MSG_FIELD_ID_FROM); + } else if (field == "") { + add_field (fields, MU_MSG_FIELD_ID_TO); + add_field (fields, MU_MSG_FIELD_ID_CC); + add_field (fields, MU_MSG_FIELD_ID_BCC); + add_field (fields, MU_MSG_FIELD_ID_FROM); + add_field (fields, MU_MSG_FIELD_ID_SUBJECT); + add_field (fields, MU_MSG_FIELD_ID_BODY_TEXT); + } else { + const auto id = field_id (field.c_str()); + if (id != MU_MSG_FIELD_ID_NONE) + add_field (fields, id); + } + + return fields; + } + + bool is_range_field (const std::string& field) const override { + const auto id = field_id (field.c_str()); + if (id == MU_MSG_FIELD_ID_NONE) return false; + else + return mu_msg_field_is_range_field (id); } + + Range process_range (const std::string& field, const std::string& lower, + const std::string& upper) const override { + + const auto id = field_id (field.c_str()); + if (id == MU_MSG_FIELD_ID_NONE) + return { lower, upper }; + + std::string l2 = lower; + std::string u2 = upper; + + if (id == MU_MSG_FIELD_ID_DATE) { + l2 = Mux::date_to_time_t_string (lower, true); + u2 = Mux::date_to_time_t_string (upper, false); + } else if (id == MU_MSG_FIELD_ID_SIZE) { + l2 = Mux::size_to_string (lower, true); + u2 = Mux::size_to_string (upper, false); + } + + return { l2, u2 }; + } + + std::vector + process_regex (const std::string& field, const std::regex& rx) const override { + + const auto id = field_id (field.c_str()); + if (id == MU_MSG_FIELD_ID_NONE) + return {}; + + char pfx[] = { mu_msg_field_xapian_prefix(id), '\0' }; + + std::vector terms; + for (auto it = db_.allterms_begin(pfx); it != db_.allterms_end(pfx); ++it) { + if (std::regex_search((*it).c_str() + 1, rx)) // avoid copy + terms.push_back(*it); + } + + return terms; + } + + const Xapian::Database& db_; }; - -class MuSizeRangeProcessor : public Xapian::NumberValueRangeProcessor { +class _MuQuery { public: - MuSizeRangeProcessor(): - Xapian::NumberValueRangeProcessor(MU_MSG_FIELD_ID_SIZE) { - } - - Xapian::valueno operator()(std::string &begin, std::string &end) { - - if (!clear_prefix (begin)) - return Xapian::BAD_VALUENO; - - if (!substitute_size (begin) || !substitute_size (end)) - return Xapian::BAD_VALUENO; - - begin = Xapian::sortable_serialise (atol(begin.c_str())); - end = Xapian::sortable_serialise (atol(end.c_str())); - - /* swap if b > e */ - if (begin > end) - std::swap (begin, end); - - return (Xapian::valueno)MU_MSG_FIELD_ID_SIZE; - } -private: - bool clear_prefix (std::string& begin) { - - const std::string colon (":"); - const std::string name (mu_msg_field_name - (MU_MSG_FIELD_ID_SIZE) + colon); - const std::string shortcut ( - std::string(1, mu_msg_field_shortcut - (MU_MSG_FIELD_ID_SIZE)) + colon); - - if (begin.find (name) == 0) { - begin.erase (0, name.length()); - return true; - } else if (begin.find (shortcut) == 0) { - begin.erase (0, shortcut.length()); - return true; - } else - return false; - } - - bool substitute_size (std::string& size) { - gchar buf[16]; - gint64 num = mu_str_size_parse_bkm(size.c_str()); - if (num < 0) - throw Xapian::QueryParserError ("invalid size"); - snprintf (buf, sizeof(buf), "%" G_GUINT64_FORMAT, num); - size = buf; - return true; - } -}; - - - -static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser); - -struct _MuQuery { -public: - _MuQuery (MuStore *store): _store(mu_store_ref(store)) { - - _qparser.set_database (db()); - _qparser.set_default_op (Xapian::Query::OP_AND); - - _qparser.add_valuerangeprocessor (&_date_range_processor); - _qparser.add_valuerangeprocessor (&_size_range_processor); - - mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_prefix, - &_qparser); - - /* add some convenient special prefixes */ - add_special_prefixes (); - } - + _MuQuery (MuStore *store): _store(mu_store_ref(store)) {} ~_MuQuery () { mu_store_unref (_store); } Xapian::Database& db() const { - Xapian::Database* db; - db = reinterpret_cast + const auto db = reinterpret_cast (mu_store_get_read_only_database (_store)); if (!db) throw std::runtime_error ("no database"); return *db; } - Xapian::QueryParser& query_parser () { return _qparser; } - private: - void add_special_prefixes () { - char pfx[] = { '\0', '\0' }; - - /* add 'contact' as a shortcut for From/Cc/Bcc/To: */ - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_FROM); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx); - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_TO); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx); - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_CC); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx); - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_BCC); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx); - - /* add 'recip' as a shortcut for Cc/Bcc/To: */ - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_TO); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_RECIP, pfx); - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_CC); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_RECIP, pfx); - pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_BCC); - _qparser.add_prefix (MU_MSG_FIELD_PSEUDO_RECIP, pfx); - } - - Xapian::QueryParser _qparser; - MuDateRangeProcessor _date_range_processor; - MuSizeRangeProcessor _size_range_processor; - MuStore *_store; }; static const Xapian::Query get_query (MuQuery *mqx, const char* searchexpr, GError **err) { - Xapian::Query query; - char *preprocessed; - - preprocessed = mu_query_preprocess (searchexpr, err); - if (!preprocessed) - throw std::runtime_error - ("parse error while preprocessing query"); - try { - query = mqx->query_parser().parse_query - (preprocessed, - Xapian::QueryParser::FLAG_BOOLEAN | - Xapian::QueryParser::FLAG_PURE_NOT | - Xapian::QueryParser::FLAG_AUTO_SYNONYMS | - Xapian::QueryParser::FLAG_WILDCARD | - Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE - ); - g_free (preprocessed); - return query; + Mux::WarningVec warns; + const auto tree = Mux::parse (searchexpr, warns, + std::make_unique(mqx->db())); + for (const auto w: warns) + std::cerr << w << std::endl; + + return Mux::xapian_query (tree); } catch (...) { mu_util_g_set_error (err,MU_ERROR_XAPIAN_QUERY, "parse error in query"); - g_free (preprocessed); throw; } } - - -static void -add_prefix (MuMsgFieldId mfid, Xapian::QueryParser* qparser) -{ - if (!mu_msg_field_xapian_index(mfid) && - !mu_msg_field_xapian_term(mfid) && - !mu_msg_field_xapian_contact(mfid)) - return; - try { - const std::string pfx - (1, mu_msg_field_xapian_prefix (mfid)); - const std::string shortcut - (1, mu_msg_field_shortcut (mfid)); - - if (mu_msg_field_uses_boolean_prefix (mfid)) { - qparser->add_boolean_prefix - (mu_msg_field_name(mfid), pfx); - qparser->add_boolean_prefix (shortcut, pfx); - } else { - qparser->add_prefix - (mu_msg_field_name(mfid), pfx); - qparser->add_prefix (shortcut, pfx); - } - - // all fields are also matched implicitly, without - // any prefix - qparser->add_prefix ("", pfx); - - } MU_XAPIAN_CATCH_BLOCK; -} - MuQuery* mu_query_new (MuStore *store, GError **err) { @@ -299,7 +226,6 @@ mu_query_new (MuStore *store, GError **err) return 0; } - void mu_query_destroy (MuQuery *self) { @@ -307,39 +233,6 @@ mu_query_destroy (MuQuery *self) } -/* preprocess a query to make them a bit more promiscuous */ -char* -mu_query_preprocess (const char *query, GError **err) -{ - GSList *parts, *cur; - gchar *myquery; - - g_return_val_if_fail (query, NULL); - - /* convert the query to a list of query terms, and escape them - * separately */ - parts = mu_str_esc_to_list (query); - if (!parts) - return NULL; - - for (cur = parts; cur; cur = g_slist_next(cur)) { - char *data; - data = (gchar*)cur->data; - cur->data = mu_str_process_query_term (data); - g_free (data); - /* run term fixups */ - data = (gchar*)cur->data; - cur->data = mu_str_xapian_fixup_terms (data); - g_free (data); - } - - myquery = mu_str_from_list (parts, ' '); - mu_str_free_list (parts); - - return myquery ? myquery : g_strdup (""); -} - - /* this function is for handling the case where a DatabaseModified * exception is raised. We try to reopen the database, and run the * query again. */ @@ -533,7 +426,11 @@ mu_query_run (MuQuery *self, const char *searchexpr, MuMsgFieldId sortfieldid, * effort to calculate threads already in the first * query since we can do it in the second one */ - first_flags = inc_related ? (flags & ~MU_QUERY_FLAG_THREADS) : flags; + if (inc_related) + first_flags = (MuQueryFlags)(flags & ~MU_QUERY_FLAG_THREADS); + else + first_flags = flags; + iter = mu_msg_iter_new ( reinterpret_cast(&enq), maxnum, @@ -563,7 +460,7 @@ mu_query_run (MuQuery *self, const char *searchexpr, MuMsgFieldId sortfieldid, char* -mu_query_as_string (MuQuery *self, const char *searchexpr, GError **err) +mu_query_internal_xapian (MuQuery *self, const char *searchexpr, GError **err) { g_return_val_if_fail (self, NULL); g_return_val_if_fail (searchexpr, NULL); @@ -574,3 +471,28 @@ mu_query_as_string (MuQuery *self, const char *searchexpr, GError **err) } MU_XAPIAN_CATCH_BLOCK_RETURN(NULL); } + + +char* +mu_query_internal (MuQuery *self, const char *searchexpr, + gboolean warn, GError **err) +{ + g_return_val_if_fail (self, NULL); + g_return_val_if_fail (searchexpr, NULL); + + try { + Mux::WarningVec warns; + const auto tree = Mux::parse (searchexpr, warns, + std::make_unique(self->db())); + std::stringstream ss; + ss << tree; + + if (warn) { + for (const auto w: warns) + std::cerr << w << std::endl; + } + + return g_strdup(ss.str().c_str()); + + } MU_XAPIAN_CATCH_BLOCK_RETURN(NULL); +} diff --git a/lib/mu-query.h b/lib/mu-query.h index 9fb4685d..b8b446b8 100644 --- a/lib/mu-query.h +++ b/lib/mu-query.h @@ -1,5 +1,5 @@ /* -** Copyright (C) 2008-2013 Dirk-Jan C. Binnema +** Copyright (C) 2008-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by @@ -43,7 +43,7 @@ typedef struct _MuQuery MuQuery; * when the instance is no longer needed, use mu_query_destroy * to free it */ -MuQuery *mu_query_new (MuStore *store, GError **err) +MuQuery* mu_query_new (MuStore *store, GError **err) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; /** @@ -53,7 +53,6 @@ MuQuery *mu_query_new (MuStore *store, GError **err) */ void mu_query_destroy (MuQuery *self); - /** * get a version string for the database * @@ -65,16 +64,14 @@ char* mu_query_version (MuQuery *store) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; -enum _MuQueryFlags { - MU_QUERY_FLAG_NONE = 0, - +typedef enum { + MU_QUERY_FLAG_NONE = 0 << 0, /**< no flags */ MU_QUERY_FLAG_DESCENDING = 1 << 0, /**< sort z->a */ MU_QUERY_FLAG_SKIP_UNREADABLE = 1 << 1, /**< skip unreadable msgs */ MU_QUERY_FLAG_SKIP_DUPS = 1 << 2, /**< skip duplicate msgs */ MU_QUERY_FLAG_INCLUDE_RELATED = 1 << 3, /**< include related msgs */ MU_QUERY_FLAG_THREADS = 1 << 4 /**< calculate threading info */ -}; -typedef int MuQueryFlags; +} MuQueryFlags; /** * run a Xapian query; for the syntax, please refer to the mu-find @@ -94,15 +91,30 @@ typedef int MuQueryFlags; * @return a MuMsgIter instance you can iterate over, or NULL in * case of error */ -MuMsgIter* mu_query_run (MuQuery *self, const char* expr, MuMsgFieldId sortfieldid, int maxnum, +MuMsgIter* mu_query_run (MuQuery *self, const char* expr, + MuMsgFieldId sortfieldid, int maxnum, MuQueryFlags flags, GError **err) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - +/** + * get Xapian's internal string representation of the query + * + * @param self a MuQuery instance + * @param searchexpr a xapian search expression + * @param warn print warnings to stderr + * @param err receives error information (if there is any); if + * function returns non-NULL, err will _not_be set. err can be NULL + * + * @return the string representation of the xapian query, or NULL in case of + * error; free the returned value with g_free + */ +char* mu_query_internal (MuQuery *self, const char *searchexpr, + gboolean warn, GError **err) + G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; /** - * get a string representation of the Xapian search query + * get Xapian's internal string representation of the query * * @param self a MuQuery instance * @param searchexpr a xapian search expression @@ -112,18 +124,10 @@ MuMsgIter* mu_query_run (MuQuery *self, const char* expr, MuMsgFieldId sortfield * @return the string representation of the xapian query, or NULL in case of * error; free the returned value with g_free */ -char* mu_query_as_string (MuQuery *self, const char* searchexpr, GError **err) +char* mu_query_internal_xapian (MuQuery *self, const char* searchexpr, + GError **err) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; -/** - * pre-process the query; this function is useful mainly for debugging mu - * - * @param query a query string - * - * @return a pre-processed query, free it with g_free - */ -char* mu_query_preprocess (const char *query, GError **err) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; G_END_DECLS diff --git a/lib/mu-store-write.cc b/lib/mu-store-write.cc index c5d79cf7..36af013a 100644 --- a/lib/mu-store-write.cc +++ b/lib/mu-store-write.cc @@ -1,6 +1,6 @@ /* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ /* -** Copyright (C) 2008-2016 Dirk-Jan C. Binnema +** Copyright (C) 2008-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the @@ -26,6 +26,9 @@ #include #include #include +#include + +#include #include "mu-store.h" #include "mu-store-priv.hh" /* _MuStore */ @@ -202,19 +205,25 @@ mu_store_flush (MuStore *store) mu_contacts_serialize (store->contacts()); } - static void add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { - time_t t; - const char *datestr; + const auto dstr = Mux::date_to_time_t_string ( + (time_t)mu_msg_get_field_numeric (msg, mfid)); - t = (time_t)mu_msg_get_field_numeric (msg, mfid); - - datestr = mu_date_time_t_to_str_s (t, FALSE /*UTC*/); - doc.add_value ((Xapian::valueno)mfid, datestr); + doc.add_value ((Xapian::valueno)mfid, dstr); } +static void +add_terms_values_size (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) +{ + const auto szstr = + Mux::size_to_string (mu_msg_get_field_numeric (msg, mfid)); + doc.add_value ((Xapian::valueno)mfid, szstr); +} + + + G_GNUC_CONST static const std::string& flag_val (char flagchar) @@ -258,9 +267,6 @@ flag_val (char flagchar) } } - - - /* pre-calculate; optimization */ G_GNUC_CONST static const std::string& prio_val (MuMsgPrio prio) @@ -283,7 +289,6 @@ prio_val (MuMsgPrio prio) } - static void add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { @@ -305,55 +310,26 @@ add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) doc.add_term (prio_val((MuMsgPrio)num)); } -static void -add_terms_values_msgid (Xapian::Document& doc, MuMsg *msg) -{ - char *str; - const char *orig; - - if (!(orig = mu_msg_get_field_string ( - msg, MU_MSG_FIELD_ID_MSGID))) - return; /* nothing to do */ - - str = mu_str_process_msgid (orig, FALSE); - - doc.add_value ((Xapian::valueno)MU_MSG_FIELD_ID_MSGID, orig); - doc.add_term (prefix(MU_MSG_FIELD_ID_MSGID) + - std::string(str, 0, _MuStore::MAX_TERM_LENGTH)); - - g_free (str); -} - - /* for string and string-list */ static void add_terms_values_str (Xapian::Document& doc, const char *val, MuMsgFieldId mfid) { - char *str; - - if (mu_msg_field_preprocess (mfid)) - str = mu_str_process_term (val); - else - str = g_strdup (val); + const auto flat = Mux::utf8_flatten (val); if (mu_msg_field_xapian_index (mfid)) { Xapian::TermGenerator termgen; termgen.set_document (doc); - termgen.index_text_without_positions (str, 1, prefix(mfid)); - if (g_strcmp0 (val, str) != 0) - termgen.index_text_without_positions ( - val, 1, prefix(mfid)); + termgen.index_text (flat, 1, prefix(mfid)); } - if (mu_msg_field_xapian_term(mfid)) - doc.add_term (prefix(mfid) + - std::string(str, 0, _MuStore::MAX_TERM_LENGTH)); - - g_free (str); + if (mu_msg_field_xapian_term(mfid)) { + //std::cerr << ":" << prefix(mfid) + flat << std::endl; + doc.add_term((prefix(mfid) + flat) + .substr(0, MuStore::MAX_TERM_LENGTH)); + } } - static void add_terms_values_string (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { @@ -370,8 +346,6 @@ add_terms_values_string (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) add_terms_values_str (doc, orig, mfid); } - - static void add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) @@ -409,7 +383,7 @@ struct PartData { static void maybe_index_text_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) { - char *txt, *str; + char *txt; Xapian::TermGenerator termgen; /* only deal with attachments/messages; inlines are indexed as @@ -423,14 +397,10 @@ maybe_index_text_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) return; termgen.set_document(pdata->_doc); - - str = mu_str_process_text (txt); - - termgen.index_text_without_positions - (str, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT)); - + const auto str = Mux::utf8_flatten (txt); g_free (txt); - g_free (str); + + termgen.index_text (str, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT)); } @@ -444,25 +414,17 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) /* save the mime type of any part */ if (part->type) { - /* note, we use '_' instead of '/' to separate - * type/subtype -- Xapian doesn't treat '/' as - * desired, so we use '_' and pre-process queries; see - * mu_query_preprocess */ char ctype[MuStore::MAX_TERM_LENGTH + 1]; - snprintf (ctype, sizeof(ctype), "%s_%s", - part->type, part->subtype); - + snprintf (ctype, sizeof(ctype), "%s/%s", part->type, part->subtype); pdata->_doc.add_term (mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH)); } if ((fname = mu_msg_part_get_filename (part, FALSE))) { - char *str; - str = mu_str_process_term (fname); + const auto flat = Mux::utf8_flatten (fname); g_free (fname); pdata->_doc.add_term - (file + std::string(str, 0, MuStore::MAX_TERM_LENGTH)); - g_free (str); + (file + std::string(flat, 0, MuStore::MAX_TERM_LENGTH)); } maybe_index_text_part (msg, part, pdata); @@ -483,13 +445,10 @@ static void add_terms_values_body (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { - const char *str; - char *flat; - if (mu_msg_get_flags(msg) & MU_FLAG_ENCRYPTED) return; /* ignore encrypted bodies */ - str = mu_msg_get_body_text (msg, MU_MSG_OPTION_NONE); + auto str = mu_msg_get_body_text (msg, MU_MSG_OPTION_NONE); if (!str) /* FIXME: html->txt fallback needed */ str = mu_msg_get_body_html (msg, MU_MSG_OPTION_NONE); if (!str) @@ -498,11 +457,8 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg, Xapian::TermGenerator termgen; termgen.set_document(doc); - flat = mu_str_process_text (str); - - // g_print ("\n--\n%s\n--\n", flat); - termgen.index_text_without_positions (flat, 1, prefix(mfid)); - g_free (flat); + const auto flat = Mux::utf8_flatten(str); + termgen.index_text (flat, 1, prefix(mfid)); } struct _MsgDoc { @@ -544,13 +500,13 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) !mu_msg_field_xapian_value(mfid)) return; - // if (mu_msg_field_xapian_contact (mfid)) - // return; /* handled in new_doc_from_message */ - switch (mfid) { case MU_MSG_FIELD_ID_DATE: add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid); break; + case MU_MSG_FIELD_ID_SIZE: + add_terms_values_size (*msgdoc->_doc, msgdoc->_msg, mfid); + break; case MU_MSG_FIELD_ID_BODY_TEXT: add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid); break; @@ -562,11 +518,6 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) case MU_MSG_FIELD_ID_MIME: case MU_MSG_FIELD_ID_EMBEDDED_TEXT: break; - - case MU_MSG_FIELD_ID_MSGID: - add_terms_values_msgid (*msgdoc->_doc, msgdoc->_msg); - break; - case MU_MSG_FIELD_ID_THREAD_ID: case MU_MSG_FIELD_ID_UID: break; /* already taken care of elsewhere */ @@ -604,7 +555,7 @@ add_address_subfields (Xapian::Document& doc, const char *addr, const std::string& pfx) { const char *at, *domain_part; - char *name_part, *f1, *f2; + char *name_part; /* add "foo" and "bar.com" as terms as well for * "foo@bar.com" */ @@ -614,16 +565,10 @@ add_address_subfields (Xapian::Document& doc, const char *addr, name_part = g_strndup(addr, at - addr); // foo domain_part = at + 1; - f1 = mu_str_process_term (name_part); - f2 = mu_str_process_term (domain_part); + doc.add_term (pfx + std::string(name_part, 0, _MuStore::MAX_TERM_LENGTH)); + doc.add_term (pfx + std::string(domain_part, 0, _MuStore::MAX_TERM_LENGTH)); - g_free (name_part); - - doc.add_term (pfx + std::string(f1, 0, _MuStore::MAX_TERM_LENGTH)); - doc.add_term (pfx + std::string(f2, 0, _MuStore::MAX_TERM_LENGTH)); - - g_free (f1); - g_free (f2); + g_free (name_part); } static gboolean @@ -640,19 +585,15 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) if (!mu_str_is_empty(contact->name)) { Xapian::TermGenerator termgen; termgen.set_document (*msgdoc->_doc); - char *flat = mu_str_process_text (contact->name); - termgen.index_text_without_positions (flat, 1, pfx); - g_free (flat); + const auto flat = Mux::utf8_flatten(contact->name); + termgen.index_text (flat, 1, pfx); } if (!mu_str_is_empty(contact->address)) { - char *flat; - flat = mu_str_process_term (contact->address); + const auto flat = Mux::utf8_flatten(contact->address); msgdoc->_doc->add_term (std::string (pfx + flat, 0, MuStore::MAX_TERM_LENGTH)); - g_free (flat); add_address_subfields (*msgdoc->_doc, contact->address, pfx); - /* store it also in our contacts cache */ if (msgdoc->_store->contacts()) mu_contacts_add (msgdoc->_store->contacts(), diff --git a/lib/mu-store.h b/lib/mu-store.h index e6a40a5f..c4073765 100644 --- a/lib/mu-store.h +++ b/lib/mu-store.h @@ -1,5 +1,5 @@ /* -** Copyright (C) 2008-2013 Dirk-Jan C. Binnema +** Copyright (C) 2008-2017 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the @@ -44,7 +44,7 @@ typedef struct _MuStore MuStore; MuStore* mu_store_new_writable (const char *xpath, const char *ccachepath, gboolean rebuild, GError **err) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; + G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; /** @@ -57,7 +57,7 @@ MuStore* mu_store_new_writable (const char *xpath, * of error; free with mu_store_unref */ MuStore* mu_store_new_read_only (const char* xpath, GError **err) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; + G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; diff --git a/lib/mu-str.c b/lib/mu-str.c index e2aad07a..fdb58178 100644 --- a/lib/mu-str.c +++ b/lib/mu-str.c @@ -175,30 +175,6 @@ mu_str_display_contact (const char *str) } -gint64 -mu_str_size_parse_bkm (const char* str) -{ - gint64 num; - - g_return_val_if_fail (str, -1); - - if (!isdigit(str[0])) - return -1; - - num = atoi(str); - for (++str; isdigit(*str); ++str); - - switch (tolower(*str)) { - case '\0': - case 'b' : return num; /* bytes */ - case 'k': return num * 1000; /* kilobyte */ - case 'm': return num * 1000 * 1000; /* megabyte */ - default: - return -1; - } -} - - char* mu_str_replace (const char *str, const char *substr, const char *repl) { @@ -224,9 +200,6 @@ mu_str_replace (const char *str, const char *substr, const char *repl) - - - char* mu_str_from_list (const GSList *lst, char sepa) { @@ -396,397 +369,6 @@ mu_str_subject_normalize (const gchar* str) } -struct _CheckPrefix { - const char *str; - gboolean match; - gboolean range_field; -}; -typedef struct _CheckPrefix CheckPrefix; - - - -static void -each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx) -{ - const char *pfx; - char pfx_short[3] = { 'X', ':', '\0'}; - char k; - - if (!cpfx || cpfx->match) - return; - - k = pfx_short[0] = mu_msg_field_shortcut (mfid); - if (k && g_str_has_prefix (cpfx->str, pfx_short)) { - cpfx->match = TRUE; - cpfx->range_field = mu_msg_field_is_range_field (mfid); - } - - pfx = mu_msg_field_name (mfid); - if (pfx && g_str_has_prefix (cpfx->str, pfx) && - cpfx->str[strlen(pfx)] == ':') { - cpfx->match = TRUE; - cpfx->range_field = mu_msg_field_is_range_field (mfid); - } -} - -/* check if it looks like either i: or msgid: */ -static gboolean -is_msgid_field (const char *str) -{ - const char *name; - - if (!str || strlen(str) < 3) - return FALSE; - - if (str[0] == mu_msg_field_shortcut (MU_MSG_FIELD_ID_MSGID) && - str[1] == ':') - return TRUE; - - name = mu_msg_field_name (MU_MSG_FIELD_ID_MSGID); - if (g_str_has_prefix (str, name) && str[strlen(name)] == ':') - return TRUE; - - return FALSE; -} - -/* message-ids need a bit more massaging -- we replace all - * non-alphanum with '_'. Note, this function assumes we're looking at - * a msg-id field, ie. i: or msgid: */ -char* -mu_str_process_msgid (const char *str, gboolean query) -{ - char *s, *c; - - g_return_val_if_fail (str, NULL); - g_return_val_if_fail (!query || strchr(str, ':'), NULL); - - if (!str) - return NULL; - - s = g_strdup (str); - - if (query) - c = strchr (s, ':') + 1; - else - c = s; - - for (; *c; ++c) - *c = isalnum (*c) ? tolower (*c) : '_'; - - return s; -} - - - -static void -check_for_field (const char *str, gboolean *is_field, - gboolean *is_range_field) -{ - CheckPrefix pfx; - - pfx.str = str; - - /* skip any non-alphanum starts in cpfx->str; this is to - * handle the case where we have e.g. "(maildir:/abc)" - */ - while (pfx.str && *pfx.str && !isalnum(*pfx.str)) - ++pfx.str; - - pfx.match = pfx.range_field = FALSE; - - mu_msg_field_foreach ((MuMsgFieldForeachFunc)each_check_prefix, - &pfx); - /* also check special prefixes... */ - if (!pfx.match) - pfx.match = - g_str_has_prefix - (str, MU_MSG_FIELD_PSEUDO_CONTACT ":") || - g_str_has_prefix - (str, MU_MSG_FIELD_PSEUDO_RECIP ":"); - - *is_field = pfx.match; - *is_range_field = pfx.range_field; -} - - -static gboolean -handle_esc_maybe (GString *gstr, char **cur, gunichar uc, - gboolean query_esc, gboolean range_field) -{ - char kar; - - kar = *cur[0]; - - if (query_esc) { - switch (kar) { - case ':': - case '(': - case ')': - case '*': - case '&': - case '"': - g_string_append_c (gstr, kar); - return TRUE; - case '.': - if (!range_field) - break; - - if ((*cur)[1] == '.' && (*cur)[2] != '.') { - g_string_append (gstr, ".."); - *cur = g_utf8_next_char (*cur); - return TRUE; - } - default: break; - } - } - - if (g_unichar_ispunct(uc) || isblank(kar)) { - g_string_append_c (gstr, '_'); - return TRUE; - } - - return FALSE; -} - - -static char* -process_str (const char *str, gboolean xapian_esc, gboolean query_esc) -{ - GString *gstr; - char *norm, *cur; - gboolean is_field, is_range_field; - - norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL); - if (G_UNLIKELY(!norm)) { /* not valid utf8? */ - char *u8; - u8 = mu_str_utf8ify (str); - norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL); - g_free (u8); - } - - if (!norm) - return NULL; - - /* msg-id needs some special care in queries */ - if (query_esc && is_msgid_field (str)) - return mu_str_process_msgid (str, TRUE); - - check_for_field (str, &is_field, &is_range_field); - gstr = g_string_sized_new (strlen (norm)); - - for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) { - - gunichar uc; - uc = g_utf8_get_char (cur); - if (xapian_esc) - if (handle_esc_maybe (gstr, &cur, uc, query_esc, - is_range_field)) - continue; - - if (g_unichar_ismark(uc)) - continue; - - if (!is_range_field) - uc = g_unichar_tolower (uc); - - g_string_append_unichar (gstr, uc); - } - - g_free (norm); - return g_string_free (gstr, FALSE); -} - - -char* -mu_str_process_text (const char *str) -{ - g_return_val_if_fail (str, NULL); - - return process_str (str, FALSE, FALSE); - -} - - -char* -mu_str_process_term (const char *str) -{ - g_return_val_if_fail (str, NULL); - - return process_str (str, TRUE, FALSE); - -} - - -char* -mu_str_process_query_term (const char *str) -{ - g_return_val_if_fail (str, NULL); - - return process_str (str, TRUE, TRUE); - -} - - - -/* - * Split simple search term into prefix, expression and suffix. - * Meant to handle cases like "(maildir:/abc)", prefix and - * suffix are the non-alphanumeric stuff at the beginning - * and the end of string. - * - * Values of *pfx, *cond and *sfx will be allocated from heap - * and must be g_free()d. - * - * Returns TRUE if all went fine and FALSE if some error was - * occured. - */ -static gboolean -split_term (const gchar *term, - const gchar **pfx, const gchar **cond, const gchar **sfx) -{ - size_t l; - const gchar *start, *tail; - const gchar *p, *c, *s; - - g_return_val_if_fail (term, FALSE); - g_return_val_if_fail (pfx, FALSE); - g_return_val_if_fail (cond, FALSE); - g_return_val_if_fail (sfx, FALSE); - - l = strlen (term); - if (l == 0) { - p = g_strdup (""); - c = g_strdup (""); - s = g_strdup (""); - goto _done; - } - - /* - * Invariants: - * - start will point to the first symbol after leading - * non-alphanumerics (can be alphanumeric or '\0'); - * - tail will point to the beginning of trailing - * non-alphanumerics or '\0'. - * So: - * - len (prefix) = start - term; - * - len (cond) = tail - start; - * - len (suffix) = term + len (term) - tail. - */ - for (start = term; *start && !isalnum (*start); start++); - for (tail = term + l; tail > start && !isalnum (*(tail-1)); tail--); - - p = g_strndup (term, start - term); - c = g_strndup (start, tail - start); - s = g_strndup (tail, term + l - tail); - -_done: - if (!p || !c || !s) { - g_free ((gchar *)p); - g_free ((gchar *)c); - g_free ((gchar *)s); - return FALSE; - } else { - *pfx = p; - *cond = c; - *sfx = s; - return TRUE; - } - /* NOTREACHED */ -} - - -/* - * Fixup handlers. - * - * Every fixup handler will take three string arguments, - * prefix, condition and suffix (as split by split_term). - * - * It will either return NULL that means "no fixup was done" - * or the pointer to the newly-allocated string with the - * new contents. - */ -typedef gchar * - (*fixup_handler_t)(const gchar *pfx, const gchar *cond, const gchar *sfx); - -static gchar* -fixup_date(const gchar *pfx, const gchar *cond, const gchar *sfx) -{ - const gchar *p; - - p = cond + sizeof ("date:") - 1; - - if (strstr (p, "..")) - return NULL; - return g_strdup_printf ("%s%s..%s%s", pfx, cond, p, sfx); -} - - -/* - * Looks up fixup handler for the given condition. - * - * Returns fixup handler if we can and NULL if there is - * no fixup for this condition. - */ -static fixup_handler_t -find_fixup (const gchar *cond) -{ - size_t n; - /* NULL-terminated list of term names for fixups. */ - static struct { - const char *name; - size_t len; - fixup_handler_t handler; - } fixups[] = { - {"date:", sizeof("date:") - 1, fixup_date}, - {NULL, 0, NULL} - }; - - g_return_val_if_fail (cond, NULL); - - for (n = 0; fixups[n].name; n++) { - if (!strncasecmp (cond, fixups[n].name, fixups[n].len)) - break; - } - - return fixups[n].handler; -} - - -gchar* -mu_str_xapian_fixup_terms (const gchar *term) -{ - gboolean is_field, is_range_field; - const gchar *cond, *pfx, *sfx; - gchar *retval; - fixup_handler_t fixup; - - g_return_val_if_fail (term, NULL); - - if (strlen (term) == 0) - return g_strdup (term); - - check_for_field (term, &is_field, &is_range_field); - if (!is_field || !is_range_field) - return g_strdup (term); - - if (!split_term (term, &pfx, &cond, &sfx)) - return g_strdup (term); - - retval = NULL; - fixup = find_fixup (cond); - if (fixup) - retval = fixup (pfx, cond, sfx); - if (!retval) - retval = g_strdup (term); - - /* At this point retval should contain the result */ - g_free ((gchar *)pfx); - g_free ((gchar *)sfx); - g_free ((gchar *)cond); - - return retval; -} - /* note: this function is *not* re-entrant, it returns a static buffer */ const char* mu_str_fullpath_s (const char* path, const char* name) diff --git a/lib/mu-str.h b/lib/mu-str.h index 79b582a6..a9cddeb7 100644 --- a/lib/mu-str.h +++ b/lib/mu-str.h @@ -119,83 +119,6 @@ char* mu_str_flags (MuFlags flags) char* mu_str_summarize (const char* str, size_t max_lines) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - -/** - * Process some text (e.g. message bodies) -- flatten (remove accents - * etc.), and remove some punctuation. - * - * @param text some text - * - * @return the processed text, free with g_free - */ -char* mu_str_process_text (const char *text) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - -/** - * Process some term (e.g., an e-mail address, subject field): - * remove accents, replace some punctuation by _ - * - * @param term some term - * - * @return the processed text, free with g_free - */ -char* mu_str_process_term (const char *term) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - -/** - * Process some query term (e.g., an e-mail address, subject field): - * remove accents, replace some punctuation by _, but leave some query - * metachars alone. - * - * @param qterm some query term - * - * @return the processed text, free with g_free - */ -char* mu_str_process_query_term (const char *qterm) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - - -/** - * Handle the message-id in a special way - * - * @param str the message-id str - * @param query is this a query? - * - * @return the massaged message-id - */ -char* mu_str_process_msgid (const char *str, gboolean query); - - -/** - * Fixup values for some fields in the DWIM manner: - * - if term is date:YYYYMMDD, replace it with the range - * date:YYYYMMDD..YYYYMMDD. - * - * @param query a query string - * - * @return the fixup'd string that must be g_free()d - * after use or NULL in case of error. - */ -gchar* mu_str_xapian_fixup_terms (const gchar *term); - -/** - * parse a byte size; a size is a number, with optionally a - * unit. Units recognized are b/B (bytes) k/K (1000) and m/M - * (1000*1000). Only the first letter is checked and the function is - * not case-sensitive, so 1000Kb, 3M will work equally well. Note, - * for kB, MB etc., we then follow the SI standards, not 2^10 etc. The - * 'b' may be omitted. - * - * practical sizes for email messages are in terms of Mb; even in - * extreme cases it should be under 100 Mb. Function return - * GUINT64_MAX if there a parsing error - * - * @param str a string with a size, such a "100", "100Kb", "1Mb" - * - * @return the corresponding size in bytes, or -1 in case of error - */ -gint64 mu_str_size_parse_bkm (const char* str); - /** * create a full path from a path + a filename. function is _not_ * reentrant. @@ -207,7 +130,6 @@ gint64 mu_str_size_parse_bkm (const char* str); */ const char* mu_str_fullpath_s (const char* path, const char* name); - /** * escape a string like a string literal in C; ie. replace \ with \\, * and " with \" @@ -220,7 +142,6 @@ const char* mu_str_fullpath_s (const char* path, const char* name); char* mu_str_escape_c_literal (const gchar* str, gboolean in_quotes) G_GNUC_WARN_UNUSED_RESULT; - /** * turn a string into plain ascii by replacing each non-ascii * character with a dot ('.'). Replacement is done in-place. @@ -231,7 +152,6 @@ char* mu_str_escape_c_literal (const gchar* str, gboolean in_quotes) */ char* mu_str_asciify_in_place (char *buf); - /** * turn string in buf into valid utf8. If this string is not valid * utf8 already, the function massages the offending characters. @@ -242,7 +162,6 @@ char* mu_str_asciify_in_place (char *buf); */ char* mu_str_utf8ify (const char *buf); - /** * convert a string in a certain charset into utf8 * @@ -255,7 +174,6 @@ char* mu_str_utf8ify (const char *buf); gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset); - /** * macro to check whether the string is empty, ie. if it's NULL or * it's length is 0 @@ -266,7 +184,6 @@ gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset); */ #define mu_str_is_empty(S) ((!(S)||!(*S))?TRUE:FALSE) - /** * convert a GSList of strings to a #sepa-separated list * @@ -277,7 +194,6 @@ gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset); */ char* mu_str_from_list (const GSList *lst, char sepa); - /** * convert a #sepa-separated list of strings in to a GSList * @@ -289,7 +205,6 @@ char* mu_str_from_list (const GSList *lst, char sepa); */ GSList* mu_str_to_list (const char *str, char sepa, gboolean strip); - /** * convert a string (with possible escaping) to a list. list items are * separated by one or more spaces. list items can be quoted (using @@ -302,8 +217,6 @@ GSList* mu_str_to_list (const char *str, char sepa, gboolean strip); */ GSList* mu_str_esc_to_list (const char *str); - - /** * Parse a list of : arguments, where supports * quoting and escaping. @@ -317,7 +230,6 @@ GSList* mu_str_esc_to_list (const char *str); GHashTable* mu_str_parse_arglist (const char *args, GError **err) G_GNUC_WARN_UNUSED_RESULT; - /** * free a GSList consisting of allocated strings * @@ -325,7 +237,6 @@ G_GNUC_WARN_UNUSED_RESULT; */ void mu_str_free_list (GSList *lst); - /** * strip the subject of Re:, Fwd: etc. * diff --git a/lib/parser/test-utils.cc b/lib/parser/test-utils.cc index 7772b193..efcf3dd4 100644 --- a/lib/parser/test-utils.cc +++ b/lib/parser/test-utils.cc @@ -53,23 +53,59 @@ test_cases(const CaseVec& cases, ProcFunc proc) } static void -test_date () +test_date_basic () { g_setenv ("TZ", "Europe/Helsinki", TRUE); CaseVec cases = { - { "2015-09-18T09:10:23", true, "001442556623" }, - { "1972-12-14T09:10:23", true, "000093165023" }, - { "1854-11-18T17:10:23", true, "000000000000" }, - { "fnorb", true, "000000000000" }, - { "fnorb", false, "999999999999" }, - { "", false, "999999999999" }, - { "", true, "000000000000" } + { "2015-09-18T09:10:23", true, "1442556623" }, + { "1972-12-14T09:10:23", true, "0093165023" }, + { "1854-11-18T17:10:23", true, "0000000000" }, + + { "2016", true, "1451599200" }, + { "2016", false, "1483221599" }, + + { "fnorb", true, "0000000000" }, + { "fnorb", false, "9999999999" }, + { "", false, "9999999999" }, + { "", true, "0000000000" } }; test_cases (cases, [](auto s, auto f){ return date_to_time_t_string(s,f); }); } +static void +test_date_ymwdhMs (void) +{ + struct { + std::string expr; + long diff; + int tolerance; + } tests[] = { + { "3h", 3 * 60 * 60, 1 }, + { "21d", 21 * 24 * 60 * 60, 1 }, + { "2w", 2 * 7 * 24 * 60 * 60, 1 }, + + { "2y", 2 * 365 * 24 * 60 * 60, 24 * 3600 + 1 }, + { "3m", 3 * 30 * 24 * 60 * 60, 3 * 24 * 3600 + 1 } + }; + + for (auto i = 0; i != G_N_ELEMENTS(tests); ++i) { + const auto diff = time(NULL) - + strtol(Mux::date_to_time_t_string(tests[i].expr, true).c_str(), + NULL, 10); + if (g_test_verbose()) + std::cerr << tests[i].expr << ' ' + << diff << ' ' + << tests[i].diff << std::endl; + + g_assert_true (tests[i].diff - diff <= tests[i].tolerance); + } + + g_assert_true (strtol(Mux::date_to_time_t_string("-1y", true).c_str(), + NULL, 10) == 0); +} + static void test_size () { @@ -88,8 +124,9 @@ main (int argc, char *argv[]) { g_test_init (&argc, &argv, NULL); - g_test_add_func ("/utils/process-date", test_date); - g_test_add_func ("/utils/process-size", test_size); + g_test_add_func ("/utils/date-basic", test_date_basic); + g_test_add_func ("/utils/date-ymwdhMs", test_date_ymwdhMs); + g_test_add_func ("/utils/size", test_size); return g_test_run (); } diff --git a/lib/parser/utils.cc b/lib/parser/utils.cc index 26fbdaae..6b27991d 100644 --- a/lib/parser/utils.cc +++ b/lib/parser/utils.cc @@ -144,11 +144,11 @@ Mux::quote (const std::string& str) return str; } -constexpr const auto InternalDateFormat = "%012" G_GINT64_FORMAT; -constexpr const char InternalDateMin[] = "000000000000"; -constexpr const char InternalDateMax[] = "999999999999"; -static_assert(sizeof(InternalDateMin) == 12 + 1); -static_assert(sizeof(InternalDateMax) == 12 + 1); +constexpr const auto InternalDateFormat = "%010" G_GINT64_FORMAT; +constexpr const char InternalDateMin[] = "0000000000"; +constexpr const char InternalDateMax[] = "9999999999"; +static_assert(sizeof(InternalDateMin) == 10 + 1); +static_assert(sizeof(InternalDateMax) == 10 + 1); static std::string date_boundary (bool is_first) @@ -204,7 +204,6 @@ delta_ymwdhMs (const std::string& expr) return date_to_time_t_string (t); } - static std::string special_date (const std::string& d, bool is_first) { @@ -235,9 +234,8 @@ special_date (const std::string& d, bool is_first) return date_boundary (is_first); } - constexpr const char UserDateMin[] = "19700101000000"; -constexpr const char UserDateMax[] = "29993112235959"; +constexpr const char UserDateMax[] = "29991231235959"; std::string Mux::date_to_time_t_string (const std::string& dstr, bool is_first) @@ -249,7 +247,10 @@ Mux::date_to_time_t_string (const std::string& dstr, bool is_first) /* one-sided dates */ if (dstr.empty()) return date_boundary (is_first); - else if (is_first && dstr.find_first_of("ymdwhMs") != std::string::npos) + else if (dstr == "today" || dstr == "now") + return special_date (dstr, is_first); + + else if (dstr.find_first_of("ymdwhMs") != std::string::npos) return delta_ymwdhMs (dstr); std::string date (is_first ? UserDateMin : UserDateMax); @@ -261,7 +262,7 @@ Mux::date_to_time_t_string (const std::string& dstr, bool is_first) !strptime (date.c_str(), "%Y%m%d", &tbuf) && !strptime (date.c_str(), "%Y%m", &tbuf) && !strptime (date.c_str(), "%Y", &tbuf)) - return special_date (date, is_first); + return date_boundary (is_first); dtime = g_date_time_new_local (tbuf.tm_year + 1900, tbuf.tm_mon + 1,