integrate new query parser

This commit is contained in:
djcb 2017-10-24 22:57:57 +03:00
parent b75f9f508b
commit 5e9cafea59
17 changed files with 373 additions and 979 deletions

View File

@ -52,7 +52,10 @@ AC_PROG_CC_C99
AC_PROG_INSTALL
AC_HEADER_STDC
AX_CXX_COMPILE_STDCXX_14
AX_COMPILER_FLAGS(,,[yes])
AX_VALGRIND_CHECK
# for now, use AM_PROG_LIBTOOL, as we don't want to require
# a too new setup for autotools/libtool
@ -83,7 +86,7 @@ AS_IF([test "x$enable_mu4e" != "xno"], [
lispdir="${lispdir}/mu4e/"
])
AS_CASE([$emacs_version],[*23*|*24*|*25*|*26*],[build_mu4e=yes],
[AC_WARN([emacs is too old to build mu4e (need emacs >= 23.x)])])
[AC_WARN([emacs is too old to build mu4e (need emacs >= 23.x)])])
])
AM_CONDITIONAL(BUILD_MU4E, test "x$build_mu4e" = "xyes")
@ -96,7 +99,7 @@ AC_CHECK_PROGS([PERL], [perl], [no])
AS_IF([test x"$enable_perl" = "xyes" -a x"$PERL" != "xno"], [
AM_PERL_MODULE([Data::SExpression],[build_perl=yes])
if test x"$build_perl" = "xyes"; then
perl_version=`$PERL -Iperl/lib -Mmup -e 'print "$mup::VERSION\n";'`
perl_version=`$PERL -Iperl/lib -Mmup -e 'print "$mup::VERSION\n";'`
fi
])
AM_CONDITIONAL(BUILD_PERL, test "x$build_perl" = "xyes")
@ -183,7 +186,7 @@ AC_SUBST(XAPIAN_LIBS)
# note that MU_STORE_SCHEMA_VERSION does not necessarily follow MU
# versioning, as we hopefully don't have updates for each version;
# also, this has nothing to do with Xapian's software version
AC_DEFINE(MU_STORE_SCHEMA_VERSION,["9.9"], ['Schema' version of the database])
AC_DEFINE(MU_STORE_SCHEMA_VERSION,["0.99"], ['Schema' version of the database])
###############################################################################
@ -230,15 +233,15 @@ AS_IF([test "x$enable_guile" != "xno"], [
AS_IF([test "x$build_guile" = "xyes"],[
AC_PATH_PROG(GUILE_BINARY, [guile], [], [$GUILE_EXECDIR])
AS_IF([test "x$GUILE_BINARY" != "x"],
[AC_DEFINE_UNQUOTED([GUILE_BINARY],["$GUILE_BINARY"],[Path to the guile binary])],
[AC_MSG_WARN([cannot find guile-snarf])])])
[AC_DEFINE_UNQUOTED([GUILE_BINARY],["$GUILE_BINARY"],[Path to the guile binary])],
[AC_MSG_WARN([cannot find guile-snarf])])])
AS_IF([test "x$build_guile" = "xyes"],[
AC_PATH_PROG(GUILE_SNARF, [guile-snarf], [], [$GUILE_EXECDIR])
AS_IF([test "x$GUILE_SNARF" != "x"],
[AC_DEFINE_UNQUOTED([GUILE_SNARF],["$GUILE_SNARF"],[Path to guile-snarf])
GUILE_SITEDIR="`${PKG_CONFIG} guile-2.0 --variable=sitedir`"],
[AC_MSG_WARN([cannot find guile-snarf])])])
[AC_DEFINE_UNQUOTED([GUILE_SNARF],["$GUILE_SNARF"],[Path to guile-snarf])
GUILE_SITEDIR="`${PKG_CONFIG} guile-2.0 --variable=sitedir`"],
[AC_MSG_WARN([cannot find guile-snarf])])])
AC_SUBST(GUILE_SITEDIR)])
AS_IF([test "x$build_guile" = "xyes" -a "x$GUILE_SNARF" != "x"],
@ -281,6 +284,7 @@ mu/Makefile
mu/tests/Makefile
lib/Makefile
lib/doxyfile
lib/parser/Makefile
lib/tests/Makefile
mu4e/Makefile
mu4e/mu4e-meta.el

View File

@ -1,4 +1,4 @@
## Copyright (C) 2010-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
## Copyright (C) 2010-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@ -18,13 +18,14 @@ include $(top_srcdir)/gtest.mk
# enforce compiling guile (optionally) first,then this dir first
# before decending into tests/
SUBDIRS= . tests
SUBDIRS= parser . tests
AM_CFLAGS= \
$(WARN_CFLAGS) \
-Wno-format-nonliteral \
-Wno-switch-enum \
-Wno-suggest-attribute=format \
-Wno-deprecated-declarations \
$(GMIME_CFLAGS) \
$(GLIB_CFLAGS) \
$(GUILE_CFLAGS)
@ -103,7 +104,8 @@ libmu_la_LIBADD= \
$(XAPIAN_LIBS) \
$(GMIME_LIBS) \
$(GLIB_LIBS) \
$(GUILE_LIBS)
$(GUILE_LIBS) \
${builddir}/parser/libmuxparser.la
EXTRA_DIST= \
mu-msg-crypto.c \

View File

@ -77,6 +77,20 @@ mu_flag_char (MuFlags flag)
}
MuFlags
mu_flag_char_from_name (const char *str)
{
unsigned u;
g_return_val_if_fail (str, MU_FLAG_INVALID);
for (u = 0; u != G_N_ELEMENTS (FLAG_INFO); ++u)
if (g_strcmp0(FLAG_INFO[u].name, str) == 0)
return FLAG_INFO[u].kar;
return 0;
}
static MuFlags
mu_flag_from_char (char kar)

View File

@ -128,6 +128,18 @@ MuFlags mu_flags_from_str (const char *str, MuFlagType types,
gboolean ignore_invalid);
/**
* Get the MuFlag char for some flag name
*
* @param str a flag name
*
* @return a flag character, or 0
*/
MuFlags mu_flag_char_from_name (const char *str);
/**
* return the concatenation of all non-standard file flags in str
* (ie., characters other than DFPRST) as a newly allocated string.

View File

@ -106,17 +106,13 @@ mu_msg_doc_get_num_field (MuMsgDoc *self, MuMsgFieldId mfid)
g_return_val_if_fail (mu_msg_field_id_is_valid(mfid), -1);
g_return_val_if_fail (mu_msg_field_is_numeric(mfid), -1);
/* date is a special case, because we store dates as
* strings */
try {
const std::string s (self->doc().get_value(mfid));
if (s.empty())
return 0;
else if (mfid == MU_MSG_FIELD_ID_DATE) {
time_t t;
t = mu_date_str_to_time_t (s.c_str(), FALSE/*utc*/);
return static_cast<gint64>(t);
} else {
else if (mfid == MU_MSG_FIELD_ID_DATE)
return strtol (s.c_str(), NULL, 10);
else {
return static_cast<gint64>
(Xapian::sortable_unserialise(s));
}

View File

@ -48,11 +48,9 @@ enum _FieldFlags {
* for Xapian queries;
* wildcards do NOT WORK
* for such fields */
FLAG_PREPROCESS = 1 << 6, /* field needs flattening for
* case/accents */
FLAG_DONT_CACHE = 1 << 7, /* don't cache this field in
FLAG_DONT_CACHE = 1 << 6, /* don't cache this field in
* the MuMsg cache */
FLAG_RANGE_FIELD = 1 << 8 /* whether this is a range field */
FLAG_RANGE_FIELD = 1 << 7 /* whether this is a range field */
};
typedef enum _FieldFlags FieldFlags;
@ -84,14 +82,14 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING,
"bcc" , 'h', 'H', /* 'hidden */
FLAG_GMIME | FLAG_XAPIAN_CONTACT |
FLAG_XAPIAN_VALUE | FLAG_PREPROCESS
FLAG_XAPIAN_VALUE
},
{
MU_MSG_FIELD_ID_BODY_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"body", 'b', 'B',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_PREPROCESS |
FLAG_GMIME | FLAG_XAPIAN_INDEX |
FLAG_DONT_CACHE
},
@ -106,8 +104,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_CC,
MU_MSG_FIELD_TYPE_STRING,
"cc", 'c', 'C',
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE |
FLAG_PREPROCESS
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
},
{
@ -122,16 +119,14 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_EMBEDDED_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"embed", 'e', 'E',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_PREPROCESS |
FLAG_DONT_CACHE
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_DONT_CACHE
},
{
MU_MSG_FIELD_ID_FILE,
MU_MSG_FIELD_TYPE_STRING,
"file" , 'j', 'J',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_PREPROCESS |
FLAG_DONT_CACHE
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_DONT_CACHE
},
@ -146,8 +141,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_FROM,
MU_MSG_FIELD_TYPE_STRING,
"from", 'f', 'F',
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE |
FLAG_PREPROCESS
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
},
@ -155,8 +149,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_MAILDIR,
MU_MSG_FIELD_TYPE_STRING,
"maildir", 'm', 'M',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_PREPROCESS
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE
},
@ -164,8 +157,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_MAILING_LIST,
MU_MSG_FIELD_TYPE_STRING,
"list", 'v', 'V',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_PREPROCESS
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE
},
@ -173,7 +165,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_MIME,
MU_MSG_FIELD_TYPE_STRING,
"mime" , 'y', 'Y',
FLAG_XAPIAN_TERM | FLAG_PREPROCESS
FLAG_XAPIAN_TERM
},
@ -181,8 +173,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_MSGID,
MU_MSG_FIELD_TYPE_STRING,
"msgid", 'i', 'I', /* 'i' for Id */
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_PREPROCESS
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE
},
@ -191,7 +182,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING,
"path", 'l', 'L', /* 'l' for location */
FLAG_GMIME | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_BOOLEAN | FLAG_PREPROCESS
FLAG_XAPIAN_BOOLEAN
},
@ -224,15 +215,14 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING,
"subject", 's', 'S',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_TERM | FLAG_PREPROCESS
FLAG_XAPIAN_TERM
},
{
MU_MSG_FIELD_ID_TAGS,
MU_MSG_FIELD_TYPE_STRING_LIST,
"tag", 'x', 'X',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_PREPROCESS |
FLAG_XAPIAN_VALUE
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE
},
@ -247,8 +237,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_TO,
MU_MSG_FIELD_TYPE_STRING,
"to", 't', 'T',
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE |
FLAG_PREPROCESS
FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
},
{ /* special, internal field, to get a unique key */
@ -389,13 +378,6 @@ mu_msg_field_xapian_contact (MuMsgFieldId id)
}
gboolean
mu_msg_field_preprocess (MuMsgFieldId id)
{
g_return_val_if_fail (mu_msg_field_id_is_valid(id),FALSE);
return mu_msg_field(id)->_flags & FLAG_PREPROCESS ? TRUE: FALSE;
}
gboolean
mu_msg_field_is_numeric (MuMsgFieldId mfid)

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2008-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
@ -59,7 +59,6 @@ enum _MuMsgFieldId {
MU_MSG_FIELD_ID_MAILING_LIST, /* mailing list */
MU_MSG_FIELD_ID_THREAD_ID,
MU_MSG_FIELD_ID_NUM
};
typedef guint8 MuMsgFieldId;
@ -232,19 +231,6 @@ gboolean mu_msg_field_xapian_value (MuMsgFieldId id) G_GNUC_PURE;
*/
gboolean mu_msg_field_uses_boolean_prefix (MuMsgFieldId id) G_GNUC_PURE;
/**
* should this field be escaped for xapian? in practice, should
* word-breaking chars be replaced with '_'? Also, flatten accents,
* downcase?
*
* @param field a MuMsgField
*
* @return TRUE if the field is to be preprocessed, FALSE otherwise
*/
gboolean mu_msg_field_preprocess (MuMsgFieldId id) G_GNUC_PURE;
/**
* is this a range-field? ie. date, or size
*

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2012-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2012-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -20,7 +20,7 @@
#include "mu-msg-prio.h"
const char*
const char*
mu_msg_prio_name (MuMsgPrio prio)
{
switch (prio) {
@ -45,8 +45,8 @@ mu_msg_prio_char (MuMsgPrio prio)
if (!(prio == 'l' || prio == 'n' || prio == 'h')) {
g_warning ("prio: %c", (char)prio);
}
g_return_val_if_fail (prio == 'l' || prio == 'n' || prio == 'h',
0);

View File

@ -1,20 +1,20 @@
/*
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
/*
** Copyright (C) 2008-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 3 of the License, or
** (at your option) any later version.
**
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#ifndef __MU_MSG_PRIO_H__
@ -37,9 +37,9 @@ static const MuMsgPrio MU_MSG_PRIO_NONE = (MuMsgPrio)0;
/**
* get a printable name for the message priority
* (ie., MU_MSG_PRIO_LOW=>"low" etc.)
*
*
* @param prio a message priority
*
*
* @return a printable name for this priority
*/
const char* mu_msg_prio_name (MuMsgPrio prio) G_GNUC_CONST;
@ -49,9 +49,9 @@ const char* mu_msg_prio_name (MuMsgPrio prio) G_GNUC_CONST;
* get the MuMsgPriority corresponding to a one-character shortcut
* ('l'=>MU_MSG_PRIO_, 'n'=>MU_MSG_PRIO_NORMAL or
* 'h'=>MU_MSG_PRIO_HIGH)
*
* @param k a character
*
*
* @param k a character
*
* @return a message priority
*/
MuMsgPrio mu_msg_prio_from_char (char k) G_GNUC_CONST;
@ -61,9 +61,9 @@ MuMsgPrio mu_msg_prio_from_char (char k) G_GNUC_CONST;
* get the one-character shortcut corresponding to a message priority
* ('l'=>MU_MSG_PRIO_, 'n'=>MU_MSG_PRIO_NORMAL or
* 'h'=>MU_MSG_PRIO_HIGH)
*
*
* @param prio a mesage priority
*
*
* @return a shortcut character or 0 in case of error
*/
char mu_msg_prio_char (MuMsgPrio prio) G_GNUC_CONST;
@ -71,7 +71,7 @@ char mu_msg_prio_char (MuMsgPrio prio) G_GNUC_CONST;
typedef void (*MuMsgPrioForeachFunc) (MuMsgPrio prio, gpointer user_data);
/**
* call a function for each message priority
*
*
* @param func a callback function
* @param user_data a user pointer to pass to the callback
*/

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2008-2016 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2008-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
@ -21,8 +21,9 @@
#include <string>
#include <cctype>
#include <cstring>
#include <stdlib.h>
#include <sstream>
#include <stdlib.h>
#include <xapian.h>
#include <glib/gstdio.h>
@ -35,252 +36,178 @@
#include "mu-str.h"
#include "mu-date.h"
/*
* custom parser for date ranges
*/
class MuDateRangeProcessor : public Xapian::StringValueRangeProcessor {
public:
MuDateRangeProcessor():
Xapian::StringValueRangeProcessor(
(Xapian::valueno)MU_MSG_FIELD_ID_DATE) {}
#include <parser/proc-iface.hh>
#include <parser/utils.hh>
#include <parser/xapian.hh>
Xapian::valueno operator()(std::string &begin, std::string &end) {
struct MuProc: public Mux::ProcIface {
if (!clear_prefix (begin))
return Xapian::BAD_VALUENO;
MuProc (const Xapian::Database& db): db_{db} {}
begin = to_sortable (begin, true);
end = to_sortable (end, false);
static MuMsgFieldId field_id (const std::string& field) {
if (begin > end)
throw Xapian::QueryParserError
("end time is before begin");
if (field.empty())
return MU_MSG_FIELD_ID_NONE;
return (Xapian::valueno)MU_MSG_FIELD_ID_DATE;
MuMsgFieldId id = mu_msg_field_id_from_name (field.c_str(), FALSE);
if (id != MU_MSG_FIELD_ID_NONE)
return id;
else
return mu_msg_field_id_from_shortcut (field[0], FALSE);
}
private:
std::string to_sortable (std::string& s, bool is_begin) {
const char* tmp;
time_t t;
std::string
process_value (const std::string& field,
const std::string& value) const override {
const auto id = field_id (field);
if (id == MU_MSG_FIELD_ID_NONE)
return value;
switch(id) {
case MU_MSG_FIELD_ID_PRIO: {
if (!value.empty())
return std::string(1, value[0]);
} break;
// note: if s is empty and not is_begin, xapian seems
// to repeat it.
if (s.empty() || g_str_has_suffix (s.c_str(), "..")) {
tmp = mu_date_complete_s ("", is_begin);
} else {
tmp = mu_date_interpret_s (s.c_str(),
is_begin ? TRUE: FALSE);
tmp = mu_date_complete_s (tmp, is_begin ? TRUE: FALSE);
t = mu_date_str_to_time_t (tmp, TRUE /*local*/);
tmp = mu_date_time_t_to_str_s (t, FALSE /*UTC*/);
case MU_MSG_FIELD_ID_FLAGS: {
const auto flag = mu_flag_char_from_name (value.c_str());
if (flag)
return std::string(1, tolower(flag));
} break;
default:
break;
}
return s = std::string(tmp);
return value; // XXX prio/flags, etc. alias
}
void add_field (std::vector<FieldInfo>& fields, MuMsgFieldId id) const {
bool clear_prefix (std::string& begin) {
const auto shortcut = mu_msg_field_shortcut(id);
if (!shortcut)
return; // can't be searched
const std::string colon (":");
const std::string name (mu_msg_field_name
(MU_MSG_FIELD_ID_DATE) + colon);
const std::string shortcut (
std::string(1, mu_msg_field_shortcut
(MU_MSG_FIELD_ID_DATE)) + colon);
const auto name = mu_msg_field_name (id);
const auto pfx = mu_msg_field_xapian_prefix (id);
if (begin.find (name) == 0) {
begin.erase (0, name.length());
return true;
} else if (begin.find (shortcut) == 0) {
begin.erase (0, shortcut.length());
return true;
} else
if (!name || !pfx)
return;
fields.push_back ({{name}, {pfx}, id});
}
std::vector<FieldInfo>
process_field (const std::string& field) const override {
std::vector<FieldInfo> fields;
if (field == "contact" || field == "recip") { // multi fields
add_field (fields, MU_MSG_FIELD_ID_TO);
add_field (fields, MU_MSG_FIELD_ID_CC);
add_field (fields, MU_MSG_FIELD_ID_BCC);
if (field == "contact")
add_field (fields, MU_MSG_FIELD_ID_FROM);
} else if (field == "") {
add_field (fields, MU_MSG_FIELD_ID_TO);
add_field (fields, MU_MSG_FIELD_ID_CC);
add_field (fields, MU_MSG_FIELD_ID_BCC);
add_field (fields, MU_MSG_FIELD_ID_FROM);
add_field (fields, MU_MSG_FIELD_ID_SUBJECT);
add_field (fields, MU_MSG_FIELD_ID_BODY_TEXT);
} else {
const auto id = field_id (field.c_str());
if (id != MU_MSG_FIELD_ID_NONE)
add_field (fields, id);
}
return fields;
}
bool is_range_field (const std::string& field) const override {
const auto id = field_id (field.c_str());
if (id == MU_MSG_FIELD_ID_NONE)
return false;
else
return mu_msg_field_is_range_field (id);
}
Range process_range (const std::string& field, const std::string& lower,
const std::string& upper) const override {
const auto id = field_id (field.c_str());
if (id == MU_MSG_FIELD_ID_NONE)
return { lower, upper };
std::string l2 = lower;
std::string u2 = upper;
if (id == MU_MSG_FIELD_ID_DATE) {
l2 = Mux::date_to_time_t_string (lower, true);
u2 = Mux::date_to_time_t_string (upper, false);
} else if (id == MU_MSG_FIELD_ID_SIZE) {
l2 = Mux::size_to_string (lower, true);
u2 = Mux::size_to_string (upper, false);
}
return { l2, u2 };
}
std::vector<std::string>
process_regex (const std::string& field, const std::regex& rx) const override {
const auto id = field_id (field.c_str());
if (id == MU_MSG_FIELD_ID_NONE)
return {};
char pfx[] = { mu_msg_field_xapian_prefix(id), '\0' };
std::vector<std::string> terms;
for (auto it = db_.allterms_begin(pfx); it != db_.allterms_end(pfx); ++it) {
if (std::regex_search((*it).c_str() + 1, rx)) // avoid copy
terms.push_back(*it);
}
return terms;
}
const Xapian::Database& db_;
};
class MuSizeRangeProcessor : public Xapian::NumberValueRangeProcessor {
class _MuQuery {
public:
MuSizeRangeProcessor():
Xapian::NumberValueRangeProcessor(MU_MSG_FIELD_ID_SIZE) {
}
Xapian::valueno operator()(std::string &begin, std::string &end) {
if (!clear_prefix (begin))
return Xapian::BAD_VALUENO;
if (!substitute_size (begin) || !substitute_size (end))
return Xapian::BAD_VALUENO;
begin = Xapian::sortable_serialise (atol(begin.c_str()));
end = Xapian::sortable_serialise (atol(end.c_str()));
/* swap if b > e */
if (begin > end)
std::swap (begin, end);
return (Xapian::valueno)MU_MSG_FIELD_ID_SIZE;
}
private:
bool clear_prefix (std::string& begin) {
const std::string colon (":");
const std::string name (mu_msg_field_name
(MU_MSG_FIELD_ID_SIZE) + colon);
const std::string shortcut (
std::string(1, mu_msg_field_shortcut
(MU_MSG_FIELD_ID_SIZE)) + colon);
if (begin.find (name) == 0) {
begin.erase (0, name.length());
return true;
} else if (begin.find (shortcut) == 0) {
begin.erase (0, shortcut.length());
return true;
} else
return false;
}
bool substitute_size (std::string& size) {
gchar buf[16];
gint64 num = mu_str_size_parse_bkm(size.c_str());
if (num < 0)
throw Xapian::QueryParserError ("invalid size");
snprintf (buf, sizeof(buf), "%" G_GUINT64_FORMAT, num);
size = buf;
return true;
}
};
static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser);
struct _MuQuery {
public:
_MuQuery (MuStore *store): _store(mu_store_ref(store)) {
_qparser.set_database (db());
_qparser.set_default_op (Xapian::Query::OP_AND);
_qparser.add_valuerangeprocessor (&_date_range_processor);
_qparser.add_valuerangeprocessor (&_size_range_processor);
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_prefix,
&_qparser);
/* add some convenient special prefixes */
add_special_prefixes ();
}
_MuQuery (MuStore *store): _store(mu_store_ref(store)) {}
~_MuQuery () { mu_store_unref (_store); }
Xapian::Database& db() const {
Xapian::Database* db;
db = reinterpret_cast<Xapian::Database*>
const auto db = reinterpret_cast<Xapian::Database*>
(mu_store_get_read_only_database (_store));
if (!db)
throw std::runtime_error ("no database");
return *db;
}
Xapian::QueryParser& query_parser () { return _qparser; }
private:
void add_special_prefixes () {
char pfx[] = { '\0', '\0' };
/* add 'contact' as a shortcut for From/Cc/Bcc/To: */
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_FROM);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx);
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_TO);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx);
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_CC);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx);
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_BCC);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_CONTACT, pfx);
/* add 'recip' as a shortcut for Cc/Bcc/To: */
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_TO);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_RECIP, pfx);
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_CC);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_RECIP, pfx);
pfx[0] = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_BCC);
_qparser.add_prefix (MU_MSG_FIELD_PSEUDO_RECIP, pfx);
}
Xapian::QueryParser _qparser;
MuDateRangeProcessor _date_range_processor;
MuSizeRangeProcessor _size_range_processor;
MuStore *_store;
};
static const Xapian::Query
get_query (MuQuery *mqx, const char* searchexpr, GError **err)
{
Xapian::Query query;
char *preprocessed;
preprocessed = mu_query_preprocess (searchexpr, err);
if (!preprocessed)
throw std::runtime_error
("parse error while preprocessing query");
try {
query = mqx->query_parser().parse_query
(preprocessed,
Xapian::QueryParser::FLAG_BOOLEAN |
Xapian::QueryParser::FLAG_PURE_NOT |
Xapian::QueryParser::FLAG_AUTO_SYNONYMS |
Xapian::QueryParser::FLAG_WILDCARD |
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
);
g_free (preprocessed);
return query;
Mux::WarningVec warns;
const auto tree = Mux::parse (searchexpr, warns,
std::make_unique<MuProc>(mqx->db()));
for (const auto w: warns)
std::cerr << w << std::endl;
return Mux::xapian_query (tree);
} catch (...) {
mu_util_g_set_error (err,MU_ERROR_XAPIAN_QUERY,
"parse error in query");
g_free (preprocessed);
throw;
}
}
static void
add_prefix (MuMsgFieldId mfid, Xapian::QueryParser* qparser)
{
if (!mu_msg_field_xapian_index(mfid) &&
!mu_msg_field_xapian_term(mfid) &&
!mu_msg_field_xapian_contact(mfid))
return;
try {
const std::string pfx
(1, mu_msg_field_xapian_prefix (mfid));
const std::string shortcut
(1, mu_msg_field_shortcut (mfid));
if (mu_msg_field_uses_boolean_prefix (mfid)) {
qparser->add_boolean_prefix
(mu_msg_field_name(mfid), pfx);
qparser->add_boolean_prefix (shortcut, pfx);
} else {
qparser->add_prefix
(mu_msg_field_name(mfid), pfx);
qparser->add_prefix (shortcut, pfx);
}
// all fields are also matched implicitly, without
// any prefix
qparser->add_prefix ("", pfx);
} MU_XAPIAN_CATCH_BLOCK;
}
MuQuery*
mu_query_new (MuStore *store, GError **err)
{
@ -299,7 +226,6 @@ mu_query_new (MuStore *store, GError **err)
return 0;
}
void
mu_query_destroy (MuQuery *self)
{
@ -307,39 +233,6 @@ mu_query_destroy (MuQuery *self)
}
/* preprocess a query to make them a bit more promiscuous */
char*
mu_query_preprocess (const char *query, GError **err)
{
GSList *parts, *cur;
gchar *myquery;
g_return_val_if_fail (query, NULL);
/* convert the query to a list of query terms, and escape them
* separately */
parts = mu_str_esc_to_list (query);
if (!parts)
return NULL;
for (cur = parts; cur; cur = g_slist_next(cur)) {
char *data;
data = (gchar*)cur->data;
cur->data = mu_str_process_query_term (data);
g_free (data);
/* run term fixups */
data = (gchar*)cur->data;
cur->data = mu_str_xapian_fixup_terms (data);
g_free (data);
}
myquery = mu_str_from_list (parts, ' ');
mu_str_free_list (parts);
return myquery ? myquery : g_strdup ("");
}
/* this function is for handling the case where a DatabaseModified
* exception is raised. We try to reopen the database, and run the
* query again. */
@ -533,7 +426,11 @@ mu_query_run (MuQuery *self, const char *searchexpr, MuMsgFieldId sortfieldid,
* effort to calculate threads already in the first
* query since we can do it in the second one
*/
first_flags = inc_related ? (flags & ~MU_QUERY_FLAG_THREADS) : flags;
if (inc_related)
first_flags = (MuQueryFlags)(flags & ~MU_QUERY_FLAG_THREADS);
else
first_flags = flags;
iter = mu_msg_iter_new (
reinterpret_cast<XapianEnquire*>(&enq),
maxnum,
@ -563,7 +460,7 @@ mu_query_run (MuQuery *self, const char *searchexpr, MuMsgFieldId sortfieldid,
char*
mu_query_as_string (MuQuery *self, const char *searchexpr, GError **err)
mu_query_internal_xapian (MuQuery *self, const char *searchexpr, GError **err)
{
g_return_val_if_fail (self, NULL);
g_return_val_if_fail (searchexpr, NULL);
@ -574,3 +471,28 @@ mu_query_as_string (MuQuery *self, const char *searchexpr, GError **err)
} MU_XAPIAN_CATCH_BLOCK_RETURN(NULL);
}
char*
mu_query_internal (MuQuery *self, const char *searchexpr,
gboolean warn, GError **err)
{
g_return_val_if_fail (self, NULL);
g_return_val_if_fail (searchexpr, NULL);
try {
Mux::WarningVec warns;
const auto tree = Mux::parse (searchexpr, warns,
std::make_unique<MuProc>(self->db()));
std::stringstream ss;
ss << tree;
if (warn) {
for (const auto w: warns)
std::cerr << w << std::endl;
}
return g_strdup(ss.str().c_str());
} MU_XAPIAN_CATCH_BLOCK_RETURN(NULL);
}

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2008-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
@ -43,7 +43,7 @@ typedef struct _MuQuery MuQuery;
* when the instance is no longer needed, use mu_query_destroy
* to free it
*/
MuQuery *mu_query_new (MuStore *store, GError **err)
MuQuery* mu_query_new (MuStore *store, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
@ -53,7 +53,6 @@ MuQuery *mu_query_new (MuStore *store, GError **err)
*/
void mu_query_destroy (MuQuery *self);
/**
* get a version string for the database
*
@ -65,16 +64,14 @@ char* mu_query_version (MuQuery *store)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
enum _MuQueryFlags {
MU_QUERY_FLAG_NONE = 0,
typedef enum {
MU_QUERY_FLAG_NONE = 0 << 0, /**< no flags */
MU_QUERY_FLAG_DESCENDING = 1 << 0, /**< sort z->a */
MU_QUERY_FLAG_SKIP_UNREADABLE = 1 << 1, /**< skip unreadable msgs */
MU_QUERY_FLAG_SKIP_DUPS = 1 << 2, /**< skip duplicate msgs */
MU_QUERY_FLAG_INCLUDE_RELATED = 1 << 3, /**< include related msgs */
MU_QUERY_FLAG_THREADS = 1 << 4 /**< calculate threading info */
};
typedef int MuQueryFlags;
} MuQueryFlags;
/**
* run a Xapian query; for the syntax, please refer to the mu-find
@ -94,15 +91,30 @@ typedef int MuQueryFlags;
* @return a MuMsgIter instance you can iterate over, or NULL in
* case of error
*/
MuMsgIter* mu_query_run (MuQuery *self, const char* expr, MuMsgFieldId sortfieldid, int maxnum,
MuMsgIter* mu_query_run (MuQuery *self, const char* expr,
MuMsgFieldId sortfieldid, int maxnum,
MuQueryFlags flags, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* get Xapian's internal string representation of the query
*
* @param self a MuQuery instance
* @param searchexpr a xapian search expression
* @param warn print warnings to stderr
* @param err receives error information (if there is any); if
* function returns non-NULL, err will _not_be set. err can be NULL
*
* @return the string representation of the xapian query, or NULL in case of
* error; free the returned value with g_free
*/
char* mu_query_internal (MuQuery *self, const char *searchexpr,
gboolean warn, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* get a string representation of the Xapian search query
* get Xapian's internal string representation of the query
*
* @param self a MuQuery instance
* @param searchexpr a xapian search expression
@ -112,18 +124,10 @@ MuMsgIter* mu_query_run (MuQuery *self, const char* expr, MuMsgFieldId sortfield
* @return the string representation of the xapian query, or NULL in case of
* error; free the returned value with g_free
*/
char* mu_query_as_string (MuQuery *self, const char* searchexpr, GError **err)
char* mu_query_internal_xapian (MuQuery *self, const char* searchexpr,
GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* pre-process the query; this function is useful mainly for debugging mu
*
* @param query a query string
*
* @return a pre-processed query, free it with g_free
*/
char* mu_query_preprocess (const char *query, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
G_END_DECLS

View File

@ -1,6 +1,6 @@
/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */
/*
** Copyright (C) 2008-2016 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2008-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -26,6 +26,9 @@
#include <xapian.h>
#include <cstring>
#include <stdexcept>
#include <iostream>
#include <parser/utils.hh>
#include "mu-store.h"
#include "mu-store-priv.hh" /* _MuStore */
@ -202,19 +205,25 @@ mu_store_flush (MuStore *store)
mu_contacts_serialize (store->contacts());
}
static void
add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
{
time_t t;
const char *datestr;
const auto dstr = Mux::date_to_time_t_string (
(time_t)mu_msg_get_field_numeric (msg, mfid));
t = (time_t)mu_msg_get_field_numeric (msg, mfid);
datestr = mu_date_time_t_to_str_s (t, FALSE /*UTC*/);
doc.add_value ((Xapian::valueno)mfid, datestr);
doc.add_value ((Xapian::valueno)mfid, dstr);
}
static void
add_terms_values_size (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
{
const auto szstr =
Mux::size_to_string (mu_msg_get_field_numeric (msg, mfid));
doc.add_value ((Xapian::valueno)mfid, szstr);
}
G_GNUC_CONST
static const std::string&
flag_val (char flagchar)
@ -258,9 +267,6 @@ flag_val (char flagchar)
}
}
/* pre-calculate; optimization */
G_GNUC_CONST static const std::string&
prio_val (MuMsgPrio prio)
@ -283,7 +289,6 @@ prio_val (MuMsgPrio prio)
}
static void
add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
{
@ -305,55 +310,26 @@ add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
doc.add_term (prio_val((MuMsgPrio)num));
}
static void
add_terms_values_msgid (Xapian::Document& doc, MuMsg *msg)
{
char *str;
const char *orig;
if (!(orig = mu_msg_get_field_string (
msg, MU_MSG_FIELD_ID_MSGID)))
return; /* nothing to do */
str = mu_str_process_msgid (orig, FALSE);
doc.add_value ((Xapian::valueno)MU_MSG_FIELD_ID_MSGID, orig);
doc.add_term (prefix(MU_MSG_FIELD_ID_MSGID) +
std::string(str, 0, _MuStore::MAX_TERM_LENGTH));
g_free (str);
}
/* for string and string-list */
static void
add_terms_values_str (Xapian::Document& doc, const char *val, MuMsgFieldId mfid)
{
char *str;
if (mu_msg_field_preprocess (mfid))
str = mu_str_process_term (val);
else
str = g_strdup (val);
const auto flat = Mux::utf8_flatten (val);
if (mu_msg_field_xapian_index (mfid)) {
Xapian::TermGenerator termgen;
termgen.set_document (doc);
termgen.index_text_without_positions (str, 1, prefix(mfid));
if (g_strcmp0 (val, str) != 0)
termgen.index_text_without_positions (
val, 1, prefix(mfid));
termgen.index_text (flat, 1, prefix(mfid));
}
if (mu_msg_field_xapian_term(mfid))
doc.add_term (prefix(mfid) +
std::string(str, 0, _MuStore::MAX_TERM_LENGTH));
g_free (str);
if (mu_msg_field_xapian_term(mfid)) {
//std::cerr << ":" << prefix(mfid) + flat << std::endl;
doc.add_term((prefix(mfid) + flat)
.substr(0, MuStore::MAX_TERM_LENGTH));
}
}
static void
add_terms_values_string (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
{
@ -370,8 +346,6 @@ add_terms_values_string (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
add_terms_values_str (doc, orig, mfid);
}
static void
add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid)
@ -409,7 +383,7 @@ struct PartData {
static void
maybe_index_text_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
{
char *txt, *str;
char *txt;
Xapian::TermGenerator termgen;
/* only deal with attachments/messages; inlines are indexed as
@ -423,14 +397,10 @@ maybe_index_text_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
return;
termgen.set_document(pdata->_doc);
str = mu_str_process_text (txt);
termgen.index_text_without_positions
(str, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
const auto str = Mux::utf8_flatten (txt);
g_free (txt);
g_free (str);
termgen.index_text (str, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
}
@ -444,25 +414,17 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
/* save the mime type of any part */
if (part->type) {
/* note, we use '_' instead of '/' to separate
* type/subtype -- Xapian doesn't treat '/' as
* desired, so we use '_' and pre-process queries; see
* mu_query_preprocess */
char ctype[MuStore::MAX_TERM_LENGTH + 1];
snprintf (ctype, sizeof(ctype), "%s_%s",
part->type, part->subtype);
snprintf (ctype, sizeof(ctype), "%s/%s", part->type, part->subtype);
pdata->_doc.add_term
(mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH));
}
if ((fname = mu_msg_part_get_filename (part, FALSE))) {
char *str;
str = mu_str_process_term (fname);
const auto flat = Mux::utf8_flatten (fname);
g_free (fname);
pdata->_doc.add_term
(file + std::string(str, 0, MuStore::MAX_TERM_LENGTH));
g_free (str);
(file + std::string(flat, 0, MuStore::MAX_TERM_LENGTH));
}
maybe_index_text_part (msg, part, pdata);
@ -483,13 +445,10 @@ static void
add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid)
{
const char *str;
char *flat;
if (mu_msg_get_flags(msg) & MU_FLAG_ENCRYPTED)
return; /* ignore encrypted bodies */
str = mu_msg_get_body_text (msg, MU_MSG_OPTION_NONE);
auto str = mu_msg_get_body_text (msg, MU_MSG_OPTION_NONE);
if (!str) /* FIXME: html->txt fallback needed */
str = mu_msg_get_body_html (msg, MU_MSG_OPTION_NONE);
if (!str)
@ -498,11 +457,8 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
Xapian::TermGenerator termgen;
termgen.set_document(doc);
flat = mu_str_process_text (str);
// g_print ("\n--\n%s\n--\n", flat);
termgen.index_text_without_positions (flat, 1, prefix(mfid));
g_free (flat);
const auto flat = Mux::utf8_flatten(str);
termgen.index_text (flat, 1, prefix(mfid));
}
struct _MsgDoc {
@ -544,13 +500,13 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
!mu_msg_field_xapian_value(mfid))
return;
// if (mu_msg_field_xapian_contact (mfid))
// return; /* handled in new_doc_from_message */
switch (mfid) {
case MU_MSG_FIELD_ID_DATE:
add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
case MU_MSG_FIELD_ID_SIZE:
add_terms_values_size (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
case MU_MSG_FIELD_ID_BODY_TEXT:
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
@ -562,11 +518,6 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
case MU_MSG_FIELD_ID_MIME:
case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
break;
case MU_MSG_FIELD_ID_MSGID:
add_terms_values_msgid (*msgdoc->_doc, msgdoc->_msg);
break;
case MU_MSG_FIELD_ID_THREAD_ID:
case MU_MSG_FIELD_ID_UID:
break; /* already taken care of elsewhere */
@ -604,7 +555,7 @@ add_address_subfields (Xapian::Document& doc, const char *addr,
const std::string& pfx)
{
const char *at, *domain_part;
char *name_part, *f1, *f2;
char *name_part;
/* add "foo" and "bar.com" as terms as well for
* "foo@bar.com" */
@ -614,16 +565,10 @@ add_address_subfields (Xapian::Document& doc, const char *addr,
name_part = g_strndup(addr, at - addr); // foo
domain_part = at + 1;
f1 = mu_str_process_term (name_part);
f2 = mu_str_process_term (domain_part);
doc.add_term (pfx + std::string(name_part, 0, _MuStore::MAX_TERM_LENGTH));
doc.add_term (pfx + std::string(domain_part, 0, _MuStore::MAX_TERM_LENGTH));
g_free (name_part);
doc.add_term (pfx + std::string(f1, 0, _MuStore::MAX_TERM_LENGTH));
doc.add_term (pfx + std::string(f2, 0, _MuStore::MAX_TERM_LENGTH));
g_free (f1);
g_free (f2);
g_free (name_part);
}
static gboolean
@ -640,19 +585,15 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
if (!mu_str_is_empty(contact->name)) {
Xapian::TermGenerator termgen;
termgen.set_document (*msgdoc->_doc);
char *flat = mu_str_process_text (contact->name);
termgen.index_text_without_positions (flat, 1, pfx);
g_free (flat);
const auto flat = Mux::utf8_flatten(contact->name);
termgen.index_text (flat, 1, pfx);
}
if (!mu_str_is_empty(contact->address)) {
char *flat;
flat = mu_str_process_term (contact->address);
const auto flat = Mux::utf8_flatten(contact->address);
msgdoc->_doc->add_term
(std::string (pfx + flat, 0, MuStore::MAX_TERM_LENGTH));
g_free (flat);
add_address_subfields (*msgdoc->_doc, contact->address, pfx);
/* store it also in our contacts cache */
if (msgdoc->_store->contacts())
mu_contacts_add (msgdoc->_store->contacts(),

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2008-2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -44,7 +44,7 @@ typedef struct _MuStore MuStore;
MuStore* mu_store_new_writable (const char *xpath,
const char *ccachepath,
gboolean rebuild, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
@ -57,7 +57,7 @@ MuStore* mu_store_new_writable (const char *xpath,
* of error; free with mu_store_unref
*/
MuStore* mu_store_new_read_only (const char* xpath, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;

View File

@ -175,30 +175,6 @@ mu_str_display_contact (const char *str)
}
gint64
mu_str_size_parse_bkm (const char* str)
{
gint64 num;
g_return_val_if_fail (str, -1);
if (!isdigit(str[0]))
return -1;
num = atoi(str);
for (++str; isdigit(*str); ++str);
switch (tolower(*str)) {
case '\0':
case 'b' : return num; /* bytes */
case 'k': return num * 1000; /* kilobyte */
case 'm': return num * 1000 * 1000; /* megabyte */
default:
return -1;
}
}
char*
mu_str_replace (const char *str, const char *substr, const char *repl)
{
@ -224,9 +200,6 @@ mu_str_replace (const char *str, const char *substr, const char *repl)
char*
mu_str_from_list (const GSList *lst, char sepa)
{
@ -396,397 +369,6 @@ mu_str_subject_normalize (const gchar* str)
}
struct _CheckPrefix {
const char *str;
gboolean match;
gboolean range_field;
};
typedef struct _CheckPrefix CheckPrefix;
static void
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
{
const char *pfx;
char pfx_short[3] = { 'X', ':', '\0'};
char k;
if (!cpfx || cpfx->match)
return;
k = pfx_short[0] = mu_msg_field_shortcut (mfid);
if (k && g_str_has_prefix (cpfx->str, pfx_short)) {
cpfx->match = TRUE;
cpfx->range_field = mu_msg_field_is_range_field (mfid);
}
pfx = mu_msg_field_name (mfid);
if (pfx && g_str_has_prefix (cpfx->str, pfx) &&
cpfx->str[strlen(pfx)] == ':') {
cpfx->match = TRUE;
cpfx->range_field = mu_msg_field_is_range_field (mfid);
}
}
/* check if it looks like either i:<msgid> or msgid:<msgid> */
static gboolean
is_msgid_field (const char *str)
{
const char *name;
if (!str || strlen(str) < 3)
return FALSE;
if (str[0] == mu_msg_field_shortcut (MU_MSG_FIELD_ID_MSGID) &&
str[1] == ':')
return TRUE;
name = mu_msg_field_name (MU_MSG_FIELD_ID_MSGID);
if (g_str_has_prefix (str, name) && str[strlen(name)] == ':')
return TRUE;
return FALSE;
}
/* message-ids need a bit more massaging -- we replace all
* non-alphanum with '_'. Note, this function assumes we're looking at
* a msg-id field, ie. i:<msgid> or msgid:<msgid> */
char*
mu_str_process_msgid (const char *str, gboolean query)
{
char *s, *c;
g_return_val_if_fail (str, NULL);
g_return_val_if_fail (!query || strchr(str, ':'), NULL);
if (!str)
return NULL;
s = g_strdup (str);
if (query)
c = strchr (s, ':') + 1;
else
c = s;
for (; *c; ++c)
*c = isalnum (*c) ? tolower (*c) : '_';
return s;
}
static void
check_for_field (const char *str, gboolean *is_field,
gboolean *is_range_field)
{
CheckPrefix pfx;
pfx.str = str;
/* skip any non-alphanum starts in cpfx->str; this is to
* handle the case where we have e.g. "(maildir:/abc)"
*/
while (pfx.str && *pfx.str && !isalnum(*pfx.str))
++pfx.str;
pfx.match = pfx.range_field = FALSE;
mu_msg_field_foreach ((MuMsgFieldForeachFunc)each_check_prefix,
&pfx);
/* also check special prefixes... */
if (!pfx.match)
pfx.match =
g_str_has_prefix
(str, MU_MSG_FIELD_PSEUDO_CONTACT ":") ||
g_str_has_prefix
(str, MU_MSG_FIELD_PSEUDO_RECIP ":");
*is_field = pfx.match;
*is_range_field = pfx.range_field;
}
static gboolean
handle_esc_maybe (GString *gstr, char **cur, gunichar uc,
gboolean query_esc, gboolean range_field)
{
char kar;
kar = *cur[0];
if (query_esc) {
switch (kar) {
case ':':
case '(':
case ')':
case '*':
case '&':
case '"':
g_string_append_c (gstr, kar);
return TRUE;
case '.':
if (!range_field)
break;
if ((*cur)[1] == '.' && (*cur)[2] != '.') {
g_string_append (gstr, "..");
*cur = g_utf8_next_char (*cur);
return TRUE;
}
default: break;
}
}
if (g_unichar_ispunct(uc) || isblank(kar)) {
g_string_append_c (gstr, '_');
return TRUE;
}
return FALSE;
}
static char*
process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
{
GString *gstr;
char *norm, *cur;
gboolean is_field, is_range_field;
norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
if (G_UNLIKELY(!norm)) { /* not valid utf8? */
char *u8;
u8 = mu_str_utf8ify (str);
norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL);
g_free (u8);
}
if (!norm)
return NULL;
/* msg-id needs some special care in queries */
if (query_esc && is_msgid_field (str))
return mu_str_process_msgid (str, TRUE);
check_for_field (str, &is_field, &is_range_field);
gstr = g_string_sized_new (strlen (norm));
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
gunichar uc;
uc = g_utf8_get_char (cur);
if (xapian_esc)
if (handle_esc_maybe (gstr, &cur, uc, query_esc,
is_range_field))
continue;
if (g_unichar_ismark(uc))
continue;
if (!is_range_field)
uc = g_unichar_tolower (uc);
g_string_append_unichar (gstr, uc);
}
g_free (norm);
return g_string_free (gstr, FALSE);
}
char*
mu_str_process_text (const char *str)
{
g_return_val_if_fail (str, NULL);
return process_str (str, FALSE, FALSE);
}
char*
mu_str_process_term (const char *str)
{
g_return_val_if_fail (str, NULL);
return process_str (str, TRUE, FALSE);
}
char*
mu_str_process_query_term (const char *str)
{
g_return_val_if_fail (str, NULL);
return process_str (str, TRUE, TRUE);
}
/*
* Split simple search term into prefix, expression and suffix.
* Meant to handle cases like "(maildir:/abc)", prefix and
* suffix are the non-alphanumeric stuff at the beginning
* and the end of string.
*
* Values of *pfx, *cond and *sfx will be allocated from heap
* and must be g_free()d.
*
* Returns TRUE if all went fine and FALSE if some error was
* occured.
*/
static gboolean
split_term (const gchar *term,
const gchar **pfx, const gchar **cond, const gchar **sfx)
{
size_t l;
const gchar *start, *tail;
const gchar *p, *c, *s;
g_return_val_if_fail (term, FALSE);
g_return_val_if_fail (pfx, FALSE);
g_return_val_if_fail (cond, FALSE);
g_return_val_if_fail (sfx, FALSE);
l = strlen (term);
if (l == 0) {
p = g_strdup ("");
c = g_strdup ("");
s = g_strdup ("");
goto _done;
}
/*
* Invariants:
* - start will point to the first symbol after leading
* non-alphanumerics (can be alphanumeric or '\0');
* - tail will point to the beginning of trailing
* non-alphanumerics or '\0'.
* So:
* - len (prefix) = start - term;
* - len (cond) = tail - start;
* - len (suffix) = term + len (term) - tail.
*/
for (start = term; *start && !isalnum (*start); start++);
for (tail = term + l; tail > start && !isalnum (*(tail-1)); tail--);
p = g_strndup (term, start - term);
c = g_strndup (start, tail - start);
s = g_strndup (tail, term + l - tail);
_done:
if (!p || !c || !s) {
g_free ((gchar *)p);
g_free ((gchar *)c);
g_free ((gchar *)s);
return FALSE;
} else {
*pfx = p;
*cond = c;
*sfx = s;
return TRUE;
}
/* NOTREACHED */
}
/*
* Fixup handlers.
*
* Every fixup handler will take three string arguments,
* prefix, condition and suffix (as split by split_term).
*
* It will either return NULL that means "no fixup was done"
* or the pointer to the newly-allocated string with the
* new contents.
*/
typedef gchar *
(*fixup_handler_t)(const gchar *pfx, const gchar *cond, const gchar *sfx);
static gchar*
fixup_date(const gchar *pfx, const gchar *cond, const gchar *sfx)
{
const gchar *p;
p = cond + sizeof ("date:") - 1;
if (strstr (p, ".."))
return NULL;
return g_strdup_printf ("%s%s..%s%s", pfx, cond, p, sfx);
}
/*
* Looks up fixup handler for the given condition.
*
* Returns fixup handler if we can and NULL if there is
* no fixup for this condition.
*/
static fixup_handler_t
find_fixup (const gchar *cond)
{
size_t n;
/* NULL-terminated list of term names for fixups. */
static struct {
const char *name;
size_t len;
fixup_handler_t handler;
} fixups[] = {
{"date:", sizeof("date:") - 1, fixup_date},
{NULL, 0, NULL}
};
g_return_val_if_fail (cond, NULL);
for (n = 0; fixups[n].name; n++) {
if (!strncasecmp (cond, fixups[n].name, fixups[n].len))
break;
}
return fixups[n].handler;
}
gchar*
mu_str_xapian_fixup_terms (const gchar *term)
{
gboolean is_field, is_range_field;
const gchar *cond, *pfx, *sfx;
gchar *retval;
fixup_handler_t fixup;
g_return_val_if_fail (term, NULL);
if (strlen (term) == 0)
return g_strdup (term);
check_for_field (term, &is_field, &is_range_field);
if (!is_field || !is_range_field)
return g_strdup (term);
if (!split_term (term, &pfx, &cond, &sfx))
return g_strdup (term);
retval = NULL;
fixup = find_fixup (cond);
if (fixup)
retval = fixup (pfx, cond, sfx);
if (!retval)
retval = g_strdup (term);
/* At this point retval should contain the result */
g_free ((gchar *)pfx);
g_free ((gchar *)sfx);
g_free ((gchar *)cond);
return retval;
}
/* note: this function is *not* re-entrant, it returns a static buffer */
const char*
mu_str_fullpath_s (const char* path, const char* name)

View File

@ -119,83 +119,6 @@ char* mu_str_flags (MuFlags flags)
char* mu_str_summarize (const char* str, size_t max_lines)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* Process some text (e.g. message bodies) -- flatten (remove accents
* etc.), and remove some punctuation.
*
* @param text some text
*
* @return the processed text, free with g_free
*/
char* mu_str_process_text (const char *text)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* Process some term (e.g., an e-mail address, subject field):
* remove accents, replace some punctuation by _
*
* @param term some term
*
* @return the processed text, free with g_free
*/
char* mu_str_process_term (const char *term)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* Process some query term (e.g., an e-mail address, subject field):
* remove accents, replace some punctuation by _, but leave some query
* metachars alone.
*
* @param qterm some query term
*
* @return the processed text, free with g_free
*/
char* mu_str_process_query_term (const char *qterm)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* Handle the message-id in a special way
*
* @param str the message-id str
* @param query is this a query?
*
* @return the massaged message-id
*/
char* mu_str_process_msgid (const char *str, gboolean query);
/**
* Fixup values for some fields in the DWIM manner:
* - if term is date:YYYYMMDD, replace it with the range
* date:YYYYMMDD..YYYYMMDD.
*
* @param query a query string
*
* @return the fixup'd string that must be g_free()d
* after use or NULL in case of error.
*/
gchar* mu_str_xapian_fixup_terms (const gchar *term);
/**
* parse a byte size; a size is a number, with optionally a
* unit. Units recognized are b/B (bytes) k/K (1000) and m/M
* (1000*1000). Only the first letter is checked and the function is
* not case-sensitive, so 1000Kb, 3M will work equally well. Note,
* for kB, MB etc., we then follow the SI standards, not 2^10 etc. The
* 'b' may be omitted.
*
* practical sizes for email messages are in terms of Mb; even in
* extreme cases it should be under 100 Mb. Function return
* GUINT64_MAX if there a parsing error
*
* @param str a string with a size, such a "100", "100Kb", "1Mb"
*
* @return the corresponding size in bytes, or -1 in case of error
*/
gint64 mu_str_size_parse_bkm (const char* str);
/**
* create a full path from a path + a filename. function is _not_
* reentrant.
@ -207,7 +130,6 @@ gint64 mu_str_size_parse_bkm (const char* str);
*/
const char* mu_str_fullpath_s (const char* path, const char* name);
/**
* escape a string like a string literal in C; ie. replace \ with \\,
* and " with \"
@ -220,7 +142,6 @@ const char* mu_str_fullpath_s (const char* path, const char* name);
char* mu_str_escape_c_literal (const gchar* str, gboolean in_quotes)
G_GNUC_WARN_UNUSED_RESULT;
/**
* turn a string into plain ascii by replacing each non-ascii
* character with a dot ('.'). Replacement is done in-place.
@ -231,7 +152,6 @@ char* mu_str_escape_c_literal (const gchar* str, gboolean in_quotes)
*/
char* mu_str_asciify_in_place (char *buf);
/**
* turn string in buf into valid utf8. If this string is not valid
* utf8 already, the function massages the offending characters.
@ -242,7 +162,6 @@ char* mu_str_asciify_in_place (char *buf);
*/
char* mu_str_utf8ify (const char *buf);
/**
* convert a string in a certain charset into utf8
*
@ -255,7 +174,6 @@ char* mu_str_utf8ify (const char *buf);
gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset);
/**
* macro to check whether the string is empty, ie. if it's NULL or
* it's length is 0
@ -266,7 +184,6 @@ gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset);
*/
#define mu_str_is_empty(S) ((!(S)||!(*S))?TRUE:FALSE)
/**
* convert a GSList of strings to a #sepa-separated list
*
@ -277,7 +194,6 @@ gchar* mu_str_convert_to_utf8 (const char* buffer, const char *charset);
*/
char* mu_str_from_list (const GSList *lst, char sepa);
/**
* convert a #sepa-separated list of strings in to a GSList
*
@ -289,7 +205,6 @@ char* mu_str_from_list (const GSList *lst, char sepa);
*/
GSList* mu_str_to_list (const char *str, char sepa, gboolean strip);
/**
* convert a string (with possible escaping) to a list. list items are
* separated by one or more spaces. list items can be quoted (using
@ -302,8 +217,6 @@ GSList* mu_str_to_list (const char *str, char sepa, gboolean strip);
*/
GSList* mu_str_esc_to_list (const char *str);
/**
* Parse a list of <key>:<value> arguments, where <value> supports
* quoting and escaping.
@ -317,7 +230,6 @@ GSList* mu_str_esc_to_list (const char *str);
GHashTable* mu_str_parse_arglist (const char *args, GError **err)
G_GNUC_WARN_UNUSED_RESULT;
/**
* free a GSList consisting of allocated strings
*
@ -325,7 +237,6 @@ G_GNUC_WARN_UNUSED_RESULT;
*/
void mu_str_free_list (GSList *lst);
/**
* strip the subject of Re:, Fwd: etc.
*

View File

@ -53,23 +53,59 @@ test_cases(const CaseVec& cases, ProcFunc proc)
}
static void
test_date ()
test_date_basic ()
{
g_setenv ("TZ", "Europe/Helsinki", TRUE);
CaseVec cases = {
{ "2015-09-18T09:10:23", true, "001442556623" },
{ "1972-12-14T09:10:23", true, "000093165023" },
{ "1854-11-18T17:10:23", true, "000000000000" },
{ "fnorb", true, "000000000000" },
{ "fnorb", false, "999999999999" },
{ "", false, "999999999999" },
{ "", true, "000000000000" }
{ "2015-09-18T09:10:23", true, "1442556623" },
{ "1972-12-14T09:10:23", true, "0093165023" },
{ "1854-11-18T17:10:23", true, "0000000000" },
{ "2016", true, "1451599200" },
{ "2016", false, "1483221599" },
{ "fnorb", true, "0000000000" },
{ "fnorb", false, "9999999999" },
{ "", false, "9999999999" },
{ "", true, "0000000000" }
};
test_cases (cases, [](auto s, auto f){ return date_to_time_t_string(s,f); });
}
static void
test_date_ymwdhMs (void)
{
struct {
std::string expr;
long diff;
int tolerance;
} tests[] = {
{ "3h", 3 * 60 * 60, 1 },
{ "21d", 21 * 24 * 60 * 60, 1 },
{ "2w", 2 * 7 * 24 * 60 * 60, 1 },
{ "2y", 2 * 365 * 24 * 60 * 60, 24 * 3600 + 1 },
{ "3m", 3 * 30 * 24 * 60 * 60, 3 * 24 * 3600 + 1 }
};
for (auto i = 0; i != G_N_ELEMENTS(tests); ++i) {
const auto diff = time(NULL) -
strtol(Mux::date_to_time_t_string(tests[i].expr, true).c_str(),
NULL, 10);
if (g_test_verbose())
std::cerr << tests[i].expr << ' '
<< diff << ' '
<< tests[i].diff << std::endl;
g_assert_true (tests[i].diff - diff <= tests[i].tolerance);
}
g_assert_true (strtol(Mux::date_to_time_t_string("-1y", true).c_str(),
NULL, 10) == 0);
}
static void
test_size ()
{
@ -88,8 +124,9 @@ main (int argc, char *argv[])
{
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/utils/process-date", test_date);
g_test_add_func ("/utils/process-size", test_size);
g_test_add_func ("/utils/date-basic", test_date_basic);
g_test_add_func ("/utils/date-ymwdhMs", test_date_ymwdhMs);
g_test_add_func ("/utils/size", test_size);
return g_test_run ();
}

View File

@ -144,11 +144,11 @@ Mux::quote (const std::string& str)
return str;
}
constexpr const auto InternalDateFormat = "%012" G_GINT64_FORMAT;
constexpr const char InternalDateMin[] = "000000000000";
constexpr const char InternalDateMax[] = "999999999999";
static_assert(sizeof(InternalDateMin) == 12 + 1);
static_assert(sizeof(InternalDateMax) == 12 + 1);
constexpr const auto InternalDateFormat = "%010" G_GINT64_FORMAT;
constexpr const char InternalDateMin[] = "0000000000";
constexpr const char InternalDateMax[] = "9999999999";
static_assert(sizeof(InternalDateMin) == 10 + 1);
static_assert(sizeof(InternalDateMax) == 10 + 1);
static std::string
date_boundary (bool is_first)
@ -204,7 +204,6 @@ delta_ymwdhMs (const std::string& expr)
return date_to_time_t_string (t);
}
static std::string
special_date (const std::string& d, bool is_first)
{
@ -235,9 +234,8 @@ special_date (const std::string& d, bool is_first)
return date_boundary (is_first);
}
constexpr const char UserDateMin[] = "19700101000000";
constexpr const char UserDateMax[] = "29993112235959";
constexpr const char UserDateMax[] = "29991231235959";
std::string
Mux::date_to_time_t_string (const std::string& dstr, bool is_first)
@ -249,7 +247,10 @@ Mux::date_to_time_t_string (const std::string& dstr, bool is_first)
/* one-sided dates */
if (dstr.empty())
return date_boundary (is_first);
else if (is_first && dstr.find_first_of("ymdwhMs") != std::string::npos)
else if (dstr == "today" || dstr == "now")
return special_date (dstr, is_first);
else if (dstr.find_first_of("ymdwhMs") != std::string::npos)
return delta_ymwdhMs (dstr);
std::string date (is_first ? UserDateMin : UserDateMax);
@ -261,7 +262,7 @@ Mux::date_to_time_t_string (const std::string& dstr, bool is_first)
!strptime (date.c_str(), "%Y%m%d", &tbuf) &&
!strptime (date.c_str(), "%Y%m", &tbuf) &&
!strptime (date.c_str(), "%Y", &tbuf))
return special_date (date, is_first);
return date_boundary (is_first);
dtime = g_date_time_new_local (tbuf.tm_year + 1900,
tbuf.tm_mon + 1,