2009-11-25 21:55:06 +01:00
|
|
|
/*
|
2010-08-25 20:46:16 +02:00
|
|
|
** Copyright (C) 2008-2010 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
2009-11-25 21:55:06 +01:00
|
|
|
**
|
|
|
|
** This program is free software; you can redistribute it and/or modify
|
|
|
|
** it under the terms of the GNU General Public License as published by
|
|
|
|
** the Free Software Foundation; either version 3 of the License, or
|
|
|
|
** (at your option) any later version.
|
|
|
|
**
|
|
|
|
** This program is distributed in the hope that it will be useful,
|
|
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
** GNU General Public License for more details.
|
|
|
|
**
|
|
|
|
** You should have received a copy of the GNU General Public License
|
|
|
|
** along with this program; if not, write to the Free Software Foundation,
|
|
|
|
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
**
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <xapian.h>
|
|
|
|
#include <glib/gstdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <string>
|
|
|
|
|
2010-08-25 20:46:16 +02:00
|
|
|
#include "mu-query.h"
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-08-25 20:29:53 +02:00
|
|
|
#include "mu-msg-iter.h"
|
|
|
|
#include "mu-msg-iter-priv.hh"
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-01-05 07:32:23 +01:00
|
|
|
#include "mu-util.h"
|
2010-08-25 20:40:07 +02:00
|
|
|
#include "mu-util-db.h"
|
2010-11-11 21:06:19 +01:00
|
|
|
#include "mu-msg-str.h"
|
2010-01-05 07:32:23 +01:00
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser);
|
2009-12-05 19:10:58 +01:00
|
|
|
|
2010-08-25 20:46:16 +02:00
|
|
|
struct _MuQuery {
|
2009-12-08 23:01:49 +01:00
|
|
|
Xapian::Database* _db;
|
|
|
|
Xapian::QueryParser* _qparser;
|
|
|
|
Xapian::Sorter* _sorters[MU_MSG_FIELD_TYPE_NUM];
|
|
|
|
};
|
2009-12-05 19:10:58 +01:00
|
|
|
|
2009-12-08 23:01:49 +01:00
|
|
|
gboolean
|
2010-08-25 20:46:16 +02:00
|
|
|
init_mu_query (MuQuery *mqx, const char* dbpath)
|
2009-12-08 23:01:49 +01:00
|
|
|
{
|
2010-09-12 15:30:29 +02:00
|
|
|
mqx->_db = 0;
|
2009-12-08 23:01:49 +01:00
|
|
|
mqx->_qparser = 0;
|
|
|
|
|
|
|
|
try {
|
2010-09-12 15:30:29 +02:00
|
|
|
mqx->_db = new Xapian::Database(dbpath);
|
2009-12-08 23:01:49 +01:00
|
|
|
mqx->_qparser = new Xapian::QueryParser;
|
2009-12-05 19:10:58 +01:00
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
mqx->_qparser->set_database (*mqx->_db);
|
|
|
|
mqx->_qparser->set_default_op (Xapian::Query::OP_AND);
|
|
|
|
//mqx->_qparser->set_stemming_strategy (Xapian::QueryParser::STEM_NONE);
|
2009-12-08 23:01:49 +01:00
|
|
|
|
|
|
|
memset (mqx->_sorters, 0, sizeof(mqx->_sorters));
|
2009-12-05 19:10:58 +01:00
|
|
|
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix,
|
2009-12-08 23:01:49 +01:00
|
|
|
(gpointer)mqx->_qparser);
|
2010-11-20 15:37:23 +01:00
|
|
|
|
2010-11-21 14:42:53 +01:00
|
|
|
// // ////// FIXME
|
2010-11-21 12:41:01 +01:00
|
|
|
// g_print ("\nsynonyms:\n");
|
2010-11-20 13:43:04 +01:00
|
|
|
// for (Xapian::TermIterator iter = mqx->_db->synonym_keys_begin();
|
|
|
|
// iter != mqx->_db->synonym_keys_end(); ++iter) {
|
|
|
|
// for (Xapian::TermIterator jter = mqx->_db->synonyms_begin(*iter);
|
|
|
|
// jter != mqx->_db->synonyms_end(*iter); ++jter) {
|
|
|
|
// g_print ("%s => %s\n", (*iter).c_str(), (*jter).c_str());
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
|
2010-01-05 07:32:23 +01:00
|
|
|
return TRUE;
|
2009-12-23 23:39:49 +01:00
|
|
|
|
2010-01-06 00:30:45 +01:00
|
|
|
} MU_XAPIAN_CATCH_BLOCK;
|
2010-01-05 07:32:23 +01:00
|
|
|
|
2010-01-08 19:49:55 +01:00
|
|
|
try {
|
|
|
|
delete mqx->_db;
|
|
|
|
delete mqx->_qparser;
|
|
|
|
|
|
|
|
} MU_XAPIAN_CATCH_BLOCK;
|
|
|
|
|
2010-01-05 07:32:23 +01:00
|
|
|
return FALSE;
|
2009-12-08 23:01:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2010-08-25 20:46:16 +02:00
|
|
|
uninit_mu_query (MuQuery *mqx)
|
2009-12-08 23:01:49 +01:00
|
|
|
{
|
|
|
|
try {
|
|
|
|
delete mqx->_db;
|
|
|
|
delete mqx->_qparser;
|
2009-12-05 19:10:58 +01:00
|
|
|
|
2009-12-08 23:01:49 +01:00
|
|
|
for (int i = 0; i != MU_MSG_FIELD_TYPE_NUM; ++i)
|
|
|
|
delete mqx->_sorters[i];
|
|
|
|
|
2010-01-06 00:30:45 +01:00
|
|
|
} MU_XAPIAN_CATCH_BLOCK;
|
2009-12-08 23:01:49 +01:00
|
|
|
}
|
2009-12-23 23:39:49 +01:00
|
|
|
|
2010-09-12 15:30:29 +02:00
|
|
|
|
2009-12-08 23:01:49 +01:00
|
|
|
static Xapian::Query
|
2010-08-25 20:46:16 +02:00
|
|
|
get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) {
|
2009-12-05 15:05:15 +01:00
|
|
|
|
2009-12-08 23:01:49 +01:00
|
|
|
try {
|
|
|
|
return mqx->_qparser->parse_query
|
2009-12-05 19:10:58 +01:00
|
|
|
(searchexpr,
|
2010-11-21 14:42:53 +01:00
|
|
|
Xapian::QueryParser::FLAG_BOOLEAN |
|
|
|
|
Xapian::QueryParser::FLAG_PURE_NOT |
|
2010-11-20 13:43:04 +01:00
|
|
|
Xapian::QueryParser::FLAG_AUTO_SYNONYMS |
|
2010-09-12 15:30:29 +02:00
|
|
|
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE);
|
2010-11-20 13:43:04 +01:00
|
|
|
|
2010-01-06 00:30:45 +01:00
|
|
|
} MU_XAPIAN_CATCH_BLOCK;
|
2010-01-31 13:17:23 +01:00
|
|
|
|
2009-12-11 21:06:49 +01:00
|
|
|
if (err)
|
|
|
|
*err = 1;
|
|
|
|
|
|
|
|
return Xapian::Query();
|
2009-12-08 23:01:49 +01:00
|
|
|
}
|
2009-11-25 21:55:06 +01:00
|
|
|
|
|
|
|
static void
|
2010-11-20 13:43:04 +01:00
|
|
|
add_prefix (MuMsgFieldId mfid, Xapian::QueryParser* qparser)
|
2009-11-25 21:55:06 +01:00
|
|
|
{
|
2010-11-20 13:43:04 +01:00
|
|
|
if (!mu_msg_field_xapian_index(mfid) &&
|
|
|
|
!mu_msg_field_xapian_term(mfid) &&
|
|
|
|
!mu_msg_field_xapian_contact(mfid))
|
2009-11-25 21:55:06 +01:00
|
|
|
return;
|
2009-12-08 23:01:49 +01:00
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
try {
|
2010-11-20 15:37:23 +01:00
|
|
|
const std::string pfx
|
|
|
|
(1, mu_msg_field_xapian_prefix (mfid));
|
|
|
|
const std::string shortcut
|
|
|
|
(1, mu_msg_field_shortcut (mfid));
|
2010-11-20 13:43:04 +01:00
|
|
|
|
2010-11-21 14:42:53 +01:00
|
|
|
if (mfid == MU_MSG_FIELD_ID_FLAGS || mfid == MU_MSG_FIELD_ID_PRIO) {
|
2010-11-20 15:37:23 +01:00
|
|
|
qparser->add_prefix
|
|
|
|
(mu_msg_field_name(mfid), pfx);
|
|
|
|
qparser->add_prefix (shortcut, pfx);
|
|
|
|
} else if (mfid == MU_MSG_FIELD_ID_MAILDIR ||
|
|
|
|
mfid == MU_MSG_FIELD_ID_MSGID) {
|
|
|
|
qparser->add_boolean_prefix
|
|
|
|
(mu_msg_field_name(mfid), pfx);
|
|
|
|
qparser->add_boolean_prefix (shortcut, pfx);
|
2010-11-20 13:43:04 +01:00
|
|
|
} else {
|
|
|
|
qparser->add_boolean_prefix
|
|
|
|
(mu_msg_field_name(mfid), pfx);
|
2010-11-20 15:37:23 +01:00
|
|
|
qparser->add_boolean_prefix (shortcut, pfx);
|
2010-11-20 13:43:04 +01:00
|
|
|
qparser->add_prefix ("", pfx);
|
|
|
|
}
|
|
|
|
} MU_XAPIAN_CATCH_BLOCK;
|
2009-11-25 21:55:06 +01:00
|
|
|
}
|
|
|
|
|
2010-08-25 20:46:16 +02:00
|
|
|
MuQuery*
|
|
|
|
mu_query_new (const char* xpath)
|
2009-11-25 21:55:06 +01:00
|
|
|
{
|
2010-08-25 20:46:16 +02:00
|
|
|
MuQuery *mqx;
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-01-06 00:30:45 +01:00
|
|
|
g_return_val_if_fail (xpath, NULL);
|
|
|
|
|
|
|
|
if (!mu_util_check_dir (xpath, TRUE, FALSE)) {
|
2010-01-23 19:50:06 +01:00
|
|
|
g_warning ("'%s' is not a readable xapian dir", xpath);
|
2009-11-25 21:55:06 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
2009-12-08 23:01:49 +01:00
|
|
|
|
2010-08-25 20:40:07 +02:00
|
|
|
if (mu_util_db_is_empty (xpath)) {
|
2010-01-31 11:14:25 +01:00
|
|
|
g_warning ("database %s is empty; nothing to do", xpath);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2010-08-25 20:40:07 +02:00
|
|
|
if (!mu_util_db_version_up_to_date (xpath)) {
|
2010-01-24 00:37:26 +01:00
|
|
|
g_warning ("%s is not up-to-date, needs a full update",
|
2010-01-23 19:50:06 +01:00
|
|
|
xpath);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2010-08-25 20:46:16 +02:00
|
|
|
mqx = g_new (MuQuery, 1);
|
2010-01-06 00:30:45 +01:00
|
|
|
|
2010-08-25 20:46:16 +02:00
|
|
|
if (!init_mu_query (mqx, xpath)) {
|
2010-09-12 15:30:29 +02:00
|
|
|
g_critical ("failed to initialize the Xapian query object");
|
2010-01-05 07:32:23 +01:00
|
|
|
g_free (mqx);
|
2010-01-08 19:49:55 +01:00
|
|
|
return NULL;
|
2010-01-05 07:32:23 +01:00
|
|
|
}
|
2010-01-08 19:49:55 +01:00
|
|
|
|
2009-12-08 23:01:49 +01:00
|
|
|
return mqx;
|
2009-11-25 21:55:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
2010-08-25 20:46:16 +02:00
|
|
|
mu_query_destroy (MuQuery *self)
|
2009-11-25 21:55:06 +01:00
|
|
|
{
|
2010-11-20 13:43:04 +01:00
|
|
|
if (!self)
|
|
|
|
return;
|
|
|
|
|
|
|
|
uninit_mu_query (self);
|
|
|
|
g_free (self);
|
2010-09-12 15:30:29 +02:00
|
|
|
}
|
2009-12-11 18:44:05 +01:00
|
|
|
|
2010-09-12 15:30:29 +02:00
|
|
|
struct _CheckPrefix {
|
|
|
|
const char *pfx;
|
|
|
|
guint len;
|
|
|
|
gboolean match;
|
|
|
|
};
|
|
|
|
typedef struct _CheckPrefix CheckPrefix;
|
2010-01-24 00:37:26 +01:00
|
|
|
|
2010-09-12 15:30:29 +02:00
|
|
|
static void
|
2010-11-20 13:43:04 +01:00
|
|
|
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
|
2010-09-12 15:30:29 +02:00
|
|
|
{
|
2010-11-20 13:43:04 +01:00
|
|
|
const char *field_name;
|
|
|
|
char field_shortcut;
|
2010-09-12 15:30:29 +02:00
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
if (!cpfx || cpfx->match)
|
2010-09-12 15:30:29 +02:00
|
|
|
return;
|
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
field_shortcut = mu_msg_field_shortcut (mfid);
|
|
|
|
if (field_shortcut == cpfx->pfx[0]) {
|
2010-09-12 15:30:29 +02:00
|
|
|
cpfx->match = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
field_name = mu_msg_field_name (mfid);
|
2010-09-12 15:30:29 +02:00
|
|
|
if (field_name &&
|
|
|
|
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
|
|
|
|
cpfx->match = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
2009-11-25 21:55:06 +01:00
|
|
|
}
|
|
|
|
|
2010-09-12 15:30:29 +02:00
|
|
|
|
|
|
|
/* colon is a position inside q pointing at a ':' character. function
|
|
|
|
* determines whether the prefix is a registered prefix (like
|
|
|
|
* 'subject' or 'from' or 's') */
|
|
|
|
static gboolean
|
|
|
|
is_xapian_prefix (const char *q, const char *colon)
|
|
|
|
{
|
|
|
|
const char *cur;
|
|
|
|
|
|
|
|
if (colon == q)
|
|
|
|
return FALSE; /* : at beginning, not a prefix */
|
|
|
|
|
|
|
|
/* track back from colon until a boundary or beginning of the
|
|
|
|
* str */
|
|
|
|
for (cur = colon - 1; cur >= q; --cur) {
|
|
|
|
|
|
|
|
if (cur == q || !isalpha (*(cur-1))) {
|
|
|
|
|
|
|
|
CheckPrefix cpfx;
|
|
|
|
memset (&cpfx, 0, sizeof(CheckPrefix));
|
|
|
|
|
|
|
|
cpfx.pfx = cur;
|
|
|
|
cpfx.len = (colon - cur);
|
|
|
|
cpfx.match = FALSE;
|
|
|
|
|
2010-11-14 15:39:21 +01:00
|
|
|
mu_msg_field_foreach ((MuMsgFieldForEachFunc)
|
|
|
|
each_check_prefix,
|
2010-09-12 15:30:29 +02:00
|
|
|
&cpfx);
|
|
|
|
|
|
|
|
return (cpfx.match);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2010-09-09 07:21:01 +02:00
|
|
|
/* preprocess a query to make them a bit more permissive */
|
2010-09-12 15:30:29 +02:00
|
|
|
char*
|
|
|
|
mu_query_preprocess (const char *query)
|
2010-09-09 07:21:01 +02:00
|
|
|
{
|
|
|
|
gchar *my_query;
|
2010-09-12 15:30:29 +02:00
|
|
|
gchar *cur;
|
|
|
|
|
|
|
|
g_return_val_if_fail (query, NULL);
|
2010-09-09 07:21:01 +02:00
|
|
|
|
|
|
|
/* translate the the searchexpr to all lowercase; this
|
2010-11-01 23:05:49 +01:00
|
|
|
* will fixes some of the false-negatives. A full fix
|
|
|
|
* probably requires some custom query parser.
|
2010-09-09 07:21:01 +02:00
|
|
|
*/
|
2010-11-11 21:06:19 +01:00
|
|
|
my_query = mu_msg_str_normalize(query, TRUE);
|
2010-09-12 15:30:29 +02:00
|
|
|
|
|
|
|
for (cur = my_query; *cur; ++cur) {
|
|
|
|
if (*cur == ':') /* we found a ':' */
|
|
|
|
/* if there's a registered xapian prefix before the
|
|
|
|
* ':', don't touch it. Otherwise replace ':' with
|
2010-11-01 23:05:49 +01:00
|
|
|
* a space'... ugly...
|
2010-09-12 15:30:29 +02:00
|
|
|
*/
|
2010-11-11 21:06:19 +01:00
|
|
|
if (!is_xapian_prefix (my_query, cur))
|
|
|
|
*cur = ' ';
|
2010-09-12 15:30:29 +02:00
|
|
|
}
|
|
|
|
|
2010-09-09 07:21:01 +02:00
|
|
|
return my_query;
|
|
|
|
}
|
|
|
|
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-08-25 20:29:53 +02:00
|
|
|
MuMsgIter*
|
2010-08-25 20:46:16 +02:00
|
|
|
mu_query_run (MuQuery *self, const char* searchexpr,
|
2010-11-20 13:43:04 +01:00
|
|
|
MuMsgFieldId sortfieldid, gboolean ascending,
|
2010-09-05 20:21:26 +02:00
|
|
|
size_t batchsize)
|
2009-11-25 21:55:06 +01:00
|
|
|
{
|
|
|
|
g_return_val_if_fail (self, NULL);
|
2010-01-23 23:51:17 +01:00
|
|
|
g_return_val_if_fail (searchexpr, NULL);
|
2010-11-20 13:43:04 +01:00
|
|
|
g_return_val_if_fail (mu_msg_field_id_is_valid (sortfieldid) ||
|
|
|
|
sortfieldid == MU_MSG_FIELD_ID_NONE, NULL);
|
2010-01-23 19:50:06 +01:00
|
|
|
|
2010-01-23 23:51:17 +01:00
|
|
|
try {
|
2010-09-09 07:21:01 +02:00
|
|
|
char *preprocessed;
|
2010-02-03 20:06:31 +01:00
|
|
|
int err (0);
|
2010-09-05 20:21:26 +02:00
|
|
|
|
2010-11-11 21:06:19 +01:00
|
|
|
preprocessed = mu_query_preprocess (searchexpr);
|
|
|
|
|
2010-09-09 07:21:01 +02:00
|
|
|
Xapian::Query q(get_query(self, preprocessed, &err));
|
2010-02-03 20:06:31 +01:00
|
|
|
if (err) {
|
2010-09-09 07:21:01 +02:00
|
|
|
g_warning ("Error in query '%s'", preprocessed);
|
|
|
|
g_free (preprocessed);
|
2010-02-03 20:06:31 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
2010-09-09 07:21:01 +02:00
|
|
|
g_free (preprocessed);
|
2010-09-05 20:21:26 +02:00
|
|
|
|
2009-12-08 23:01:49 +01:00
|
|
|
Xapian::Enquire enq (*self->_db);
|
2010-01-24 12:15:04 +01:00
|
|
|
|
|
|
|
if (batchsize == 0)
|
|
|
|
batchsize = self->_db->get_doccount();
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-11-20 13:43:04 +01:00
|
|
|
if (sortfieldid != MU_MSG_FIELD_ID_NONE)
|
2009-11-25 21:55:06 +01:00
|
|
|
enq.set_sort_by_value (
|
2010-11-20 13:43:04 +01:00
|
|
|
(Xapian::valueno)sortfieldid,
|
2009-11-25 21:55:06 +01:00
|
|
|
ascending);
|
|
|
|
|
2010-01-23 23:51:17 +01:00
|
|
|
enq.set_query(q);
|
|
|
|
enq.set_cutoff(0,0);
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-08-25 20:29:53 +02:00
|
|
|
return mu_msg_iter_new (enq, batchsize);
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-01-08 19:49:55 +01:00
|
|
|
} MU_XAPIAN_CATCH_BLOCK_RETURN(NULL);
|
2009-11-25 21:55:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
2010-11-14 15:39:21 +01:00
|
|
|
mu_query_as_string (MuQuery *self, const char *searchexpr)
|
2009-11-25 21:55:06 +01:00
|
|
|
{
|
|
|
|
g_return_val_if_fail (self, NULL);
|
|
|
|
g_return_val_if_fail (searchexpr, NULL);
|
|
|
|
|
|
|
|
try {
|
2010-11-14 15:39:21 +01:00
|
|
|
char *preprocessed;
|
2010-02-03 20:06:31 +01:00
|
|
|
int err (0);
|
2010-11-20 13:43:04 +01:00
|
|
|
|
2010-11-14 15:39:21 +01:00
|
|
|
preprocessed = mu_query_preprocess (searchexpr);
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-11-14 15:39:21 +01:00
|
|
|
Xapian::Query q(get_query(self, preprocessed, &err));
|
|
|
|
if (err)
|
|
|
|
g_warning ("Error in query '%s'", preprocessed);
|
|
|
|
|
|
|
|
g_free (preprocessed);
|
2010-01-13 21:35:16 +01:00
|
|
|
|
2010-11-14 15:39:21 +01:00
|
|
|
return err ? NULL : g_strdup(q.get_description().c_str());
|
2009-11-25 21:55:06 +01:00
|
|
|
|
2010-02-03 20:06:31 +01:00
|
|
|
} MU_XAPIAN_CATCH_BLOCK_RETURN(NULL);
|
2009-12-05 19:10:58 +01:00
|
|
|
}
|
2010-01-03 22:53:49 +01:00
|
|
|
|
|
|
|
|