simplified and dispatched code in the searchdata monster

2012-11-18 13:25:54 +01:00 · 2012-11-18 13:25:54 +01:00 · 1539fe7e4d
commit 1539fe7e4d
parent e4aa111d55
8 changed files with 490 additions and 378 deletions
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -6,8 +6,8 @@ LIBS = librcl.a

 all: $(LIBS)

-OBJS =  rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
-DEPS =  rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
+OBJS =  rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
+DEPS =  rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp

 librcl.a : $(DEPS) $(OBJS)
 	ar ru librcl.a $(OBJS)
@ -87,6 +87,8 @@ wasastringtoquery.o : ../query/wasastringtoquery.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/wasastringtoquery.cpp
 wasatorcl.o : ../query/wasatorcl.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/wasatorcl.cpp
+daterange.o : ../rcldb/daterange.cpp $(depth)/mk/localdefs
+	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/daterange.cpp
 expansiondbs.o : ../rcldb/expansiondbs.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/expansiondbs.cpp
 rclabstract.o : ../rcldb/rclabstract.cpp $(depth)/mk/localdefs
@ -99,6 +101,8 @@ rclquery.o : ../rcldb/rclquery.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rclquery.cpp
 searchdata.o : ../rcldb/searchdata.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/searchdata.cpp
+searchdataxml.o : ../rcldb/searchdataxml.cpp $(depth)/mk/localdefs
+	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/searchdataxml.cpp
 stemdb.o : ../rcldb/stemdb.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/stemdb.cpp
 stoplist.o : ../rcldb/stoplist.cpp $(depth)/mk/localdefs
@ -282,6 +286,9 @@ wasastringtoquery.dep.stamp : ../query/wasastringtoquery.cpp $(depth)/mk/localde
 wasatorcl.dep.stamp : ../query/wasatorcl.cpp $(depth)/mk/localdefs
 	$(CXX) -M $(ALL_CXXFLAGS) ../query/wasatorcl.cpp > wasatorcl.dep
 	touch wasatorcl.dep.stamp
+daterange.dep.stamp : ../rcldb/daterange.cpp $(depth)/mk/localdefs
+	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/daterange.cpp > daterange.dep
+	touch daterange.dep.stamp
 expansiondbs.dep.stamp : ../rcldb/expansiondbs.cpp $(depth)/mk/localdefs
 	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/expansiondbs.cpp > expansiondbs.dep
 	touch expansiondbs.dep.stamp
@ -300,6 +307,9 @@ rclquery.dep.stamp : ../rcldb/rclquery.cpp $(depth)/mk/localdefs
 searchdata.dep.stamp : ../rcldb/searchdata.cpp $(depth)/mk/localdefs
 	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/searchdata.cpp > searchdata.dep
 	touch searchdata.dep.stamp
+searchdataxml.dep.stamp : ../rcldb/searchdataxml.cpp $(depth)/mk/localdefs
+	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/searchdataxml.cpp > searchdataxml.dep
+	touch searchdataxml.dep.stamp
 stemdb.dep.stamp : ../rcldb/stemdb.cpp $(depth)/mk/localdefs
 	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/stemdb.cpp > stemdb.dep
 	touch stemdb.dep.stamp
@ -415,12 +425,14 @@ include reslistpager.dep
 include sortseq.dep
 include wasastringtoquery.dep
 include wasatorcl.dep
+include daterange.dep
 include expansiondbs.dep
 include rclabstract.dep
 include rcldb.dep
 include rcldoc.dep
 include rclquery.dep
 include searchdata.dep
+include searchdataxml.dep
 include stemdb.dep
 include stoplist.dep
 include synfamily.dep
--- a/src/lib/mkMake
+++ b/src/lib/mkMake
@ -41,12 +41,14 @@ ${depth}/query/reslistpager.cpp \
 ${depth}/query/sortseq.cpp \
 ${depth}/query/wasastringtoquery.cpp \
 ${depth}/query/wasatorcl.cpp \
+${depth}/rcldb/daterange.cpp \
 ${depth}/rcldb/expansiondbs.cpp \
 ${depth}/rcldb/rclabstract.cpp \
 ${depth}/rcldb/rcldb.cpp \
 ${depth}/rcldb/rcldoc.cpp \
 ${depth}/rcldb/rclquery.cpp \
 ${depth}/rcldb/searchdata.cpp \
+${depth}/rcldb/searchdataxml.cpp \
 ${depth}/rcldb/stemdb.cpp \
 ${depth}/rcldb/stoplist.cpp \
 ${depth}/rcldb/synfamily.cpp \
--- a/src/rcldb/daterange.cpp
+++ b/src/rcldb/daterange.cpp
@ -0,0 +1,128 @@
+/* The dates-to-query routine is is lifted quasi-verbatim but
+ *  modified from xapian-omega:date.cc. Copyright info:
+ *
+ * Copyright 1999,2000,2001 BrightStation PLC
+ * Copyright 2001 James Aylett
+ * Copyright 2001,2002 Ananova Ltd
+ * Copyright 2002 Intercede 1749 Ltd
+ * Copyright 2002,2003,2006 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+#include "autoconfig.h"
+
+#include <stdio.h>
+
+#include <vector>
+using namespace std;
+
+#include "xapian.h"
+
+#include "debuglog.h"
+#include "rclconfig.h"
+
+namespace Rcl {
+
+#ifdef RCL_INDEX_STRIPCHARS
+#define bufprefix(BUF, L) {(BUF)[0] = L;}
+#define bpoffs() 1
+#else
+static inline void bufprefix(char *buf, char c)
+{
+    if (o_index_stripchars) {
+	buf[0] = c;
+    } else {
+	buf[0] = ':'; 
+	buf[1] = c; 
+	buf[2] = ':';
+    }
+}
+static inline int bpoffs() 
+{
+    return o_index_stripchars ? 1 : 3;
+}
+#endif
+
+Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
+{
+    // Xapian uses a smallbuf and snprintf. Can't be bothered, we're
+    // only doing %d's !
+    char buf[200];
+    bufprefix(buf, 'D');
+    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
+    vector<Xapian::Query> v;
+
+    int d_last = monthdays(m1, y1);
+    int d_end = d_last;
+    if (y1 == y2 && m1 == m2 && d2 < d_last) {
+	d_end = d2;
+    }
+    // Deal with any initial partial month
+    if (d1 > 1 || d_end < d_last) {
+    	for ( ; d1 <= d_end ; d1++) {
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
+	    v.push_back(Xapian::Query(buf));
+	}
+    } else {
+	bufprefix(buf, 'M');
+	v.push_back(Xapian::Query(buf));
+    }
+    
+    if (y1 == y2 && m1 == m2) {
+	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
+    }
+
+    int m_last = (y1 < y2) ? 12 : m2 - 1;
+    while (++m1 <= m_last) {
+	sprintf(buf + 4 + bpoffs(), "%02d", m1);
+	bufprefix(buf, 'M');
+	v.push_back(Xapian::Query(buf));
+    }
+	
+    if (y1 < y2) {
+	while (++y1 < y2) {
+	    sprintf(buf + bpoffs(), "%04d", y1);
+	    bufprefix(buf, 'Y');
+	    v.push_back(Xapian::Query(buf));
+	}
+	sprintf(buf + bpoffs(), "%04d", y2);
+	bufprefix(buf, 'M');
+	for (m1 = 1; m1 < m2; m1++) {
+	    sprintf(buf + 4 + bpoffs(), "%02d", m1);
+	    v.push_back(Xapian::Query(buf));
+	}
+    }
+	
+    sprintf(buf + 2 + bpoffs(), "%02d", m2);
+
+    // Deal with any final partial month
+    if (d2 < monthdays(m2, y2)) {
+	bufprefix(buf, 'D');
+    	for (d1 = 1 ; d1 <= d2; d1++) {
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
+	    v.push_back(Xapian::Query(buf));
+	}
+    } else {
+	bufprefix(buf, 'M');
+	v.push_back(Xapian::Query(buf));
+    }
+
+    return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
+}
+
+
+}
+
--- a/src/rcldb/daterange.h
+++ b/src/rcldb/daterange.h
@ -0,0 +1,10 @@
+#ifndef _DATERANGE_H_INCLUDED_
+#define _DATERANGE_H_INCLUDED_
+
+#include <xapian.h>
+
+namespace Rcl {
+extern Xapian::Query date_range_filter(int y1, int m1, int d1, 
+				       int y2, int m2, int d2);
+}
+#endif /* _DATERANGE_H_INCLUDED_ */
--- a/src/rcldb/rclquery.cpp
+++ b/src/rcldb/rclquery.cpp
@ -195,13 +195,8 @@ bool Query::setQuery(RefCntr<SearchData> sdata)
    m_nq->clear();
    m_sd = sdata;
    
-    int maxexp = 10000;
-    m_db->getConf()->getConfParam("maxTermExpand", &maxexp);
-    int maxcl = 100000;
-    m_db->getConf()->getConfParam("maxXapianClauses", &maxcl);
-
    Xapian::Query xq;
-    if (!sdata->toNativeQuery(*m_db, &xq, maxexp, maxcl)) {
+    if (!sdata->toNativeQuery(*m_db, &xq)) {
 	m_reason += sdata->getReason();
 	return false;
    }
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -46,6 +46,7 @@ using namespace std;
 #include "stemdb.h"
 #include "expansiondbs.h"
 #include "base64.h"
+#include "daterange.h"

 namespace Rcl {

@ -54,117 +55,17 @@ typedef  vector<SearchDataClause *>::const_iterator qlist_cit_t;

 static const int original_term_wqf_booster = 10;

-/* The dates-to-query routine is is lifted quasi-verbatim but
- *  modified from xapian-omega:date.cc. Copyright info:
- *
- * Copyright 1999,2000,2001 BrightStation PLC
- * Copyright 2001 James Aylett
- * Copyright 2001,2002 Ananova Ltd
- * Copyright 2002 Intercede 1749 Ltd
- * Copyright 2002,2003,2006 Olly Betts
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
- * USA
- */
-
-#ifdef RCL_INDEX_STRIPCHARS
-#define bufprefix(BUF, L) {(BUF)[0] = L;}
-#define bpoffs() 1
-#else
-static inline void bufprefix(char *buf, char c)
+void SearchData::commoninit()
 {
-    if (o_index_stripchars) {
-	buf[0] = c;
-    } else {
-	buf[0] = ':'; 
-	buf[1] = c; 
-	buf[2] = ':';
-    }
-}
-static inline int bpoffs() 
-{
-    return o_index_stripchars ? 1 : 3;
-}
-#endif
-
-static Xapian::Query
-date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
-{
-    // Xapian uses a smallbuf and snprintf. Can't be bothered, we're
-    // only doing %d's !
-    char buf[200];
-    bufprefix(buf, 'D');
-    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
-    vector<Xapian::Query> v;
-
-    int d_last = monthdays(m1, y1);
-    int d_end = d_last;
-    if (y1 == y2 && m1 == m2 && d2 < d_last) {
-	d_end = d2;
-    }
-    // Deal with any initial partial month
-    if (d1 > 1 || d_end < d_last) {
-    	for ( ; d1 <= d_end ; d1++) {
-	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
-	    v.push_back(Xapian::Query(buf));
-	}
-    } else {
-	bufprefix(buf, 'M');
-	v.push_back(Xapian::Query(buf));
-    }
-    
-    if (y1 == y2 && m1 == m2) {
-	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
-    }
-
-    int m_last = (y1 < y2) ? 12 : m2 - 1;
-    while (++m1 <= m_last) {
-	sprintf(buf + 4 + bpoffs(), "%02d", m1);
-	bufprefix(buf, 'M');
-	v.push_back(Xapian::Query(buf));
-    }
-	
-    if (y1 < y2) {
-	while (++y1 < y2) {
-	    sprintf(buf + bpoffs(), "%04d", y1);
-	    bufprefix(buf, 'Y');
-	    v.push_back(Xapian::Query(buf));
-	}
-	sprintf(buf + bpoffs(), "%04d", y2);
-	bufprefix(buf, 'M');
-	for (m1 = 1; m1 < m2; m1++) {
-	    sprintf(buf + 4 + bpoffs(), "%02d", m1);
-	    v.push_back(Xapian::Query(buf));
-	}
-    }
-	
-    sprintf(buf + 2 + bpoffs(), "%02d", m2);
-
-    // Deal with any final partial month
-    if (d2 < monthdays(m2, y2)) {
-	bufprefix(buf, 'D');
-    	for (d1 = 1 ; d1 <= d2; d1++) {
-	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
-	    v.push_back(Xapian::Query(buf));
-	}
-    } else {
-	bufprefix(buf, 'M');
-	v.push_back(Xapian::Query(buf));
-    }
-
-    return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
+    m_haveDates = false;
+    m_maxSize = size_t(-1);
+    m_minSize = size_t(-1);
+    m_haveWildCards = false;
+    m_softmaxexpand = -1;
+    m_autodiacsens = false;
+    m_autocasesens = true;
+    m_maxexp = 10000;
+    m_maxcl = 100000;
 }

 // Expand categories and mime type wild card exps
@ -188,13 +89,17 @@ bool SearchData::expandFileTypes(RclConfig *cfg, vector<string>& tps)
 	    cfg->getMimeCatTypes(*it, tps);
 	    exptps.insert(exptps.end(), tps.begin(), tps.end());
 	} else {
+	    bool matched = false;
 	    for (vector<string>::const_iterator ait = alltypes.begin();
 		 ait != alltypes.end(); ait++) {
 		if (fnmatch(it->c_str(), ait->c_str(), FNM_CASEFOLD) 
 		    != FNM_NOMATCH) {
 		    exptps.push_back(*ait);
+		    matched = true;
 		}
 	    }
+	    if (!matched)
+		exptps.push_back(it->c_str());
 	}
    }
    tps = exptps;
@ -203,13 +108,12 @@ bool SearchData::expandFileTypes(RclConfig *cfg, vector<string>& tps)

 bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, 
 				vector<SearchDataClause*>& query, 
-				string& reason, void *d, 
-				int maxexp, int maxcl)
+				string& reason, void *d)
 {
    Xapian::Query xq;
    for (qlist_it_t it = query.begin(); it != query.end(); it++) {
 	Xapian::Query nq;
-	if (!(*it)->toNativeQuery(db, &nq, maxexp, maxcl)) {
+	if (!(*it)->toNativeQuery(db, &nq)) {
 	    LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
 		    (*it)->getReason().c_str()));
 	    reason += (*it)->getReason() + " ";
@ -240,7 +144,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
        } else {
            xq = Xapian::Query(op, xq, nq);
        }
-	if (int(xq.get_length()) >= maxcl) {
+	if (int(xq.get_length()) >= getMaxCl()) {
 	    LOGERR(("Maximum Xapian query size exceeded."
 		    " Maybe increase maxXapianClauses."));
 	    m_reason += "Maximum Xapian query size exceeded."
@ -255,113 +159,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
    return true;
 }

-static string tpToString(SClType tp)
-{
-    switch (tp) {
-    case SCLT_AND: return "AND";
-    case SCLT_OR: return "OR";
-    case SCLT_EXCL: return "EX";
-    case SCLT_FILENAME: return "FN";
-    case SCLT_PHRASE: return "PH";
-    case SCLT_NEAR: return "NE";
-    case SCLT_SUB: return "SU"; // Unsupported actually
-    default: return "UN";
-    }
-}
-
-string SearchData::asXML()
-{
-    LOGDEB(("SearchData::asXML\n"));
-    ostringstream os;
-
-    // Searchdata
-    os << "<SD>" << endl;
-
-    // Clause list
-    os << "<CL>" << endl;
-    if (m_tp != SCLT_AND)
-	os << "<CLT>" << tpToString(m_tp) << "</CLT>" << endl;
-    for (unsigned int i = 0; i <  m_query.size(); i++) {
-	SearchDataClause *c = m_query[i];
-	if (c->getTp() == SCLT_SUB) {
-	    LOGERR(("SearchData::asXML: can't do subclauses !\n"));
-	    continue;
-	}
-	SearchDataClauseSimple *cl = 
-	    dynamic_cast<SearchDataClauseSimple*>(c);
-	os << "<C>" << endl;
-	if (cl->getTp() != SCLT_AND) {
-	    os << "<CT>" << tpToString(cl->getTp()) << "</CT>" << endl;
-	}
-	if (cl->getTp() != SCLT_FILENAME && !cl->getfield().empty()) {
-	    os << "<F>" << base64_encode(cl->getfield()) << "</F>" << endl;
-	}
-	os << "<T>" << base64_encode(cl->gettext()) << "</T>" << endl;
-	if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) {
-	    SearchDataClauseDist *cld = 
-	    dynamic_cast<SearchDataClauseDist*>(cl);
-	    os << "<S>" << cld->getslack() << "</S>" << endl;
-	}
-	os << "</C>" << endl;
-    }
-    os << "</CL>" << endl;
-
-    if (m_haveDates) {
-	if (m_dates.y1 > 0) {
-	    os << "<DMI>" << 
-		"<D>" << m_dates.d1 << "</D>" <<
-		"<M>" << m_dates.m1 << "</M>" << 
-		"<Y>" << m_dates.y1 << "</Y>" 
-	       << "</DMI>" << endl;
-	}
-	if (m_dates.y2 > 0) {
-	    os << "<DMA>" << 
-		"<D>" << m_dates.d2 << "</D>" <<
-		"<M>" << m_dates.m2 << "</M>" << 
-		"<Y>" << m_dates.y2 << "</Y>" 
-	       << "</DMA>" << endl;
-	}
-    }
-
-
-    if (m_minSize != size_t(-1)) {
-	os << "<MIS>" << m_minSize << "</MIS>" << endl;
-    }
-    if (m_maxSize != size_t(-1)) {
-	os << "<MAS>" << m_maxSize << "</MAS>" << endl;
-    }
-
-    if (!m_filetypes.empty()) {
-	os << "<ST>";
-	for (vector<string>::iterator it = m_filetypes.begin(); 
-	     it != m_filetypes.end(); it++) {
-	    os << *it << " ";
-	}
-	os << "</ST>" << endl;
-    }
-
-    if (!m_nfiletypes.empty()) {
-	os << "<IT>";
-	for (vector<string>::iterator it = m_nfiletypes.begin(); 
-	     it != m_nfiletypes.end(); it++) {
-	    os << *it << " ";
-	}
-	os << "</IT>" << endl;
-    }
-
-    for (vector<DirSpec>::const_iterator dit = m_dirspecs.begin();
-	 dit != m_dirspecs.end(); dit++) {
-	if (dit->exclude) {
-	    os << "<ND>" << base64_encode(dit->dir) << "</ND>" << endl;
-	} else {
-	    os << "<YD>" << base64_encode(dit->dir) << "</YD>" << endl;
-	}
-    }
-    os << "</SD>";
-    return os.str();
-}
-
-bool SearchData::toNativeQuery(Rcl::Db &db, void *d, int maxexp, int maxcl)
+bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
 {
    LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
    m_reason.erase();
@ -369,7 +167,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d, int maxexp, int maxcl)
    // Walk the clause list translating each in turn and building the 
    // Xapian query tree
    Xapian::Query xq;
-    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq, maxexp, maxcl)) {
+    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
 	LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n", 
 		m_reason.c_str()));
 	return false;
@ -632,7 +430,8 @@ bool SearchData::addClause(SearchDataClause* cl)
 }

 // Make me all new
-void SearchData::erase() {
+void SearchData::erase() 
+{
    LOGDEB0(("SearchData::erase\n"));
    m_tp = SCLT_AND;
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
@ -729,54 +528,6 @@ private:
    map<int, bool> m_nste;
 };

-// A class used to translate a user compound string (*not* a query
-// language string) as may be entered in any_terms/all_terms search
-// entry fields, ex: [term1 "a phrase" term3] into a xapian query
-// tree.
-// The object keeps track of the query terms and term groups while
-// translating.
-class StringToXapianQ {
-public:
-    StringToXapianQ(Db& db, HighlightData& hld, const string& field, 
-		    const string &stmlng, bool boostUser, int maxexp, int maxcl)
-	: m_db(db), m_field(field), m_stemlang(stmlng),
-	  m_doBoostUserTerms(boostUser), m_hld(hld), m_autodiacsens(false),
-	  m_autocasesens(true), m_maxexp(maxexp), m_maxcl(maxcl), m_curcl(0)
-    { 
-	m_db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);
-	m_db.getConf()->getConfParam("autocasesens", &m_autocasesens);
-    }
-
-    bool processUserString(const string &iq,
-			   int mods, 
-			   string &ermsg,
-			   vector<Xapian::Query> &pqueries, 
-			   int slack = 0, bool useNear = false);
-private:
-    bool expandTerm(string& ermsg, int mods, 
-		    const string& term, vector<string>& exp, 
-                    string& sterm, const string& prefix);
-    // After splitting entry on whitespace: process non-phrase element
-    void processSimpleSpan(string& ermsg, const string& span, 
-			   int mods,
-			   vector<Xapian::Query> &pqueries);
-    // Process phrase/near element
-    void processPhraseOrNear(string& ermsg, TextSplitQ *splitData, 
-			     int mods,
-			     vector<Xapian::Query> &pqueries,
-			     bool useNear, int slack);
-
-    Db&           m_db;
-    const string& m_field;
-    const string& m_stemlang;
-    const bool    m_doBoostUserTerms;
-    HighlightData& m_hld;
-    bool m_autodiacsens;
-    bool m_autocasesens;
-    int  m_maxexp;
-    int  m_maxcl;
-    int  m_curcl;
-};

 #if 1
 static void listVector(const string& what, const vector<string>&l)
@ -800,13 +551,14 @@ static void listVector(const string& what, const vector<string>&l)
 *  has it already. Used in the simple case where there is nothing to expand, 
 *  and we just return the prefixed term (else Db::termMatch deals with it).
 */
-bool StringToXapianQ::expandTerm(string& ermsg, int mods, 
+bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, 
+					string& ermsg, int mods, 
 					const string& term, 
 					vector<string>& oexp, string &sterm,
 					const string& prefix)
 {
    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
-	     mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
+	     mods, m_field.c_str(), term.c_str(), getStemLang().c_str()));
    sterm.clear();
    oexp.clear();
    if (term.empty())
@ -816,12 +568,12 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,

    // If there are no wildcards, add term to the list of user-entered terms
    if (!haswild)
-	m_hld.uterms.insert(term);
+	m_hldata.uterms.insert(term);

    bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;

    // No stem expansion if there are wildcards or if prevented by caller
-    if (haswild || m_stemlang.empty()) {
+    if (haswild || getStemLang().empty()) {
 	LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
 	nostemexp = true;
    }
@ -842,7 +594,7 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,
 	// diacritic-sensitive. Note that the way that the test is
 	// performed (conversion+comparison) will automatically ignore
 	// accented characters which are actually a separate letter
-	if (m_autodiacsens && unachasaccents(term)) {
+	if (getAutoDiac() && unachasaccents(term)) {
 	    LOGDEB0(("expandTerm: term has accents -> diac-sensitive\n"));
 	    diac_sensitive = true;
 	}
@ -853,7 +605,7 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,
 	// modifier to search for Floor in a case-sensitive way.
 	Utf8Iter it(term);
 	it++;
-	if (m_autocasesens && unachasuppercase(term.substr(it.getBpos()))) {
+	if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {
 	    LOGDEB0(("expandTerm: term has uppercase -> case-sensitive\n"));
 	    case_sensitive = true;
 	}
@ -872,14 +624,14 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,
    if (noexpansion) {
 	sterm = term;
 	oexp.push_back(prefix + term);
-	m_hld.terms[term] = m_hld.uterms.size() - 1;
+	m_hldata.terms[term] = m_hldata.uterms.size() - 1;
 	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
 	return true;
    } 

    // Make objects before the goto jungle to avoid compiler complaints
    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
-    XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all", 
+    XapComputableSynFamMember synac(db.m_ndb->xrdb, synFamDiCa, "all", 
 				    &unacfoldtrans);
    // This will hold the result of case and diacritics expansion as input
    // to stem expansion.
@ -891,7 +643,8 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,
 	// expansion, which means that we are casediac-sensitive. There
 	// would be nothing to prevent us to expand from the casediac
 	// synonyms first. To be done later
-	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang,term,res,m_maxexp,m_field);
+	db.termMatch(Rcl::Db::ET_WILD, getStemLang(), term, res, 
+		     getMaxExp(), m_field);
 	goto termmatchtoresult;
    }

@ -899,14 +652,16 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,

 #ifdef RCL_INDEX_STRIPCHARS

-    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, m_maxexp, m_field);
+    db.termMatch(Rcl::Db::ET_STEM, getStemLang(), term, res, 
+		 getMaxExp(), m_field);

 #else

    if (o_index_stripchars) {
 	// If the index is raw, we can only come here if nostemexp is unset
 	// and we just need stem expansion.
-	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang,term,res,m_maxexp,m_field);
+	db.termMatch(Rcl::Db::ET_STEM, getStemLang(), term, res, 
+		     getMaxExp(), m_field);
 	goto termmatchtoresult;
    } 

@ -950,11 +705,11 @@ bool StringToXapianQ::expandTerm(string& ermsg, int mods,
    {
 	vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
 	lexp.resize(uit - lexp.begin());
-	StemDb db(m_db.m_ndb->xrdb);
+	StemDb sdb(db.m_ndb->xrdb);
 	vector<string> exp1;
 	for (vector<string>::const_iterator it = lexp.begin(); 
 	     it != lexp.end(); it++) {
-	    db.stemExpand(m_stemlang, *it, exp1);
+	    sdb.stemExpand(getStemLang(), *it, exp1);
 	}
 	LOGDEB(("ExpTerm: stem exp-> %s\n", stringsToString(exp1).c_str()));

@ -975,13 +730,14 @@ exptotermatch:
    LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
    for (vector<string>::const_iterator it = lexp.begin();
 	 it != lexp.end(); it++) {
-	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res,m_maxexp,m_field);
+	db.termMatch(Rcl::Db::ET_WILD, getStemLang(), *it, res,
+		     getMaxExp(), m_field);
    }
 #endif

    // Term match entries to vector of terms
 termmatchtoresult:
-    if (int(res.entries.size()) >= m_maxexp) {
+    if (int(res.entries.size()) >= getMaxExp()) {
 	ermsg = "Maximum term expansion size exceeded."
 	    " Maybe increase maxTermExpand.";
 	return false;
@ -999,7 +755,7 @@ termmatchtoresult:
    // Remember the uterm-to-expansion links
    for (vector<string>::const_iterator it = oexp.begin(); 
 	 it != oexp.end(); it++) {
-	m_hld.terms[strip_prefix(*it)] = term;
+	m_hldata.terms[strip_prefix(*it)] = term;
    }
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
    return true;
@ -1039,10 +795,11 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
    }
 }

-void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span, 
-					int mods,
-					vector<Xapian::Query> &pqueries)
+void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
+					       const string& span, 
+					       int mods, void * pq)
 {
+    vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
    LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
 	    span.c_str(), (unsigned int)mods));
    vector<string> exp;  
@ -1050,19 +807,19 @@ void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span,

    string prefix;
    const FieldTraits *ftp;
-    if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
+    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) {
 	prefix = wrap_prefix(ftp->pfx);
    }

-    if (!expandTerm(ermsg, mods, span, exp, sterm, prefix))
+    if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix))
 	return;
    
    // Set up the highlight data. No prefix should go in there
    for (vector<string>::const_iterator it = exp.begin(); 
 	 it != exp.end(); it++) {
-	m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
-	m_hld.slacks.push_back(0);
-	m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
+	m_hldata.groups.push_back(vector<string>(1, it->substr(prefix.size())));
+	m_hldata.slacks.push_back(0);
+	m_hldata.grpsugidx.push_back(m_hldata.ugroups.size() - 1);
    }

    // Push either term or OR of stem-expanded set
@ -1074,7 +831,12 @@ void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span,
    // the non-expanded terms in a term list would end-up with even
    // less wqf). This does not happen if there are wildcards anywhere
    // in the search.
-    if (m_doBoostUserTerms && !sterm.empty()) {
+    // We normally boost the original term in the stem expansion list. Don't
+    // do it if there are wildcards anywhere, this would skew the results.
+    bool doBoostUserTerm = 
+	(m_parentSearch && !m_parentSearch->haveWildCards()) || 
+	(m_parentSearch == 0 && !m_haveWildCards);
+    if (doBoostUserTerm && !sterm.empty()) {
        xq = Xapian::Query(Xapian::Query::OP_OR, xq, 
 			   Xapian::Query(prefix+sterm, 
 					 original_term_wqf_booster));
@ -1086,11 +848,12 @@ void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span,
 // NEAR xapian query, the elements of which can themselves be OR
 // queries if the terms get expanded by stemming or wildcards (we
 // don't do stemming for PHRASE though)
-void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData, 
-					  int mods,
-					  vector<Xapian::Query> &pqueries,
+void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, 
+						 TextSplitQ *splitData, 
+						 int mods, void *pq,
 						 bool useNear, int slack)
 {
+    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
 	Xapian::Query::OP_PHRASE;
    vector<Xapian::Query> orqueries;
@ -1101,7 +864,7 @@ void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData,

    string prefix;
    const FieldTraits *ftp;
-    if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
+    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) {
 	prefix = wrap_prefix(ftp->pfx);
    }

@ -1128,7 +891,7 @@ void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
 	    lmods |= SearchDataClause::SDCM_NOSTEMMING;
 	string sterm;
 	vector<string> exp;
-	if (!expandTerm(ermsg, lmods, *it, exp, sterm, prefix))
+	if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
 	    return;
 	LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
 	listVector("", exp);
@ -1142,7 +905,7 @@ void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
 	orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 					  exp.begin(), exp.end()));
 	m_curcl += exp.size();
-	if (m_curcl >= m_maxcl)
+	if (m_curcl >= getMaxCl())
 	    return;
 #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
 	if (exp.size() > 1) 
@ -1173,10 +936,11 @@ void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
    
    // Insert the search groups and slacks in the highlight data, with
    // a reference to the user entry that generated them:
-    m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
-    m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
-    m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(), 
-			   m_hld.ugroups.size() - 1);
+    m_hldata.groups.insert(m_hldata.groups.end(), 
+			   allcombs.begin(), allcombs.end());
+    m_hldata.slacks.insert(m_hldata.slacks.end(), allcombs.size(), slack);
+    m_hldata.grpsugidx.insert(m_hldata.grpsugidx.end(), allcombs.size(), 
+			      m_hldata.ugroups.size() - 1);
 }

 // Trim string beginning with ^ or ending with $ and convert to flags
@ -1220,20 +984,18 @@ static int stringToMods(string& s)
 * @return the subquery count (either or'd stem-expanded terms or phrase word
 *   count)
 */
-bool StringToXapianQ::processUserString(const string &iq,
-					int mods, 
-					string &ermsg,
-					vector<Xapian::Query> &pqueries,
-					int slack, 
-					bool useNear
-					)
+bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
+					int mods, string &ermsg,
+					void *pq, int slack, bool useNear)
 {
+    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
+
    LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
 	    "slack %d near %d\n", 
 	    iq.c_str(), m_field.c_str(), mods, slack, useNear));
    ermsg.erase();
    m_curcl = 0;
-    const StopList stops = m_db.getStopList();
+    const StopList stops = db.getStopList();

    // Simple whitespace-split input into user-level words and
    // double-quoted phrases: word1 word2 "this is a phrase". 
@ -1297,16 +1059,17 @@ bool StringToXapianQ::processUserString(const string &iq,
 		int lmods = mods;
 		if (splitter.nostemexps.front())
 		    lmods |= SearchDataClause::SDCM_NOSTEMMING;
-		m_hld.ugroups.push_back(vector<string>(1, *it));
-		processSimpleSpan(ermsg,splitter.terms.front(),lmods, pqueries);
+		m_hldata.ugroups.push_back(vector<string>(1, *it));
+		processSimpleSpan(db, ermsg, splitter.terms.front(),
+				  lmods, &pqueries);
 	    }
 		break;
 	    default:
-		m_hld.ugroups.push_back(vector<string>(1, *it));
-		processPhraseOrNear(ermsg, &splitter, mods, pqueries,
+		m_hldata.ugroups.push_back(vector<string>(1, *it));
+		processPhraseOrNear(db, ermsg, &splitter, mods, &pqueries,
 				    useNear, slack);
 	    }
-	    if (m_curcl >= m_maxcl) {
+	    if (m_curcl >= getMaxCl()) {
 		ermsg = "Maximum Xapian query size exceeded."
 		    " Maybe increase maxXapianClauses.";
 		break;
@ -1329,8 +1092,7 @@ bool StringToXapianQ::processUserString(const string &iq,
 }

 // Translate a simple OR, AND, or EXCL search clause. 
-bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, 
-					   int maxexp, int maxcl)
+bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
 {
    LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
 	     getStemLang().c_str()));
@ -1348,17 +1110,9 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
 	LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
 	return false;
    }
+
    vector<Xapian::Query> pqueries;
-
-    // We normally boost the original term in the stem expansion list. Don't
-    // do it if there are wildcards anywhere, this would skew the results.
-    bool doBoostUserTerm = 
-	(m_parentSearch && !m_parentSearch->haveWildCards()) || 
-	(m_parentSearch == 0 && !m_haveWildCards);
-
-    StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
-		       maxexp, maxcl);
-    if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
+    if (!processUserString(db, m_text, getModifiers(), m_reason, &pqueries))
 	return false;
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
@ -1381,13 +1135,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
 // about expanding multiple fragments in the past). We just take the
 // value blanks and all and expand this against the indexed unsplit
 // file names
-bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p, 
-					     int maxexp, int)
+bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
 {
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    vector<string> names;
+    int maxexp = 10000;
+    db.getConf()->getConfParam("maxTermExpand", &maxexp);
    db.filenameWildExp(m_text, names, maxexp);
    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());

@ -1398,8 +1153,7 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
 }

 // Translate NEAR or PHRASE clause. 
-bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, 
-					 int maxexp, int maxcl)
+bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
 {
    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));

@ -1409,12 +1163,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    vector<Xapian::Query> pqueries;
    Xapian::Query nq;

-    // We normally boost the original term in the stem expansion list. Don't
-    // do it if there are wildcards anywhere, this would skew the results.
-    bool doBoostUserTerm = 
-	(m_parentSearch && !m_parentSearch->haveWildCards()) || 
-	(m_parentSearch == 0 && !m_haveWildCards);
-
    // We produce a single phrase out of the user entry then use
    // stringToXapianQueries() to lowercase and simplify the phrase
    // terms etc. This will result into a single (complex)
@ -1424,9 +1172,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    }
    string s = cstr_dquote + m_text + cstr_dquote;
    bool useNear = (m_tp == SCLT_NEAR);
-    StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
-		       maxexp, maxcl);
-    if (!tr.processUserString(s, getModifiers(), m_reason, pqueries, 
+    if (!processUserString(db, s, getModifiers(), m_reason, &pqueries, 
 			   m_slack, useNear))
 	return false;
    if (pqueries.empty()) {
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -48,13 +48,15 @@ enum SClType {
 class SearchDataClause;

 /** 
-    Data structure representing a Recoll user query, for translation
+    A SearchData object represents a Recoll user query, for translation
    into a Xapian query tree. This could probably better called a 'question'.

-    This is a list of search clauses combined through either OR or AND.
+    This is a list of SearchDataClause objects combined through either
+    OR or AND.

    Clauses either reflect user entry in a query field: some text, a
-    clause type (AND/OR/NEAR etc.), possibly a distance, or points to
+    clause type (AND/OR/NEAR etc.), possibly a distance, or are the
+    result of parsing query language input. A clause can also point to
    another SearchData representing a subquery.

    The content of each clause when added may not be fully parsed yet
@ -63,28 +65,34 @@ class SearchDataClause;
    several terms and phrases as would result from 
    ["this is a phrase"  term1 term2] . 

-    This is why the clauses also have an AND/OR/... type. 
+    This is why the clauses also have an AND/OR/... type. They are an 
+    intermediate form between the primary user input and 
+    the final Xapian::Query tree.

-    A phrase clause could be added either explicitly or using double quotes:
-    {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
+    For example, a phrase clause could be added either explicitly or
+    using double quotes: {SCLT_PHRASE, [this is a phrase]} or as
+    {SCLT_XXX, ["this is a phrase"]}

 */
 class SearchData {
 public:
    SearchData(SClType tp, const string& stemlang) 
-    : m_tp(tp), m_haveDates(false), m_maxSize(size_t(-1)),
-      m_minSize(size_t(-1)), m_haveWildCards(false), m_stemlang(stemlang)
+	: m_tp(tp), m_stemlang(stemlang)
    {
 	if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
 	    m_tp = SCLT_OR;
+	commoninit();
    }
    SearchData() 
-	: m_tp(SCLT_AND), m_haveDates(false), m_maxSize(size_t(-1)),
-	  m_minSize(size_t(-1)), m_haveWildCards(false), m_stemlang("english")
+	: m_tp(SCLT_AND), m_stemlang("english")
    {
+	commoninit();
    }
    
-    ~SearchData() {erase();}
+    ~SearchData() 
+    {
+	erase();
+    }

    /** Make pristine */
    void erase();
@ -96,7 +104,7 @@ public:
    bool haveWildCards() {return m_haveWildCards;}

    /** Translate to Xapian query. rcldb knows about the void*  */
-    bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl);
+    bool toNativeQuery(Rcl::Db &db, void *);

    /** We become the owner of cl and will delete it */
    bool addClause(SearchDataClause *cl);
@ -143,12 +151,26 @@ public:
    std::string getDescription() {return m_description;}
    void setDescription(const std::string& d) {m_description = d;}

+    /** Return an XML version of the contents, for storage in search history
+	by the GUI */
    string asXML();
+
    void setTp(SClType tp) 
    {
 	m_tp = tp;
    }
+
+    void setMaxExpand(int max)
+    {
+	m_softmaxexpand = max;
+    }
+    bool getAutoDiac() {return m_autodiacsens;}
+    bool getAutoCase() {return m_autocasesens;}
+    int getMaxExp() {return m_maxexp;}
+    int getMaxCl() {return m_maxcl;}
+
    friend class ::AdvSearch;
+
 private:
    // Combine type. Only SCLT_AND or SCLT_OR here
    SClType                   m_tp; 
@ -184,10 +206,26 @@ private:
    bool   m_haveWildCards;
    std::string m_stemlang;

+    // Parameters set at the start of ToNativeQuery because they need
+    // an rclconfig. Actually this does not make sense and it would be
+    // simpler to just pass an rclconfig to the constructor;
+    bool m_autodiacsens;
+    bool m_autocasesens;
+    int m_maxexp;
+    int m_maxcl;
+
+    // Parameters which are not part of the main query data but may influence
+    // translation in special cases.
+    // Maximum TermMatch (e.g. wildcard) expansion. This is normally set
+    // from the configuration with a high default, but may be set to a lower
+    // value during "find-as-you-type" operations from the GUI
+    int m_softmaxexpand;
+
    bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
    bool clausesToQuery(Rcl::Db &db, SClType tp,     
 			std::vector<SearchDataClause*>& query,
-			string& reason, void *d, int, int);
+			string& reason, void *d);
+    void commoninit();

    /* Copyconst and assignment private and forbidden */
    SearchData(const SearchData &) {}
@ -204,7 +242,7 @@ public:
      m_modifiers(SDCM_NONE), m_weight(1.0)
    {}
    virtual ~SearchDataClause() {}
-    virtual bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl) = 0;
+    virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
    virtual std::string getReason() const {return m_reason;}
    virtual void getTerms(HighlightData & hldata) const = 0;
@ -222,6 +260,22 @@ public:
 	return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ? 
 	    cstr_null : m_parentSearch->getStemLang();
    }
+    bool getAutoDiac()
+    {
+	return m_parentSearch ? m_parentSearch->getAutoDiac() : false;
+    }
+    bool getAutoCase()
+    {
+	return m_parentSearch ? m_parentSearch->getAutoCase() : true;
+    }
+    int getMaxExp() 
+    {
+	return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
+    }
+    int getMaxCl() 
+    {
+	return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
+    }
    virtual void setModifiers(Modifier mod) 
    {
 	m_modifiers = mod;
@ -263,6 +317,7 @@ private:
 * "Simple" data clause with user-entered query text. This can include 
 * multiple phrases and words, but no specified distance.
 */
+class TextSplitQ;
 class SearchDataClauseSimple : public SearchDataClause {
 public:
    SearchDataClauseSimple(SClType tp, const std::string& txt, 
@ -278,7 +333,7 @@ public:
    }

    /** Translate to Xapian query */
-    virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
+    virtual bool toNativeQuery(Rcl::Db &, void *);

    virtual void getTerms(HighlightData& hldata) const
    {
@ -296,6 +351,21 @@ protected:
    std::string  m_text;  // Raw user entry text.
    std::string  m_field; // Field specification if any
    HighlightData m_hldata;
+    int  m_curcl;
+
+    bool processUserString(Rcl::Db &db, const string &iq, int mods,  
+			   std::string &ermsg,
+			   void* pq, int slack = 0, bool useNear = false);
+    bool expandTerm(Rcl::Db &db, std::string& ermsg, int mods, 
+		    const std::string& term, 
+		    std::vector<std::string>& exp, 
+                    std::string& sterm, const std::string& prefix);
+    // After splitting entry on whitespace: process non-phrase element
+    void processSimpleSpan(Rcl::Db &db, string& ermsg, const string& span, 
+			   int mods, void *pq);
+    // Process phrase/near element
+    void processPhraseOrNear(Rcl::Db &db, string& ermsg, TextSplitQ *splitData, 
+			     int mods, void *pq, bool useNear, int slack);
 };

 /** 
@ -306,10 +376,10 @@ protected:
 * field, especially for file names, because this makes searches for
 * "*xx" much faster (no need to scan the whole main index).
 */
-class SearchDataClauseFilename : public SearchDataClauseSimple {
+class SearchDataClauseFilename : public SearchDataClause {
 public:
    SearchDataClauseFilename(const std::string& txt)
-	: SearchDataClauseSimple(SCLT_FILENAME, txt) 
+	: SearchDataClause(SCLT_FILENAME), m_text(txt) 
    {
 	// File name searches don't count when looking for wild cards.
 	m_haveWildCards = false;
@ -319,7 +389,14 @@ public:
    {
    }

-    virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
+    virtual void getTerms(HighlightData&) const
+    {
+    }
+
+    virtual bool toNativeQuery(Rcl::Db &, void *);
+
+protected:
+    std::string m_text;
 };

 /** 
@ -338,7 +415,7 @@ public:
    {
    }

-    virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
+    virtual bool toNativeQuery(Rcl::Db &, void *);
    virtual int getslack() const
    {
 	return m_slack;
@ -354,9 +431,9 @@ public:
 	: SearchDataClause(tp), m_sub(sub) 
    {
    }
-    virtual bool toNativeQuery(Rcl::Db &db, void *p, int maxexp, int maxcl)
+    virtual bool toNativeQuery(Rcl::Db &db, void *p)
    {
-	bool ret = m_sub->toNativeQuery(db, p, maxexp, maxcl);
+	bool ret = m_sub->toNativeQuery(db, p);
 	if (!ret) 
 	    m_reason = m_sub->getReason();
 	return ret;
--- a/src/rcldb/searchdataxml.cpp
+++ b/src/rcldb/searchdataxml.cpp
@ -0,0 +1,142 @@
+/* Copyright (C) 2006 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+// Handle translation from rcl's SearchData structures to Xapian Queries
+
+#include "autoconfig.h"
+
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+#include <sstream>
+using namespace std;
+
+#include "searchdata.h"
+#include "debuglog.h"
+#include "base64.h"
+
+namespace Rcl {
+
+static string tpToString(SClType tp)
+{
+    switch (tp) {
+    case SCLT_AND: return "AND";
+    case SCLT_OR: return "OR";
+    case SCLT_EXCL: return "EX";
+    case SCLT_FILENAME: return "FN";
+    case SCLT_PHRASE: return "PH";
+    case SCLT_NEAR: return "NE";
+    case SCLT_SUB: return "SU"; // Unsupported actually
+    default: return "UN";
+    }
+}
+
+string SearchData::asXML()
+{
+    LOGDEB(("SearchData::asXML\n"));
+    ostringstream os;
+
+    // Searchdata
+    os << "<SD>" << endl;
+
+    // Clause list
+    os << "<CL>" << endl;
+    if (m_tp != SCLT_AND)
+	os << "<CLT>" << tpToString(m_tp) << "</CLT>" << endl;
+    for (unsigned int i = 0; i <  m_query.size(); i++) {
+	SearchDataClause *c = m_query[i];
+	if (c->getTp() == SCLT_SUB) {
+	    LOGERR(("SearchData::asXML: can't do subclauses !\n"));
+	    continue;
+	}
+	SearchDataClauseSimple *cl = 
+	    dynamic_cast<SearchDataClauseSimple*>(c);
+	os << "<C>" << endl;
+	if (cl->getTp() != SCLT_AND) {
+	    os << "<CT>" << tpToString(cl->getTp()) << "</CT>" << endl;
+	}
+	if (cl->getTp() != SCLT_FILENAME && !cl->getfield().empty()) {
+	    os << "<F>" << base64_encode(cl->getfield()) << "</F>" << endl;
+	}
+	os << "<T>" << base64_encode(cl->gettext()) << "</T>" << endl;
+	if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) {
+	    SearchDataClauseDist *cld = 
+	    dynamic_cast<SearchDataClauseDist*>(cl);
+	    os << "<S>" << cld->getslack() << "</S>" << endl;
+	}
+	os << "</C>" << endl;
+    }
+    os << "</CL>" << endl;
+
+    if (m_haveDates) {
+	if (m_dates.y1 > 0) {
+	    os << "<DMI>" << 
+		"<D>" << m_dates.d1 << "</D>" <<
+		"<M>" << m_dates.m1 << "</M>" << 
+		"<Y>" << m_dates.y1 << "</Y>" 
+	       << "</DMI>" << endl;
+	}
+	if (m_dates.y2 > 0) {
+	    os << "<DMA>" << 
+		"<D>" << m_dates.d2 << "</D>" <<
+		"<M>" << m_dates.m2 << "</M>" << 
+		"<Y>" << m_dates.y2 << "</Y>" 
+	       << "</DMA>" << endl;
+	}
+    }
+
+
+    if (m_minSize != size_t(-1)) {
+	os << "<MIS>" << m_minSize << "</MIS>" << endl;
+    }
+    if (m_maxSize != size_t(-1)) {
+	os << "<MAS>" << m_maxSize << "</MAS>" << endl;
+    }
+
+    if (!m_filetypes.empty()) {
+	os << "<ST>";
+	for (vector<string>::iterator it = m_filetypes.begin(); 
+	     it != m_filetypes.end(); it++) {
+	    os << *it << " ";
+	}
+	os << "</ST>" << endl;
+    }
+
+    if (!m_nfiletypes.empty()) {
+	os << "<IT>";
+	for (vector<string>::iterator it = m_nfiletypes.begin(); 
+	     it != m_nfiletypes.end(); it++) {
+	    os << *it << " ";
+	}
+	os << "</IT>" << endl;
+    }
+
+    for (vector<DirSpec>::const_iterator dit = m_dirspecs.begin();
+	 dit != m_dirspecs.end(); dit++) {
+	if (dit->exclude) {
+	    os << "<ND>" << base64_encode(dit->dir) << "</ND>" << endl;
+	} else {
+	    os << "<YD>" << base64_encode(dit->dir) << "</YD>" << endl;
+	}
+    }
+    os << "</SD>";
+    return os.str();
+}
+
+
+}