ensure that recoll configured with indexStripChars=1 runs as compiled with -DRCL_INDEX_STRIPCHARS

2012-09-15 15:16:20 +02:00 · 2012-09-15 15:16:20 +02:00 · e22b347767
commit e22b347767
parent 48e9a4f901
17 changed files with 425 additions and 260 deletions
--- a/src/aspell/rclaspell.cpp
+++ b/src/aspell/rclaspell.cpp
@ -23,9 +23,9 @@
 #include <unistd.h>
 #include <dlfcn.h>
 #include <iostream>
 #include <stdlib.h>
-#include <vector>
+
 using namespace std;
 #include ASPELL_INCLUDE
@ -33,7 +33,7 @@
 #include "execmd.h"
 #include "rclaspell.h"
 #include "debuglog.h"
-
+#include "unacpp.h"
 #include "ptmutex.h"
 // Just a place where we keep the Aspell library entry points together
@ -260,6 +260,14 @@ public:
 	while (m_db.termWalkNext(m_tit, *m_input)) {
 	    if (!Rcl::Db::isSpellingCandidate(*m_input))
 		continue;
 #ifndef RCL_INDEX_STRIPCHARS
 	    if (!o_index_stripchars) {
 		string lower;
 		if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
 		    continue;
 		m_input->swap(lower);
 	    }
 #endif
 	    // Got a non-empty sort-of appropriate term, let's send it to
 	    // aspell
 	    m_input->append("\n");
@ -335,17 +343,29 @@ bool Aspell::make_speller(string& reason)
    return true;
 }
-bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
+bool Aspell::check(const string &iterm, string& reason)
 {
-    LOGDEB2(("Aspell::check [%s]\n", term.c_str()));
+    LOGDEB2(("Aspell::check [%s]\n", iterm.c_str()));
    string mterm(iterm);
    if (!ok() || !make_speller(reason))
 	return false;
-    if (term.empty())
+    if (iterm.empty())
        return true; //??
 #ifndef RCL_INDEX_STRIPCHARS
    if (!o_index_stripchars) {
 	string lower;
 	if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
 	    LOGERR(("Aspell::check : cant lowercase input\n"));
 	    return false;
 	}
 	mterm.swap(lower);
    }
 #endif
    int ret = aapi.aspell_speller_check(m_data->m_speller, 
-                                        term.c_str(), term.length());
+                                        mterm.c_str(), mterm.length());
    reason.clear();
    switch (ret) {
    case 0: return false;
@ -358,19 +378,31 @@ bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
    }
 }
-bool Aspell::suggest(Rcl::Db &db, const string &term, 
+bool Aspell::suggest(Rcl::Db &db, const string &_term, 
                     list<string>& suggestions, string& reason)
 {
    if (!ok() || !make_speller(reason))
 	return false;
-    if (term.empty())
+    string mterm(_term);
    if (mterm.empty())
        return true; //??
 #ifndef RCL_INDEX_STRIPCHARS
    if (!o_index_stripchars) {
 	string lower;
 	if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
 	    LOGERR(("Aspell::check : cant lowercase input\n"));
 	    return false;
 	}
 	mterm.swap(lower);
    }
 #endif
    AspellCanHaveError *ret;
    const AspellWordList *wl = 
 	aapi.aspell_speller_suggest(m_data->m_speller, 
-                                    term.c_str(), term.length());
+                                    mterm.c_str(), mterm.length());
    if (wl == 0) {
 	reason = aapi.aspell_speller_error_message(m_data->m_speller);
 	return false;
@ -385,7 +417,7 @@ bool Aspell::suggest(Rcl::Db &db, const string &term,
        // ******** This should depend if
 	// stemming is turned on or not for querying  *******
 	string sw(word);
-	if (db.termExists(sw) && db.stemDiffers("english", sw, term))
+	if (db.termExists(sw) && db.stemDiffers("english", sw, mterm))
 	    suggestions.push_back(word);
    }
    aapi.delete_aspell_string_enumeration(els);
@ -418,7 +450,6 @@ using namespace std;
 static char *thisprog;
 RclConfig *rclconfig;
 Rcl::Db rcldb;
 static char usage [] =
 " -b : build dictionary\n"
@ -477,7 +508,9 @@ int main(int argc, char **argv)
 	exit(1);
    }
-    if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
+    Rcl::Db rcldb(rclconfig);
    if (!rcldb.open(Rcl::Db::DbRO, 0)) {
 	fprintf(stderr, "Could not open database in %s\n", dbdir.c_str());
 	exit(1);
    }
--- a/src/aspell/rclaspell.h
+++ b/src/aspell/rclaspell.h
@ -37,11 +37,6 @@
 #include "rclconfig.h"
 #include "rcldb.h"
 #ifndef NO_NAMESPACES
 using std::string;
 using std::list;
 #endif // NO_NAMESPACES
 class AspellData;
 class Aspell {
@ -53,26 +48,26 @@ class Aspell {
    bool ok() const;
    /** Find the aspell command and shared library, init function pointers */
-    bool init(string &reason); 
+    bool init(std::string &reason); 
    /**  Build dictionary out of index term list. This is done at the end
     * of an indexing pass. */
-    bool buildDict(Rcl::Db &db, string &reason);
+    bool buildDict(Rcl::Db &db, std::string &reason);
    /** Check that word is in dictionary. ret==false && !reason.empty() => err*/
-    bool check(Rcl::Db &db, const string& term, string& reason);
+    bool check(const std::string& term, std::string& reason);
    /** Return a list of possible expansions for a given word */
-    bool suggest(Rcl::Db &db, const string& term, list<string> &suggestions, 
+    bool suggest(Rcl::Db &db, const std::string& term, 
-		 string &reason);
+		 std::list<std::string> &suggestions, std::string &reason);
 private:
-    string dicPath();
+    std::string dicPath();
    RclConfig  *m_config;
-    string      m_lang;
+    std::string      m_lang;
    AspellData *m_data;
-    bool make_speller(string& reason);
+    bool make_speller(std::string& reason);
 };
 #endif /* RCL_USE_ASPELL */
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -15,6 +15,8 @@
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 #ifndef TEST_RCLCONFIG
 #include "autoconfig.h"
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdio.h>
@ -34,6 +36,7 @@
 #include <iostream>
 #include <cstdlib>
 #include <cstring>
 using namespace std;
 #include "cstr.h"
 #include "pathut.h"
@ -45,15 +48,8 @@
 #include "readfile.h"
 #include "fstreewalk.h"
-#ifndef NO_NAMESPACES
+#ifndef RCL_INDEX_STRIPCHARS
-using namespace std;
+bool o_index_stripchars;
 #endif /* NO_NAMESPACES */
 #ifndef MIN
 #define MIN(A,B) (((A)<(B)) ? (A) : (B))
 #endif
 #ifndef MAX
 #define MAX(A,B) (((A)>(B)) ? (A) : (B))
 #endif
 bool ParamStale::needrecompute()
@ -77,6 +73,7 @@ bool ParamStale::needrecompute()
    }
    return false;
 }
 void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
 {
    parent = rconf;
@ -239,6 +236,14 @@ bool RclConfig::updateMainConfig()
 	FsTreeWalker::setNoFnmPathname();
    }
 #ifndef RCL_INDEX_STRIPCHARS
    static int m_index_stripchars_init = 0;
    if (!m_index_stripchars_init) {
 	getConfParam("indexStripChars", &o_index_stripchars);
 	m_index_stripchars_init = 1;
    }
 #endif
    return true;
 }
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@ -303,5 +303,13 @@ class RclConfig {
    bool readFieldsConfig(const string& errloc);
 };
-
+// This global variable defines if we are running with an index
 // stripped of accents and case or a raw one. Ideally, it should be
 // constant, but it needs to be initialized from the configuration, so
 // there is no way to do this. It never changes after initialization
 // of course. When set, it is supposed to get all of recoll to behave like if
 // if was compiled with RCL_INDEX_STRIPCHARS
 #ifndef  RCL_INDEX_STRIPCHARS
 extern bool o_index_stripchars;
 #endif
 #endif /* _RCLCONFIG_H_INCLUDED_ */
--- a/src/qtgui/reslist.cpp
+++ b/src/qtgui/reslist.cpp
@ -197,10 +197,14 @@ void QtGuiResListPager::suggest(const vector<string>uterms,
 	// If the term is in the index, we don't suggest alternatives. 
 	// Actually, we may want to check the frequencies and propose something
 	// anyway if a possible variation is much more common (as google does)
-        if (aspell->check(*rcldb, *uit, reason))
+#warning need to take case and diacs sensibility into account somehow	
 	// Maybe use the xapian index instead ? How to retrieve the sensitivity flags ?
 	if (0) {
        if (aspell->check(*uit, reason))
            continue;
        else if (!reason.empty())
            return;
 	}
        if (!aspell->suggest(*rcldb, *uit, asuggs, reason)) {
            LOGERR(("QtGuiResListPager::suggest: aspell failed: %s\n", 
                    reason.c_str()));
@ -336,6 +340,7 @@ ResList::~ResList()
 	QT_TR_NOOP("Open"),
 	QT_TR_NOOP("(show query)"),
        QT_TR_NOOP("<p><i>Alternate spellings (accents suppressed): </i>"),
        QT_TR_NOOP("<p><i>Alternate spellings: </i>"),
    };
 }
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -79,22 +79,30 @@ class TextSplitPTR : public TextSplit {
 	for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
 	     vit != hdata.groups.end(); vit++) {
 	    if (vit->size() == 1) {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
-		m_terms[vit->front()] = vit - hdata.groups.begin();
+		if (o_index_stripchars) {
-#else
+#endif
-		string dumb = vit->front();
+		    m_terms[vit->front()] = vit - hdata.groups.begin();
-		unacmaybefold(vit->front(), dumb, "UTF-8", UNACOP_UNACFOLD);
+#ifndef RCL_INDEX_STRIPCHARS
-		m_terms[dumb] = vit - hdata.groups.begin();
+		} else {
 		    string dumb = vit->front();
 		    unacmaybefold(vit->front(), dumb, "UTF-8", UNACOP_UNACFOLD);
 		    m_terms[dumb] = vit - hdata.groups.begin();
 		}
 #endif
 	    } else if (vit->size() > 1) {
 		for (vector<string>::const_iterator it = vit->begin(); 
 		     it != vit->end(); it++) {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
 		if (o_index_stripchars) {
 #endif
 		    m_gterms.insert(*it);
-#else
+#ifndef RCL_INDEX_STRIPCHARS
 		} else {
 		    string dumb = *it;
 		    unacmaybefold(*it, dumb, "UTF-8", UNACOP_UNACFOLD);
 		    m_gterms.insert(dumb);
 		}
 #endif
 		}
 	    }
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@ -320,9 +320,16 @@ void ResListPager::displayPage(RclConfig *config)
            map<string, vector<string> > spellings;
            suggest(uterms, spellings);
            if (!spellings.empty()) {
-                chunk << 
+		if (o_index_stripchars) {
-                 trans("<p><i>Alternate spellings (accents suppressed): </i>")
+		    chunk << 
-		      << "<br /><blockquote>";
+		trans("<p><i>Alternate spellings (accents suppressed): </i>")
 			  << "<br /><blockquote>";
 		} else {
 		    chunk << 
 			trans("<p><i>Alternate spellings: </i>")
 			  << "<br /><blockquote>";
 		}
 		for (map<string, vector<string> >::const_iterator it0 =
 			 spellings.begin(); it0 != spellings.end(); it0++) {
--- a/src/query/xadump.cpp
+++ b/src/query/xadump.cpp
@ -116,12 +116,20 @@ static void sigcleanup(int sig)
    exit(1);
 }
 #ifndef RCL_INDEX_STRIPCHARS
 bool o_index_stripchars;
 #endif
 inline bool has_prefix(const string& trm)
 {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
-    return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
+    if (o_index_stripchars) {
-#else
+#endif
-    return trm.size() > 0 && trm[0] == ':';
+	return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
 #ifndef RCL_INDEX_STRIPCHARS
    } else {
 	return trm.size() > 0 && trm[0] == ':';
    }
 #endif
 }
@ -201,10 +209,22 @@ int main(int argc, char **argv)
    try {
 	db = new Xapian::Database(dbdir);
 	cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
 	    db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
-	    
+
 #ifndef RCL_INDEX_STRIPCHARS
 	// If we have terms with a leading ':' it's a new style,
 	// unstripped index
 	{
 	    Xapian::TermIterator term = db->allterms_begin(":");
 	    if (term == db->allterms_end())
 		o_index_stripchars = true;
 	    else
 		o_index_stripchars = false;
 	    cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
 	}
 #endif
 	if (op_flags & OPT_T) {
 	    Xapian::TermIterator term;
 	    string printable;
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@ -63,17 +63,19 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
    // Unaccented stem dbs
    vector<XapWritableComputableSynFamMember> unacstemdbs;
    // We can reuse the same stemmer pointers, the objects are stateless.
-    for (unsigned int i = 0; i < langs.size(); i++) {
+    if (!o_index_stripchars) {
-	unacstemdbs.push_back(
+	for (unsigned int i = 0; i < langs.size(); i++) {
-	    XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
+	    unacstemdbs.push_back(
-					      stemmers.back().getptr()));
+		XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
-	unacstemdbs.back().recreate();
+						  stemmers.back().getptr()));
 	    unacstemdbs.back().recreate();
 	}
    }
    SynTermTransUnac transunac(UNACOP_UNACFOLD);
    XapWritableComputableSynFamMember 
 	diacasedb(wdb, synFamDiCa, "all", &transunac);
-    diacasedb.recreate();
+    if (!o_index_stripchars)
 	diacasedb.recreate();
 #endif
    // Walk the list of all terms, and stem/unac each.
@ -109,8 +111,10 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // is the input to the stem db, and add a synonym from the
 	    // stripped term to the cased and accented one, for accent
 	    // and case expansion at query time
-	    unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
+	    if (!o_index_stripchars) {
-	    diacasedb.addSynonym(*it);
+		unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
 		diacasedb.addSynonym(*it);
 	    }
 #endif
 	    // Create stemming synonym for every language. The input is the 
@ -124,12 +128,15 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // the unaccented term. While this may be incorrect, it is
 	    // also necessary for searching in a diacritic-unsensitive
 	    // way on a raw index
-	    string unac;
+	    if (!o_index_stripchars) {
-	    unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
+		string unac;
-	    if (unac != lower)
+		unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
-		for (unsigned int i = 0; i < langs.size(); i++) {
+		if (unac != lower) {
-		    unacstemdbs[i].addSynonym(unac);
+		    for (unsigned int i = 0; i < langs.size(); i++) {
 			unacstemdbs[i].addSynonym(unac);
 		    }
 		}
 	    }
 #endif
        }
    } XCATCHERROR(ermsg);
--- a/src/rcldb/expansiondbs.h
+++ b/src/rcldb/expansiondbs.h
@ -24,10 +24,13 @@
 namespace Rcl {
-/* A Capitals/Diacritics removal functor for using with
+/** A Capitals/Diacritics removal functor for using with
-   XapComputableSynFamMember */
+ *  XapComputableSynFamMember */
 class SynTermTransUnac : public SynTermTrans {
 public:
    /** Constructor
     * @param op defines if we remove diacritics, case or both 
     */
    SynTermTransUnac(UnacOp op)
    : m_op(op)
    {
@ -43,7 +46,9 @@ public:
    UnacOp m_op;
 };
-/** Walk the Xapian term list and create all the expansion dbs in one go */
+/** Walk the Xapian term list and create all the expansion dbs in one go.
 * 
 */
 extern bool createExpansionDbs(Xapian::WritableDatabase& wdb, 
 			       const std::vector<std::string>& langs);
 }
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -92,10 +92,11 @@ const string start_of_field_term = "XXST";
 const string end_of_field_term = "XXND";
 static const string page_break_term = "XXPG";
 #else
-const string start_of_field_term = "XXST/";
+string start_of_field_term;
-const string end_of_field_term = "XXND/";
+string end_of_field_term;
-static const string page_break_term = "XXPG/";
+const string page_break_term = "XXPG/";
 #endif
 // Field name for the unsplit file name. Has to exist in the field file 
 // because of usage in termmatch()
 static const string unsplitFilenameFieldName = "rclUnsplitFN";
@ -683,6 +684,18 @@ Db::Db(RclConfig *cfp)
      m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
      m_maxFsOccupPc(0), m_mode(Db::DbRO)
 {
 #ifndef RCL_INDEX_STRIPCHARS
    if (start_of_field_term.empty()) {
 	if (o_index_stripchars) {
 	    start_of_field_term = "XXST";
 	    end_of_field_term = "XXND";
 	} else {
 	    start_of_field_term = "XXST/";
 	    end_of_field_term = "XXND/";
 	}
    }
 #endif
    m_ndb = new Native(this);
    if (m_config) {
 	m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
@ -886,12 +899,13 @@ int Db::termDocCnt(const string& _term)
        return -1;
    string term = _term;
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
-    if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
+    if (o_index_stripchars)
 	LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
 	return 0;
    }
 #endif
 	if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
 	    LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
 	    return 0;
 	}
    if (m_stops.isStop(term)) {
 	LOGDEB1(("Db::termDocCnt [%s] in stop list\n", term.c_str()));
@ -1151,13 +1165,17 @@ string Db::getSpellingSuggestion(const string& word)
 {
    if (m_ndb == 0)
 	return string();
    string term = word;
-#ifdef RCL_INDEX_STRIPCHARS
+
 #ifndef RCL_INDEX_STRIPCHARS
    if (o_index_stripchars)
 #endif
    if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
 	LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
 	return string();
    }
-#endif
+
    if (!isSpellingCandidate(term))
 	return string();
    return m_ndb->xrdb.get_spelling_suggestion(term);
@ -1266,9 +1284,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
    TermProc *nxt = &tpidx;
    TermProcStop tpstop(nxt, m_stops);nxt = &tpstop;
    //TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
-#ifdef RCL_INDEX_STRIPCHARS
+
-    TermProcPrep tpprep(nxt); nxt = &tpprep;
+    TermProcPrep tpprep(nxt);
 #ifndef RCL_INDEX_STRIPCHARS
    if (o_index_stripchars)
 #endif
 	nxt = &tpprep;
    TextSplitDb splitter(newdocument, nxt);
    tpidx.setTSD(&splitter);
@ -1951,12 +1972,15 @@ bool Db::termMatch(MatchType typ, const string &lang,
    // Get rid of capitals and accents
    string droot = root;
-#ifdef RCL_INDEX_STRIPCHARS
+
-    if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) {
+#ifndef RCL_INDEX_STRIPCHARS
-	LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
+    if (o_index_stripchars)
 	return false;
    }
 #endif
 	if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) {
 	    LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
 	    return false;
 	}
    string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
    string prefix;
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -129,18 +129,27 @@ extern  void *DbUpdWorker(void*);
 inline bool has_prefix(const string& trm)
 {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
-    return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
+    if (o_index_stripchars) {
-#else
+#endif
-    return !trm.empty() && trm[0] == ':';
+	return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
 #ifndef RCL_INDEX_STRIPCHARS
    } else {
 	return !trm.empty() && trm[0] == ':';
    }
 #endif
 }
 inline string wrap_prefix(const string& pfx) 
 {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
-    return pfx;
+    if (o_index_stripchars) {
-#else
+#endif
-    return cstr_colon + pfx + cstr_colon;
+	return pfx;
 #ifndef RCL_INDEX_STRIPCHARS
    } else {
 	return cstr_colon + pfx + cstr_colon;
    }
 #endif
 }
@ -384,9 +393,13 @@ private:
 string version_string();
 extern const string pathelt_prefix;
 #ifdef RCL_INDEX_STRIPCHARS
 extern const string start_of_field_term;
 extern const string end_of_field_term;
-
+#else
 extern string start_of_field_term;
 extern string end_of_field_term;
 #endif
 }
 #endif /* _DB_H_INCLUDED_ */
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -79,10 +79,22 @@ static const int original_term_wqf_booster = 10;
 #ifdef RCL_INDEX_STRIPCHARS
 #define bufprefix(BUF, L) {(BUF)[0] = L;}
-#define bpoffs 1
+#define bpoffs() 1
 #else
-#define bufprefix(BUF, L) {(BUF)[0] = ':'; (BUF)[1] = L; (BUF)[2] = ':';}
+static inline void bufprefix(char *buf, char c)
-#define bpoffs 3
+{
    if (o_index_stripchars) {
 	buf[0] = c;
    } else {
 	buf[0] = ':'; 
 	buf[1] = c; 
 	buf[2] = ':';
    }
 }
 static inline int bpoffs() 
 {
    return o_index_stripchars ? 1 : 3;
 }
 #endif
 static Xapian::Query
@ -92,7 +104,7 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
    // only doing %d's !
    char buf[200];
    bufprefix(buf, 'D');
-    sprintf(buf+bpoffs, "%04d%02d", y1, m1);
+    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
    vector<Xapian::Query> v;
    int d_last = monthdays(m1, y1);
@ -103,7 +115,7 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
    // Deal with any initial partial month
    if (d1 > 1 || d_end < d_last) {
    	for ( ; d1 <= d_end ; d1++) {
-	    sprintf(buf + 6 + bpoffs, "%02d", d1);
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
 	    v.push_back(Xapian::Query(buf));
 	}
    } else {
@ -117,32 +129,32 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
    int m_last = (y1 < y2) ? 12 : m2 - 1;
    while (++m1 <= m_last) {
-	sprintf(buf + 4 + bpoffs, "%02d", m1);
+	sprintf(buf + 4 + bpoffs(), "%02d", m1);
 	bufprefix(buf, 'M');
 	v.push_back(Xapian::Query(buf));
    }
    if (y1 < y2) {
 	while (++y1 < y2) {
-	    sprintf(buf + bpoffs, "%04d", y1);
+	    sprintf(buf + bpoffs(), "%04d", y1);
 	    bufprefix(buf, 'Y');
 	    v.push_back(Xapian::Query(buf));
 	}
-	sprintf(buf + bpoffs, "%04d", y2);
+	sprintf(buf + bpoffs(), "%04d", y2);
 	bufprefix(buf, 'M');
 	for (m1 = 1; m1 < m2; m1++) {
-	    sprintf(buf + 4 + bpoffs, "%02d", m1);
+	    sprintf(buf + 4 + bpoffs(), "%02d", m1);
 	    v.push_back(Xapian::Query(buf));
 	}
    }
-    sprintf(buf + 2 + bpoffs, "%02d", m2);
+    sprintf(buf + 2 + bpoffs(), "%02d", m2);
    // Deal with any final partial month
    if (d2 < monthdays(m2, y2)) {
 	bufprefix(buf, 'D');
    	for (d1 = 1 ; d1 <= d2; d1++) {
-	    sprintf(buf + 6 + bpoffs, "%02d", d1);
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
 	    v.push_back(Xapian::Query(buf));
 	}
    } else {
@ -663,13 +675,13 @@ static void listVector(const string& what, const vector<string>&l)
 */
 void StringToXapianQ::expandTerm(int mods, 
 				 const string& term, 
-                                 vector<string>& exp, string &sterm,
+                                 vector<string>& oexp, string &sterm,
 				 const string& prefix)
 {
    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
 	     mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
    sterm.clear();
-    exp.clear();
+    oexp.clear();
    if (term.empty())
 	return;
@ -693,145 +705,161 @@ void StringToXapianQ::expandTerm(int mods,
    bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
    bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;
-    // If we are working with a raw index, apply the rules for case and 
+    if (o_index_stripchars) {
-    // diacritics sensitivity.
+	diac_sensitive = case_sensitive = false;
    } else {
 	// If we are working with a raw index, apply the rules for case and 
 	// diacritics sensitivity.
-    // If any character has a diacritic, we become
+	// If any character has a diacritic, we become
-    // diacritic-sensitive. Note that the way that the test is
+	// diacritic-sensitive. Note that the way that the test is
-    // performed (conversion+comparison) will automatically ignore
+	// performed (conversion+comparison) will automatically ignore
-    // accented characters which are actually a separate letter
+	// accented characters which are actually a separate letter
-    if (unachasaccents(term))
+	if (unachasaccents(term))
-	diac_sensitive = true;
+	    diac_sensitive = true;
-    // If any character apart the first is uppercase, we become case-sensitive. 
+	// If any character apart the first is uppercase, we become
-    // The first character is reserved for turning off stemming. You need to
+	// case-sensitive.  The first character is reserved for
-    // use a query language modifier to search for Floor in a case-sensitive
+	// turning off stemming. You need to use a query language
-    // way.
+	// modifier to search for Floor in a case-sensitive way.
-    Utf8Iter it(term);
+	Utf8Iter it(term);
-    it++;
+	it++;
-    if (unachasuppercase(term.substr(it.getBpos())))
+	if (unachasuppercase(term.substr(it.getBpos())))
-	case_sensitive = true;
+	    case_sensitive = true;
-    // If we are sensitive to case or diacritics turn stemming off
+	// If we are sensitive to case or diacritics turn stemming off
-    if (diac_sensitive || case_sensitive)
+	if (diac_sensitive || case_sensitive)
-	nostemexp = true;
+	    nostemexp = true;
-    if (!case_sensitive || !diac_sensitive)
+	if (!case_sensitive || !diac_sensitive)
-	noexpansion = false;
+	    noexpansion = false;
    }
 #endif
    if (noexpansion) {
 	sterm = term;
-	exp.push_back(prefix + term);
+	oexp.push_back(prefix + term);
-    } else {
+	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
-	TermMatchResult res;
+	return;
-	if (haswild) {
+    } 
 	    // Note that if there are wildcards, we do a direct from-index
 	    // expansion, which means that we are casediac-sensitive. There
 	    // would be nothing to prevent us to expand from the casediac
 	    // synonyms first. To be done later
 	    m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
                           m_field);
 	} else {
 	    sterm = term;
 #ifdef RCL_INDEX_STRIPCHARS
 	    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, 
                           m_field);
 #else
 	    // No stem expansion when diacritic or case sensitivity is
 	    // set, it makes no sense (it would mess with the
 	    // diacritics anyway if they are not in the stem part).
 	    // In these 3 cases, perform appropriate expansion from
 	    // the charstripping db, and do a bogus wildcard expansion
 	    // (there is no wild card) to generate the result:
 	    if (diac_sensitive && case_sensitive) {
 		// No expansion whatsoever
 		m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
 			       m_field);
 	    } else {
 		// Access case and diacritics expansion:
 		vector<string> exp;
 		SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
 		XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa,
 						"all", &unacfoldtrans);
-		if (diac_sensitive) {
+    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
-		    // Expand for accents and case, filtering for same accents,
+    XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all", 
-		    // then bogus wildcard expansion for generating result
+				    &unacfoldtrans);
-		    SynTermTransUnac foldtrans(UNACOP_FOLD);
+    vector<string> lexp;
 		    synac.synExpand(term, exp, &foldtrans);
 		    for (vector<string>::const_iterator it = exp.begin();
 			 it != exp.end(); it++) {
 			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
 				       -1, m_field);
 		    }
 		} else if (case_sensitive) {
 		    // Expand for accents and case, filtering for same case,
 		    // then bogus wildcard expansion for generating result
 		    SynTermTransUnac unactrans(UNACOP_UNAC);
 		    synac.synExpand(term, exp, &unactrans);
 		    for (vector<string>::const_iterator it = exp.begin();
 			 it != exp.end(); it++) {
 			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
 				       -1, m_field);
 		    }
 		} else {
 		    // Expand for accents and case, then lowercase
 		    // result for input to stemdb.
 		    synac.synExpand(term, exp);
 		    for (unsigned int i = 0; i < exp.size(); i++) {
 			string lower;
 			unacmaybefold(exp[i], lower, "UTF-8", UNACOP_FOLD);
 			exp[i] = lower;
 		    }
 		    sort(exp.begin(), exp.end());
 		    vector<string>::iterator uit = 
 			unique(exp.begin(), exp.end());
 		    exp.resize(uit - exp.begin());
 		    LOGDEB(("ExpandTerm: after casediac: %s\n", 
 			    stringsToString(exp).c_str()));
-		    StemDb db(m_db.m_ndb->xrdb);
+    TermMatchResult res;
-		    vector<string> exp1;
+    if (haswild) {
-		    for (vector<string>::const_iterator it = exp.begin();
+	// Note that if there are wildcards, we do a direct from-index
-			 it != exp.end(); it++) {
+	// expansion, which means that we are casediac-sensitive. There
-			db.stemExpand(m_stemlang, *it, exp1);
+	// would be nothing to prevent us to expand from the casediac
-		    }
+	// synonyms first. To be done later
-		    LOGDEB(("ExpandTerm: after stem: %s\n", 
+	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
-			    stringsToString(exp1).c_str()));
+		       m_field);
-
+	goto termmatchtoresult;
 		    // Expand the resulting list for case (all stemdb content
 		    // is lowercase)
 		    exp.clear();
 		    for (vector<string>::const_iterator it = exp1.begin();
 			 it != exp1.end(); it++) {
 			synac.synExpand(*it, exp);
 		    }
 		    sort(exp.begin(), exp.end());
 		    uit = unique(exp.begin(), exp.end());
 		    exp.resize(uit - exp.begin());
 		    LOGDEB(("ExpandTerm: after case exp of stem: %s\n", 
 			    stringsToString(exp).c_str()));
                    // Bogus wildcard expand to generate the result
 		    for (vector<string>::const_iterator it = exp.begin();
 			 it != exp.end(); it++) {
 			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
 				       -1, m_field);
 		    }
 		}
 	    }
 #endif
 	}
 	for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
 	     it != res.entries.end(); it++) {
 	    exp.push_back(it->term);
 	}
 	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(exp).c_str()));
    }
    sterm = term;
 #ifdef RCL_INDEX_STRIPCHARS
    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
 #else
    if (o_index_stripchars) {
 	// If the index is raw, we can only come here if nostemexp is unset
 	// and we just need stem expansion.
 	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
 	goto termmatchtoresult;
    } 
    // No stem expansion when diacritic or case sensitivity is set, it
    // makes no sense (it would mess with the diacritics anyway if
    // they are not in the stem part).  In these 3 cases, perform
    // appropriate expansion from the charstripping db, and do a bogus
    // wildcard expansion (there is no wild card) to generate the
    // result:
    if (diac_sensitive && case_sensitive) {
 	// No expansion whatsoever
 	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
 	goto termmatchtoresult;
    }
    if (diac_sensitive) {
 	// Expand for accents and case, filtering for same accents,
 	// then bogus wildcard expansion for generating result
 	SynTermTransUnac foldtrans(UNACOP_FOLD);
 	synac.synExpand(term, lexp, &foldtrans);
 	goto exptotermatch;
    } 
    if (case_sensitive) {
 	// Expand for accents and case, filtering for same case, then
 	// bogus wildcard expansion for generating result
 	SynTermTransUnac unactrans(UNACOP_UNAC);
 	synac.synExpand(term, lexp, &unactrans);
 	goto exptotermatch;
    }
    // We are neither accent- nor case- sensitive and may need stem
    // expansion or not.
    // Expand for accents and case
    synac.synExpand(term, lexp);
    LOGDEB(("ExpTerm: casediac: %s\n", stringsToString(lexp).c_str()));
    if (nostemexp)
 	goto exptotermatch;
    // Need stem expansion. Lowercase the result of accent and case
    // expansion for input to stemdb.
    for (unsigned int i = 0; i < lexp.size(); i++) {
 	string lower;
 	unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
 	lexp[i] = lower;
    }
    sort(lexp.begin(), lexp.end());
    {
 	vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
 	lexp.resize(uit - lexp.begin());
 	StemDb db(m_db.m_ndb->xrdb);
 	vector<string> exp1;
 	for (vector<string>::const_iterator it = lexp.begin(); 
 	     it != lexp.end(); it++) {
 	    db.stemExpand(m_stemlang, *it, exp1);
 	}
 	LOGDEB(("ExpTerm: stem: %s\n", stringsToString(exp1).c_str()));
 	// Expand the resulting list for case (all stemdb content
 	// is lowercase)
 	lexp.clear();
 	for (vector<string>::const_iterator it = exp1.begin(); 
 	     it != exp1.end(); it++) {
 	    synac.synExpand(*it, lexp);
 	}
 	sort(lexp.begin(), lexp.end());
 	uit = unique(lexp.begin(), lexp.end());
 	lexp.resize(uit - lexp.begin());
    }
    LOGDEB(("ExpTerm: case exp of stem: %s\n", stringsToString(lexp).c_str()));
    // Bogus wildcard expand to generate the result
 exptotermatch:
    for (vector<string>::const_iterator it = lexp.begin();
 	 it != lexp.end(); it++) {
 	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, 
 		       res, -1, m_field);
    }
 #endif
    // Term match entries to vector of terms
 termmatchtoresult:
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
 	 it != res.entries.end(); it++) {
 	oexp.push_back(it->term);
    }
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
 }
 // Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -1097,9 +1125,11 @@ bool StringToXapianQ::processUserString(const string &iq,
            TermProcStop tpstop(nxt, stops); nxt = &tpstop;
            //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
            //tpcommon.onlygrams(true);
-#ifdef RCL_INDEX_STRIPCHARS
+	    TermProcPrep tpprep(nxt);
-	    TermProcPrep tpprep(nxt); nxt = &tpprep;
+#ifndef RCL_INDEX_STRIPCHARS
 	    if (o_index_stripchars)
 #endif
 		nxt = &tpprep;
 	    TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
 						 TextSplit::TXTS_KEEPWILD), 
--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@ -26,6 +26,8 @@
 #include <algorithm>
 #include <map>
 #include <iostream>
 using namespace std;
 #include <xapian.h>
@ -34,18 +36,14 @@
 #include "smallut.h"
 #include "synfamily.h"
 #include "unacpp.h"
-
+#include "rclconfig.h"
 #include <iostream>
 using namespace std;
 namespace Rcl {
 /**
 * Expand for one or several languages
 */
-bool StemDb::stemExpand(const std::string& langs,
+bool StemDb::stemExpand(const std::string& langs, const std::string& term,
 			const std::string& term,
 			vector<string>& result)
 {
    vector<string> llangs;
@ -59,14 +57,17 @@ bool StemDb::stemExpand(const std::string& langs,
    }
 #ifndef RCL_INDEX_STRIPCHARS
-    for (vector<string>::const_iterator it = llangs.begin();
+    // Expand the unaccented stem
-	 it != llangs.end(); it++) {
+    if (!o_index_stripchars) {
-	SynTermTransStem stemmer(*it);
+	for (vector<string>::const_iterator it = llangs.begin();
-	XapComputableSynFamMember expander(getdb(), synFamStemUnac, 
+	     it != llangs.end(); it++) {
-					   *it, &stemmer);
+	    SynTermTransStem stemmer(*it);
-	string unac;
+	    XapComputableSynFamMember expander(getdb(), synFamStemUnac, 
-	unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
+					       *it, &stemmer);
-	(void)expander.synExpand(unac, result);
+	    string unac;
 	    unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
 	    (void)expander.synExpand(unac, result);
 	}
    }
 #endif 
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -33,17 +33,12 @@
 #include <string>
 #include <iostream>
 #include <list>
 using namespace std;
 #include "smallut.h"
 #include "utf8iter.h"
 #include "hldata.h"
 #ifndef NO_NAMESPACES
 using namespace std;
 #endif /* NO_NAMESPACES */
 #define MIN(A,B) ((A)<(B)?(A):(B))
 int stringicmp(const string & s1, const string& s2) 
 {
    string::const_iterator it1 = s1.begin();
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@ -224,4 +224,11 @@ public:
    }
 };
 #ifndef MIN
 #define MIN(A,B) (((A)<(B)) ? (A) : (B))
 #endif
 #ifndef MAX
 #define MAX(A,B) (((A)>(B)) ? (A) : (B))
 #endif
 #endif /* _SMALLUT_H_INCLUDED_ */
--- a/tests/config/recoll.conf
+++ b/tests/config/recoll.conf
@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
 daemloglevel = 6
 daemlogfilename = /tmp/rclmontrace
 indexStripChars = 1
 topdirs = /home/dockes/projets/fulltext/testrecoll/
 skippedPaths = \