ensure that recoll configured with indexStripChars=1 runs as compiled with -DRCL_INDEX_STRIPCHARS

2012-09-15 15:16:20 +02:00 · 2012-09-15 15:16:20 +02:00 · e22b347767
commit e22b347767
parent 48e9a4f901
17 changed files with 425 additions and 260 deletions
--- a/src/aspell/rclaspell.cpp
+++ b/src/aspell/rclaspell.cpp
@ -23,9 +23,9 @@

 #include <unistd.h>
 #include <dlfcn.h>
-#include <iostream>
 #include <stdlib.h>
-#include <vector>
+
+using namespace std;

 #include ASPELL_INCLUDE

@ -33,7 +33,7 @@
 #include "execmd.h"
 #include "rclaspell.h"
 #include "debuglog.h"
-
+#include "unacpp.h"
 #include "ptmutex.h"

 // Just a place where we keep the Aspell library entry points together
@ -260,6 +260,14 @@ public:
 	while (m_db.termWalkNext(m_tit, *m_input)) {
 	    if (!Rcl::Db::isSpellingCandidate(*m_input))
 		continue;
+#ifndef RCL_INDEX_STRIPCHARS
+	    if (!o_index_stripchars) {
+		string lower;
+		if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
+		    continue;
+		m_input->swap(lower);
+	    }
+#endif
 	    // Got a non-empty sort-of appropriate term, let's send it to
 	    // aspell
 	    m_input->append("\n");
@ -335,17 +343,29 @@ bool Aspell::make_speller(string& reason)
    return true;
 }

-bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
+bool Aspell::check(const string &iterm, string& reason)
 {
-    LOGDEB2(("Aspell::check [%s]\n", term.c_str()));
+    LOGDEB2(("Aspell::check [%s]\n", iterm.c_str()));
+    string mterm(iterm);

    if (!ok() || !make_speller(reason))
 	return false;
-    if (term.empty())
+    if (iterm.empty())
        return true; //??

+#ifndef RCL_INDEX_STRIPCHARS
+    if (!o_index_stripchars) {
+	string lower;
+	if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
+	    LOGERR(("Aspell::check : cant lowercase input\n"));
+	    return false;
+	}
+	mterm.swap(lower);
+    }
+#endif
+
    int ret = aapi.aspell_speller_check(m_data->m_speller, 
-                                        term.c_str(), term.length());
+                                        mterm.c_str(), mterm.length());
    reason.clear();
    switch (ret) {
    case 0: return false;
@ -358,19 +378,31 @@ bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
    }
 }

-bool Aspell::suggest(Rcl::Db &db, const string &term, 
+bool Aspell::suggest(Rcl::Db &db, const string &_term, 
                     list<string>& suggestions, string& reason)
 {
    if (!ok() || !make_speller(reason))
 	return false;
-    if (term.empty())
+    string mterm(_term);
+    if (mterm.empty())
        return true; //??

+#ifndef RCL_INDEX_STRIPCHARS
+    if (!o_index_stripchars) {
+	string lower;
+	if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
+	    LOGERR(("Aspell::check : cant lowercase input\n"));
+	    return false;
+	}
+	mterm.swap(lower);
+    }
+#endif
+
    AspellCanHaveError *ret;

    const AspellWordList *wl = 
 	aapi.aspell_speller_suggest(m_data->m_speller, 
-                                    term.c_str(), term.length());
+                                    mterm.c_str(), mterm.length());
    if (wl == 0) {
 	reason = aapi.aspell_speller_error_message(m_data->m_speller);
 	return false;
@ -385,7 +417,7 @@ bool Aspell::suggest(Rcl::Db &db, const string &term,
        // ******** This should depend if
 	// stemming is turned on or not for querying  *******
 	string sw(word);
-	if (db.termExists(sw) && db.stemDiffers("english", sw, term))
+	if (db.termExists(sw) && db.stemDiffers("english", sw, mterm))
 	    suggestions.push_back(word);
    }
    aapi.delete_aspell_string_enumeration(els);
@ -418,7 +450,6 @@ using namespace std;

 static char *thisprog;
 RclConfig *rclconfig;
-Rcl::Db rcldb;

 static char usage [] =
 " -b : build dictionary\n"
@ -477,7 +508,9 @@ int main(int argc, char **argv)
 	exit(1);
    }

-    if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
+    Rcl::Db rcldb(rclconfig);
+
+    if (!rcldb.open(Rcl::Db::DbRO, 0)) {
 	fprintf(stderr, "Could not open database in %s\n", dbdir.c_str());
 	exit(1);
    }
--- a/src/aspell/rclaspell.h
+++ b/src/aspell/rclaspell.h
@ -37,11 +37,6 @@
 #include "rclconfig.h"
 #include "rcldb.h"

-#ifndef NO_NAMESPACES
-using std::string;
-using std::list;
-#endif // NO_NAMESPACES
-
 class AspellData;

 class Aspell {
@ -53,26 +48,26 @@ class Aspell {
    bool ok() const;

    /** Find the aspell command and shared library, init function pointers */
-    bool init(string &reason); 
+    bool init(std::string &reason); 

    /**  Build dictionary out of index term list. This is done at the end
     * of an indexing pass. */
-    bool buildDict(Rcl::Db &db, string &reason);
+    bool buildDict(Rcl::Db &db, std::string &reason);

    /** Check that word is in dictionary. ret==false && !reason.empty() => err*/
-    bool check(Rcl::Db &db, const string& term, string& reason);
+    bool check(const std::string& term, std::string& reason);

    /** Return a list of possible expansions for a given word */
-    bool suggest(Rcl::Db &db, const string& term, list<string> &suggestions, 
-		 string &reason);
+    bool suggest(Rcl::Db &db, const std::string& term, 
+		 std::list<std::string> &suggestions, std::string &reason);

 private:
-    string dicPath();
+    std::string dicPath();
    RclConfig  *m_config;
-    string      m_lang;
+    std::string      m_lang;
    AspellData *m_data;

-    bool make_speller(string& reason);
+    bool make_speller(std::string& reason);
 };

 #endif /* RCL_USE_ASPELL */
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -15,6 +15,8 @@
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 #ifndef TEST_RCLCONFIG
+#include "autoconfig.h"
+
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdio.h>
@ -34,6 +36,7 @@
 #include <iostream>
 #include <cstdlib>
 #include <cstring>
+using namespace std;

 #include "cstr.h"
 #include "pathut.h"
@ -45,15 +48,8 @@
 #include "readfile.h"
 #include "fstreewalk.h"

-#ifndef NO_NAMESPACES
-using namespace std;
-#endif /* NO_NAMESPACES */
-
-#ifndef MIN
-#define MIN(A,B) (((A)<(B)) ? (A) : (B))
-#endif
-#ifndef MAX
-#define MAX(A,B) (((A)>(B)) ? (A) : (B))
+#ifndef RCL_INDEX_STRIPCHARS
+bool o_index_stripchars;
 #endif

 bool ParamStale::needrecompute()
@ -77,6 +73,7 @@ bool ParamStale::needrecompute()
    }
    return false;
 }
+
 void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
 {
    parent = rconf;
@ -239,6 +236,14 @@ bool RclConfig::updateMainConfig()
 	FsTreeWalker::setNoFnmPathname();
    }

+#ifndef RCL_INDEX_STRIPCHARS
+    static int m_index_stripchars_init = 0;
+    if (!m_index_stripchars_init) {
+	getConfParam("indexStripChars", &o_index_stripchars);
+	m_index_stripchars_init = 1;
+    }
+#endif
+
    return true;
 }

--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@ -303,5 +303,13 @@ class RclConfig {
    bool readFieldsConfig(const string& errloc);
 };

-
+// This global variable defines if we are running with an index
+// stripped of accents and case or a raw one. Ideally, it should be
+// constant, but it needs to be initialized from the configuration, so
+// there is no way to do this. It never changes after initialization
+// of course. When set, it is supposed to get all of recoll to behave like if
+// if was compiled with RCL_INDEX_STRIPCHARS
+#ifndef  RCL_INDEX_STRIPCHARS
+extern bool o_index_stripchars;
+#endif
 #endif /* _RCLCONFIG_H_INCLUDED_ */
--- a/src/qtgui/reslist.cpp
+++ b/src/qtgui/reslist.cpp
@ -197,10 +197,14 @@ void QtGuiResListPager::suggest(const vector<string>uterms,
 	// If the term is in the index, we don't suggest alternatives. 
 	// Actually, we may want to check the frequencies and propose something
 	// anyway if a possible variation is much more common (as google does)
-        if (aspell->check(*rcldb, *uit, reason))
+#warning need to take case and diacs sensibility into account somehow	
+	// Maybe use the xapian index instead ? How to retrieve the sensitivity flags ?
+	if (0) {
+        if (aspell->check(*uit, reason))
            continue;
        else if (!reason.empty())
            return;
+	}
        if (!aspell->suggest(*rcldb, *uit, asuggs, reason)) {
            LOGERR(("QtGuiResListPager::suggest: aspell failed: %s\n", 
                    reason.c_str()));
@ -336,6 +340,7 @@ ResList::~ResList()
 	QT_TR_NOOP("Open"),
 	QT_TR_NOOP("(show query)"),
        QT_TR_NOOP("<p><i>Alternate spellings (accents suppressed): </i>"),
+        QT_TR_NOOP("<p><i>Alternate spellings: </i>"),
    };
 }

--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -79,22 +79,30 @@ class TextSplitPTR : public TextSplit {
 	for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
 	     vit != hdata.groups.end(); vit++) {
 	    if (vit->size() == 1) {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+		if (o_index_stripchars) {
+#endif
 		    m_terms[vit->front()] = vit - hdata.groups.begin();
-#else
+#ifndef RCL_INDEX_STRIPCHARS
+		} else {
 		    string dumb = vit->front();
 		    unacmaybefold(vit->front(), dumb, "UTF-8", UNACOP_UNACFOLD);
 		    m_terms[dumb] = vit - hdata.groups.begin();
+		}
 #endif
 	    } else if (vit->size() > 1) {
 		for (vector<string>::const_iterator it = vit->begin(); 
 		     it != vit->end(); it++) {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+		if (o_index_stripchars) {
+#endif
 		    m_gterms.insert(*it);
-#else
+#ifndef RCL_INDEX_STRIPCHARS
+		} else {
 		    string dumb = *it;
 		    unacmaybefold(*it, dumb, "UTF-8", UNACOP_UNACFOLD);
 		    m_gterms.insert(dumb);
+		}
 #endif
 		}
 	    }
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@ -320,9 +320,16 @@ void ResListPager::displayPage(RclConfig *config)
            map<string, vector<string> > spellings;
            suggest(uterms, spellings);
            if (!spellings.empty()) {
+		if (o_index_stripchars) {
 		    chunk << 
 		trans("<p><i>Alternate spellings (accents suppressed): </i>")
 			  << "<br /><blockquote>";
+		} else {
+		    chunk << 
+			trans("<p><i>Alternate spellings: </i>")
+			  << "<br /><blockquote>";
+		    
+		}

 		for (map<string, vector<string> >::const_iterator it0 =
 			 spellings.begin(); it0 != spellings.end(); it0++) {
--- a/src/query/xadump.cpp
+++ b/src/query/xadump.cpp
@ -116,12 +116,20 @@ static void sigcleanup(int sig)
    exit(1);
 }

+#ifndef RCL_INDEX_STRIPCHARS
+bool o_index_stripchars;
+#endif
+
 inline bool has_prefix(const string& trm)
 {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars) {
+#endif
 	return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
-#else
+#ifndef RCL_INDEX_STRIPCHARS
+    } else {
 	return trm.size() > 0 && trm[0] == ':';
+    }
 #endif
 }

@ -201,10 +209,22 @@ int main(int argc, char **argv)

    try {
 	db = new Xapian::Database(dbdir);
-
 	cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
 	    db->get_lastdocid() << " avglength " << db->get_avlength() << endl;

+#ifndef RCL_INDEX_STRIPCHARS
+	// If we have terms with a leading ':' it's a new style,
+	// unstripped index
+	{
+	    Xapian::TermIterator term = db->allterms_begin(":");
+	    if (term == db->allterms_end())
+		o_index_stripchars = true;
+	    else
+		o_index_stripchars = false;
+	    cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
+	}
+#endif
+    
 	if (op_flags & OPT_T) {
 	    Xapian::TermIterator term;
 	    string printable;
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@ -63,16 +63,18 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
    // Unaccented stem dbs
    vector<XapWritableComputableSynFamMember> unacstemdbs;
    // We can reuse the same stemmer pointers, the objects are stateless.
+    if (!o_index_stripchars) {
 	for (unsigned int i = 0; i < langs.size(); i++) {
 	    unacstemdbs.push_back(
 		XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
 						  stemmers.back().getptr()));
 	    unacstemdbs.back().recreate();
 	}
-
+    }
    SynTermTransUnac transunac(UNACOP_UNACFOLD);
    XapWritableComputableSynFamMember 
 	diacasedb(wdb, synFamDiCa, "all", &transunac);
+    if (!o_index_stripchars)
 	diacasedb.recreate();
 #endif

@ -109,8 +111,10 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // is the input to the stem db, and add a synonym from the
 	    // stripped term to the cased and accented one, for accent
 	    // and case expansion at query time
+	    if (!o_index_stripchars) {
 		unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
 		diacasedb.addSynonym(*it);
+	    }
 #endif

 	    // Create stemming synonym for every language. The input is the 
@ -124,12 +128,15 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // the unaccented term. While this may be incorrect, it is
 	    // also necessary for searching in a diacritic-unsensitive
 	    // way on a raw index
+	    if (!o_index_stripchars) {
 		string unac;
 		unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
-	    if (unac != lower)
+		if (unac != lower) {
 		    for (unsigned int i = 0; i < langs.size(); i++) {
 			unacstemdbs[i].addSynonym(unac);
 		    }
+		}
+	    }
 #endif
        }
    } XCATCHERROR(ermsg);
--- a/src/rcldb/expansiondbs.h
+++ b/src/rcldb/expansiondbs.h
@ -24,10 +24,13 @@

 namespace Rcl {

-/* A Capitals/Diacritics removal functor for using with
-   XapComputableSynFamMember */
+/** A Capitals/Diacritics removal functor for using with
+ *  XapComputableSynFamMember */
 class SynTermTransUnac : public SynTermTrans {
 public:
+    /** Constructor
+     * @param op defines if we remove diacritics, case or both 
+     */
    SynTermTransUnac(UnacOp op)
    : m_op(op)
    {
@ -43,7 +46,9 @@ public:
    UnacOp m_op;
 };

-/** Walk the Xapian term list and create all the expansion dbs in one go */
+/** Walk the Xapian term list and create all the expansion dbs in one go.
+ * 
+ */
 extern bool createExpansionDbs(Xapian::WritableDatabase& wdb, 
 			       const std::vector<std::string>& langs);
 }
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -92,10 +92,11 @@ const string start_of_field_term = "XXST";
 const string end_of_field_term = "XXND";
 static const string page_break_term = "XXPG";
 #else
-const string start_of_field_term = "XXST/";
-const string end_of_field_term = "XXND/";
-static const string page_break_term = "XXPG/";
+string start_of_field_term;
+string end_of_field_term;
+const string page_break_term = "XXPG/";
 #endif
+
 // Field name for the unsplit file name. Has to exist in the field file 
 // because of usage in termmatch()
 static const string unsplitFilenameFieldName = "rclUnsplitFN";
@ -683,6 +684,18 @@ Db::Db(RclConfig *cfp)
      m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
      m_maxFsOccupPc(0), m_mode(Db::DbRO)
 {
+#ifndef RCL_INDEX_STRIPCHARS
+    if (start_of_field_term.empty()) {
+	if (o_index_stripchars) {
+	    start_of_field_term = "XXST";
+	    end_of_field_term = "XXND";
+	} else {
+	    start_of_field_term = "XXST/";
+	    end_of_field_term = "XXND/";
+	}
+    }
+#endif
+
    m_ndb = new Native(this);
    if (m_config) {
 	m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
@ -886,12 +899,13 @@ int Db::termDocCnt(const string& _term)
        return -1;

    string term = _term;
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
+#endif
 	if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
 	    LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
 	    return 0;
 	}
-#endif

    if (m_stops.isStop(term)) {
 	LOGDEB1(("Db::termDocCnt [%s] in stop list\n", term.c_str()));
@ -1151,13 +1165,17 @@ string Db::getSpellingSuggestion(const string& word)
 {
    if (m_ndb == 0)
 	return string();
+
    string term = word;
-#ifdef RCL_INDEX_STRIPCHARS
+
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
+#endif
    if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
 	LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
 	return string();
    }
-#endif
+
    if (!isSpellingCandidate(term))
 	return string();
    return m_ndb->xrdb.get_spelling_suggestion(term);
@ -1266,9 +1284,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
    TermProc *nxt = &tpidx;
    TermProcStop tpstop(nxt, m_stops);nxt = &tpstop;
    //TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
-#ifdef RCL_INDEX_STRIPCHARS
-    TermProcPrep tpprep(nxt); nxt = &tpprep;
+
+    TermProcPrep tpprep(nxt);
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
 #endif
+	nxt = &tpprep;

    TextSplitDb splitter(newdocument, nxt);
    tpidx.setTSD(&splitter);
@ -1951,12 +1972,15 @@ bool Db::termMatch(MatchType typ, const string &lang,
    // Get rid of capitals and accents

    string droot = root;
-#ifdef RCL_INDEX_STRIPCHARS
+
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
+#endif
 	if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) {
 	    LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
 	    return false;
 	}
-#endif
+
    string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;

    string prefix;
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -129,18 +129,27 @@ extern  void *DbUpdWorker(void*);

 inline bool has_prefix(const string& trm)
 {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars) {
+#endif
 	return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
-#else
+#ifndef RCL_INDEX_STRIPCHARS
+    } else {
 	return !trm.empty() && trm[0] == ':';
+    }
 #endif
 }
+
 inline string wrap_prefix(const string& pfx) 
 {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars) {
+#endif
 	return pfx;
-#else
+#ifndef RCL_INDEX_STRIPCHARS
+    } else {
 	return cstr_colon + pfx + cstr_colon;
+    }
 #endif
 }

@ -384,9 +393,13 @@ private:
 string version_string();

 extern const string pathelt_prefix;
+#ifdef RCL_INDEX_STRIPCHARS
 extern const string start_of_field_term;
 extern const string end_of_field_term;
-
+#else
+extern string start_of_field_term;
+extern string end_of_field_term;
+#endif
 }

 #endif /* _DB_H_INCLUDED_ */
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -79,10 +79,22 @@ static const int original_term_wqf_booster = 10;

 #ifdef RCL_INDEX_STRIPCHARS
 #define bufprefix(BUF, L) {(BUF)[0] = L;}
-#define bpoffs 1
+#define bpoffs() 1
 #else
-#define bufprefix(BUF, L) {(BUF)[0] = ':'; (BUF)[1] = L; (BUF)[2] = ':';}
-#define bpoffs 3
+static inline void bufprefix(char *buf, char c)
+{
+    if (o_index_stripchars) {
+	buf[0] = c;
+    } else {
+	buf[0] = ':'; 
+	buf[1] = c; 
+	buf[2] = ':';
+    }
+}
+static inline int bpoffs() 
+{
+    return o_index_stripchars ? 1 : 3;
+}
 #endif

 static Xapian::Query
@ -92,7 +104,7 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
    // only doing %d's !
    char buf[200];
    bufprefix(buf, 'D');
-    sprintf(buf+bpoffs, "%04d%02d", y1, m1);
+    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
    vector<Xapian::Query> v;

    int d_last = monthdays(m1, y1);
@ -103,7 +115,7 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
    // Deal with any initial partial month
    if (d1 > 1 || d_end < d_last) {
    	for ( ; d1 <= d_end ; d1++) {
-	    sprintf(buf + 6 + bpoffs, "%02d", d1);
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
 	    v.push_back(Xapian::Query(buf));
 	}
    } else {
@ -117,32 +129,32 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)

    int m_last = (y1 < y2) ? 12 : m2 - 1;
    while (++m1 <= m_last) {
-	sprintf(buf + 4 + bpoffs, "%02d", m1);
+	sprintf(buf + 4 + bpoffs(), "%02d", m1);
 	bufprefix(buf, 'M');
 	v.push_back(Xapian::Query(buf));
    }
 	
    if (y1 < y2) {
 	while (++y1 < y2) {
-	    sprintf(buf + bpoffs, "%04d", y1);
+	    sprintf(buf + bpoffs(), "%04d", y1);
 	    bufprefix(buf, 'Y');
 	    v.push_back(Xapian::Query(buf));
 	}
-	sprintf(buf + bpoffs, "%04d", y2);
+	sprintf(buf + bpoffs(), "%04d", y2);
 	bufprefix(buf, 'M');
 	for (m1 = 1; m1 < m2; m1++) {
-	    sprintf(buf + 4 + bpoffs, "%02d", m1);
+	    sprintf(buf + 4 + bpoffs(), "%02d", m1);
 	    v.push_back(Xapian::Query(buf));
 	}
    }
 	
-    sprintf(buf + 2 + bpoffs, "%02d", m2);
+    sprintf(buf + 2 + bpoffs(), "%02d", m2);

    // Deal with any final partial month
    if (d2 < monthdays(m2, y2)) {
 	bufprefix(buf, 'D');
    	for (d1 = 1 ; d1 <= d2; d1++) {
-	    sprintf(buf + 6 + bpoffs, "%02d", d1);
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
 	    v.push_back(Xapian::Query(buf));
 	}
    } else {
@ -663,13 +675,13 @@ static void listVector(const string& what, const vector<string>&l)
 */
 void StringToXapianQ::expandTerm(int mods, 
 				 const string& term, 
-                                 vector<string>& exp, string &sterm,
+                                 vector<string>& oexp, string &sterm,
 				 const string& prefix)
 {
    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
 	     mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
    sterm.clear();
-    exp.clear();
+    oexp.clear();
    if (term.empty())
 	return;

@ -693,6 +705,9 @@ void StringToXapianQ::expandTerm(int mods,
    bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
    bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;

+    if (o_index_stripchars) {
+	diac_sensitive = case_sensitive = false;
+    } else {
 	// If we are working with a raw index, apply the rules for case and 
 	// diacritics sensitivity.

@ -703,10 +718,10 @@ void StringToXapianQ::expandTerm(int mods,
 	if (unachasaccents(term))
 	    diac_sensitive = true;

-    // If any character apart the first is uppercase, we become case-sensitive. 
-    // The first character is reserved for turning off stemming. You need to
-    // use a query language modifier to search for Floor in a case-sensitive
-    // way.
+	// If any character apart the first is uppercase, we become
+	// case-sensitive.  The first character is reserved for
+	// turning off stemming. You need to use a query language
+	// modifier to search for Floor in a case-sensitive way.
 	Utf8Iter it(term);
 	it++;
 	if (unachasuppercase(term.substr(it.getBpos())))
@ -718,12 +733,21 @@ void StringToXapianQ::expandTerm(int mods,

 	if (!case_sensitive || !diac_sensitive)
 	    noexpansion = false;
+    }
 #endif

    if (noexpansion) {
 	sterm = term;
-	exp.push_back(prefix + term);
-    } else {
+	oexp.push_back(prefix + term);
+	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
+	return;
+    } 
+
+    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
+    XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all", 
+				    &unacfoldtrans);
+    vector<string> lexp;
+
    TermMatchResult res;
    if (haswild) {
 	// Note that if there are wildcards, we do a direct from-index
@ -732,106 +756,110 @@ void StringToXapianQ::expandTerm(int mods,
 	// synonyms first. To be done later
 	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
 		       m_field);
-	} else {
+	goto termmatchtoresult;
+    }
+
    sterm = term;
+
 #ifdef RCL_INDEX_STRIPCHARS
-	    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, 
-                           m_field);
+
+    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
+
 #else
-	    // No stem expansion when diacritic or case sensitivity is
-	    // set, it makes no sense (it would mess with the
-	    // diacritics anyway if they are not in the stem part).
-	    // In these 3 cases, perform appropriate expansion from
-	    // the charstripping db, and do a bogus wildcard expansion
-	    // (there is no wild card) to generate the result:
+
+    if (o_index_stripchars) {
+	// If the index is raw, we can only come here if nostemexp is unset
+	// and we just need stem expansion.
+	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
+	goto termmatchtoresult;
+    } 
+
+    // No stem expansion when diacritic or case sensitivity is set, it
+    // makes no sense (it would mess with the diacritics anyway if
+    // they are not in the stem part).  In these 3 cases, perform
+    // appropriate expansion from the charstripping db, and do a bogus
+    // wildcard expansion (there is no wild card) to generate the
+    // result:
+
    if (diac_sensitive && case_sensitive) {
 	// No expansion whatsoever
-		m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
-			       m_field);
-	    } else {
-		// Access case and diacritics expansion:
-		vector<string> exp;
-		SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
-		XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa,
-						"all", &unacfoldtrans);
+	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
+	goto termmatchtoresult;
+    }

    if (diac_sensitive) {
 	// Expand for accents and case, filtering for same accents,
 	// then bogus wildcard expansion for generating result
 	SynTermTransUnac foldtrans(UNACOP_FOLD);
-		    synac.synExpand(term, exp, &foldtrans);
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
-				       -1, m_field);
+	synac.synExpand(term, lexp, &foldtrans);
+	goto exptotermatch;
    } 
-		} else if (case_sensitive) {
-		    // Expand for accents and case, filtering for same case,
-		    // then bogus wildcard expansion for generating result
-		    SynTermTransUnac unactrans(UNACOP_UNAC);
-		    synac.synExpand(term, exp, &unactrans);
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
-				       -1, m_field);
-		    }
-		} else {
-		    // Expand for accents and case, then lowercase
-		    // result for input to stemdb.
-		    synac.synExpand(term, exp);
-		    for (unsigned int i = 0; i < exp.size(); i++) {
-			string lower;
-			unacmaybefold(exp[i], lower, "UTF-8", UNACOP_FOLD);
-			exp[i] = lower;
-		    }
-		    sort(exp.begin(), exp.end());
-		    vector<string>::iterator uit = 
-			unique(exp.begin(), exp.end());
-		    exp.resize(uit - exp.begin());
-		    LOGDEB(("ExpandTerm: after casediac: %s\n", 
-			    stringsToString(exp).c_str()));

+    if (case_sensitive) {
+	// Expand for accents and case, filtering for same case, then
+	// bogus wildcard expansion for generating result
+	SynTermTransUnac unactrans(UNACOP_UNAC);
+	synac.synExpand(term, lexp, &unactrans);
+	goto exptotermatch;
+    }
+
+    // We are neither accent- nor case- sensitive and may need stem
+    // expansion or not.
+
+    // Expand for accents and case
+    synac.synExpand(term, lexp);
+    LOGDEB(("ExpTerm: casediac: %s\n", stringsToString(lexp).c_str()));
+    if (nostemexp)
+	goto exptotermatch;
+
+    // Need stem expansion. Lowercase the result of accent and case
+    // expansion for input to stemdb.
+    for (unsigned int i = 0; i < lexp.size(); i++) {
+	string lower;
+	unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
+	lexp[i] = lower;
+    }
+    sort(lexp.begin(), lexp.end());
+    {
+	vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
+	lexp.resize(uit - lexp.begin());
 	StemDb db(m_db.m_ndb->xrdb);
 	vector<string> exp1;
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
+	for (vector<string>::const_iterator it = lexp.begin(); 
+	     it != lexp.end(); it++) {
 	    db.stemExpand(m_stemlang, *it, exp1);
 	}
-		    LOGDEB(("ExpandTerm: after stem: %s\n", 
-			    stringsToString(exp1).c_str()));
+	LOGDEB(("ExpTerm: stem: %s\n", stringsToString(exp1).c_str()));

 	// Expand the resulting list for case (all stemdb content
 	// is lowercase)
-		    exp.clear();
+	lexp.clear();
 	for (vector<string>::const_iterator it = exp1.begin(); 
 	     it != exp1.end(); it++) {
-			synac.synExpand(*it, exp);
+	    synac.synExpand(*it, lexp);
 	}
-		    sort(exp.begin(), exp.end());
-		    uit = unique(exp.begin(), exp.end());
-		    exp.resize(uit - exp.begin());
-
-		    LOGDEB(("ExpandTerm: after case exp of stem: %s\n", 
-			    stringsToString(exp).c_str()));
+	sort(lexp.begin(), lexp.end());
+	uit = unique(lexp.begin(), lexp.end());
+	lexp.resize(uit - lexp.begin());
+    }
+    LOGDEB(("ExpTerm: case exp of stem: %s\n", stringsToString(lexp).c_str()));

    // Bogus wildcard expand to generate the result
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
-				       -1, m_field);
-		    }
-
-		}
+exptotermatch:
+    for (vector<string>::const_iterator it = lexp.begin();
+	 it != lexp.end(); it++) {
+	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, 
+		       res, -1, m_field);
    }
 #endif
-	}

+    // Term match entries to vector of terms
+termmatchtoresult:
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
 	 it != res.entries.end(); it++) {
-	    exp.push_back(it->term);
-	}
-	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(exp).c_str()));
+	oexp.push_back(it->term);
    }
+    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
 }

 // Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -1097,9 +1125,11 @@ bool StringToXapianQ::processUserString(const string &iq,
            TermProcStop tpstop(nxt, stops); nxt = &tpstop;
            //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
            //tpcommon.onlygrams(true);
-#ifdef RCL_INDEX_STRIPCHARS
-	    TermProcPrep tpprep(nxt); nxt = &tpprep;
+	    TermProcPrep tpprep(nxt);
+#ifndef RCL_INDEX_STRIPCHARS
+	    if (o_index_stripchars)
 #endif
+		nxt = &tpprep;

 	    TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
 						 TextSplit::TXTS_KEEPWILD), 
--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@ -26,6 +26,8 @@

 #include <algorithm>
 #include <map>
+#include <iostream>
+using namespace std;

 #include <xapian.h>

@ -34,18 +36,14 @@
 #include "smallut.h"
 #include "synfamily.h"
 #include "unacpp.h"
-
-#include <iostream>
-
-using namespace std;
+#include "rclconfig.h"

 namespace Rcl {

 /**
 * Expand for one or several languages
 */
-bool StemDb::stemExpand(const std::string& langs,
-			const std::string& term,
+bool StemDb::stemExpand(const std::string& langs, const std::string& term,
 			vector<string>& result)
 {
    vector<string> llangs;
@ -59,6 +57,8 @@ bool StemDb::stemExpand(const std::string& langs,
    }

 #ifndef RCL_INDEX_STRIPCHARS
+    // Expand the unaccented stem
+    if (!o_index_stripchars) {
 	for (vector<string>::const_iterator it = llangs.begin();
 	     it != llangs.end(); it++) {
 	    SynTermTransStem stemmer(*it);
@ -68,6 +68,7 @@ bool StemDb::stemExpand(const std::string& langs,
 	    unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
 	    (void)expander.synExpand(unac, result);
 	}
+    }
 #endif 

    if (result.empty())
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -33,17 +33,12 @@
 #include <string>
 #include <iostream>
 #include <list>
+using namespace std;

 #include "smallut.h"
 #include "utf8iter.h"
 #include "hldata.h"

-#ifndef NO_NAMESPACES
-using namespace std;
-#endif /* NO_NAMESPACES */
-
-#define MIN(A,B) ((A)<(B)?(A):(B))
-
 int stringicmp(const string & s1, const string& s2) 
 {
    string::const_iterator it1 = s1.begin();
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@ -224,4 +224,11 @@ public:
    }
 };

+#ifndef MIN
+#define MIN(A,B) (((A)<(B)) ? (A) : (B))
+#endif
+#ifndef MAX
+#define MAX(A,B) (((A)>(B)) ? (A) : (B))
+#endif
+
 #endif /* _SMALLUT_H_INCLUDED_ */
--- a/tests/config/recoll.conf
+++ b/tests/config/recoll.conf
@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
 daemloglevel = 6
 daemlogfilename = /tmp/rclmontrace

+indexStripChars = 1
+
 topdirs = /home/dockes/projets/fulltext/testrecoll/

 skippedPaths = \