ensure that recoll configured with indexStripChars=1 runs as compiled with -DRCL_INDEX_STRIPCHARS

2012-09-15 15:16:20 +02:00 · 2012-09-15 15:16:20 +02:00 · e22b347767
commit e22b347767
parent 48e9a4f901
17 changed files with 425 additions and 260 deletions
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@ -63,17 +63,19 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
    // Unaccented stem dbs
    vector<XapWritableComputableSynFamMember> unacstemdbs;
    // We can reuse the same stemmer pointers, the objects are stateless.
-    for (unsigned int i = 0; i < langs.size(); i++) {
-	unacstemdbs.push_back(
-	    XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
-					      stemmers.back().getptr()));
-	unacstemdbs.back().recreate();
+    if (!o_index_stripchars) {
+	for (unsigned int i = 0; i < langs.size(); i++) {
+	    unacstemdbs.push_back(
+		XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
+						  stemmers.back().getptr()));
+	    unacstemdbs.back().recreate();
+	}
    }
-
    SynTermTransUnac transunac(UNACOP_UNACFOLD);
    XapWritableComputableSynFamMember 
 	diacasedb(wdb, synFamDiCa, "all", &transunac);
-    diacasedb.recreate();
+    if (!o_index_stripchars)
+	diacasedb.recreate();
 #endif

    // Walk the list of all terms, and stem/unac each.
@ -109,8 +111,10 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // is the input to the stem db, and add a synonym from the
 	    // stripped term to the cased and accented one, for accent
 	    // and case expansion at query time
-	    unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
-	    diacasedb.addSynonym(*it);
+	    if (!o_index_stripchars) {
+		unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
+		diacasedb.addSynonym(*it);
+	    }
 #endif

 	    // Create stemming synonym for every language. The input is the 
@ -124,12 +128,15 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // the unaccented term. While this may be incorrect, it is
 	    // also necessary for searching in a diacritic-unsensitive
 	    // way on a raw index
-	    string unac;
-	    unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
-	    if (unac != lower)
-		for (unsigned int i = 0; i < langs.size(); i++) {
-		    unacstemdbs[i].addSynonym(unac);
+	    if (!o_index_stripchars) {
+		string unac;
+		unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
+		if (unac != lower) {
+		    for (unsigned int i = 0; i < langs.size(); i++) {
+			unacstemdbs[i].addSynonym(unac);
+		    }
 		}
+	    }
 #endif
        }
    } XCATCHERROR(ermsg);