ensure that recoll configured with indexStripChars=1 runs as compiled with -DRCL_INDEX_STRIPCHARS

This commit is contained in:
Jean-Francois Dockes 2012-09-15 15:16:20 +02:00
parent 48e9a4f901
commit e22b347767
17 changed files with 425 additions and 260 deletions

View file

@ -63,17 +63,19 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// Unaccented stem dbs
vector<XapWritableComputableSynFamMember> unacstemdbs;
// We can reuse the same stemmer pointers, the objects are stateless.
for (unsigned int i = 0; i < langs.size(); i++) {
unacstemdbs.push_back(
XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i],
stemmers.back().getptr()));
unacstemdbs.back().recreate();
if (!o_index_stripchars) {
for (unsigned int i = 0; i < langs.size(); i++) {
unacstemdbs.push_back(
XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i],
stemmers.back().getptr()));
unacstemdbs.back().recreate();
}
}
SynTermTransUnac transunac(UNACOP_UNACFOLD);
XapWritableComputableSynFamMember
diacasedb(wdb, synFamDiCa, "all", &transunac);
diacasedb.recreate();
if (!o_index_stripchars)
diacasedb.recreate();
#endif
// Walk the list of all terms, and stem/unac each.
@ -109,8 +111,10 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// is the input to the stem db, and add a synonym from the
// stripped term to the cased and accented one, for accent
// and case expansion at query time
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
diacasedb.addSynonym(*it);
if (!o_index_stripchars) {
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
diacasedb.addSynonym(*it);
}
#endif
// Create stemming synonym for every language. The input is the
@ -124,12 +128,15 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// the unaccented term. While this may be incorrect, it is
// also necessary for searching in a diacritic-unsensitive
// way on a raw index
string unac;
unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
if (unac != lower)
for (unsigned int i = 0; i < langs.size(); i++) {
unacstemdbs[i].addSynonym(unac);
if (!o_index_stripchars) {
string unac;
unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
if (unac != lower) {
for (unsigned int i = 0; i < langs.size(); i++) {
unacstemdbs[i].addSynonym(unac);
}
}
}
#endif
}
} XCATCHERROR(ermsg);