filenames used for "filename search" need to be lowercased and stripped

2012-10-15 08:06:04 +02:00 · 2012-10-15 08:06:04 +02:00 · b57f483a30
commit b57f483a30
parent 7c76d34b05
2 changed files with 12 additions and 1 deletions
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -536,6 +536,8 @@ bool Db::rmQueryDb(const string &dir)
 // http://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
 size_t Db::whatDbIdx(const Doc& doc)
 {
+    LOGDEB(("Db::whatDbIdx: xdocid %lu, %u extraDbs\n", 
+	    (unsigned long)doc.xdocid, m_extraDbs.size()));
    if (doc.xdocid == 0) 
 	return (size_t)-1;
    if (m_extraDbs.size() == 0)
@ -1448,6 +1450,15 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)

    LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));

+    // We inconditionnally lowercase and strip the pattern, as is done
+    // during indexing. This seems to be the only sane possible
+    // approach with file names and wild cards. termMatch does
+    // stripping conditionally on indexstripchars.
+    string pat1;
+    if (unacmaybefold(pattern, pat1, "UTF-8", UNACOP_UNACFOLD)) {
+	pattern.swap(pat1);
+    }
+
    TermMatchResult result;
    if (!termMatch(ET_WILD, string(), pattern, result, max,
 		   unsplitFilenameFieldName))
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -1270,7 +1270,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
 // the generic field-processing code.
 //
 // We do not split the entry any more (used to do some crazy thing
-// about expanding multiple fragments in the past. We just take the
+// about expanding multiple fragments in the past). We just take the
 // value blanks and all and expand this against the indexed unsplit
 // file names
 bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,