removed list size truncature on filename expansion
This commit is contained in:
parent
8de0e2e8c9
commit
86515ce52a
5 changed files with 37 additions and 30 deletions
|
@ -38,6 +38,7 @@ src/doc/user/rcl.program.api.html
|
||||||
src/doc/user/rcl.program.fields.html
|
src/doc/user/rcl.program.fields.html
|
||||||
src/doc/user/rcl.program.html
|
src/doc/user/rcl.program.html
|
||||||
src/doc/user/rcl.search.anchorwild.html
|
src/doc/user/rcl.search.anchorwild.html
|
||||||
|
src/doc/user/rcl.search.casediac.html
|
||||||
src/doc/user/rcl.search.commandline.html
|
src/doc/user/rcl.search.commandline.html
|
||||||
src/doc/user/rcl.search.complex.html
|
src/doc/user/rcl.search.complex.html
|
||||||
src/doc/user/rcl.search.custom.html
|
src/doc/user/rcl.search.custom.html
|
||||||
|
|
|
@ -1449,7 +1449,7 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names)
|
||||||
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
|
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
|
||||||
|
|
||||||
TermMatchResult result;
|
TermMatchResult result;
|
||||||
if (!termMatch(ET_WILD, string(), pattern, result, 1000,
|
if (!termMatch(ET_WILD, string(), pattern, result, -1,
|
||||||
unsplitFilenameFieldName))
|
unsplitFilenameFieldName))
|
||||||
return false;
|
return false;
|
||||||
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||||
|
@ -1506,7 +1506,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Db::stemExpand(const string &langs, const string &term,
|
bool Db::stemExpand(const string &langs, const string &term,
|
||||||
TermMatchResult& result, int max)
|
TermMatchResult& result)
|
||||||
{
|
{
|
||||||
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
||||||
return false;
|
return false;
|
||||||
|
@ -1518,7 +1518,9 @@ bool Db::stemExpand(const string &langs, const string &term,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Add prefix to all strings in list */
|
/** Add prefix to all strings in list.
|
||||||
|
* @param prefix already wrapped prefix
|
||||||
|
*/
|
||||||
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
||||||
{
|
{
|
||||||
if (prefix.empty())
|
if (prefix.empty())
|
||||||
|
@ -1579,7 +1581,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
res.prefix = prefix;
|
res.prefix = prefix;
|
||||||
|
|
||||||
if (typ == ET_STEM) {
|
if (typ == ET_STEM) {
|
||||||
if (!stemExpand(lang, root, res, max))
|
if (!stemExpand(lang, root, res))
|
||||||
return false;
|
return false;
|
||||||
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
||||||
it != res.entries.end(); it++) {
|
it != res.entries.end(); it++) {
|
||||||
|
@ -1623,7 +1625,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
Xapian::TermIterator it = xdb.allterms_begin();
|
Xapian::TermIterator it = xdb.allterms_begin();
|
||||||
if (!is.empty())
|
if (!is.empty())
|
||||||
it.skip_to(is.c_str());
|
it.skip_to(is.c_str());
|
||||||
for (int n = 0; it != xdb.allterms_end(); it++) {
|
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
||||||
// If we're beyond the terms matching the initial
|
// If we're beyond the terms matching the initial
|
||||||
// string, end
|
// string, end
|
||||||
if (!is.empty() && (*it).find(is) != 0)
|
if (!is.empty() && (*it).find(is) != 0)
|
||||||
|
@ -1645,7 +1647,14 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
res.entries.push_back(TermMatchEntry(*it,
|
res.entries.push_back(TermMatchEntry(*it,
|
||||||
xdb.get_collection_freq(*it),
|
xdb.get_collection_freq(*it),
|
||||||
it.get_termfreq()));
|
it.get_termfreq()));
|
||||||
++n;
|
|
||||||
|
// The problem with truncating here is that this is done
|
||||||
|
// alphabetically and we may not keep the most frequent
|
||||||
|
// terms. OTOH, not doing it may stall the program if
|
||||||
|
// we are walking the whole term list. We compromise
|
||||||
|
// by cutting at 2*max
|
||||||
|
if (max > 0 && ++rcnt >= 2*max)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
m_reason.erase();
|
m_reason.erase();
|
||||||
break;
|
break;
|
||||||
|
@ -1676,6 +1685,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
TermMatchCmpByWcf wcmp;
|
TermMatchCmpByWcf wcmp;
|
||||||
sort(res.entries.begin(), res.entries.end(), wcmp);
|
sort(res.entries.begin(), res.entries.end(), wcmp);
|
||||||
if (max > 0) {
|
if (max > 0) {
|
||||||
|
// Would need a small max and big stem expansion...
|
||||||
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -406,7 +406,7 @@ private:
|
||||||
// Reinitialize when adding/removing additional dbs
|
// Reinitialize when adding/removing additional dbs
|
||||||
bool adjustdbs();
|
bool adjustdbs();
|
||||||
bool stemExpand(const string &lang, const string &s,
|
bool stemExpand(const string &lang, const string &s,
|
||||||
TermMatchResult& result, int max = -1);
|
TermMatchResult& result);
|
||||||
|
|
||||||
// Flush when idxflushmb is reached
|
// Flush when idxflushmb is reached
|
||||||
bool maybeflush(off_t moretext);
|
bool maybeflush(off_t moretext);
|
||||||
|
|
|
@ -19,11 +19,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using std::string;
|
|
||||||
using std::vector;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
|
|
||||||
|
@ -43,18 +38,18 @@ enum abstract_result {
|
||||||
// Snippet entry for makeDocAbstract
|
// Snippet entry for makeDocAbstract
|
||||||
class Snippet {
|
class Snippet {
|
||||||
public:
|
public:
|
||||||
Snippet(int page, const string& snip)
|
Snippet(int page, const std::string& snip)
|
||||||
: page(page), snippet(snip)
|
: page(page), snippet(snip)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
Snippet& setTerm(const string& trm)
|
Snippet& setTerm(const std::string& trm)
|
||||||
{
|
{
|
||||||
term = trm;
|
term = trm;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
int page;
|
int page;
|
||||||
string term;
|
std::string term;
|
||||||
string snippet;
|
std::string snippet;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -71,11 +66,11 @@ class Query {
|
||||||
~Query();
|
~Query();
|
||||||
|
|
||||||
/** Get explanation about last error */
|
/** Get explanation about last error */
|
||||||
string getReason() const;
|
std::string getReason() const;
|
||||||
|
|
||||||
/** Choose sort order. Must be called before setQuery */
|
/** Choose sort order. Must be called before setQuery */
|
||||||
void setSortBy(const string& fld, bool ascending = true);
|
void setSortBy(const std::string& fld, bool ascending = true);
|
||||||
const string& getSortBy() const {return m_sortField;}
|
const std::string& getSortBy() const {return m_sortField;}
|
||||||
bool getSortAscending() const {return m_sortAscending;}
|
bool getSortAscending() const {return m_sortAscending;}
|
||||||
|
|
||||||
/** Return or filter results with identical content checksum */
|
/** Return or filter results with identical content checksum */
|
||||||
|
@ -94,26 +89,26 @@ class Query {
|
||||||
bool getDoc(int i, Doc &doc);
|
bool getDoc(int i, Doc &doc);
|
||||||
|
|
||||||
/** Get possibly expanded list of query terms */
|
/** Get possibly expanded list of query terms */
|
||||||
bool getQueryTerms(vector<string>& terms);
|
bool getQueryTerms(std::vector<std::string>& terms);
|
||||||
|
|
||||||
/** Return a list of terms which matched for a specific result document */
|
/** Return a list of terms which matched for a specific result document */
|
||||||
bool getMatchTerms(const Doc& doc, vector<string>& terms);
|
bool getMatchTerms(const Doc& doc, std::vector<std::string>& terms);
|
||||||
bool getMatchTerms(unsigned long xdocid, vector<string>& terms);
|
bool getMatchTerms(unsigned long xdocid, std::vector<std::string>& terms);
|
||||||
|
|
||||||
/** Build synthetic abstract for document, extracting chunks relevant for
|
/** Build synthetic abstract for document, extracting chunks relevant for
|
||||||
* the input query. This uses index data only (no access to the file) */
|
* the input query. This uses index data only (no access to the file) */
|
||||||
// Abstract return as one string
|
// Abstract return as one string
|
||||||
bool makeDocAbstract(Doc &doc, string& abstract);
|
bool makeDocAbstract(Doc &doc, std::string& abstract);
|
||||||
// Returned as a snippets vector
|
// Returned as a snippets vector
|
||||||
bool makeDocAbstract(Doc &doc, vector<string>& abstract);
|
bool makeDocAbstract(Doc &doc, std::vector<std::string>& abstract);
|
||||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||||
abstract_result makeDocAbstract(Doc &doc, vector<Snippet>& abst,
|
abstract_result makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
|
||||||
int maxoccs= -1, int ctxwords = -1);
|
int maxoccs= -1, int ctxwords = -1);
|
||||||
/** Retrieve detected page breaks positions */
|
/** Retrieve detected page breaks positions */
|
||||||
int getFirstMatchPage(Doc &doc, std::string& term);
|
int getFirstMatchPage(Doc &doc, std::string& term);
|
||||||
|
|
||||||
/** Expand query to look for documents like the one passed in */
|
/** Expand query to look for documents like the one passed in */
|
||||||
vector<string> expand(const Doc &doc);
|
std::vector<std::string> expand(const Doc &doc);
|
||||||
|
|
||||||
/** Return the Db we're set for */
|
/** Return the Db we're set for */
|
||||||
Db *whatDb();
|
Db *whatDb();
|
||||||
|
@ -123,10 +118,10 @@ class Query {
|
||||||
Native *m_nq;
|
Native *m_nq;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string m_reason; // Error explanation
|
std::string m_reason; // Error explanation
|
||||||
Db *m_db;
|
Db *m_db;
|
||||||
void *m_sorter;
|
void *m_sorter;
|
||||||
string m_sortField;
|
std::string m_sortField;
|
||||||
bool m_sortAscending;
|
bool m_sortAscending;
|
||||||
bool m_collapseDuplicates;
|
bool m_collapseDuplicates;
|
||||||
int m_resCnt;
|
int m_resCnt;
|
||||||
|
|
|
@ -1095,8 +1095,9 @@ bool StringToXapianQ::processUserString(const string &iq,
|
||||||
bool useNear
|
bool useNear
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
LOGDEB(("StringToXapianQ:: qstr [%s] mods 0x%x slack %d near %d\n",
|
LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
|
||||||
iq.c_str(), mods, slack, useNear));
|
"slack %d near %d\n",
|
||||||
|
iq.c_str(), m_field.c_str(), mods, slack, useNear));
|
||||||
ermsg.erase();
|
ermsg.erase();
|
||||||
|
|
||||||
const StopList stops = m_db.getStopList();
|
const StopList stops = m_db.getStopList();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue