/* Copyright (C) 2006 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include using std::string; using std::list; #include "rclconfig.h" #include "wasastringtoquery.h" #include "rcldb.h" #include "searchdata.h" #include "wasatorcl.h" #include "debuglog.h" #include "smallut.h" #include "rclconfig.h" #include "refcntr.h" #include "textsplit.h" static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, const string& stemlang, WasaQuery *wasa, const string& autosuffs, string& reason) { if (wasa == 0) { reason = "NULL query"; return 0; } if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) { reason = "Top query neither AND nor OR ?"; LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n")); return 0; } Rcl::SearchData *sdata = new Rcl::SearchData(wasa->m_op == WasaQuery::OP_AND ? Rcl::SCLT_AND : Rcl::SCLT_OR, stemlang); LOGDEB2(("wasaQueryToRcl: %s chain\n", wasa->m_op == WasaQuery::OP_AND ? "AND" : "OR")); WasaQuery::subqlist_t::iterator it; Rcl::SearchDataClause *nclause; // Walk the list of clauses. Some pseudo-field types need special // processing, which results in setting data in the top struct // instead of adding a clause. We check for these first for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) { if (!stringicmp("mime", (*it)->m_fieldspec) || !stringicmp("format", (*it)->m_fieldspec)) { if ((*it)->m_op == WasaQuery::OP_LEAF) { sdata->addFiletype((*it)->m_value); } else if ((*it)->m_op == WasaQuery::OP_EXCL) { sdata->remFiletype((*it)->m_value); } else { reason = "internal error: mime clause neither leaf not excl??"; return 0; } continue; } // Xesam uses "type", we also support "rclcat", for broad // categories like "audio", "presentation", etc. if (!stringicmp("rclcat", (*it)->m_fieldspec) || !stringicmp("type", (*it)->m_fieldspec)) { if ((*it)->m_op != WasaQuery::OP_LEAF && (*it)->m_op != WasaQuery::OP_EXCL) { reason = "internal error: rclcat/type clause neither leaf" "nor excl??"; return 0; } vector mtypes; if (config && config->getMimeCatTypes((*it)->m_value, mtypes) && !mtypes.empty()) { for (vector::iterator mit = mtypes.begin(); mit != mtypes.end(); mit++) { if ((*it)->m_op == WasaQuery::OP_LEAF) sdata->addFiletype(*mit); else sdata->remFiletype(*mit); } } else { reason = "Unknown rclcat/type value: no mime types found"; return 0; } continue; } // Filtering on location if (!stringicmp("dir", (*it)->m_fieldspec)) { string dir = path_tildexpand((*it)->m_value); sdata->addDirSpec(dir, (*it)->m_op == WasaQuery::OP_EXCL, (*it)->m_weight); continue; } // Handle "date" spec if (!stringicmp("date", (*it)->m_fieldspec)) { if ((*it)->m_op != WasaQuery::OP_LEAF) { reason = "Negative date filtering not supported"; return 0; } DateInterval di; if (!parsedateinterval((*it)->m_value, &di)) { LOGERR(("wasaQueryToRcl: bad date interval format\n")); reason = "Bad date interval format"; return 0; } LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n", di.y1,di.m1,di.d1, di.y2,di.m2,di.d2)); sdata->setDateSpan(&di); continue; } // Handle "size" spec if (!stringicmp("size", (*it)->m_fieldspec)) { if ((*it)->m_op != WasaQuery::OP_LEAF) { reason = "Negative size filtering not supported"; return 0; } char *cp; size_t size = strtoll((*it)->m_value.c_str(), &cp, 10); if (*cp != 0) { switch (*cp) { case 'k': case 'K': size *= 1E3;break; case 'm': case 'M': size *= 1E6;break; case 'g': case 'G': size *= 1E9;break; case 't': case 'T': size *= 1E12;break; default: reason = string("Bad multiplier suffix: ") + *cp; return 0; } } switch ((*it)->m_rel) { case WasaQuery::REL_EQUALS: sdata->setMaxSize(size); sdata->setMinSize(size); break; case WasaQuery::REL_LT: case WasaQuery::REL_LTE: sdata->setMaxSize(size); break; case WasaQuery::REL_GT: case WasaQuery::REL_GTE: sdata->setMinSize(size); break; default: reason = "Bad relation operator with size query. Use > < or ="; return 0; } continue; } // "Regular" processing follows: unsigned int mods = (unsigned int)(*it)->m_modifiers; nclause = 0; switch ((*it)->m_op) { case WasaQuery::OP_NULL: case WasaQuery::OP_AND: default: reason = "Found bad NULL or AND query type in list"; LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n")); continue; case WasaQuery::OP_LEAF: { LOGDEB(("wasaQueryToRcl: leaf clause [%s]:[%s] slack %d\n", (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str(), (*it)->m_slack)); // Change terms found in the "autosuffs" list into "ext" // field queries if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) { vector asfv; if (stringToStrings(autosuffs, asfv)) { if (find_if(asfv.begin(), asfv.end(), StringIcmpPred((*it)->m_value)) != asfv.end()) { (*it)->m_fieldspec = "ext"; (*it)->m_modifiers |= WasaQuery::WQM_NOSTEM; } } } // I'm not sure I understand the phrase/near detection // thereafter anymore, maybe it would be better to have an // explicit flag. Mods can only be set after a double // quote. if (TextSplit::hasVisibleWhite((*it)->m_value) || mods) { Rcl::SClType tp = Rcl::SCLT_PHRASE; if (mods & WasaQuery::WQM_PROX) { tp = Rcl::SCLT_NEAR; } nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value, (*it)->m_slack, (*it)->m_fieldspec); } else { nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, (*it)->m_value, (*it)->m_fieldspec); } if (nclause == 0) { reason = "Out of memory"; LOGERR(("wasaQueryToRcl: out of memory\n")); return 0; } } break; case WasaQuery::OP_EXCL: LOGDEB2(("wasaQueryToRcl: excl clause [%s]:[%s]\n", (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str())); if (wasa->m_op != WasaQuery::OP_AND) { LOGERR(("wasaQueryToRcl: negative clause inside OR list!\n")); continue; } // Note: have to add dquotes which will be translated to // phrase if there are several words in there. Not pretty // but should work. If there is actually a single // word, it will not be taken as a phrase, and // stem-expansion will work normally // Have to do this because searchdata has nothing like and_not nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL, string("\"") + (*it)->m_value + "\"", (*it)->m_fieldspec); if (nclause == 0) { reason = "Out of memory"; LOGERR(("wasaQueryToRcl: out of memory\n")); return 0; } break; case WasaQuery::OP_OR: LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n", (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str())); // Create a subquery. Rcl::SearchData *sub = wasaQueryToRcl(config, stemlang, *it, autosuffs, reason); if (sub == 0) { continue; } nclause = new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB, RefCntr(sub)); if (nclause == 0) { LOGERR(("wasaQueryToRcl: out of memory\n")); reason = "Out of memory"; return 0; } } if (mods & WasaQuery::WQM_NOSTEM) nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING); if (mods & WasaQuery::WQM_DIACSENS) nclause->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS); if (mods & WasaQuery::WQM_CASESENS) nclause->addModifier(Rcl::SearchDataClause::SDCM_CASESENS); if ((*it)->m_weight != 1.0) nclause->setWeight((*it)->m_weight); sdata->addClause(nclause); } return sdata; } Rcl::SearchData *wasaStringToRcl(RclConfig *config, const string& stemlang, const string &qs, string &reason, const string& autosuffs) { StringToWasaQuery parser; WasaQuery *wq = parser.stringToQuery(qs, reason); if (wq == 0) return 0; return wasaQueryToRcl(config, stemlang, wq, autosuffs, reason); }