Add db stats option to term explorer

This commit is contained in:
Jean-Francois Dockes 2013-01-10 15:12:43 +01:00
parent 5aad9d50e1
commit 8c5258ce19
7 changed files with 200 additions and 53 deletions

View file

@ -443,11 +443,7 @@ bool RclConfig::addLocalFields(map<string, string> *tgt) const
// filtering don't work well together.
vector<string> RclConfig::getAllMimeTypes() const
{
vector<string> lst;
if (mimeconf == 0)
return lst;
lst = mimeconf->getNames("index");
return lst;
return mimeconf ? mimeconf->getNames("index") : vector<string>();
}
// Things for suffix comparison. We define a string class and string

View file

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>450</height>
<width>520</width>
<height>465</height>
</rect>
</property>
<property name="sizePolicy">

View file

@ -16,12 +16,16 @@
*/
#include "autoconfig.h"
#include <algorithm>
#include <stdio.h>
#include <unistd.h>
#include <algorithm>
#include <list>
#include <stdio.h>
#include <map>
#include <string>
using std::list;
using std::multimap;
using std::string;
#include <qmessagebox.h>
#include <qpushbutton.h>
@ -40,7 +44,11 @@
#include "spell_w.h"
#include "guiutils.h"
#include "rcldb.h"
#include "searchdata.h"
#include "rclquery.h"
#include "rclhelp.h"
#include "wasatorcl.h"
#include "execmd.h"
#ifdef RCL_USE_ASPELL
#include "rclaspell.h"
@ -48,21 +56,32 @@
void SpellW::init()
{
// Don't change the order, or fix the rest of the code...
/*0*/expTypeCMB->addItem(tr("Wildcards"));
/*1*/expTypeCMB->addItem(tr("Regexp"));
/*2*/expTypeCMB->addItem(tr("Stem expansion"));
m_c2t.clear();
expTypeCMB->addItem(tr("Wildcards"));
m_c2t.push_back(TYPECMB_WILD);
expTypeCMB->addItem(tr("Regexp"));
m_c2t.push_back(TYPECMB_REG);
expTypeCMB->addItem(tr("Stem expansion"));
m_c2t.push_back(TYPECMB_STEM);
#ifdef RCL_USE_ASPELL
bool noaspell = false;
theconfig->getConfParam("noaspell", &noaspell);
if (!noaspell)
/*3*/expTypeCMB->addItem(tr("Spelling/Phonetic"));
if (!noaspell) {
expTypeCMB->addItem(tr("Spelling/Phonetic"));
m_c2t.push_back(TYPECMB_ASPELL);
}
#endif
expTypeCMB->addItem(tr("Show index statistics"));
m_c2t.push_back(TYPECMB_STATS);
int typ = prefs.termMatchType;
if (typ < 0 || typ > expTypeCMB->count())
typ = 0;
expTypeCMB->setCurrentIndex(typ);
vector<comboboxchoice>::const_iterator it =
std::find(m_c2t.begin(), m_c2t.end(), typ);
if (it == m_c2t.end())
it = m_c2t.begin();
int cmbidx = it - m_c2t.begin();
expTypeCMB->setCurrentIndex(cmbidx);
// Stemming language combobox
stemLangCMB->clear();
@ -76,7 +95,6 @@ void SpellW::init()
stemLangCMB->
addItem(QString::fromAscii(it->c_str(), it->length()));
}
stemLangCMB->setEnabled(expTypeCMB->currentIndex()==2);
(void)new HelpClient(this);
HelpClient::installMap((const char *)this->objectName().toUtf8(),
@ -90,9 +108,6 @@ void SpellW::init()
connect(dismissPB, SIGNAL(clicked()), this, SLOT(close()));
connect(expTypeCMB, SIGNAL(activated(int)), this, SLOT(modeSet(int)));
QStringList labels(tr("Term"));
labels.push_back(tr("Doc. / Tot."));
resTW->setHorizontalHeaderLabels(labels);
resTW->setShowGrid(0);
resTW->horizontalHeader()->setResizeMode(0, QHeaderView::Stretch);
resTW->verticalHeader()->setDefaultSectionSize(20);
@ -103,6 +118,8 @@ void SpellW::init()
resTW->setColumnWidth(0, 200);
resTW->setColumnWidth(1, 150);
resTW->installEventFilter(this);
modeSet(cmbidx);
}
static const int maxexpand = 10000;
@ -110,9 +127,14 @@ static const int maxexpand = 10000;
/* Expand term according to current mode */
void SpellW::doExpand()
{
int idx = expTypeCMB->currentIndex();
if (idx < 0 || idx >= int(m_c2t.size()))
idx = 0;
comboboxchoice mode = m_c2t[idx];
// Can't clear qt4 table widget: resets column headers too
resTW->setRowCount(0);
if (baseWordLE->text().isEmpty())
if (baseWordLE->text().isEmpty() && mode != TYPECMB_STATS)
return;
string reason;
@ -122,25 +144,24 @@ void SpellW::doExpand()
return;
}
string expr = string((const char *)baseWordLE->text().toUtf8());
list<string> suggs;
prefs.termMatchType = expTypeCMB->currentIndex();
Rcl::Db::MatchType mt = Rcl::Db::ET_WILD;
switch(expTypeCMB->currentIndex()) {
case 0: mt = Rcl::Db::ET_WILD; break;
case 1:mt = Rcl::Db::ET_REGEXP; break;
case 2:mt = Rcl::Db::ET_STEM; break;
Rcl::Db::MatchType mt;
switch(mode) {
case TYPECMB_WILD: mt = Rcl::Db::ET_WILD; break;
case TYPECMB_REG: mt = Rcl::Db::ET_REGEXP; break;
case TYPECMB_STEM: mt = Rcl::Db::ET_STEM; break;
default: mt = Rcl::Db::ET_WILD;
}
Rcl::TermMatchResult res;
switch (expTypeCMB->currentIndex()) {
case 0:
case 1:
case 2:
string expr = string((const char *)baseWordLE->text().toUtf8());
switch (mode) {
case TYPECMB_WILD:
default:
case TYPECMB_REG:
case TYPECMB_STEM:
{
string l_stemlang = (const char*)stemLangCMB->currentText().toAscii();
string l_stemlang = qs2utf8s(stemLangCMB->currentText());
if (!rcldb->termMatch(mt, l_stemlang, expr, res, maxexpand)) {
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
@ -155,7 +176,8 @@ void SpellW::doExpand()
break;
#ifdef RCL_USE_ASPELL
case 3: {
case TYPECMB_ASPELL:
{
LOGDEB(("SpellW::doExpand: aspelling\n"));
if (!aspell) {
QMessageBox::warning(0, "Recoll",
@ -182,7 +204,15 @@ void SpellW::doExpand()
#endif // TESTING_XAPIAN_SPELL
statsLBL->setText(tr("%1 results").arg(res.entries.size()));
}
#endif
break;
#endif // RCL_USE_ASPELL
case TYPECMB_STATS:
{
showStats();
return;
}
break;
}
@ -224,6 +254,93 @@ void SpellW::doExpand()
}
}
void SpellW::showStats()
{
statsLBL->setText("");
int row = 0;
Rcl::TermMatchResult res;
if (!rcldb->termMatch(Rcl::Db::ET_WILD, "", "azbogusaz", res, 1)) {
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
return;
}
resTW->setRowCount(row+1);
resTW->setItem(row, 0,
new QTableWidgetItem(tr("Number of documents")));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::number(res.dbdoccount)));
resTW->setRowCount(row+1);
resTW->setItem(row, 0,
new QTableWidgetItem(tr("Average terms per document")));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::number(res.dbavgdoclen)));
resTW->setRowCount(row+1);
resTW->setItem(row, 0,
new QTableWidgetItem(tr("Smallest document length")));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::number(res.mindoclen)));
resTW->setRowCount(row+1);
resTW->setItem(row, 0,
new QTableWidgetItem(tr("Longest document length")));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::number(res.maxdoclen)));
if (!thestableconfig)
return;
ExecCmd cmd;
vector<string> args;
int status;
args.push_back("-sk");
args.push_back(thestableconfig->getDbDir());
string output;
status = cmd.doexec("du", args, 0, &output);
int dbkbytes = 0;
if (!status) {
dbkbytes = atoi(output.c_str());
}
resTW->setRowCount(row+1);
resTW->setItem(row, 0,
new QTableWidgetItem(tr("Database directory size")));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::fromUtf8(
displayableBytes(dbkbytes*1024).c_str())));
vector<string> allmimetypes = thestableconfig->getAllMimeTypes();
multimap<int, string> mtbycnt;
for (vector<string>::const_iterator it = allmimetypes.begin();
it != allmimetypes.end(); it++) {
string reason;
string q = string("mime:") + *it;
Rcl::SearchData *sd =
wasaStringToRcl(thestableconfig, "", q, reason);
RefCntr<Rcl::SearchData> rq(sd);
Rcl::Query query(rcldb);
if (!query.setQuery(rq)) {
LOGERR(("Query setup failed: %s",query.getReason().c_str()));
return;
}
int cnt = query.getResCnt();
mtbycnt.insert(pair<int,string>(cnt,*it));
}
resTW->setRowCount(row+1);
resTW->setItem(row, 0, new QTableWidgetItem(tr("MIME types:")));
resTW->setItem(row++, 1, new QTableWidgetItem(""));
for (multimap<int, string>::const_reverse_iterator it = mtbycnt.rbegin();
it != mtbycnt.rend(); it++) {
resTW->setRowCount(row+1);
resTW->setItem(row, 0, new QTableWidgetItem(
QString::fromUtf8(it->second.c_str())));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::number(it->first)));
}
}
void SpellW::wordChanged(const QString &text)
{
if (text.isEmpty()) {
@ -242,12 +359,34 @@ void SpellW::textDoubleClicked(int row, int)
emit(wordSelect(item->text()));
}
void SpellW::modeSet(int mode)
void SpellW::modeSet(int idx)
{
if (mode == 2)
if (idx < 0 || idx > int(m_c2t.size()))
return;
comboboxchoice mode = m_c2t[idx];
resTW->setRowCount(0);
if (mode == TYPECMB_STEM)
stemLangCMB->setEnabled(true);
else
stemLangCMB->setEnabled(false);
if (mode == TYPECMB_STATS)
baseWordLE->setEnabled(false);
else
baseWordLE->setEnabled(true);
if (mode == TYPECMB_STATS) {
QStringList labels(tr("Item"));
labels.push_back(tr("Value"));
resTW->setHorizontalHeaderLabels(labels);
doExpand();
} else {
QStringList labels(tr("Term"));
labels.push_back(tr("Doc. / Tot."));
resTW->setHorizontalHeaderLabels(labels);
prefs.termMatchType = mode;
}
}
void SpellW::copy()

View file

@ -17,11 +17,12 @@
#ifndef _ASPELL_W_H_INCLUDED_
#define _ASPELL_W_H_INCLUDED_
#include <vector>
#include <qvariant.h>
#include <qwidget.h>
#include "ui_spell.h"
class SpellW : public QWidget, public Ui::SpellBase
{
Q_OBJECT
@ -33,8 +34,6 @@ public:
init();
}
~SpellW(){}
virtual bool eventFilter(QObject *target, QEvent *event );
public slots:
virtual void doExpand();
@ -47,8 +46,13 @@ signals:
void wordSelect(QString);
private:
enum comboboxchoice {TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM,
TYPECMB_ASPELL, TYPECMB_STATS};
// combobox index to expansion type
std::vector<comboboxchoice> m_c2t;
void init();
void copy();
void showStats();
};
#endif /* _ASPELL_W_H_INCLUDED_ */

View file

@ -1707,7 +1707,10 @@ bool Db::termMatch(MatchType typ, const string &lang,
Xapian::Database xdb = m_ndb->xrdb;
XAPTRY(res.dbdoccount = xdb.get_doccount();
res.dbavgdoclen = xdb.get_avlength(), xdb, m_reason);
res.dbavgdoclen = xdb.get_avlength();
res.mindoclen = xdb.get_doclength_lower_bound();
res.maxdoclen = xdb.get_doclength_upper_bound();
, xdb, m_reason);
if (!m_reason.empty())
return false;

View file

@ -121,6 +121,8 @@ public:
// Index-wide stats
unsigned int dbdoccount;
double dbavgdoclen;
size_t mindoclen;
size_t maxdoclen;
};
inline bool has_prefix(const string& trm)

View file

@ -29,6 +29,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include <math.h>
#include <string>
#include <iostream>
@ -603,19 +604,21 @@ string displayableBytes(off_t size)
char sizebuf[50];
const char *unit;
double roundable = 0;
if (size < 1000) {
unit = " B ";
roundable = double(size);
} else if (size < 1E6) {
unit = " KB ";
size /= 1000;
roundable = double(size) / 1E3;
} else if (size < 1E9) {
unit = " MB ";
size /= (1E6);
roundable = double(size) / 1E6;
} else {
unit = " GB ";
size /= (1E9);
roundable = double(size) / 1E9;
}
size = round(roundable);
sprintf(sizebuf, OFFTPC "%s", size, unit);
return string(sizebuf);
}