From 9627f84e3c79a824c70a90f9b0668ab2d69f2c81 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Tue, 26 Mar 2013 10:42:46 +0100 Subject: [PATCH] added basic facility for result path translation --- src/common/rclconfig.cpp | 45 ++++++++++++++++- src/common/rclconfig.h | 3 ++ src/rcldb/rcldb.cpp | 22 +++++++-- website/download.html | 101 +++++++++++++++++++++++---------------- website/features.html | 7 ++- 5 files changed, 132 insertions(+), 46 deletions(-) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index f8ffd521..b968f2a7 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -95,6 +95,7 @@ void RclConfig::zeroMe() { mimeconf = 0; mimeview = 0; m_fields = 0; + m_ptrans = 0; m_stopsuffixes = 0; m_maxsufflen = 0; m_stpsuffstate.init(this, 0, "recoll_noindex"); @@ -217,6 +218,8 @@ RclConfig::RclConfig(const string *argcnf) if (!readFieldsConfig(cnferrloc)) return; + m_ptrans = new ConfSimple(path_cat(m_confdir, "ptrans").c_str(), 1); + m_ok = true; setKeyDir(cstr_null); @@ -744,7 +747,7 @@ bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp) _fld.c_str(), pit->second.pfx.c_str())); return true; } else { - LOGDEB1(("RclConfig::readFieldsConfig: no prefix for field [%s]\n", + LOGDEB1(("RclConfig::getFieldTraits: no prefix for field [%s]\n", fld.c_str())); *ftpp = 0; return false; @@ -936,6 +939,41 @@ string RclConfig::getDbDir() const return path_canon(dbdir); } +void RclConfig::urlrewrite(const string& dbdir, string& url) const +{ + LOGDEB2(("RclConfig::urlrewrite: dbdir [%s] url [%s]\n", + dbdir.c_str(), url.c_str())); + + // Do path translations exist for this index ? + if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) { + LOGDEB2(("RclConfig::urlrewrite: no paths translations (m_ptrans %p)\n", + m_ptrans)); + return; + } + + string path = fileurltolocalpath(url); + if (path.empty()) { + LOGDEB2(("RclConfig::urlrewrite: not file url\n")); + return; + } + + // For each translation check if the prefix matches the input path, + // replace and return the result if it does. + vector opaths = m_ptrans->getNames(dbdir); + for (vector::const_iterator it = opaths.begin(); + it != opaths.end(); it++) { + if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) { + string npath; + // This call always succeeds because the key comes from getNames() + if (m_ptrans->get(*it, npath, dbdir)) { + path = path.replace(0, it->size(), npath); + url = "file://" + path; + } + break; + } + } +} + bool RclConfig::sourceChanged() const { if (m_conf && m_conf->sourceChanged()) @@ -948,6 +986,8 @@ bool RclConfig::sourceChanged() const return true; if (m_fields && m_fields->sourceChanged()) return true; + if (m_ptrans && m_ptrans->sourceChanged()) + return true; return false; } @@ -1179,6 +1219,7 @@ void RclConfig::freeAll() delete mimeconf; delete mimeview; delete m_fields; + delete m_ptrans; delete STOPSUFFIXES; // just in case zeroMe(); @@ -1204,6 +1245,8 @@ void RclConfig::initFrom(const RclConfig& r) mimeview = new ConfStack(*(r.mimeview)); if (r.m_fields) m_fields = new ConfStack(*(r.m_fields)); + if (r.m_ptrans) + m_ptrans = new ConfSimple(*(r.m_ptrans)); m_fldtotraits = r.m_fldtotraits; m_aliastocanon = r.m_aliastocanon; m_storedFields = r.m_storedFields; diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index e7e546b4..f53d1a92 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -150,6 +150,8 @@ class RclConfig { string getPidfile() const; /** Get indexing status file name */ string getIdxStatusFile() const; + /** Do path translation according to the ptrans table */ + void urlrewrite(const string& dbdir, string& url) const; /** Get Web Queue directory name */ string getWebQueueDir() const; @@ -279,6 +281,7 @@ class RclConfig { ConfStack *mimeconf; // but their content may depend on it. ConfStack *mimeview; // ConfStack *m_fields; + ConfSimple *m_ptrans; // Paths translations map m_fldtotraits; // Field to field params map m_aliastocanon; set m_storedFields; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 1dc92c67..65ac5251 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -233,8 +233,21 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, if (!parms.ok()) return false; - // Special cases: + // Compute what index this comes from, and check for path translations + string dbdir = m_rcldb->m_basedir; + if (!m_rcldb->m_extraDbs.empty()) { + // As per trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID + unsigned int idxi = (docid-1) % (m_rcldb->m_extraDbs.size()+1); + // idxi is in [0, extraDbs.size()]. 0 is the base index, 1-n index + // into the additional dbs array + if (idxi) { + dbdir = m_rcldb->m_extraDbs[idxi - 1]; + } + } parms.get(Doc::keyurl, doc.url); + m_rcldb->m_config->urlrewrite(dbdir, doc.url); + + // Special cases: parms.get(Doc::keytp, doc.mimetype); parms.get(Doc::keyfmt, doc.fmtime); parms.get(Doc::keydmt, doc.dmtime); @@ -264,6 +277,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, if (doc.meta.find(*it) == doc.meta.end()) parms.get(*it, doc.meta[*it]); } + doc.meta[Doc::keyurl] = doc.url; doc.meta[Doc::keymt] = doc.dmtime.empty() ? doc.fmtime : doc.dmtime; return true; } @@ -579,14 +593,16 @@ int Db::termDocCnt(const string& _term) return res; } -bool Db::addQueryDb(const string &dir) +bool Db::addQueryDb(const string &_dir) { - LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb, + string dir = _dir; + LOGDEB0(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb, (m_ndb)?m_ndb->m_iswritable:0, dir.c_str())); if (!m_ndb) return false; if (m_ndb->m_iswritable) return false; + dir = path_canon(dir); if (find(m_extraDbs.begin(), m_extraDbs.end(), dir) == m_extraDbs.end()) { m_extraDbs.push_back(dir); } diff --git a/website/download.html b/website/download.html index f45a950b..2872fc42 100644 --- a/website/download.html +++ b/website/download.html @@ -107,7 +107,7 @@

recoll-1.18.1.tar.gz.

- + --> -

Prerequisites for building from source:

-
    +

    Prerequisites for building from source:

    +
    • C++ compiler. Its absence sometimes manifests itself by strange messages about iconv_open (fixed after 1.13.04).
    • @@ -144,22 +144,22 @@ -
    • X11 development files.
    • +
    • X11 development files.
    • -
    • zlib development files.

      +
    • zlib development files.
    • -
    • Qt development files: Qt 4.4 or newer. The Recoll GUI +

    • Qt development files: Qt 4.4 or newer. The Recoll GUI will not build with Qt releases older than 4.4.

    • -
    • Qt webkit development: Qt WebKit is quite often - distributed apart from the main Qt lib. It is possible to +

    • Qt webkit development: Qt WebKit is quite often + distributed apart from the main Qt lib. It is possible to configure Recoll not to use Qt WebKit (see configure --help).

    • -
    • Python development package: you can avoid needing this by - configuring with --disable-python-module.
    • +
    • Python development package: you can avoid needing this by + configuring with --disable-python-module.
    • -
    +

Source repository:

The Recoll source repository is @@ -211,7 +211,7 @@

Ubuntu

-

There are Personal Package Archives on launchpad.net for +

There are Personal Package Archives on launchpad.net for Recoll, kio-recoll and recoll-lens. These were built from the latest versions, for a set of Ubuntu series. starting at @@ -221,38 +221,60 @@ sudo apt-get update sudo apt-get install recoll -

Source code for the lens (also included in the main - recoll tar file):
- For 1.18 installations: - recoll-lens-1.18.1.2997.tar.gz
- For 1.17: - recoll-lens-1.17.2.2697.tar.gz

-

The packages in the standard repository and on the PPA - are a bit different in the sense that the Python extension - is included in the PPA package, but it's a separate - package in the standard repository. This can give rise to - errors about overwriting the Python module when switching - between versions. Typically, the error message would be - like the following:

-

+      

Source code for the lens (also included in the main + recoll tar file):
+ For 1.18 installations: + recoll-lens-1.18.1.2997.tar.gz
+ For 1.17: + recoll-lens-1.17.2.2697.tar.gz

+ +

The packages in the standard repository and on the PPA + are a bit different in the sense that the Python extension + is included in the PPA package, but it's a separate + package in the standard repository. This can give rise to + errors about overwriting the Python module when switching + between versions. Typically, the error message would be + like the following:

+ +

     ErrorMessage: trying to overwrite '/usr/lib/python2.7/dist-packages/recoll.so', which is also in package recoll 1.18.1-1~ppa1~quantal1
-

If this happens, you just need to delete - the previous package(s) before installing the other one(s) - instead of performing an upgrade.

+

If this happens, you just need to delete + the previous package(s) before installing the other one(s) + instead of performing an upgrade.

+ +
+

Notes for Ubuntu Lucid

-

Linux Mint

-

The Ubuntu PPA works perfectly for Mint 13 (and probably - other releases too). Just follow the instructions for Ubuntu.

+
    + +
  • Under lucid you will need to add the + + Xapian backports PPA to provide the libxapian22 + package
  • + +
  • The rclepub filter apparently needs Python + 2.7. You will need to install it and modify the first + line of the filter script to execute python2.7 + instead of python.
  • + +
+
+ +

Linux Mint

+ +

The Ubuntu PPA works perfectly for Mint 13 (and probably + other releases too). Just follow the instructions for Ubuntu.

RPMS

-

You'll need to install the Xapian, Qt, Qt-Webkit and zlib - development packages if you want use the source rpms.

+

You'll need to install the Xapian, Qt, Qt-Webkit and zlib + development packages if you want use the source rpms.

Fedora

-

Recoll is present in the standard Fedora package - repositories starting from F-12. The new versions are tracked quite + +

Recoll is present in the standard Fedora package repositories + starting from F-12. The new versions are tracked quite closely, so I don't build the rpms any more (email me if you need one).

@@ -260,9 +282,8 @@

Recoll is in the KDE:Extra repository - + You just need to add the repository to your software sources (Yast2->software->Software repositories).
diff --git a/website/features.html b/website/features.html index 73c32079..51037fdc 100644 --- a/website/features.html +++ b/website/features.html @@ -356,8 +356,7 @@ application/x-tar = execm rcltar features that help to specify an efficient search and to manage the results. However it maybe sometimes preferable to use a simpler tool with a better integration with your desktop - interfaces. Several solutions exist, at the moment mostly for - the KDE desktop:

+ interfaces. Several solutions exist:

Recoll also has