This commit is contained in:
Jean-Francois Dockes 2012-12-20 08:05:12 +01:00
parent 8cd40c7cd9
commit 05a39a163f

View file

@ -26,11 +26,10 @@ using std::vector;
using std::map; using std::map;
using std::set; using std::set;
#include "pathut.h"
#include "Filter.h" #include "Filter.h"
// Beware: the class changes according to RCL_USE_XATTR, so any file // The class changes according to RCL_USE_XATTR
// including this needs autoconfig.h
#include "autoconfig.h" #include "autoconfig.h"
#include "pathut.h"
class RclConfig; class RclConfig;
namespace Rcl { namespace Rcl {
@ -64,9 +63,12 @@ public:
}; };
/** /**
* A class to convert data from a datastore (file-system, firefox * Convert data from file-serialized form (either an actual File
* history, etc.) into possibly one or severaldocuments in internal * System file or a memory image) into one or several documents in
* representation, either for indexing or viewing at query time (gui preview). * internal representation (Rcl::Doc). This can be used for indexing,
* or viewing at query time (GUI preview), or extracting an internal
* document out of a compound file into a simple one.
*
* Things work a little differently when indexing or previewing: * Things work a little differently when indexing or previewing:
* - When indexing, all data has to come from the datastore, and it is * - When indexing, all data has to come from the datastore, and it is
* normally desired that all found subdocuments be returned (ie: * normally desired that all found subdocuments be returned (ie:
@ -76,27 +78,20 @@ public:
* so that the full doc identifier is passed in: high level url * so that the full doc identifier is passed in: high level url
* (ie: file path) and internal identifier: ipath, ie: message and * (ie: file path) and internal identifier: ipath, ie: message and
* attachment number. * attachment number.
*
* Internfile is the part of the code which knows about ipath structure.
*
* The class has a number of static helper method which could just as well not
* be members and are in there just for namespace reasons.
*
*/ */
class FileInterner { class FileInterner {
public: public:
/// Operation modifier flags /** Operation modifier flags */
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype}; enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
/// Return values for internfile() /** Return values for internfile() */
enum Status {FIError, FIDone, FIAgain}; enum Status {FIError, FIDone, FIAgain};
/**
* Get immediate parent for document.
*
* This is not in general the same as the "parent" document used
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
* this would be for exemple the email containing the attachment.
*/
static bool getEnclosing(const string &url, const string &ipath,
string &eurl, string &eipath, string& udi);
/** Return last element in ipath, like basename */
static std::string getLastIpathElt(const std::string& ipath);
/** Constructors take the initial step to preprocess the data object and /** Constructors take the initial step to preprocess the data object and
* create the top filter */ * create the top filter */
@ -106,13 +101,17 @@ class FileInterner {
* created for previewing a file). * created for previewing a file).
* - Filter output may be different for previewing and indexing. * - Filter output may be different for previewing and indexing.
* *
* @param fn file name * This constructor is now only used for indexing, the form with
* an Rcl::Doc parameter to identify the data is always used
* at query time.
*
* @param fn file name.
* @param stp pointer to updated stat struct. * @param stp pointer to updated stat struct.
* @param cnf Recoll configuration * @param cnf Recoll configuration.
* @param td temporary directory to use as working space if * @param td temporary directory to use as working space if
* decompression needed. Must be private and will be wiped clean. * decompression needed. Must be private and will be wiped clean.
* @param mtype mime type if known. For a compressed file this is the * @param mtype mime type if known. For a compressed file this is the
* mime type for the uncompressed version. * mime type for the uncompressed version.
*/ */
FileInterner(const string &fn, const struct stat *stp, FileInterner(const string &fn, const struct stat *stp,
RclConfig *cnf, TempDir &td, int flags, RclConfig *cnf, TempDir &td, int flags,
@ -121,25 +120,24 @@ class FileInterner {
/** /**
* Alternate constructor for the case where the data is in memory. * Alternate constructor for the case where the data is in memory.
* This is mainly for data extracted from the web cache. The mime type * This is mainly for data extracted from the web cache. The mime type
* must be set, input must be uncompressed. * must be set, input must be already uncompressed.
*/ */
FileInterner(const string &data, RclConfig *cnf, TempDir &td, FileInterner(const string &data, RclConfig *cnf, TempDir &td,
int flags, const string& mtype); int flags, const string& mtype);
/** /**
* Alternate constructor for the case where it is not known where * Alternate constructor used at query time. We don't know where
* the data will come from. We'll use the doc fields and try our * the data was stored, this is determined from the Rcl::Doc data
* best. This is only used at query time, the idoc was built from index *
* data. * @param idoc Rcl::Doc object built from index data. The back-end
* storage identifier (rclbes field) is used to build the
* appropriate fetcher which uses the rest of the Doc fields (url,
* ipath...) to retrieve the file or a file reference, which we
* then process normally.
*/ */
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td, FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td,
int flags); int flags);
/**
* Build sig for doc coming from rcldb. This is here because we know how
* to query the right backend */
static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
~FileInterner(); ~FileInterner();
void setMissingStore(FIMissingStore *st) void setMissingStore(FIMissingStore *st)
@ -150,8 +148,9 @@ class FileInterner {
/** /**
* Turn file or file part into Recoll document. * Turn file or file part into Recoll document.
* *
* For multidocument files (ie: mail folder), this must be called multiple * For multidocument files (ie: mail folder), this must be called
* times to retrieve the subdocuments * multiple times to retrieve the subdocuments.
*
* @param doc output document * @param doc output document
* @param ipath internal path. If set by caller, the specified subdoc will * @param ipath internal path. If set by caller, the specified subdoc will
* be returned. Else the next document according to current state will * be returned. Else the next document according to current state will
@ -169,7 +168,7 @@ class FileInterner {
/** We normally always return text/plain data. A caller can request /** We normally always return text/plain data. A caller can request
* that we stop conversion at the native document type (ie: extracting * that we stop conversion at the native document type (ie: extracting
* an email attachment and starting an external viewer) * an email attachment in its native form for an external viewer)
*/ */
void setTargetMType(const string& tp) {m_targetMType = tp;} void setTargetMType(const string& tp) {m_targetMType = tp;}
@ -182,16 +181,44 @@ class FileInterner {
we keep it around to save work for our caller, which can get it here */ we keep it around to save work for our caller, which can get it here */
TempFile get_imgtmp() {return m_imgtmp;} TempFile get_imgtmp() {return m_imgtmp;}
const string& getReason() const
{
return m_reason;
}
bool ok() const
{
return m_ok;
}
/**
* Get immediate parent for document.
*
* This is not in general the same as the "parent" document used
* with Rcl::Db::addOrUpdate(). The latter is the enclosing file,
* this would be for exemple the email containing the attachment.
*/
static bool getEnclosing(const string &url, const string &ipath,
string &eurl, string &eipath, string& udi);
/** Return last element in ipath, like basename */
static std::string getLastIpathElt(const std::string& ipath);
/**
* Build sig for doc coming from rcldb. This is here because we know how
* to query the right backend. Used to check up-to-dateness at query time */
static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
/** Extract internal document into temporary file. /** Extract internal document into temporary file.
* This is used mainly for starting an external viewer for a * This is used mainly for starting an external viewer for a
* subdocument (ie: mail attachment). * subdocument (ie: mail attachment). This really would not need to be
* a member. It creates a FileInterner object to do the actual work
* @return true for success. * @return true for success.
* @param temp output reference-counted temp file object (goes * @param temp output reference-counted temp file object (goes
* away magically). Only used if tofile.empty() * away magically). Only used if tofile.empty()
* @param tofile output file if not null * @param tofile output file if not empty.
* @param cnf The recoll config * @param cnf The recoll config
* @param doc Doc data taken from the index. We use it to access the * @param doc Doc data taken from the index. We use it to construct a
* actual document (ie: use mtype, fn, ipath...). * FileInterner object.
*/ */
static bool idocToFile(TempFile& temp, const string& tofile, static bool idocToFile(TempFile& temp, const string& tofile,
RclConfig *cnf, const Rcl::Doc& doc); RclConfig *cnf, const Rcl::Doc& doc);
@ -209,12 +236,10 @@ class FileInterner {
static bool maybeUncompressToTemp(TempFile& temp, const string& fn, static bool maybeUncompressToTemp(TempFile& temp, const string& fn,
RclConfig *cnf, const Rcl::Doc& doc); RclConfig *cnf, const Rcl::Doc& doc);
const string& getReason() const {return m_reason;}
static void getMissingExternal(FIMissingStore *st, string& missing); static void getMissingExternal(FIMissingStore *st, string& missing);
static void getMissingDescription(FIMissingStore *st, string& desc); static void getMissingDescription(FIMissingStore *st, string& desc);
// Parse "missing" file contents into memory struct // Parse "missing" file contents into memory struct
static void getMissingFromDescription(FIMissingStore *st, const string& desc); static void getMissingFromDescription(FIMissingStore *st, const string& desc);
bool ok() {return m_ok;}
private: private:
static const unsigned int MAXHANDLERS = 20; static const unsigned int MAXHANDLERS = 20;