comments
This commit is contained in:
parent
8cd40c7cd9
commit
05a39a163f
1 changed files with 68 additions and 43 deletions
|
@ -26,11 +26,10 @@ using std::vector;
|
||||||
using std::map;
|
using std::map;
|
||||||
using std::set;
|
using std::set;
|
||||||
|
|
||||||
#include "pathut.h"
|
|
||||||
#include "Filter.h"
|
#include "Filter.h"
|
||||||
// Beware: the class changes according to RCL_USE_XATTR, so any file
|
// The class changes according to RCL_USE_XATTR
|
||||||
// including this needs autoconfig.h
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
#include "pathut.h"
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
@ -64,9 +63,12 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class to convert data from a datastore (file-system, firefox
|
* Convert data from file-serialized form (either an actual File
|
||||||
* history, etc.) into possibly one or severaldocuments in internal
|
* System file or a memory image) into one or several documents in
|
||||||
* representation, either for indexing or viewing at query time (gui preview).
|
* internal representation (Rcl::Doc). This can be used for indexing,
|
||||||
|
* or viewing at query time (GUI preview), or extracting an internal
|
||||||
|
* document out of a compound file into a simple one.
|
||||||
|
*
|
||||||
* Things work a little differently when indexing or previewing:
|
* Things work a little differently when indexing or previewing:
|
||||||
* - When indexing, all data has to come from the datastore, and it is
|
* - When indexing, all data has to come from the datastore, and it is
|
||||||
* normally desired that all found subdocuments be returned (ie:
|
* normally desired that all found subdocuments be returned (ie:
|
||||||
|
@ -76,27 +78,20 @@ public:
|
||||||
* so that the full doc identifier is passed in: high level url
|
* so that the full doc identifier is passed in: high level url
|
||||||
* (ie: file path) and internal identifier: ipath, ie: message and
|
* (ie: file path) and internal identifier: ipath, ie: message and
|
||||||
* attachment number.
|
* attachment number.
|
||||||
|
*
|
||||||
|
* Internfile is the part of the code which knows about ipath structure.
|
||||||
|
*
|
||||||
|
* The class has a number of static helper method which could just as well not
|
||||||
|
* be members and are in there just for namespace reasons.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
class FileInterner {
|
class FileInterner {
|
||||||
public:
|
public:
|
||||||
/// Operation modifier flags
|
/** Operation modifier flags */
|
||||||
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
|
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
|
||||||
/// Return values for internfile()
|
/** Return values for internfile() */
|
||||||
enum Status {FIError, FIDone, FIAgain};
|
enum Status {FIError, FIDone, FIAgain};
|
||||||
|
|
||||||
/**
|
|
||||||
* Get immediate parent for document.
|
|
||||||
*
|
|
||||||
* This is not in general the same as the "parent" document used
|
|
||||||
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
|
|
||||||
* this would be for exemple the email containing the attachment.
|
|
||||||
*/
|
|
||||||
static bool getEnclosing(const string &url, const string &ipath,
|
|
||||||
string &eurl, string &eipath, string& udi);
|
|
||||||
|
|
||||||
/** Return last element in ipath, like basename */
|
|
||||||
static std::string getLastIpathElt(const std::string& ipath);
|
|
||||||
|
|
||||||
/** Constructors take the initial step to preprocess the data object and
|
/** Constructors take the initial step to preprocess the data object and
|
||||||
* create the top filter */
|
* create the top filter */
|
||||||
|
|
||||||
|
@ -106,9 +101,13 @@ class FileInterner {
|
||||||
* created for previewing a file).
|
* created for previewing a file).
|
||||||
* - Filter output may be different for previewing and indexing.
|
* - Filter output may be different for previewing and indexing.
|
||||||
*
|
*
|
||||||
* @param fn file name
|
* This constructor is now only used for indexing, the form with
|
||||||
|
* an Rcl::Doc parameter to identify the data is always used
|
||||||
|
* at query time.
|
||||||
|
*
|
||||||
|
* @param fn file name.
|
||||||
* @param stp pointer to updated stat struct.
|
* @param stp pointer to updated stat struct.
|
||||||
* @param cnf Recoll configuration
|
* @param cnf Recoll configuration.
|
||||||
* @param td temporary directory to use as working space if
|
* @param td temporary directory to use as working space if
|
||||||
* decompression needed. Must be private and will be wiped clean.
|
* decompression needed. Must be private and will be wiped clean.
|
||||||
* @param mtype mime type if known. For a compressed file this is the
|
* @param mtype mime type if known. For a compressed file this is the
|
||||||
|
@ -121,25 +120,24 @@ class FileInterner {
|
||||||
/**
|
/**
|
||||||
* Alternate constructor for the case where the data is in memory.
|
* Alternate constructor for the case where the data is in memory.
|
||||||
* This is mainly for data extracted from the web cache. The mime type
|
* This is mainly for data extracted from the web cache. The mime type
|
||||||
* must be set, input must be uncompressed.
|
* must be set, input must be already uncompressed.
|
||||||
*/
|
*/
|
||||||
FileInterner(const string &data, RclConfig *cnf, TempDir &td,
|
FileInterner(const string &data, RclConfig *cnf, TempDir &td,
|
||||||
int flags, const string& mtype);
|
int flags, const string& mtype);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Alternate constructor for the case where it is not known where
|
* Alternate constructor used at query time. We don't know where
|
||||||
* the data will come from. We'll use the doc fields and try our
|
* the data was stored, this is determined from the Rcl::Doc data
|
||||||
* best. This is only used at query time, the idoc was built from index
|
*
|
||||||
* data.
|
* @param idoc Rcl::Doc object built from index data. The back-end
|
||||||
|
* storage identifier (rclbes field) is used to build the
|
||||||
|
* appropriate fetcher which uses the rest of the Doc fields (url,
|
||||||
|
* ipath...) to retrieve the file or a file reference, which we
|
||||||
|
* then process normally.
|
||||||
*/
|
*/
|
||||||
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td,
|
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td,
|
||||||
int flags);
|
int flags);
|
||||||
|
|
||||||
/**
|
|
||||||
* Build sig for doc coming from rcldb. This is here because we know how
|
|
||||||
* to query the right backend */
|
|
||||||
static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
|
|
||||||
|
|
||||||
~FileInterner();
|
~FileInterner();
|
||||||
|
|
||||||
void setMissingStore(FIMissingStore *st)
|
void setMissingStore(FIMissingStore *st)
|
||||||
|
@ -150,8 +148,9 @@ class FileInterner {
|
||||||
/**
|
/**
|
||||||
* Turn file or file part into Recoll document.
|
* Turn file or file part into Recoll document.
|
||||||
*
|
*
|
||||||
* For multidocument files (ie: mail folder), this must be called multiple
|
* For multidocument files (ie: mail folder), this must be called
|
||||||
* times to retrieve the subdocuments
|
* multiple times to retrieve the subdocuments.
|
||||||
|
*
|
||||||
* @param doc output document
|
* @param doc output document
|
||||||
* @param ipath internal path. If set by caller, the specified subdoc will
|
* @param ipath internal path. If set by caller, the specified subdoc will
|
||||||
* be returned. Else the next document according to current state will
|
* be returned. Else the next document according to current state will
|
||||||
|
@ -169,7 +168,7 @@ class FileInterner {
|
||||||
|
|
||||||
/** We normally always return text/plain data. A caller can request
|
/** We normally always return text/plain data. A caller can request
|
||||||
* that we stop conversion at the native document type (ie: extracting
|
* that we stop conversion at the native document type (ie: extracting
|
||||||
* an email attachment and starting an external viewer)
|
* an email attachment in its native form for an external viewer)
|
||||||
*/
|
*/
|
||||||
void setTargetMType(const string& tp) {m_targetMType = tp;}
|
void setTargetMType(const string& tp) {m_targetMType = tp;}
|
||||||
|
|
||||||
|
@ -182,16 +181,44 @@ class FileInterner {
|
||||||
we keep it around to save work for our caller, which can get it here */
|
we keep it around to save work for our caller, which can get it here */
|
||||||
TempFile get_imgtmp() {return m_imgtmp;}
|
TempFile get_imgtmp() {return m_imgtmp;}
|
||||||
|
|
||||||
|
const string& getReason() const
|
||||||
|
{
|
||||||
|
return m_reason;
|
||||||
|
}
|
||||||
|
bool ok() const
|
||||||
|
{
|
||||||
|
return m_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get immediate parent for document.
|
||||||
|
*
|
||||||
|
* This is not in general the same as the "parent" document used
|
||||||
|
* with Rcl::Db::addOrUpdate(). The latter is the enclosing file,
|
||||||
|
* this would be for exemple the email containing the attachment.
|
||||||
|
*/
|
||||||
|
static bool getEnclosing(const string &url, const string &ipath,
|
||||||
|
string &eurl, string &eipath, string& udi);
|
||||||
|
|
||||||
|
/** Return last element in ipath, like basename */
|
||||||
|
static std::string getLastIpathElt(const std::string& ipath);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build sig for doc coming from rcldb. This is here because we know how
|
||||||
|
* to query the right backend. Used to check up-to-dateness at query time */
|
||||||
|
static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
|
||||||
|
|
||||||
/** Extract internal document into temporary file.
|
/** Extract internal document into temporary file.
|
||||||
* This is used mainly for starting an external viewer for a
|
* This is used mainly for starting an external viewer for a
|
||||||
* subdocument (ie: mail attachment).
|
* subdocument (ie: mail attachment). This really would not need to be
|
||||||
|
* a member. It creates a FileInterner object to do the actual work
|
||||||
* @return true for success.
|
* @return true for success.
|
||||||
* @param temp output reference-counted temp file object (goes
|
* @param temp output reference-counted temp file object (goes
|
||||||
* away magically). Only used if tofile.empty()
|
* away magically). Only used if tofile.empty()
|
||||||
* @param tofile output file if not null
|
* @param tofile output file if not empty.
|
||||||
* @param cnf The recoll config
|
* @param cnf The recoll config
|
||||||
* @param doc Doc data taken from the index. We use it to access the
|
* @param doc Doc data taken from the index. We use it to construct a
|
||||||
* actual document (ie: use mtype, fn, ipath...).
|
* FileInterner object.
|
||||||
*/
|
*/
|
||||||
static bool idocToFile(TempFile& temp, const string& tofile,
|
static bool idocToFile(TempFile& temp, const string& tofile,
|
||||||
RclConfig *cnf, const Rcl::Doc& doc);
|
RclConfig *cnf, const Rcl::Doc& doc);
|
||||||
|
@ -209,12 +236,10 @@ class FileInterner {
|
||||||
static bool maybeUncompressToTemp(TempFile& temp, const string& fn,
|
static bool maybeUncompressToTemp(TempFile& temp, const string& fn,
|
||||||
RclConfig *cnf, const Rcl::Doc& doc);
|
RclConfig *cnf, const Rcl::Doc& doc);
|
||||||
|
|
||||||
const string& getReason() const {return m_reason;}
|
|
||||||
static void getMissingExternal(FIMissingStore *st, string& missing);
|
static void getMissingExternal(FIMissingStore *st, string& missing);
|
||||||
static void getMissingDescription(FIMissingStore *st, string& desc);
|
static void getMissingDescription(FIMissingStore *st, string& desc);
|
||||||
// Parse "missing" file contents into memory struct
|
// Parse "missing" file contents into memory struct
|
||||||
static void getMissingFromDescription(FIMissingStore *st, const string& desc);
|
static void getMissingFromDescription(FIMissingStore *st, const string& desc);
|
||||||
bool ok() {return m_ok;}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static const unsigned int MAXHANDLERS = 20;
|
static const unsigned int MAXHANDLERS = 20;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue