/* Copyright (C) 2004 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_ #include "autoconfig.h" #include #include #include #include #include #include UNORDERED_SET_INCLUDE using std::string; using std::vector; using std::pair; using std::set; using std::map; #include "conftree.h" #include "smallut.h" class RclConfig; // A small class used for parameters that need to be computed from the // config string, and which can change with the keydir. Minimize work // by using the keydirgen and a saved string to avoid unneeded // recomputations class ParamStale { public: RclConfig *parent; ConfNull *conffile; string paramname; bool active; // Check at init if config defines name at all int savedkeydirgen; string savedvalue; ParamStale(RclConfig *rconf, const string& nm); void init(ConfNull *cnf); bool needrecompute(); }; // Hold the description for an external metadata-gathering command struct MDReaper { string fieldname; vector cmdv; }; // Data associated to a indexed field name: struct FieldTraits { string pfx; // indexing prefix, int wdfinc; // Index time term frequency increment (default 1) double boost; // Query time boost (default 1.0) bool pfxonly; // Suppress prefix-less indexing bool noterms; // Don't add term to highlight data (e.g.: rclbes) FieldTraits() : wdfinc(1), boost(1.0), pfxonly(false), noterms(false) {} }; class RclConfig { public: // Constructor: we normally look for a configuration file, except // if this was specified on the command line and passed through // argcnf RclConfig(const string *argcnf = 0); RclConfig(const RclConfig &r) : m_oldstpsuffstate(this, "recoll_noindex"), m_stpsuffstate(this, "noContentSuffixes"), m_skpnstate(this, "skippedNames"), m_rmtstate(this, "indexedmimetypes"), m_xmtstate(this, "excludedmimetypes"), m_mdrstate(this, "metadatacmds") { initFrom(r); } ~RclConfig() { freeAll(); } // Return a writable clone of the main config. This belongs to the // caller (must delete it when done) ConfNull *cloneMainConfig(); /** (re)Read recoll.conf */ bool updateMainConfig(); bool ok() const {return m_ok;} const string &getReason() const {return m_reason;} /** Return the directory where this configuration is stored. * This was possibly silently created by the rclconfig * constructor it it is the default one (~/.recoll) and it did * not exist yet. */ string getConfDir() const {return m_confdir;} /** Check if the config files were modified since we read them */ bool sourceChanged() const; /** Returns true if this is ~/.recoll */ bool isDefaultConfig() const; /** Get the local value for /usr/local/share/recoll/ */ const string& getDatadir() const {return m_datadir;} /** Set current directory reference, and fetch automatic parameters. */ void setKeyDir(const string &dir); string getKeyDir() const {return m_keydir;} /** Get generic configuration parameter according to current keydir */ bool getConfParam(const string &name, string &value, bool shallow=false) const { if (m_conf == 0) return false; return m_conf->get(name, value, m_keydir, shallow); } /** Variant with autoconversion to int */ bool getConfParam(const string &name, int *value, bool shallow=false) const; /** Variant with autoconversion to bool */ bool getConfParam(const string &name, bool *value, bool shallow=false) const; /** Variant with conversion to vector * (stringToStrings). Can fail if the string is malformed. */ bool getConfParam(const string &name, vector *value, bool shallow=false) const; /** Variant with conversion to vector */ bool getConfParam(const string &name, vector *value, bool shallow=false) const; enum ThrStage {ThrIntern=0, ThrSplit=1, ThrDbWrite=2}; pair getThrConf(ThrStage who) const; /** * Get list of config names under current sk, with possible * wildcard filtering */ vector getConfNames(const char *pattern = 0) const { return m_conf->getNames(m_keydir, pattern); } /** Check if name exists anywhere in config */ bool hasNameAnywhere(const string& nm) const { return m_conf? m_conf->hasNameAnywhere(nm) : false; } /** Get default charset for current keydir (was set during setKeydir) * filenames are handled differently */ const string &getDefCharset(bool filename = false) const; /** Get list of top directories. This is needed from a number of places * and needs some cleaning-up code. An empty list is always an error, no * need for other status */ vector getTopdirs() const; string getConfdirPath(const char *varname, const char *dflt) const; /** Get database directory */ string getDbDir() const; /** Get stoplist file name */ string getStopfile() const; /** Get synonym groups file name */ string getSynGroupsFile() const; /** Get indexing pid file name */ string getPidfile() const; /** Get indexing status file name */ string getIdxStatusFile() const; /** Do path translation according to the ptrans table */ void urlrewrite(const string& dbdir, string& url) const; ConfSimple *getPTrans() { return m_ptrans; } /** Get Web Queue directory name */ string getWebQueueDir() const; /** Get list of skipped file names for current keydir */ vector& getSkippedNames(); /** Get list of skipped paths patterns. Doesn't depend on the keydir */ vector getSkippedPaths() const; /** Get list of skipped paths patterns, daemon version (may add some) Doesn't depend on the keydir */ vector getDaemSkippedPaths() const; /** * mimemap: Check if file name should be ignored because of suffix * * The list of ignored suffixes is initialized on first call, and * not changed for subsequent setKeydirs. */ bool inStopSuffixes(const string& fn); /** * Check in mimeconf if input mime type is a compressed one, and * return command to uncompress if it is. * * The returned command has substitutable places for input file name * and temp dir name, and will return output name */ bool getUncompressor(const string &mtpe, vector& cmd) const; /** mimemap: compute mimetype */ string getMimeTypeFromSuffix(const string &suffix) const; /** mimemap: get a list of all indexable mime types defined */ vector getAllMimeTypes() const; /** mimemap: Get appropriate suffix for mime type. This is inefficient */ string getSuffixFromMimeType(const string &mt) const; /** mimeconf: get input filter for mimetype */ string getMimeHandlerDef(const string &mimetype, bool filtertypes=false); /** For lines like: "name = some value; attr1 = value1; attr2 = val2" * Separate the value and store the attributes in a ConfSimple * @param whole the raw value. No way to escape a semi-colon in there. */ static bool valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs) ; /** Return icon path for mime type and tag */ string getMimeIconPath(const string &mt, const string& apptag) const; /** mimeconf: get list of file categories */ bool getMimeCategories(vector&) const; /** mimeconf: is parameter one of the categories ? */ bool isMimeCategory(string&) const; /** mimeconf: get list of mime types for category */ bool getMimeCatTypes(const string& cat, vector&) const; /** mimeconf: get list of gui filters (doc cats by default */ bool getGuiFilterNames(vector&) const; /** mimeconf: get query lang frag for named filter */ bool getGuiFilter(const string& filtername, string& frag) const; /** fields: get field prefix from field name. Use additional query aliases if isquery is set */ bool getFieldTraits(const string& fldname, const FieldTraits **, bool isquery = false) const; const set& getStoredFields() const {return m_storedFields;} set getIndexedFields() const; /** Get canonic name for possible alias */ string fieldCanon(const string& fld) const; /** Get canonic name for possible alias, including query-only aliases */ string fieldQCanon(const string& fld) const; /** Get xattr name to field names translations */ const map& getXattrToField() const {return m_xattrtofld;} /** Get value of a parameter inside the "fields" file. Only some filters * use this (ie: mh_mail). The information specific to a given filter * is typically stored in a separate section(ie: [mail]) */ vector getFieldSectNames(const string &sk, const char* = 0) const; bool getFieldConfParam(const string &name, const string &sk, string &value) const; /** mimeview: get/set external viewer exec string(s) for mimetype(s) */ string getMimeViewerDef(const string &mimetype, const string& apptag, bool useall) const; string getMimeViewerAllEx() const; bool setMimeViewerAllEx(const string& allex); bool getMimeViewerDefs(vector >&) const; bool setMimeViewerDef(const string& mimetype, const string& cmd); /** Check if mime type is designated as needing no uncompress before view * (if a file of this type is found compressed). Default is true, * exceptions are found in the nouncompforviewmts mimeview list */ bool mimeViewerNeedsUncomp(const string &mimetype) const; /** Retrieve extra metadata-gathering commands */ const vector& getMDReapers(); /** Store/retrieve missing helpers description string */ bool getMissingHelperDesc(string&) const; void storeMissingHelperDesc(const string &s); /** Find exec file for external filter. * * If the input is an absolute path, we just return it. Else We * look in $RECOLL_FILTERSDIR, "filtersdir" from the config file, * $RECOLL_CONFDIR/. If nothing is found, we return the input with * the assumption that this will be used with a PATH-searching * exec. * * @param cmd is normally the command name from the command string * returned by getMimeHandlerDef(), but this could be used for any * command. If cmd begins with a /, we return cmd without * further processing. */ string findFilter(const string& cmd) const; /** Thread config init is not done automatically because not all programs need it and it uses debuglog so that it's better to call it after primary init */ void initThrConf(); const string& getOrigCwd() { return o_origcwd; } RclConfig& operator=(const RclConfig &r) { if (this != &r) { freeAll(); initFrom(r); } return *this; } friend class ParamStale; private: int m_ok; string m_reason; // Explanation for bad state string m_confdir; // User directory where the customized files are stored string m_datadir; // Example: /usr/local/share/recoll string m_keydir; // Current directory used for parameter fetches. int m_keydirgen; // To help with knowing when to update computed data. vector m_cdirs; // directory stack for the confstacks ConfStack *m_conf; // Parsed configuration files ConfStack *mimemap; // The files don't change with keydir, ConfStack *mimeconf; // but their content may depend on it. ConfStack *mimeview; // ConfStack *m_fields; ConfSimple *m_ptrans; // Paths translations map m_fldtotraits; // Field to field params map m_aliastocanon; map m_aliastoqcanon; set m_storedFields; map m_xattrtofld; void *m_stopsuffixes; unsigned int m_maxsufflen; ParamStale m_oldstpsuffstate; // Values from user mimemap, now obsolete ParamStale m_stpsuffstate; ParamStale m_skpnstate; vector m_skpnlist; // Original current working directory. Set once at init before we do any // chdir'ing and used for converting user args to absolute paths. static string o_origcwd; // Parameters auto-fetched on setkeydir string m_defcharset; static string o_localecharset; // Limiting set of mime types to be processed. Normally empty. ParamStale m_rmtstate; STD_UNORDERED_SET m_restrictMTypes; // Exclusion set of mime types. Normally empty ParamStale m_xmtstate; STD_UNORDERED_SET m_excludeMTypes; vector > m_thrConf; // Same idea with the metadata-gathering external commands, // (e.g. used to reap tagging info: "tmsu tags %f") ParamStale m_mdrstate; vector m_mdreapers; /** Create initial user configuration */ bool initUserConfig(); /** Copy from other */ void initFrom(const RclConfig& r); /** Init pointers to 0 */ void zeroMe(); /** Free data then zero pointers */ void freeAll(); bool readFieldsConfig(const string& errloc); }; // This global variable defines if we are running with an index // stripped of accents and case or a raw one. Ideally, it should be // constant, but it needs to be initialized from the configuration, so // there is no way to do this. It never changes after initialization // of course. Changing the value on a given index imposes a // reset. When using multiple indexes, all must have the same value extern bool o_index_stripchars; // This global variable defines if we use mtime instead of ctime for // up-to-date tests. This is mostly incompatible with xattr indexing, // in addition to other issues. See recoll.conf comments. extern bool o_uptodate_test_use_mtime; #endif /* _RCLCONFIG_H_INCLUDED_ */