use the "charset" extended attribute for text files if it is set
This commit is contained in:
parent
17ecc9d7bb
commit
78e889d984
2 changed files with 14 additions and 4 deletions
|
@ -33,6 +33,7 @@ using namespace std;
|
|||
#include "readfile.h"
|
||||
#include "md5.h"
|
||||
#include "rclconfig.h"
|
||||
#include "pxattr.h"
|
||||
|
||||
const int MB = 1024*1024;
|
||||
const int KB = 1024;
|
||||
|
@ -53,6 +54,10 @@ bool MimeHandlerText::set_document_file(const string &fn)
|
|||
return false;
|
||||
}
|
||||
|
||||
// Check for charset defined in extended attribute as per:
|
||||
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
||||
|
||||
// Max file size parameter: texts over this size are not indexed
|
||||
int maxmbs = 20;
|
||||
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
||||
|
@ -115,14 +120,18 @@ bool MimeHandlerText::next_document()
|
|||
if (m_havedoc == false)
|
||||
return false;
|
||||
|
||||
// We transcode even if defcharset is supposedly already utf-8:
|
||||
// this validates the encoding.
|
||||
if (m_charsetfromxattr.empty())
|
||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||
else
|
||||
m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
|
||||
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
|
||||
size_t srclen = m_text.length();
|
||||
m_metaData[cstr_dj_keycontent].swap(m_text);
|
||||
|
||||
// We transcode even if defcharset is supposedly already utf-8:
|
||||
// this validates the encoding.
|
||||
// txtdcode() truncates the text if transcoding fails
|
||||
(void)txtdcode("mh_text");
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ private:
|
|||
string m_fn;
|
||||
off_t m_offs; // Offset of next read in file if we're paging
|
||||
size_t m_pagesz;
|
||||
string m_charsetfromxattr;
|
||||
|
||||
bool readnext();
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue