use the "charset" extended attribute for text files if it is set
This commit is contained in:
parent
17ecc9d7bb
commit
78e889d984
2 changed files with 14 additions and 4 deletions
|
@ -33,6 +33,7 @@ using namespace std;
|
||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
#include "md5.h"
|
#include "md5.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
#include "pxattr.h"
|
||||||
|
|
||||||
const int MB = 1024*1024;
|
const int MB = 1024*1024;
|
||||||
const int KB = 1024;
|
const int KB = 1024;
|
||||||
|
@ -53,6 +54,10 @@ bool MimeHandlerText::set_document_file(const string &fn)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for charset defined in extended attribute as per:
|
||||||
|
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||||
|
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
||||||
|
|
||||||
// Max file size parameter: texts over this size are not indexed
|
// Max file size parameter: texts over this size are not indexed
|
||||||
int maxmbs = 20;
|
int maxmbs = 20;
|
||||||
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
||||||
|
@ -115,14 +120,18 @@ bool MimeHandlerText::next_document()
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// We transcode even if defcharset is supposedly already utf-8:
|
if (m_charsetfromxattr.empty())
|
||||||
// this validates the encoding.
|
|
||||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||||
|
else
|
||||||
|
m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
|
||||||
|
|
||||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
|
||||||
size_t srclen = m_text.length();
|
size_t srclen = m_text.length();
|
||||||
m_metaData[cstr_dj_keycontent].swap(m_text);
|
m_metaData[cstr_dj_keycontent].swap(m_text);
|
||||||
|
|
||||||
|
// We transcode even if defcharset is supposedly already utf-8:
|
||||||
|
// this validates the encoding.
|
||||||
// txtdcode() truncates the text if transcoding fails
|
// txtdcode() truncates the text if transcoding fails
|
||||||
(void)txtdcode("mh_text");
|
(void)txtdcode("mh_text");
|
||||||
|
|
||||||
|
|
|
@ -56,6 +56,7 @@ private:
|
||||||
string m_fn;
|
string m_fn;
|
||||||
off_t m_offs; // Offset of next read in file if we're paging
|
off_t m_offs; // Offset of next read in file if we're paging
|
||||||
size_t m_pagesz;
|
size_t m_pagesz;
|
||||||
|
string m_charsetfromxattr;
|
||||||
|
|
||||||
bool readnext();
|
bool readnext();
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue