from 1.18 branch: When creating initial config directory (1st exec), initialize specific unac_except_trans for some languages: de, se/no/dk/fi + fix mixup of language and country codes
This commit is contained in:
parent
a11c696554
commit
3da5158e9f
5 changed files with 87 additions and 56 deletions
|
@ -1101,6 +1101,12 @@ static const char blurb0[] =
|
||||||
"# values is identical.\n"
|
"# values is identical.\n"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
// Use uni2ascii -a K to generate these from the utf-8 strings
|
||||||
|
// Swedish and Danish.
|
||||||
|
static const char swedish_ex[] = "unac_except_trans = \303\244\303\244 \303\204\303\244 \303\266\303\266 \303\226\303\266 \303\274\303\274 \303\234\303\274 \303\237ss \305\223oe \305\222oe \303\246ae \303\206ae \357\254\201fi \357\254\202fl \303\245\303\245 \303\205\303\245";
|
||||||
|
// German:
|
||||||
|
static const char german_ex[] = "unac_except_trans = \303\244\303\244 \303\204\303\244 \303\266\303\266 \303\226\303\266 \303\274\303\274 \303\234\303\274 \303\237ss \305\223oe \305\222oe \303\246ae \303\206ae \357\254\201fi \357\254\202fl";
|
||||||
|
|
||||||
// Create initial user config by creating commented empty files
|
// Create initial user config by creating commented empty files
|
||||||
static const char *configfiles[] = {"recoll.conf", "mimemap", "mimeconf",
|
static const char *configfiles[] = {"recoll.conf", "mimemap", "mimeconf",
|
||||||
"mimeview"};
|
"mimeview"};
|
||||||
|
@ -1121,12 +1127,22 @@ bool RclConfig::initUserConfig()
|
||||||
strerror(errno);
|
strerror(errno);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
string lang = localelang();
|
||||||
for (int i = 0; i < ncffiles; i++) {
|
for (int i = 0; i < ncffiles; i++) {
|
||||||
string dst = path_cat(m_confdir, string(configfiles[i]));
|
string dst = path_cat(m_confdir, string(configfiles[i]));
|
||||||
if (access(dst.c_str(), 0) < 0) {
|
if (access(dst.c_str(), 0) < 0) {
|
||||||
FILE *fp = fopen(dst.c_str(), "w");
|
FILE *fp = fopen(dst.c_str(), "w");
|
||||||
if (fp) {
|
if (fp) {
|
||||||
fprintf(fp, "%s\n", blurb);
|
fprintf(fp, "%s\n", blurb);
|
||||||
|
if (!strcmp(configfiles[i], "recoll.conf")) {
|
||||||
|
// Add improved unac_except_trans for some languages
|
||||||
|
if (lang == "se" || lang == "dk" || lang == "no" ||
|
||||||
|
lang == "fi") {
|
||||||
|
fprintf(fp, "%s\n", swedish_ex);
|
||||||
|
} else if (lang == "de") {
|
||||||
|
fprintf(fp, "%s\n", german_ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
} else {
|
} else {
|
||||||
m_reason += string("fopen ") + dst + ": " + strerror(errno);
|
m_reason += string("fopen ") + dst + ": " + strerror(errno);
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "unac.h"
|
#include "unac.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
|
||||||
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
|
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
|
||||||
SIGUSR1, SIGUSR2};
|
SIGUSR1, SIGUSR2};
|
||||||
|
@ -119,6 +120,9 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||||
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
|
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
|
||||||
unac_set_except_translations(unacex.c_str());
|
unac_set_except_translations(unacex.c_str());
|
||||||
|
|
||||||
|
// Init langtocode() static table
|
||||||
|
langtocode("");
|
||||||
|
|
||||||
int flushmb;
|
int flushmb;
|
||||||
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
|
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
|
||||||
LOGDEB1(("rclinit: idxflushmb=%d, set XAPIAN_FLUSH_THRESHOLD to 10E6\n",
|
LOGDEB1(("rclinit: idxflushmb=%d, set XAPIAN_FLUSH_THRESHOLD to 10E6\n",
|
||||||
|
|
|
@ -15,47 +15,12 @@
|
||||||
*/
|
*/
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <tr1/unordered_map>
|
|
||||||
using std::tr1::unordered_map;
|
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "transcode.h"
|
#include "transcode.h"
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
|
||||||
static const char *vcountry_to_code[] = {
|
|
||||||
"fr", "windows-1252",
|
|
||||||
"al", "windows-1252",
|
|
||||||
"dk", "windows-1252",
|
|
||||||
"en", "windows-1252",
|
|
||||||
"de", "windows-1252",
|
|
||||||
"is", "windows-1252",
|
|
||||||
"my", "windows-1252",
|
|
||||||
"ie", "windows-1252",
|
|
||||||
"gb", "windows-1252",
|
|
||||||
"it", "windows-1252",
|
|
||||||
"lu", "windows-1252",
|
|
||||||
"no", "windows-1252",
|
|
||||||
"pt", "windows-1252",
|
|
||||||
"es", "windows-1252",
|
|
||||||
"se", "windows-1252",
|
|
||||||
"ba", "iso-8859-2",
|
|
||||||
"hr", "iso-8859-2",
|
|
||||||
"cz", "iso-8859-2",
|
|
||||||
"hu", "iso-8859-2",
|
|
||||||
"pl", "iso-8859-2",
|
|
||||||
"rs", "iso-8859-2",
|
|
||||||
"sk", "iso-8859-2",
|
|
||||||
"si", "iso-8859-2",
|
|
||||||
"gr", "iso-8859-7",
|
|
||||||
"il", "iso-8859-8",
|
|
||||||
"tr", "iso-8859-9",
|
|
||||||
"th", "iso-8859-11",
|
|
||||||
"lv", "iso-8859-13",
|
|
||||||
"lt", "iso-8859-13",
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// Called after decoding from utf-8 failed. Handle the common case
|
// Called after decoding from utf-8 failed. Handle the common case
|
||||||
// where this is a good old 8bit-encoded text document left-over when
|
// where this is a good old 8bit-encoded text document left-over when
|
||||||
|
@ -64,27 +29,8 @@ static const char *vcountry_to_code[] = {
|
||||||
// heuristic, but may be better than discarding the data.
|
// heuristic, but may be better than discarding the data.
|
||||||
static bool alternate_decode(const string& in, string& out)
|
static bool alternate_decode(const string& in, string& out)
|
||||||
{
|
{
|
||||||
static unordered_map<string, string> country_to_code;
|
string lang = localelang();
|
||||||
if (country_to_code.empty()) {
|
string code = langtocode(lang);
|
||||||
for (unsigned int i = 0;
|
|
||||||
i < sizeof(vcountry_to_code) / sizeof(char *); i += 2) {
|
|
||||||
country_to_code[vcountry_to_code[i]] = vcountry_to_code[i+1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
string locale = setlocale(LC_CTYPE, 0);
|
|
||||||
LOGDEB(("RecollFilter::alternate_dcde: locale: [%s]\n", locale.c_str()));
|
|
||||||
string::size_type under = locale.find_first_of("_");
|
|
||||||
if (under == string::npos)
|
|
||||||
return false;
|
|
||||||
string country = locale.substr(0, under);
|
|
||||||
|
|
||||||
unordered_map<string,string>::const_iterator it =
|
|
||||||
country_to_code.find(country);
|
|
||||||
if (it == country_to_code.end())
|
|
||||||
return false;
|
|
||||||
string code = it->second;
|
|
||||||
|
|
||||||
LOGDEB(("RecollFilter::txtdcode: trying alternate decode from %s\n",
|
LOGDEB(("RecollFilter::txtdcode: trying alternate decode from %s\n",
|
||||||
code.c_str()));
|
code.c_str()));
|
||||||
return transcode(in, out, code, cstr_utf8);
|
return transcode(in, out, code, cstr_utf8);
|
||||||
|
|
|
@ -33,11 +33,14 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
#include <tr1/unordered_map>
|
||||||
|
using std::tr1::unordered_map;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "utf8iter.h"
|
#include "utf8iter.h"
|
||||||
#include "hldata.h"
|
#include "hldata.h"
|
||||||
|
#include "cstr.h"
|
||||||
|
|
||||||
int stringicmp(const string & s1, const string& s2)
|
int stringicmp(const string & s1, const string& s2)
|
||||||
{
|
{
|
||||||
|
@ -1097,6 +1100,63 @@ void HighlightData::append(const HighlightData& hl)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *vlang_to_code[] = {
|
||||||
|
"be", "cp1251",
|
||||||
|
"bg", "cp1251",
|
||||||
|
"cs", "iso-8859-2",
|
||||||
|
"el", "iso-8859-7",
|
||||||
|
"he", "iso-8859-8",
|
||||||
|
"hr", "iso-8859-2",
|
||||||
|
"hu", "iso-8859-2",
|
||||||
|
"ja", "eucjp",
|
||||||
|
"kk", "pt154",
|
||||||
|
"ko", "euckr",
|
||||||
|
"lt", "iso-8859-13",
|
||||||
|
"lv", "iso-8859-13",
|
||||||
|
"pl", "iso-8859-2",
|
||||||
|
"rs", "iso-8859-2",
|
||||||
|
"ro", "iso-8859-2",
|
||||||
|
"ru", "koi8-r",
|
||||||
|
"sk", "iso-8859-2",
|
||||||
|
"sl", "iso-8859-2",
|
||||||
|
"sr", "iso-8859-2",
|
||||||
|
"th", "iso-8859-11",
|
||||||
|
"tr", "iso-8859-9",
|
||||||
|
"uk", "koi8-u",
|
||||||
|
};
|
||||||
|
|
||||||
|
string langtocode(const string& lang)
|
||||||
|
{
|
||||||
|
static unordered_map<string, string> lang_to_code;
|
||||||
|
if (lang_to_code.empty()) {
|
||||||
|
for (unsigned int i = 0;
|
||||||
|
i < sizeof(vlang_to_code) / sizeof(char *); i += 2) {
|
||||||
|
lang_to_code[vlang_to_code[i]] = vlang_to_code[i+1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unordered_map<string,string>::const_iterator it =
|
||||||
|
lang_to_code.find(lang);
|
||||||
|
|
||||||
|
// Use cp1252 by default...
|
||||||
|
if (it == lang_to_code.end())
|
||||||
|
return cstr_cp1252;
|
||||||
|
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
string localelang()
|
||||||
|
{
|
||||||
|
const char *lang = getenv("LANG");
|
||||||
|
|
||||||
|
if (lang == 0 || *lang == 0 || !strcmp(lang, "C") || !strcmp(lang, "POSIX"))
|
||||||
|
return "en";
|
||||||
|
string locale(lang);
|
||||||
|
string::size_type under = locale.find_first_of("_");
|
||||||
|
if (under == string::npos)
|
||||||
|
return locale;
|
||||||
|
return locale.substr(0, under);
|
||||||
|
}
|
||||||
|
|
||||||
#else // TEST_SMALLUT
|
#else // TEST_SMALLUT
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
|
@ -53,6 +53,11 @@ extern string stringtolower(const string& io);
|
||||||
// Is one string the end part of the other ?
|
// Is one string the end part of the other ?
|
||||||
extern int stringisuffcmp(const string& s1, const string& s2);
|
extern int stringisuffcmp(const string& s1, const string& s2);
|
||||||
|
|
||||||
|
// Divine language from locale
|
||||||
|
extern std::string localelang();
|
||||||
|
// Divine 8bit charset from language
|
||||||
|
extern std::string langtocode(const string& lang);
|
||||||
|
|
||||||
// Compare charset names, removing the more common spelling variations
|
// Compare charset names, removing the more common spelling variations
|
||||||
extern bool samecharset(const string &cs1, const string &cs2);
|
extern bool samecharset(const string &cs1, const string &cs2);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue