Make path searches case-insensitive on windows.
This commit is contained in:
parent
0294f6ca6d
commit
08a3bd926c
7 changed files with 229 additions and 34 deletions
|
@ -22,6 +22,8 @@
|
|||
#ifndef _WIN32
|
||||
#include <langinfo.h>
|
||||
#include <sys/param.h>
|
||||
#else
|
||||
#include "wincodepages.h"
|
||||
#endif
|
||||
#include <limits.h>
|
||||
#include "safesysstat.h"
|
||||
|
@ -208,11 +210,10 @@ RclConfig::RclConfig(const string *argcnf)
|
|||
o_localecharset = string(cstr_cp1252);
|
||||
}
|
||||
#else
|
||||
// This is quite incorrect, when using the non-unicode (utf16)
|
||||
// interface, Windows will never use utf-8
|
||||
o_localecharset = "UTF-8";
|
||||
o_localecharset = winACPName();
|
||||
#endif
|
||||
LOGDEB1("RclConfig::getDefCharset: localecharset [" << (o_localecharset) << "]\n" );
|
||||
LOGDEB1("RclConfig::getDefCharset: localecharset [" <<
|
||||
o_localecharset << "]\n");
|
||||
}
|
||||
|
||||
const char *cp;
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "cancelcheck.h"
|
||||
#include "rclinit.h"
|
||||
#include "extrameta.h"
|
||||
#include "utf8fn.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -592,24 +593,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
return processonefile(m_config, fn, stp, m_localfields);
|
||||
}
|
||||
|
||||
// File name transcoded to utf8 for indexing. If this fails, the file
|
||||
// name won't be indexed, no big deal Note that we used to do the full
|
||||
// path here, but I ended up believing that it made more sense to use
|
||||
// only the file name The charset is used is the one from the locale.
|
||||
static string compute_utf8fn(RclConfig *config, const string& fn)
|
||||
{
|
||||
string charset = config->getDefCharset(true);
|
||||
string utf8fn;
|
||||
int ercnt;
|
||||
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
|
||||
LOGERR("processone: fn transcode failure from [" << (charset) << "] to UTF-8: " << (path_getsimple(fn)) << "\n" );
|
||||
} else if (ercnt) {
|
||||
LOGDEB("processone: fn transcode " << (ercnt) << " errors from [" << (charset) << "] to UTF-8: " << (path_getsimple(fn)) << "\n" );
|
||||
}
|
||||
LOGDEB2("processone: fn transcoded from [" << (path_getsimple(fn)) << "] to [" << (utf8fn) << "] (" << (charset) << "->" << ("UTF-8") << ")\n" );
|
||||
return utf8fn;
|
||||
}
|
||||
|
||||
FsTreeWalker::Status
|
||||
FsIndexer::processonefile(RclConfig *config,
|
||||
const std::string &fn, const struct stat *stp,
|
||||
|
@ -680,9 +663,12 @@ FsIndexer::processonefile(RclConfig *config,
|
|||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
LOGDEB0("processone: processing: [" << (displayableBytes(off_t(stp->st_size))) << "] " << (fn) << "\n" );
|
||||
LOGDEB0("processone: processing: [" <<
|
||||
displayableBytes(off_t(stp->st_size)) << "] " << fn << "\n");
|
||||
|
||||
string utf8fn = compute_utf8fn(config, fn);
|
||||
// Note that we used to do the full path here, but I ended up
|
||||
// believing that it made more sense to use only the file name
|
||||
string utf8fn = compute_utf8fn(config, fn, true);
|
||||
|
||||
// parent_udi is initially the same as udi, it will be used if there
|
||||
// are subdocs.
|
||||
|
|
|
@ -56,6 +56,7 @@ using namespace std;
|
|||
#include "expansiondbs.h"
|
||||
#include "rclinit.h"
|
||||
#include "internfile.h"
|
||||
#include "utf8fn.h"
|
||||
|
||||
// Recoll index format version is stored in user metadata. When this change,
|
||||
// we can't open the db and will have to reindex.
|
||||
|
@ -159,7 +160,9 @@ Db::Native::~Native()
|
|||
#ifdef IDX_THREADS
|
||||
if (m_havewriteq) {
|
||||
void *status = m_wqueue.setTerminateAndWait();
|
||||
LOGDEB2("Native::~Native: worker status " << (long(status)) << "\n" );
|
||||
if (status) {
|
||||
LOGDEB1("Native::~Native: worker status " << status << "\n");
|
||||
}
|
||||
}
|
||||
#endif // IDX_THREADS
|
||||
}
|
||||
|
@ -1060,7 +1063,6 @@ class TextSplitDb : public TextSplitP {
|
|||
// Reimplement text_to_words to insert the begin and end anchor terms.
|
||||
virtual bool text_to_words(const string &in)
|
||||
{
|
||||
bool ret = false;
|
||||
string ermsg;
|
||||
|
||||
try {
|
||||
|
@ -1089,8 +1091,6 @@ class TextSplitDb : public TextSplitP {
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = true;
|
||||
|
||||
out:
|
||||
basepos += curpos + 100;
|
||||
return true;
|
||||
|
@ -1296,6 +1296,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||
// Split and index the path from the url for path-based filtering
|
||||
{
|
||||
string path = url_gpathS(doc.url);
|
||||
|
||||
#ifdef _WIN32
|
||||
// Windows file names are case-insensitive, so we
|
||||
// translate to UTF-8 and lowercase
|
||||
string upath = compute_utf8fn(m_config, path, false);
|
||||
unacmaybefold(upath, path, "UTF-8", UNACOP_FOLD);
|
||||
#endif
|
||||
|
||||
vector<string> vpath;
|
||||
stringToTokens(path, vpath, "/");
|
||||
// If vpath is not /, the last elt is the file/dir name, not a
|
||||
|
|
|
@ -937,11 +937,20 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
|
|||
// Translate a dir: path filtering clause. See comments in .h
|
||||
bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
|
||||
{
|
||||
LOGDEB("SearchDataClausePath::toNativeQuery: [" << (m_text) << "]\n" );
|
||||
LOGDEB("SearchDataClausePath::toNativeQuery: [" << m_text << "]\n");
|
||||
Xapian::Query *qp = (Xapian::Query *)p;
|
||||
*qp = Xapian::Query();
|
||||
|
||||
if (m_text.empty()) {
|
||||
string ltext;
|
||||
#ifdef _WIN32
|
||||
// Windows file names are case-insensitive, so we lowercase (same
|
||||
// as when indexing)
|
||||
unacmaybefold(m_text, ltext, "UTF-8", UNACOP_FOLD);
|
||||
#else
|
||||
ltext = m_text;
|
||||
#endif
|
||||
|
||||
if (ltext.empty()) {
|
||||
LOGERR("SearchDataClausePath: empty path??\n" );
|
||||
m_reason = "Empty path ?";
|
||||
return false;
|
||||
|
@ -949,13 +958,13 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
|
|||
|
||||
vector<Xapian::Query> orqueries;
|
||||
|
||||
if (path_isabsolute(m_text))
|
||||
if (path_isabsolute(ltext))
|
||||
orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
|
||||
else
|
||||
m_text = path_tildexpand(m_text);
|
||||
ltext = path_tildexpand(ltext);
|
||||
|
||||
vector<string> vpath;
|
||||
stringToTokens(m_text, vpath, "/");
|
||||
stringToTokens(ltext, vpath, "/");
|
||||
|
||||
for (vector<string>::const_iterator pit = vpath.begin();
|
||||
pit != vpath.end(); pit++){
|
||||
|
@ -967,7 +976,8 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
|
|||
*pit, exp, sterm, wrap_prefix(pathelt_prefix))) {
|
||||
return false;
|
||||
}
|
||||
LOGDEB0("SDataPath::toNative: exp size " << (exp.size()) << ". Exp: " << (stringsToString(exp)) << "\n" );
|
||||
LOGDEB0("SDataPath::toNative: exp size " << exp.size() << ". Exp: " <<
|
||||
stringsToString(exp) << "\n");
|
||||
if (exp.size() == 1)
|
||||
orqueries.push_back(Xapian::Query(exp[0]));
|
||||
else
|
||||
|
|
|
@ -29,6 +29,7 @@ SOURCES += \
|
|||
../../common/syngroups.cpp \
|
||||
../../common/textsplit.cpp \
|
||||
../../common/unacpp.cpp \
|
||||
../../common/utf8fn.cpp \
|
||||
../../index/beaglequeue.cpp \
|
||||
../../index/bglfetcher.cpp \
|
||||
../../index/checkretryfailed.cpp \
|
||||
|
@ -89,6 +90,7 @@ SOURCES += \
|
|||
../../utils/ecrontab.cpp \
|
||||
../../windows/execmd_w.cpp \
|
||||
../../windows/fnmatch.c \
|
||||
../../windows/wincodepages.cpp \
|
||||
../../utils/fileudi.cpp \
|
||||
../../utils/fstreewalk.cpp \
|
||||
../../utils/hldata.cpp \
|
||||
|
|
180
src/windows/wincodepages.cpp
Normal file
180
src/windows/wincodepages.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
#include <unordered_map>
|
||||
#include <string>
|
||||
|
||||
#include "safewindows.h"
|
||||
#include "wincodepages.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct WinCpDef {
|
||||
string cpname;
|
||||
string cpcomment;
|
||||
};
|
||||
|
||||
static unordered_map<int, WinCpDef> cpdefs {
|
||||
{037, {"IBM037", "IBM EBCDIC US-Canada"}},
|
||||
{437, {"IBM437", "OEM United States"}},
|
||||
{500, {"IBM500", "IBM EBCDIC International"}},
|
||||
{708, {"ASMO-708", "Arabic (ASMO 708)"}},
|
||||
{709, {"", "Arabic (ASMO-449+, BCON V4)"}},
|
||||
{710, {"", "Arabic - Transparent Arabic"}},
|
||||
{720, {"DOS-720", "Arabic (Transparent ASMO); Arabic (DOS)"}},
|
||||
{737, {"ibm737", "OEM Greek (formerly 437G); Greek (DOS)"}},
|
||||
{775, {"ibm775", "OEM Baltic; Baltic (DOS)"}},
|
||||
{850, {"ibm850", "OEM Multilingual Latin 1; Western European (DOS)"}},
|
||||
{852, {"ibm852", "OEM Latin 2; Central European (DOS)"}},
|
||||
{855, {"IBM855", "OEM Cyrillic (primarily Russian)"}},
|
||||
{857, {"ibm857", "OEM Turkish; Turkish (DOS)"}},
|
||||
{858, {"IBM00858", "OEM Multilingual Latin 1 + Euro symbol"}},
|
||||
{860, {"IBM860", "OEM Portuguese; Portuguese (DOS)"}},
|
||||
{861, {"ibm861", "OEM Icelandic; Icelandic (DOS)"}},
|
||||
{862, {"DOS-862", "OEM Hebrew; Hebrew (DOS)"}},
|
||||
{863, {"IBM863", "OEM French Canadian; French Canadian (DOS)"}},
|
||||
{864, {"IBM864", "OEM Arabic; Arabic (864)"}},
|
||||
{865, {"IBM865", "OEM Nordic; Nordic (DOS)"}},
|
||||
{866, {"cp866", "OEM Russian; Cyrillic (DOS)"}},
|
||||
{869, {"ibm869", "OEM Modern Greek; Greek, Modern (DOS)"}},
|
||||
{870, {"IBM870", "IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2"}},
|
||||
{874, {"windows-874", "ANSI/OEM Thai (ISO 8859-11); Thai (Windows)"}},
|
||||
{875, {"cp875", "IBM EBCDIC Greek Modern"}},
|
||||
{932, {"shift_jis", "ANSI/OEM Japanese; Japanese (Shift-JIS)"}},
|
||||
{936, {"gb2312", "ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)"}},
|
||||
{949, {"ks_c_5601-1987", "ANSI/OEM Korean (Unified Hangul Code)"}},
|
||||
{950, {"big5", "ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)"}},
|
||||
{1026, {"IBM1026", "IBM EBCDIC Turkish (Latin 5)"}},
|
||||
{1047, {"IBM01047", "IBM EBCDIC Latin 1/Open System"}},
|
||||
{1140, {"IBM01140", "IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)"}},
|
||||
{1141, {"IBM01141", "IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)"}},
|
||||
{1142, {"IBM01142", "IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)"}},
|
||||
{1143, {"IBM01143", "IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)"}},
|
||||
{1144, {"IBM01144", "IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)"}},
|
||||
{1145, {"IBM01145", "IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)"}},
|
||||
{1146, {"IBM01146", "IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)"}},
|
||||
{1147, {"IBM01147", "IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)"}},
|
||||
{1148, {"IBM01148", "IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)"}},
|
||||
{1149, {"IBM01149", "IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)"}},
|
||||
{1200, {"utf-16", "Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications"}},
|
||||
{1201, {"unicodeFFFE", "Unicode UTF-16, big endian byte order; available only to managed applications"}},
|
||||
{1250, {"windows-1250", "ANSI Central European; Central European (Windows)"}},
|
||||
{1251, {"windows-1251", "ANSI Cyrillic; Cyrillic (Windows)"}},
|
||||
{1252, {"windows-1252", "ANSI Latin 1; Western European (Windows)"}},
|
||||
{1253, {"windows-1253", "ANSI Greek; Greek (Windows)"}},
|
||||
{1254, {"windows-1254", "ANSI Turkish; Turkish (Windows)"}},
|
||||
{1255, {"windows-1255", "ANSI Hebrew; Hebrew (Windows)"}},
|
||||
{1256, {"windows-1256", "ANSI Arabic; Arabic (Windows)"}},
|
||||
{1257, {"windows-1257", "ANSI Baltic; Baltic (Windows)"}},
|
||||
{1258, {"windows-1258", "ANSI/OEM Vietnamese; Vietnamese (Windows)"}},
|
||||
{1361, {"Johab", "Korean (Johab)"}},
|
||||
{10000, {"macintosh", "MAC Roman; Western European (Mac)"}},
|
||||
{10001, {"x-mac-japanese", "Japanese (Mac)"}},
|
||||
{10002, {"x-mac-chinesetrad", "MAC Traditional Chinese (Big5); Chinese Traditional (Mac)"}},
|
||||
{10003, {"x-mac-korean", "Korean (Mac)"}},
|
||||
{10004, {"x-mac-arabic", "Arabic (Mac)"}},
|
||||
{10005, {"x-mac-hebrew", "Hebrew (Mac)"}},
|
||||
{10006, {"x-mac-greek", "Greek (Mac)"}},
|
||||
{10007, {"x-mac-cyrillic", "Cyrillic (Mac)"}},
|
||||
{10008, {"x-mac-chinesesimp", "MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)"}},
|
||||
{10010, {"x-mac-romanian", "Romanian (Mac)"}},
|
||||
{10017, {"x-mac-ukrainian", "Ukrainian (Mac)"}},
|
||||
{10021, {"x-mac-thai", "Thai (Mac)"}},
|
||||
{10029, {"x-mac-ce", "MAC Latin 2; Central European (Mac)"}},
|
||||
{10079, {"x-mac-icelandic", "Icelandic (Mac)"}},
|
||||
{10081, {"x-mac-turkish", "Turkish (Mac)"}},
|
||||
{10082, {"x-mac-croatian", "Croatian (Mac)"}},
|
||||
{12000, {"utf-32", "Unicode UTF-32, little endian byte order; available only to managed applications"}},
|
||||
{12001, {"utf-32BE", "Unicode UTF-32, big endian byte order; available only to managed applications"}},
|
||||
{20000, {"x-Chinese_CNS", "CNS Taiwan; Chinese Traditional (CNS)"}},
|
||||
{20001, {"x-cp20001", "TCA Taiwan"}},
|
||||
{20002, {"x_Chinese-Eten", "Eten Taiwan; Chinese Traditional (Eten)"}},
|
||||
{20003, {"x-cp20003", "IBM5550 Taiwan"}},
|
||||
{20004, {"x-cp20004", "TeleText Taiwan"}},
|
||||
{20005, {"x-cp20005", "Wang Taiwan"}},
|
||||
{20105, {"x-IA5", "IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)"}},
|
||||
{20106, {"x-IA5-German", "IA5 German (7-bit)"}},
|
||||
{20107, {"x-IA5-Swedish", "IA5 Swedish (7-bit)"}},
|
||||
{20108, {"x-IA5-Norwegian", "IA5 Norwegian (7-bit)"}},
|
||||
{20127, {"us-ascii", "US-ASCII (7-bit)"}},
|
||||
{20261, {"x-cp20261", "T.61"}},
|
||||
{20269, {"x-cp20269", "ISO 6937 Non-Spacing Accent"}},
|
||||
{20273, {"IBM273", "IBM EBCDIC Germany"}},
|
||||
{20277, {"IBM277", "IBM EBCDIC Denmark-Norway"}},
|
||||
{20278, {"IBM278", "IBM EBCDIC Finland-Sweden"}},
|
||||
{20280, {"IBM280", "IBM EBCDIC Italy"}},
|
||||
{20284, {"IBM284", "IBM EBCDIC Latin America-Spain"}},
|
||||
{20285, {"IBM285", "IBM EBCDIC United Kingdom"}},
|
||||
{20290, {"IBM290", "IBM EBCDIC Japanese Katakana Extended"}},
|
||||
{20297, {"IBM297", "IBM EBCDIC France"}},
|
||||
{20420, {"IBM420", "IBM EBCDIC Arabic"}},
|
||||
{20423, {"IBM423", "IBM EBCDIC Greek"}},
|
||||
{20424, {"IBM424", "IBM EBCDIC Hebrew"}},
|
||||
{20833, {"x-EBCDIC-KoreanExtended", "IBM EBCDIC Korean Extended"}},
|
||||
{20838, {"IBM-Thai", "IBM EBCDIC Thai"}},
|
||||
{20866, {"koi8-r", "Russian (KOI8-R); Cyrillic (KOI8-R)"}},
|
||||
{20871, {"IBM871", "IBM EBCDIC Icelandic"}},
|
||||
{20880, {"IBM880", "IBM EBCDIC Cyrillic Russian"}},
|
||||
{20905, {"IBM905", "IBM EBCDIC Turkish"}},
|
||||
{20924, {"IBM00924", "IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)"}},
|
||||
{20932, {"EUC-JP", "Japanese (JIS 0208-1990 and 0212-1990)"}},
|
||||
{20936, {"x-cp20936", "Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)"}},
|
||||
{20949, {"x-cp20949", "Korean Wansung"}},
|
||||
{21025, {"cp1025", "IBM EBCDIC Cyrillic Serbian-Bulgarian"}},
|
||||
{21027, {"", "(deprecated)"}},
|
||||
{21866, {"koi8-u", "Ukrainian (KOI8-U); Cyrillic (KOI8-U)"}},
|
||||
{28591, {"iso-8859-1", "ISO 8859-1 Latin 1; Western European (ISO)"}},
|
||||
{28592, {"iso-8859-2", "ISO 8859-2 Central European; Central European (ISO)"}},
|
||||
{28593, {"iso-8859-3", "ISO 8859-3 Latin 3"}},
|
||||
{28594, {"iso-8859-4", "ISO 8859-4 Baltic"}},
|
||||
{28595, {"iso-8859-5", "ISO 8859-5 Cyrillic"}},
|
||||
{28596, {"iso-8859-6", "ISO 8859-6 Arabic"}},
|
||||
{28597, {"iso-8859-7", "ISO 8859-7 Greek"}},
|
||||
{28598, {"iso-8859-8", "ISO 8859-8 Hebrew; Hebrew (ISO-Visual)"}},
|
||||
{28599, {"iso-8859-9", "ISO 8859-9 Turkish"}},
|
||||
{28603, {"iso-8859-13", "ISO 8859-13 Estonian"}},
|
||||
{28605, {"iso-8859-15", "ISO 8859-15 Latin 9"}},
|
||||
{29001, {"x-Europa", "Europa 3"}},
|
||||
{38598, {"iso-8859-8-i", "ISO 8859-8 Hebrew; Hebrew (ISO-Logical)"}},
|
||||
{50220, {"iso-2022-jp", "ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)"}},
|
||||
{50221, {"csISO2022JP", "ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)"}},
|
||||
{50222, {"iso-2022-jp", "ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)"}},
|
||||
{50225, {"iso-2022-kr", "ISO 2022 Korean"}},
|
||||
{50227, {"x-cp50227", "ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)"}},
|
||||
{50229, {"", "ISO 2022 Traditional Chinese"}},
|
||||
{50930, {"", "EBCDIC Japanese (Katakana) Extended"}},
|
||||
{50931, {"", "EBCDIC US-Canada and Japanese"}},
|
||||
{50933, {"", "EBCDIC Korean Extended and Korean"}},
|
||||
{50935, {"", "EBCDIC Simplified Chinese Extended and Simplified Chinese"}},
|
||||
{50936, {"", "EBCDIC Simplified Chinese"}},
|
||||
{50937, {"", "EBCDIC US-Canada and Traditional Chinese"}},
|
||||
{50939, {"", "EBCDIC Japanese (Latin) Extended and Japanese"}},
|
||||
{51932, {"euc-jp", "EUC Japanese"}},
|
||||
{51936, {"EUC-CN", "EUC Simplified Chinese; Chinese Simplified (EUC)"}},
|
||||
{51949, {"euc-kr", "EUC Korean"}},
|
||||
{51950, {"", "EUC Traditional Chinese"}},
|
||||
{52936, {"hz-gb-2312", "HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)"}},
|
||||
{54936, {"GB18030", "Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)"}},
|
||||
{57002, {"x-iscii-de", "ISCII Devanagari"}},
|
||||
{57003, {"x-iscii-be", "ISCII Bangla"}},
|
||||
{57004, {"x-iscii-ta", "ISCII Tamil"}},
|
||||
{57005, {"x-iscii-te", "ISCII Telugu"}},
|
||||
{57006, {"x-iscii-as", "ISCII Assamese"}},
|
||||
{57007, {"x-iscii-or", "ISCII Odia"}},
|
||||
{57008, {"x-iscii-ka", "ISCII Kannada"}},
|
||||
{57009, {"x-iscii-ma", "ISCII Malayalam"}},
|
||||
{57010, {"x-iscii-gu", "ISCII Gujarati"}},
|
||||
{57011, {"x-iscii-pa", "ISCII Punjabi"}},
|
||||
{65000, {"utf-7", "Unicode (UTF-7)"}},
|
||||
{65001, {"utf-8", "Unicode (UTF-8)"}},
|
||||
};
|
||||
|
||||
static const string cp1252("CP1252");
|
||||
|
||||
const string& winACPName()
|
||||
{
|
||||
unsigned int acp = GetACP();
|
||||
auto it = cpdefs.find(acp);
|
||||
if (it == cpdefs.end()) {
|
||||
return cp1252;
|
||||
} else {
|
||||
return it->second.cpname;
|
||||
}
|
||||
}
|
8
src/windows/wincodepages.h
Normal file
8
src/windows/wincodepages.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
#ifndef WINCODEPAGES_H_
|
||||
#define WINCODEPAGES_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
extern const std::string& winACPName();
|
||||
|
||||
#endif // WINCODEPAGES_H_
|
Loading…
Add table
Add a link
Reference in a new issue