Make dehyphenate (co-worker->coworker) optional
This commit is contained in:
parent
5c001aec83
commit
bd3e930533
5 changed files with 38 additions and 8 deletions
|
@ -321,8 +321,9 @@ bool RclConfig::updateMainConfig()
|
||||||
m_mdrstate.init(m_conf);
|
m_mdrstate.init(m_conf);
|
||||||
|
|
||||||
setKeyDir(cstr_null);
|
setKeyDir(cstr_null);
|
||||||
bool nocjk = false;
|
|
||||||
if (getConfParam("nocjk", &nocjk) && nocjk == true) {
|
bool bvalue = false;
|
||||||
|
if (getConfParam("nocjk", &bvalue) && bvalue == true) {
|
||||||
TextSplit::cjkProcessing(false);
|
TextSplit::cjkProcessing(false);
|
||||||
} else {
|
} else {
|
||||||
int ngramlen;
|
int ngramlen;
|
||||||
|
@ -333,14 +334,18 @@ bool RclConfig::updateMainConfig()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool nonum = false;
|
bvalue = false;
|
||||||
if (getConfParam("nonumbers", &nonum) && nonum == true) {
|
if (getConfParam("nonumbers", &bvalue) && bvalue == true) {
|
||||||
TextSplit::noNumbers();
|
TextSplit::noNumbers();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool fnmpathname = true;
|
bvalue = false;
|
||||||
if (getConfParam("skippedPathsFnmPathname", &fnmpathname)
|
if (getConfParam("dehyphenate", &bvalue)) {
|
||||||
&& fnmpathname == false) {
|
TextSplit::deHyphenate(bvalue);
|
||||||
|
}
|
||||||
|
|
||||||
|
bvalue = true;
|
||||||
|
if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
|
||||||
FsTreeWalker::setNoFnmPathname();
|
FsTreeWalker::setNoFnmPathname();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -212,6 +212,7 @@ bool TextSplit::isCJK(int c)
|
||||||
bool TextSplit::o_processCJK = true;
|
bool TextSplit::o_processCJK = true;
|
||||||
unsigned int TextSplit::o_CJKNgramLen = 2;
|
unsigned int TextSplit::o_CJKNgramLen = 2;
|
||||||
bool TextSplit::o_noNumbers = false;
|
bool TextSplit::o_noNumbers = false;
|
||||||
|
bool TextSplit::o_deHyphenate = false;
|
||||||
|
|
||||||
// Final term checkpoint: do some checking (the kind which is simpler
|
// Final term checkpoint: do some checking (the kind which is simpler
|
||||||
// to do here than in the main loop), then send term to our client.
|
// to do here than in the main loop), then send term to our client.
|
||||||
|
@ -309,7 +310,8 @@ bool TextSplit::words_from_span(int bp)
|
||||||
// Byte position of the span start
|
// Byte position of the span start
|
||||||
int spboffs = bp - m_span.size();
|
int spboffs = bp - m_span.size();
|
||||||
|
|
||||||
if (spanwords == 2 && m_span[m_words_in_span[0].second] == '-') {
|
if (o_deHyphenate && spanwords == 2 &&
|
||||||
|
m_span[m_words_in_span[0].second] == '-') {
|
||||||
unsigned int s0 = m_words_in_span[0].first;
|
unsigned int s0 = m_words_in_span[0].first;
|
||||||
unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
|
unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
|
||||||
unsigned int s1 = m_words_in_span[1].first;
|
unsigned int s1 = m_words_in_span[1].first;
|
||||||
|
|
|
@ -52,6 +52,13 @@ public:
|
||||||
o_noNumbers = true;
|
o_noNumbers = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Given [co-worker] as input, do we also generate [coworker] ?
|
||||||
|
// Set by rclconfig
|
||||||
|
static bool o_deHyphenate;
|
||||||
|
static void deHyphenate(bool on) {
|
||||||
|
o_deHyphenate = on;
|
||||||
|
}
|
||||||
|
|
||||||
enum Flags {
|
enum Flags {
|
||||||
// Default: will return spans and words (a_b, a, b)
|
// Default: will return spans and words (a_b, a, b)
|
||||||
TXTS_NONE = 0,
|
TXTS_NONE = 0,
|
||||||
|
|
|
@ -5591,6 +5591,15 @@ skippedPaths = ~/somedir/*.txt
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry><term><varname>dehyphenate</varname></term>
|
||||||
|
<listitem><para>Determines if, given an input of
|
||||||
|
<literal>co-worker</literal>, we add a term for
|
||||||
|
<literal>coworker</literal>. This possibility is new in version
|
||||||
|
1.22, and on by default. Setting the variable to off allows
|
||||||
|
restoring the previous behaviour.</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry><term><varname>nocjk</varname></term>
|
<varlistentry><term><varname>nocjk</varname></term>
|
||||||
<listitem><para>If this set to true, specific east asian
|
<listitem><para>If this set to true, specific east asian
|
||||||
(Chinese Korean Japanese) characters/word splitting is
|
(Chinese Korean Japanese) characters/word splitting is
|
||||||
|
|
|
@ -134,6 +134,13 @@ indexstemminglanguages = english
|
||||||
# are not performed by unac, but I cant imagine someone typing the composed
|
# are not performed by unac, but I cant imagine someone typing the composed
|
||||||
# forms in a search.
|
# forms in a search.
|
||||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
||||||
|
# Turn off the indexing of numbers: may reduce the index size if you have
|
||||||
|
# no use for them
|
||||||
|
# nonumbers = 0
|
||||||
|
|
||||||
|
# Turn off indexing "coworker" for an input of "co-worker" (in addition to
|
||||||
|
# co, worker, "co worker". Default is on as of version 1.22
|
||||||
|
# dehyphenate = 1
|
||||||
|
|
||||||
# Maximum expansion count for a single term (ie: when using wildcards).
|
# Maximum expansion count for a single term (ie: when using wildcards).
|
||||||
# We used to not limit this at all (except for filenames where the limit
|
# We used to not limit this at all (except for filenames where the limit
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue