Make dehyphenate (co-worker->coworker) optional
This commit is contained in:
parent
5c001aec83
commit
bd3e930533
5 changed files with 38 additions and 8 deletions
|
@ -321,8 +321,9 @@ bool RclConfig::updateMainConfig()
|
|||
m_mdrstate.init(m_conf);
|
||||
|
||||
setKeyDir(cstr_null);
|
||||
bool nocjk = false;
|
||||
if (getConfParam("nocjk", &nocjk) && nocjk == true) {
|
||||
|
||||
bool bvalue = false;
|
||||
if (getConfParam("nocjk", &bvalue) && bvalue == true) {
|
||||
TextSplit::cjkProcessing(false);
|
||||
} else {
|
||||
int ngramlen;
|
||||
|
@ -333,14 +334,18 @@ bool RclConfig::updateMainConfig()
|
|||
}
|
||||
}
|
||||
|
||||
bool nonum = false;
|
||||
if (getConfParam("nonumbers", &nonum) && nonum == true) {
|
||||
bvalue = false;
|
||||
if (getConfParam("nonumbers", &bvalue) && bvalue == true) {
|
||||
TextSplit::noNumbers();
|
||||
}
|
||||
|
||||
bool fnmpathname = true;
|
||||
if (getConfParam("skippedPathsFnmPathname", &fnmpathname)
|
||||
&& fnmpathname == false) {
|
||||
bvalue = false;
|
||||
if (getConfParam("dehyphenate", &bvalue)) {
|
||||
TextSplit::deHyphenate(bvalue);
|
||||
}
|
||||
|
||||
bvalue = true;
|
||||
if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
|
||||
FsTreeWalker::setNoFnmPathname();
|
||||
}
|
||||
|
||||
|
|
|
@ -212,6 +212,7 @@ bool TextSplit::isCJK(int c)
|
|||
bool TextSplit::o_processCJK = true;
|
||||
unsigned int TextSplit::o_CJKNgramLen = 2;
|
||||
bool TextSplit::o_noNumbers = false;
|
||||
bool TextSplit::o_deHyphenate = false;
|
||||
|
||||
// Final term checkpoint: do some checking (the kind which is simpler
|
||||
// to do here than in the main loop), then send term to our client.
|
||||
|
@ -309,7 +310,8 @@ bool TextSplit::words_from_span(int bp)
|
|||
// Byte position of the span start
|
||||
int spboffs = bp - m_span.size();
|
||||
|
||||
if (spanwords == 2 && m_span[m_words_in_span[0].second] == '-') {
|
||||
if (o_deHyphenate && spanwords == 2 &&
|
||||
m_span[m_words_in_span[0].second] == '-') {
|
||||
unsigned int s0 = m_words_in_span[0].first;
|
||||
unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
|
||||
unsigned int s1 = m_words_in_span[1].first;
|
||||
|
|
|
@ -52,6 +52,13 @@ public:
|
|||
o_noNumbers = true;
|
||||
}
|
||||
|
||||
// Given [co-worker] as input, do we also generate [coworker] ?
|
||||
// Set by rclconfig
|
||||
static bool o_deHyphenate;
|
||||
static void deHyphenate(bool on) {
|
||||
o_deHyphenate = on;
|
||||
}
|
||||
|
||||
enum Flags {
|
||||
// Default: will return spans and words (a_b, a, b)
|
||||
TXTS_NONE = 0,
|
||||
|
|
|
@ -5591,6 +5591,15 @@ skippedPaths = ~/somedir/*.txt
|
|||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>dehyphenate</varname></term>
|
||||
<listitem><para>Determines if, given an input of
|
||||
<literal>co-worker</literal>, we add a term for
|
||||
<literal>coworker</literal>. This possibility is new in version
|
||||
1.22, and on by default. Setting the variable to off allows
|
||||
restoring the previous behaviour.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>nocjk</varname></term>
|
||||
<listitem><para>If this set to true, specific east asian
|
||||
(Chinese Korean Japanese) characters/word splitting is
|
||||
|
|
|
@ -134,6 +134,13 @@ indexstemminglanguages = english
|
|||
# are not performed by unac, but I cant imagine someone typing the composed
|
||||
# forms in a search.
|
||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
||||
# Turn off the indexing of numbers: may reduce the index size if you have
|
||||
# no use for them
|
||||
# nonumbers = 0
|
||||
|
||||
# Turn off indexing "coworker" for an input of "co-worker" (in addition to
|
||||
# co, worker, "co worker". Default is on as of version 1.22
|
||||
# dehyphenate = 1
|
||||
|
||||
# Maximum expansion count for a single term (ie: when using wildcards).
|
||||
# We used to not limit this at all (except for filenames where the limit
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue