arrange so that ' .net' is split as .net and net. Previously it only produced .net, which meant that matching filename extensions, like in fn:pdf$ did not work well because of cases where a special char or a space occurred before the .

This commit is contained in:
Jean-Francois Dockes 2016-06-20 17:25:25 +02:00
parent f3aa385448
commit 3fd6e866a5

View file

@ -326,10 +326,10 @@ bool TextSplit::words_from_span(size_t bp)
for (int i = 0; for (int i = 0;
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
i++, pos++) { i++) {
int deb = m_words_in_span[i].first; int deb = m_words_in_span[i].first;
bool noposinc = m_words_in_span[i].second == deb;
for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i); for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords); j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
j++) { j++) {
@ -343,6 +343,8 @@ bool TextSplit::words_from_span(size_t bp)
if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin)) if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin))
return false; return false;
} }
if (!noposinc)
++pos;
} }
return true; return true;
} }
@ -642,8 +644,12 @@ bool TextSplit::text_to_words(const string &in)
// Check for number like .1 // Check for number like .1
if (isdigit(nextwhat, m_flags)) { if (isdigit(nextwhat, m_flags)) {
m_inNumber = true; m_inNumber = true;
}
m_wordLen += it.appendchartostring(m_span); m_wordLen += it.appendchartostring(m_span);
} else {
m_words_in_span.
push_back(pair<int,int>(m_wordStart, m_wordStart));
m_wordStart += it.appendchartostring(m_span);
}
STATS_INC_WORDCHARS; STATS_INC_WORDCHARS;
break; break;
} }