arrange so that ' .net' is split as .net and net. Previously it only produced .net, which meant that matching filename extensions, like in fn:pdf$ did not work well because of cases where a special char or a space occurred before the .

This commit is contained in:
Jean-Francois Dockes 2016-06-20 17:25:25 +02:00
parent f3aa385448
commit 3fd6e866a5

View file

@ -326,23 +326,25 @@ bool TextSplit::words_from_span(size_t bp)
for (int i = 0; for (int i = 0;
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
i++, pos++) { i++) {
int deb = m_words_in_span[i].first; int deb = m_words_in_span[i].first;
bool noposinc = m_words_in_span[i].second == deb;
for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i); for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords); j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
j++) { j++) {
int fin = m_words_in_span[j].second; int fin = m_words_in_span[j].second;
//cerr << "i " << i << " j " << j << " deb " << deb << //cerr << "i " << i << " j " << j << " deb " << deb <<
// " fin " << fin << endl; //" fin " << fin << endl;
if (fin - deb > int(m_span.size())) if (fin - deb > int(m_span.size()))
break; break;
string word(m_span.substr(deb, fin-deb)); string word(m_span.substr(deb, fin-deb));
if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin)) if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin))
return false; return false;
} }
if (!noposinc)
++pos;
} }
return true; return true;
} }
@ -642,8 +644,12 @@ bool TextSplit::text_to_words(const string &in)
// Check for number like .1 // Check for number like .1
if (isdigit(nextwhat, m_flags)) { if (isdigit(nextwhat, m_flags)) {
m_inNumber = true; m_inNumber = true;
m_wordLen += it.appendchartostring(m_span);
} else {
m_words_in_span.
push_back(pair<int,int>(m_wordStart, m_wordStart));
m_wordStart += it.appendchartostring(m_span);
} }
m_wordLen += it.appendchartostring(m_span);
STATS_INC_WORDCHARS; STATS_INC_WORDCHARS;
break; break;
} }