merged the case/diac sensitivity code back into trunk
This commit is contained in:
commit
94b571aac6
22 changed files with 743 additions and 271 deletions
|
@ -21,7 +21,10 @@
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <set>
|
//#include <set>
|
||||||
|
#include <tr1/unordered_set>
|
||||||
|
using std::tr1::unordered_set;
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "textsplit.h"
|
#include "textsplit.h"
|
||||||
|
@ -57,8 +60,8 @@ static int charclasses[charclasses_size];
|
||||||
// with interesting properties. This is far from full-blown management
|
// with interesting properties. This is far from full-blown management
|
||||||
// of Unicode properties, but seems to do the job well enough in most
|
// of Unicode properties, but seems to do the job well enough in most
|
||||||
// common cases
|
// common cases
|
||||||
static set<unsigned int> unicign;
|
static unordered_set<unsigned int> unicign;
|
||||||
static set<unsigned int> visiblewhite;
|
static unordered_set<unsigned int> visiblewhite;
|
||||||
|
|
||||||
class CharClassInit {
|
class CharClassInit {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -25,24 +25,8 @@
|
||||||
* This is used as a quick fix to the ascii-based code, and is not correct.
|
* This is used as a quick fix to the ascii-based code, and is not correct.
|
||||||
* the correct way would be to do what http://www.unicode.org/reports/tr29/
|
* the correct way would be to do what http://www.unicode.org/reports/tr29/
|
||||||
* says.
|
* says.
|
||||||
*
|
|
||||||
* Data from:
|
|
||||||
# PropList-4.0.1.txt
|
|
||||||
# Date: 2004-03-02, 02:42:40 GMT [MD]
|
|
||||||
#
|
|
||||||
# Unicode Character Database
|
|
||||||
# Copyright (c) 1991-2004 Unicode, Inc.
|
|
||||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
|
||||||
# For documentation, see UCD.html
|
|
||||||
*/
|
*/
|
||||||
static const unsigned int uniign[] = {
|
static const unsigned int uniign[] = {
|
||||||
0x0021, /* ; Terminal_Punctuation # Po EXCLAMATION MARK*/
|
|
||||||
0x002C, /* ; Terminal_Punctuation # Po COMMA*/
|
|
||||||
0x002D, /* ; Dash # Pd HYPHEN-MINUS*/
|
|
||||||
0x002E, /* ; Terminal_Punctuation # Po FULL STOP*/
|
|
||||||
0x003A, /* ; Terminal_Punctuation # Po [2] COLON..SEMICOLON*/
|
|
||||||
0x003B, /* ; Terminal_Punctuation # Po [2] COLON..SEMICOLON*/
|
|
||||||
0x003F, /* ; Terminal_Punctuation # Po QUESTION MARK*/
|
|
||||||
0x0085, /* NEXT LINE NEL;Cc */
|
0x0085, /* NEXT LINE NEL;Cc */
|
||||||
0x00A0, /* NO-BREAK SPACE; Zs */
|
0x00A0, /* NO-BREAK SPACE; Zs */
|
||||||
0x00A1, /* INVERTED EXCLAMATION MARK;Po */
|
0x00A1, /* INVERTED EXCLAMATION MARK;Po */
|
||||||
|
@ -53,85 +37,81 @@ static const unsigned int uniign[] = {
|
||||||
0x00A6, /* BROKEN BAR;So */
|
0x00A6, /* BROKEN BAR;So */
|
||||||
0x00A7, /* SECTION SIGN;So; */
|
0x00A7, /* SECTION SIGN;So; */
|
||||||
0x00A9, /* COPYRIGHT SIGN;So */
|
0x00A9, /* COPYRIGHT SIGN;So */
|
||||||
0x00AB, /* ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK*/
|
0x00AB, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK*/
|
||||||
0x00AC, /* NOT SIGN;Sm */
|
0x00AC, /* NOT SIGN;Sm */
|
||||||
0x00AD, /* ; Hyphen # Cf SOFT HYPHEN*/
|
0x00AD, /* SOFT HYPHEN*/
|
||||||
0x00AE, /* registered sign */
|
0x00AE, /* registered sign */
|
||||||
0x00B0, /* DEGREE SIGN;So;0;ET;;;;;N;;;;; */
|
0x00B0, /* DEGREE SIGN */
|
||||||
0x00B1, /* PLUS-MINUS SIGN;Sm;0;ET;;;;;N;PLUS-OR-MINUS SIGN;;;;*/
|
0x00B1, /* PLUS-MINUS SIGN */
|
||||||
0x00B7, /* MIDDLE DOT;Po;0;ON;;;;;N;;;;;*/
|
0x00B7, /* MIDDLE DOT */
|
||||||
0x00BB, /* ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK*/
|
0x00BB, /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
|
||||||
0x00BF, /* INVERTED QUESTION MARK;Po */
|
0x00BF, /* INVERTED QUESTION MARK; */
|
||||||
0x00D7, /* MULTIPLICATION SIGN;Sm;0;ON;;;;;N;;;;; */
|
0x00D7, /* MULTIPLICATION SIGN */
|
||||||
0x037E, /* ; Terminal_Punctuation # Po GREEK QUESTION MARK*/
|
0x037E, /* GREEK QUESTION MARK */
|
||||||
0x0387, /* ; Terminal_Punctuation # Po GREEK ANO TELEIA*/
|
0x0387, /* GREEK ANO TELEIA */
|
||||||
0x055C, /* ; STerm # Po ARMENIAN EXCLAMATION MARK*/
|
0x055C, /* ARMENIAN EXCLAMATION MARK */
|
||||||
0x055E, /* ; STerm # Po ARMENIAN QUESTION MARK*/
|
0x055E, /* ARMENIAN QUESTION MARK */
|
||||||
0x0589, /* ; STerm # Po ARMENIAN FULL STOP*/
|
0x0589, /* ARMENIAN FULL STOP */
|
||||||
0x0589, /* ; Terminal_Punctuation # Po ARMENIAN FULL STOP*/
|
0x058A, /* ARMENIAN HYPHEN */
|
||||||
0x058A, /* ; Dash # Pd ARMENIAN HYPHEN*/
|
0x05C3, /* HEBREW PUNCTUATION SOF PASUQ */
|
||||||
0x058A, /* ; Hyphen # Pd ARMENIAN HYPHEN*/
|
0x060C, /* ARABIC COMMA */
|
||||||
0x05C3, /* ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ*/
|
0x061B, /* ARABIC SEMICOLON */
|
||||||
0x060C, /* ; Terminal_Punctuation # Po ARABIC COMMA*/
|
0x061F, /* ARABIC QUESTION MARK */
|
||||||
0x061B, /* ; Terminal_Punctuation # Po ARABIC SEMICOLON*/
|
0x06D4, /* ARABIC FULL STOP */
|
||||||
0x061F, /* ; STerm # Po ARABIC QUESTION MARK*/
|
0x166E, /* CANADIAN SYLLABICS FULL STOP */
|
||||||
0x061F, /* ; Terminal_Punctuation # Po ARABIC QUESTION MARK*/
|
0x1680, /* OGHAM SPACE MARK */
|
||||||
0x06D4, /* ; STerm # Po ARABIC FULL STOP*/
|
0x16EB, /* RUNIC SINGLE PUNCTUATION */
|
||||||
0x06D4, /* ; Terminal_Punctuation # Po ARABIC FULL STOP*/
|
0x16EC, /* RUNIC MULTIPLE PUNCTUATION */
|
||||||
0x166E, /* ; STerm # Po CANADIAN SYLLABICS FULL STOP*/
|
0x16ED, /* RUNIC CROSS PUNCTUATION */
|
||||||
0x1680, /* ; White_Space # Zs OGHAM SPACE MARK*/
|
0x1803, /* MONGOLIAN FULL STOP */
|
||||||
0x16EB, /* RUNIC SINGLE PUNCTUATION;Po;0;L;;;;;N;;;;;*/
|
0x1806, /* MONGOLIAN TODO SOFT HYPHEN */
|
||||||
0x16EC, /* RUNIC MULTIPLE PUNCTUATION;Po;0;L;;;;;N;;;;;*/
|
0x1809, /* MONGOLIAN MANCHU FULL STOP */
|
||||||
0x16ED, /* RUNIC CROSS PUNCTUATION;Po;0;L;;;;;N;;;;; */
|
0x180E, /* MONGOLIAN VOWEL SEPARATOR */
|
||||||
0x1803, /* ; STerm # Po MONGOLIAN FULL STOP*/
|
0x2000, /* EN QUAD..HAIR SPACE*/
|
||||||
0x1806, /* ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN*/
|
0x2001, /* EN QUAD..HAIR SPACE*/
|
||||||
0x1809, /* ; STerm # Po MONGOLIAN MANCHU FULL STOP*/
|
0x2002, /* EN QUAD..HAIR SPACE*/
|
||||||
0x180E, /* ; White_Space # Zs MONGOLIAN VOWEL SEPARATOR*/
|
0x2003, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2000, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2004, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2001, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2005, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2002, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2006, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2003, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2007, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2004, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2008, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2005, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2009, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2006, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x200A, /* EN QUAD..HAIR SPACE*/
|
||||||
0x2007, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2010, /* [2] HYPHEN..NON-BREAKING HYPHEN*/
|
||||||
0x2008, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2011, /* [2] HYPHEN..NON-BREAKING HYPHEN*/
|
||||||
0x2009, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2012, /* [6] HYPHEN..HORIZONTAL BAR*/
|
||||||
0x200A, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
|
0x2013, /* [6] HYPHEN..HORIZONTAL BAR*/
|
||||||
0x2010, /* ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN*/
|
0x2014, /* [6] HYPHEN..HORIZONTAL BAR*/
|
||||||
0x2011, /* ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN*/
|
0x2015, /* [6] HYPHEN..HORIZONTAL BAR*/
|
||||||
0x2012, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
|
0x2018, /* LEFT SINGLE QUOTATION MARK*/
|
||||||
0x2013, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
|
0x2019, /* RIGHT SINGLE QUOTATION MARK*/
|
||||||
0x2014, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
|
0x201A, /* SINGLE LOW-9 QUOTATION MARK*/
|
||||||
0x2015, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
|
0x201B, /* SINGLE HIGH-REVERSED-9 QUOTATION MARK*/
|
||||||
0x2018, /* ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK*/
|
0x201C, /* LEFT DOUBLE QUOTATION MARK*/
|
||||||
0x2019, /* ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK*/
|
0x201D, /* RIGHT DOUBLE QUOTATION MARK*/
|
||||||
0x201A, /* ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK*/
|
0x201E, /* DOUBLE LOW-9 QUOTATION MARK*/
|
||||||
0x201B, /* ; Quotation_Mark # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK*/
|
0x201F, /* DOUBLE HIGH-REVERSED-9 QUOTATION MARK*/
|
||||||
0x201C, /* ; Quotation_Mark # Pi LEFT DOUBLE QUOTATION MARK*/
|
0x2022, /* BULLET */
|
||||||
0x201D, /* ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK*/
|
0x2023, /* TRIANGULAR BULLET*/
|
||||||
0x201E, /* ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK*/
|
|
||||||
0x201F, /* ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK*/
|
|
||||||
0x2022, /* BULLET;Po;0;ON;;;;;N;;;;; */
|
|
||||||
0x2023, /* TRIANGULAR BULLET;Po;0;ON;;;;;N;;;;;*/
|
|
||||||
0x2024, /* ONE DOT LEADER;Po;0;ON;<compat> 002E;;;;N;;;;;*/
|
0x2024, /* ONE DOT LEADER;Po;0;ON;<compat> 002E;;;;N;;;;;*/
|
||||||
0x2025, /* TWO DOT LEADER;Po;0;ON;<compat> 002E 002E;;;;N;;;;; */
|
0x2025, /* TWO DOT LEADER;Po;0;ON;<compat> 002E 002E;;;;N;;;;; */
|
||||||
0x2026, /* HORIZONTAL ELLIPSIS;Po;0;ON;<compat> 002E 002E 002E;;;;N;;;;; */
|
0x2026, /* HORIZONTAL ELLIPSIS;Po;0;ON;<compat> 002E 002E 002E;;;;N;;;;; */
|
||||||
0x2028, /* ; White_Space # Zl LINE SEPARATOR*/
|
0x2028, /* LINE SEPARATOR */
|
||||||
0x2029, /* ; White_Space # Zp PARAGRAPH SEPARATOR*/
|
0x2029, /* PARAGRAPH SEPARATOR */
|
||||||
0x202F, /* ; White_Space # Zs NARROW NO-BREAK SPACE*/
|
0x202F, /* NARROW NO-BREAK SPACE */
|
||||||
0x2032, /* PRIME;Po;0;ET;;;;;N;;;;;*/
|
0x2032, /* PRIME */
|
||||||
0x2039, /* ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK*/
|
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
|
||||||
0x203A, /* ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK*/
|
0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK*/
|
||||||
0x203C, /* ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
|
0x203C, /* [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
|
||||||
0x203D, /* ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
|
0x203D, /* [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
|
||||||
0x2047, /* ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
|
0x2047, /* [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
|
||||||
0x2048, /* ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
|
0x2048, /* [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
|
||||||
0x2049, /* ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
|
0x2049, /* [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
|
||||||
0x2053, /* ; Dash # Po SWUNG DASH*/
|
0x2053, /* SWUNG DASH*/
|
||||||
0x205F, /* ; White_Space # Zs MEDIUM MATHEMATICAL SPACE*/
|
0x205F, /* MEDIUM MATHEMATICAL SPACE*/
|
||||||
0x207B, /* ; Dash # Sm SUPERSCRIPT MINUS*/
|
0x207B, /* SUPERSCRIPT MINUS*/
|
||||||
0x208B, /* ; Dash # Sm SUBSCRIPT MINUS*/
|
0x208B, /* SUBSCRIPT MINUS*/
|
||||||
0x20A0, /* EURO-CURRENCY SIGN */
|
0x20A0, /* EURO-CURRENCY SIGN */
|
||||||
0x20A1, /* COLON SIGN */
|
0x20A1, /* COLON SIGN */
|
||||||
0x20A2, /* CRUZEIRO SIGN */
|
0x20A2, /* CRUZEIRO SIGN */
|
||||||
|
@ -161,60 +141,156 @@ static const unsigned int uniign[] = {
|
||||||
0x2117, /* SOUND RECORDING COPYRIGHT;So */
|
0x2117, /* SOUND RECORDING COPYRIGHT;So */
|
||||||
0x2122, /* TRADE MARK SIGN;So; */
|
0x2122, /* TRADE MARK SIGN;So; */
|
||||||
0x2192, /* RIGHTWARDS ARROW;Sm;0;ON;;;;;N;RIGHT ARROW;;;;*/
|
0x2192, /* RIGHTWARDS ARROW;Sm;0;ON;;;;;N;RIGHT ARROW;;;;*/
|
||||||
0x2212, /* ; Dash # Sm MINUS SIGN*/
|
0x2212, /* MINUS SIGN*/
|
||||||
|
0x25A0, /* BLACK SQUARE */
|
||||||
|
0x25A1, /* WHITE SQUARE */
|
||||||
|
0x25A2, /* WHITE SQUARE WITH ROUNDED CORNERS */
|
||||||
|
0x25A3, /* WHITE SQUARE CONTAINING BLACK SMALL SQUARE */
|
||||||
|
0x25A4, /* SQUARE WITH HORIZONTAL FILL */
|
||||||
|
0x25A5, /* SQUARE WITH VERTICAL FILL */
|
||||||
|
0x25A6, /* SQUARE WITH ORTHOGONAL CROSSHATCH FILL */
|
||||||
|
0x25A7, /* SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL */
|
||||||
|
0x25A8, /* SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL */
|
||||||
|
0x25A9, /* SQUARE WITH DIAGONAL CROSSHATCH FILL */
|
||||||
|
0x25AA, /* BLACK SMALL SQUARE */
|
||||||
|
0x25AB, /* WHITE SMALL SQUARE */
|
||||||
|
0x25AC, /* BLACK RECTANGLE */
|
||||||
|
0x25AD, /* WHITE RECTANGLE */
|
||||||
|
0x25AE, /* BLACK VERTICAL RECTANGLE */
|
||||||
|
0x25AF, /* WHITE VERTICAL RECTANGLE */
|
||||||
|
0x25B0, /* BLACK PARALLELOGRAM */
|
||||||
|
0x25B1, /* WHITE PARALLELOGRAM */
|
||||||
|
0x25B2, /* BLACK UP-POINTING TRIANGLE */
|
||||||
|
0x25B3, /* WHITE UP-POINTING TRIANGLE */
|
||||||
|
0x25B4, /* BLACK UP-POINTING SMALL TRIANGLE */
|
||||||
|
0x25B5, /* WHITE UP-POINTING SMALL TRIANGLE */
|
||||||
|
0x25B6, /* BLACK RIGHT-POINTING TRIANGLE */
|
||||||
|
0x25B7, /* WHITE RIGHT-POINTING TRIANGLE */
|
||||||
|
0x25B8, /* BLACK RIGHT-POINTING SMALL TRIANGLE */
|
||||||
|
0x25B9, /* WHITE RIGHT-POINTING SMALL TRIANGLE */
|
||||||
|
0x25BA, /* BLACK RIGHT-POINTING POINTER */
|
||||||
|
0x25BB, /* WHITE RIGHT-POINTING POINTER */
|
||||||
|
0x25BC, /* BLACK DOWN-POINTING TRIANGLE */
|
||||||
|
0x25BD, /* WHITE DOWN-POINTING TRIANGLE */
|
||||||
|
0x25BE, /* BLACK DOWN-POINTING SMALL TRIANGLE */
|
||||||
|
0x25BF, /* WHITE DOWN-POINTING SMALL TRIANGLE */
|
||||||
|
0x25C0, /* BLACK LEFT-POINTING TRIANGLE */
|
||||||
|
0x25C1, /* WHITE LEFT-POINTING TRIANGLE */
|
||||||
|
0x25C2, /* BLACK LEFT-POINTING SMALL TRIANGLE */
|
||||||
|
0x25C3, /* WHITE LEFT-POINTING SMALL TRIANGLE */
|
||||||
|
0x25C4, /* BLACK LEFT-POINTING POINTER */
|
||||||
|
0x25C5, /* WHITE LEFT-POINTING POINTER */
|
||||||
|
0x25C6, /* BLACK DIAMOND */
|
||||||
|
0x25C7, /* WHITE DIAMOND */
|
||||||
|
0x25C8, /* WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND */
|
||||||
|
0x25C9, /* FISHEYE */
|
||||||
|
0x25CA, /* LOZENGE */
|
||||||
|
0x25CB, /* WHITE CIRCLE */
|
||||||
|
0x25CC, /* DOTTED CIRCLE */
|
||||||
|
0x25CD, /* CIRCLE WITH VERTICAL FILL */
|
||||||
|
0x25CE, /* BULLSEYE */
|
||||||
|
0x25CF, /* BLACK CIRCLE */
|
||||||
|
0x25D0, /* CIRCLE WITH LEFT HALF BLACK */
|
||||||
|
0x25D1, /* CIRCLE WITH RIGHT HALF BLACK */
|
||||||
|
0x25D2, /* CIRCLE WITH LOWER HALF BLACK */
|
||||||
|
0x25D3, /* CIRCLE WITH UPPER HALF BLACK */
|
||||||
|
0x25D4, /* CIRCLE WITH UPPER RIGHT QUADRANT BLACK */
|
||||||
|
0x25D5, /* CIRCLE WITH ALL BUT UPPER LEFT QUADRANT BLACK */
|
||||||
|
0x25D6, /* LEFT HALF BLACK CIRCLE */
|
||||||
|
0x25D7, /* RIGHT HALF BLACK CIRCLE */
|
||||||
|
0x25D8, /* INVERSE BULLET */
|
||||||
|
0x25D9, /* INVERSE WHITE CIRCLE */
|
||||||
|
0x25DA, /* UPPER HALF INVERSE WHITE CIRCLE */
|
||||||
|
0x25DB, /* LOWER HALF INVERSE WHITE CIRCLE */
|
||||||
|
0x25DC, /* UPPER LEFT QUADRANT CIRCULAR ARC */
|
||||||
|
0x25DD, /* UPPER RIGHT QUADRANT CIRCULAR ARC */
|
||||||
|
0x25DE, /* LOWER RIGHT QUADRANT CIRCULAR ARC */
|
||||||
|
0x25DF, /* LOWER LEFT QUADRANT CIRCULAR ARC */
|
||||||
|
0x25E0, /* UPPER HALF CIRCLE */
|
||||||
|
0x25E1, /* LOWER HALF CIRCLE */
|
||||||
|
0x25E2, /* BLACK LOWER RIGHT TRIANGLE */
|
||||||
|
0x25E3, /* BLACK LOWER LEFT TRIANGLE */
|
||||||
|
0x25E4, /* BLACK UPPER LEFT TRIANGLE */
|
||||||
|
0x25E5, /* BLACK UPPER RIGHT TRIANGLE */
|
||||||
|
0x25E6, /* WHITE BULLET */
|
||||||
|
0x25E7, /* SQUARE WITH LEFT HALF BLACK */
|
||||||
|
0x25E8, /* SQUARE WITH RIGHT HALF BLACK */
|
||||||
|
0x25E9, /* SQUARE WITH UPPER LEFT DIAGONAL HALF BLACK */
|
||||||
|
0x25EA, /* SQUARE WITH LOWER RIGHT DIAGONAL HALF BLACK */
|
||||||
|
0x25EB, /* WHITE SQUARE WITH VERTICAL BISECTING LINE */
|
||||||
|
0x25EC, /* WHITE UP-POINTING TRIANGLE WITH DOT */
|
||||||
|
0x25ED, /* UP-POINTING TRIANGLE WITH LEFT HALF BLACK */
|
||||||
|
0x25EE, /* UP-POINTING TRIANGLE WITH RIGHT HALF BLACK */
|
||||||
|
0x25EF, /* LARGE CIRCLE */
|
||||||
|
0x25F0, /* WHITE SQUARE WITH UPPER LEFT QUADRANT */
|
||||||
|
0x25F1, /* WHITE SQUARE WITH LOWER LEFT QUADRANT */
|
||||||
|
0x25F2, /* WHITE SQUARE WITH LOWER RIGHT QUADRANT */
|
||||||
|
0x25F3, /* WHITE SQUARE WITH UPPER RIGHT QUADRANT */
|
||||||
|
0x25F4, /* WHITE CIRCLE WITH UPPER LEFT QUADRANT */
|
||||||
|
0x25F5, /* WHITE CIRCLE WITH LOWER LEFT QUADRANT */
|
||||||
|
0x25F6, /* WHITE CIRCLE WITH LOWER RIGHT QUADRANT */
|
||||||
|
0x25F7, /* WHITE CIRCLE WITH UPPER RIGHT QUADRANT */
|
||||||
|
0x25F8, /* UPPER LEFT TRIANGLE */
|
||||||
|
0x25F9, /* UPPER RIGHT TRIANGLE */
|
||||||
|
0x25FA, /* LOWER LEFT TRIANGLE */
|
||||||
|
0x25FB, /* WHITE MEDIUM SQUARE */
|
||||||
|
0x25FC, /* BLACK MEDIUM SQUARE */
|
||||||
|
0x25FD, /* WHITE MEDIUM SMALL SQUARE */
|
||||||
|
0x25FE, /* BLACK MEDIUM SMALL SQUARE */
|
||||||
|
0x25FF, /* LOWER RIGHT TRIANGLE */
|
||||||
0x2E2E, /* REVERSED QUESTION MARK;Po;0;ON;;;;;N;;;;; */
|
0x2E2E, /* REVERSED QUESTION MARK;Po;0;ON;;;;;N;;;;; */
|
||||||
0x3000, /* ; White_Space # Zs IDEOGRAPHIC SPACE*/
|
0x3000, /* IDEOGRAPHIC SPACE*/
|
||||||
0x3002, /* ; STerm # Po IDEOGRAPHIC FULL STOP*/
|
0x3002, /* IDEOGRAPHIC FULL STOP*/
|
||||||
0x300C, /* ; Quotation_Mark # Ps LEFT CORNER BRACKET*/
|
0x300C, /* LEFT CORNER BRACKET*/
|
||||||
0x300D, /* ; Quotation_Mark # Pe RIGHT CORNER BRACKET*/
|
0x300D, /* RIGHT CORNER BRACKET*/
|
||||||
0x300E, /* ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET*/
|
0x300E, /* LEFT WHITE CORNER BRACKET*/
|
||||||
0x300F, /* ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET*/
|
0x300F, /* RIGHT WHITE CORNER BRACKET*/
|
||||||
0x301C, /* ; Dash # Pd WAVE DASH*/
|
0x301C, /* WAVE DASH*/
|
||||||
0x301D, /* ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK*/
|
0x301D, /* REVERSED DOUBLE PRIME QUOTATION MARK*/
|
||||||
0x301E, /* ; Quotation_Mark # Pe LOW DOUBLE PRIME QUOTATION MARK*/
|
0x301E, /* LOW DOUBLE PRIME QUOTATION MARK*/
|
||||||
0x3030, /* ; Dash # Pd WAVY DASH*/
|
0x3030, /* WAVY DASH*/
|
||||||
0x30FB, /* ; Hyphen # Pc KATAKANA MIDDLE DOT*/
|
0x30FB, /* KATAKANA MIDDLE DOT*/
|
||||||
0xC2B6, /* PILCROW SIGN;So;0;ON;;;;;N;PARAGRAPH SIGN;;;; */
|
0xC2B6, /* PILCROW SIGN;So;0;ON;;;;;N;PARAGRAPH SIGN;;;; */
|
||||||
0xC3B7, /* DIVISION SIGN;Sm;0;ON;;;;;N;;;;; */
|
0xC3B7, /* DIVISION SIGN;Sm;0;ON;;;;;N;;;;; */
|
||||||
0xFE31, /* ; Dash # Pd PRESENTATION FORM FOR VERTICAL EM DASH*/
|
0xFE31, /* PRESENTATION FORM FOR VERTICAL EM DASH*/
|
||||||
0xFE32, /* ; Dash # Pd PRESENTATION FORM FOR VERTICAL EN DASH*/
|
0xFE32, /* PRESENTATION FORM FOR VERTICAL EN DASH*/
|
||||||
0xFE41, /* ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET*/
|
0xFE41, /* PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET*/
|
||||||
0xFE42, /* ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET*/
|
0xFE42, /* PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET*/
|
||||||
0xFE43, /* ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET*/
|
0xFE43, /* PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET*/
|
||||||
0xFE44, /* ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET*/
|
0xFE44, /* PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET*/
|
||||||
0xFE50, /* ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP*/
|
0xFE50, /* [3] SMALL COMMA..SMALL FULL STOP*/
|
||||||
0xFE51, /* ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP*/
|
0xFE51, /* [3] SMALL COMMA..SMALL FULL STOP*/
|
||||||
0xFE52, /* ; STerm # Po SMALL FULL STOP*/
|
0xFE52, /* STOP*/
|
||||||
0xFE52, /* ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP*/
|
0xFE52, /* [3] SMALL COMMA..SMALL FULL STOP*/
|
||||||
0xFE54, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
0xFE54, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
||||||
0xFE55, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
0xFE55, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
||||||
0xFE56, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
0xFE56, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
||||||
0xFE57, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
0xFE57, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
|
||||||
0xFE58, /* ; Dash # Pd SMALL EM DASH*/
|
0xFE58, /* SMALL EM DASH */
|
||||||
0xFE63, /* ; Hyphen # Pd SMALL HYPHEN-MINUS*/
|
0xFE63, /* SMALL HYPHEN-MINUS */
|
||||||
0xFF01, /* FULLWIDTH EXCLAMATION MARK;Po;0;ON;<wide> 0021;;;;N;;;;; */
|
0xFF01, /* FULLWIDTH EXCLAMATION MARK */
|
||||||
0xFF02, /* FULLWIDTH QUOTATION MARK;Po;0;ON;<wide> 0022;;;;N;;;;; */
|
0xFF02, /* FULLWIDTH QUOTATION MARK */
|
||||||
0xFF03, /* FULLWIDTH NUMBER SIGN;Po;0;ET;<wide> 0023;;;;N;;;;; */
|
0xFF03, /* FULLWIDTH NUMBER SIGN */
|
||||||
0xFF04, /* FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;; */
|
0xFF04, /* FULLWIDTH DOLLAR SIGN */
|
||||||
0xFF05, /* FULLWIDTH PERCENT SIGN;Po;0;ET;<wide> 0025;;;;N;;;;; */
|
0xFF05, /* FULLWIDTH PERCENT SIGN */
|
||||||
0xFF06, /* FULLWIDTH AMPERSAND;Po;0;ON;<wide> 0026;;;;N;;;;; */
|
0xFF06, /* FULLWIDTH AMPERSAND */
|
||||||
0xFF07, /* FULLWIDTH APOSTROPHE;Po;0;ON;<wide> 0027;;;;N;;;;; */
|
0xFF07, /* FULLWIDTH APOSTROPHE */
|
||||||
0xFF08, /* FULLWIDTH LEFT PARENTHESIS;Ps;0;ON;<wide> 0028;;;;Y;FULLWIDTH OPENIN*/
|
0xFF08, /* FULLWIDTH LEFT PARENTHESIS */
|
||||||
0xFF09, /* FULLWIDTH RIGHT PARENTHESIS;Pe;0;ON;<wide> 0029;;;;Y;FULLWIDTH CLOS*/
|
0xFF09, /* FULLWIDTH RIGHT PARENTHESIS */
|
||||||
0xFF0A, /* FULLWIDTH ASTERISK;Po;0;ON;<wide> 002A;;;;N;;;;; */
|
0xFF0A, /* FULLWIDTH ASTERISK */
|
||||||
0xFF0B, /* FULLWIDTH PLUS SIGN;Sm;0;ES;<wide> 002B;;;;N;;;;; */
|
0xFF0B, /* FULLWIDTH PLUS SIGN */
|
||||||
0xFF0C, /* FULLWIDTH COMMA;Po;0;CS;<wide> 002C;;;;N;;;;; */
|
0xFF0C, /* FULLWIDTH COMMA */
|
||||||
0xFF0D, /* FULLWIDTH HYPHEN-MINUS;Pd;0;ES;<wide> 002D;;;;N;;;;; */
|
0xFF0D, /* FULLWIDTH HYPHEN-MINUS */
|
||||||
0xFF0E, /* FULLWIDTH FULL STOP;Po;0;CS;<wide> 002E;;;;N;FULLWIDTH PERIOD;;;; */
|
0xFF0E, /* FULLWIDTH FULL STOP */
|
||||||
0xFF0F, /* FULLWIDTH SOLIDUS;Po;0;CS;<wide> 002F;;;;N;FULLWIDTH SLASH;;;; */
|
0xFF0F, /* FULLWIDTH SOLIDUS */
|
||||||
0xFF1A, /* ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
|
0xFF1A, /* [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
|
||||||
0xFF1B, /* ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
|
0xFF1B, /* [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
|
||||||
0xFF1F, /* ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK*/
|
0xFF1F, /* FULLWIDTH QUESTION MARK*/
|
||||||
0xFF61, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP*/
|
0xFF61, /* HALFWIDTH IDEOGRAPHIC FULL STOP*/
|
||||||
0xFF62, /* ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET*/
|
0xFF62, /* HALFWIDTH LEFT CORNER BRACKET*/
|
||||||
0xFF63, /* ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET*/
|
0xFF63, /* HALFWIDTH RIGHT CORNER BRACKET*/
|
||||||
0xFF64, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA*/
|
0xFF64, /* HALFWIDTH IDEOGRAPHIC COMMA*/
|
||||||
0xFF65, /* ; Hyphen # Pc HALFWIDTH KATAKANA MIDDLE DOT*/
|
0xFF65, /* HALFWIDTH KATAKANA MIDDLE DOT*/
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Things that would visibly break a block of text, rendering obvious the need
|
/* Things that would visibly break a block of text, rendering obvious the need
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <QComboBox>
|
||||||
#include <qvariant.h>
|
#include <qvariant.h>
|
||||||
#include <qwidget.h>
|
#include <qwidget.h>
|
||||||
|
|
||||||
|
|
|
@ -301,6 +301,7 @@ void RclMain::init()
|
||||||
connect(restable, SIGNAL(docSaveToFileClicked(Rcl::Doc)),
|
connect(restable, SIGNAL(docSaveToFileClicked(Rcl::Doc)),
|
||||||
this, SLOT(saveDocToFile(Rcl::Doc)));
|
this, SLOT(saveDocToFile(Rcl::Doc)));
|
||||||
|
|
||||||
|
reslist->setRclMain(this);
|
||||||
connect(this, SIGNAL(docSourceChanged(RefCntr<DocSequence>)),
|
connect(this, SIGNAL(docSourceChanged(RefCntr<DocSequence>)),
|
||||||
reslist, SLOT(setDocSource(RefCntr<DocSequence>)));
|
reslist, SLOT(setDocSource(RefCntr<DocSequence>)));
|
||||||
connect(firstPageAction, SIGNAL(activated()),
|
connect(firstPageAction, SIGNAL(activated()),
|
||||||
|
@ -931,8 +932,12 @@ void RclMain::showIndexSched(bool modal)
|
||||||
connect(indexSched->cronCLB, SIGNAL(clicked()),
|
connect(indexSched->cronCLB, SIGNAL(clicked()),
|
||||||
this, SLOT(execCronTool()));
|
this, SLOT(execCronTool()));
|
||||||
if (theconfig && theconfig->isDefaultConfig()) {
|
if (theconfig && theconfig->isDefaultConfig()) {
|
||||||
|
#ifdef RCL_MONITOR
|
||||||
connect(indexSched->rtidxCLB, SIGNAL(clicked()),
|
connect(indexSched->rtidxCLB, SIGNAL(clicked()),
|
||||||
this, SLOT(execRTITool()));
|
this, SLOT(execRTITool()));
|
||||||
|
#else
|
||||||
|
indexSched->rtidxCLB->setEnabled(false);
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
indexSched->rtidxCLB->setEnabled(false);
|
indexSched->rtidxCLB->setEnabled(false);
|
||||||
}
|
}
|
||||||
|
@ -1493,8 +1498,9 @@ static bool lookForHtmlBrowser(string &exefile)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RclMain::startNativeViewer(Rcl::Doc doc)
|
void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum)
|
||||||
{
|
{
|
||||||
|
LOGDEB(("RclMain::startNativeViewer: page %d\n", pagenum));
|
||||||
// Look for appropriate viewer
|
// Look for appropriate viewer
|
||||||
string cmdplusattr;
|
string cmdplusattr;
|
||||||
if (prefs.useDesktopOpen) {
|
if (prefs.useDesktopOpen) {
|
||||||
|
@ -1512,11 +1518,13 @@ void RclMain::startNativeViewer(Rcl::Doc doc)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int pagenum = 1;
|
if (pagenum == -1) {
|
||||||
if (m_source.isNotNull())
|
|
||||||
pagenum = m_source->getFirstMatchPage(doc);
|
|
||||||
if (pagenum == -1)
|
|
||||||
pagenum = 1;
|
pagenum = 1;
|
||||||
|
if (m_source.isNotNull())
|
||||||
|
pagenum = m_source->getFirstMatchPage(doc);
|
||||||
|
if (pagenum == -1)
|
||||||
|
pagenum = 1;
|
||||||
|
}
|
||||||
char cpagenum[20];
|
char cpagenum[20];
|
||||||
sprintf(cpagenum, "%d", pagenum);
|
sprintf(cpagenum, "%d", pagenum);
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ public slots:
|
||||||
virtual void docExpand(Rcl::Doc);
|
virtual void docExpand(Rcl::Doc);
|
||||||
virtual void startPreview(int docnum, Rcl::Doc doc, int keymods);
|
virtual void startPreview(int docnum, Rcl::Doc doc, int keymods);
|
||||||
virtual void startPreview(Rcl::Doc);
|
virtual void startPreview(Rcl::Doc);
|
||||||
virtual void startNativeViewer(Rcl::Doc);
|
virtual void startNativeViewer(Rcl::Doc, int pagenum = -1);
|
||||||
virtual void saveDocToFile(Rcl::Doc);
|
virtual void saveDocToFile(Rcl::Doc);
|
||||||
virtual void previewNextInTab(Preview *, int sid, int docnum);
|
virtual void previewNextInTab(Preview *, int sid, int docnum);
|
||||||
virtual void previewPrevInTab(Preview *, int sid, int docnum);
|
virtual void previewPrevInTab(Preview *, int sid, int docnum);
|
||||||
|
|
|
@ -25,6 +25,7 @@ HEADERS += \
|
||||||
restable.h \
|
restable.h \
|
||||||
rtitool.h \
|
rtitool.h \
|
||||||
searchclause_w.h \
|
searchclause_w.h \
|
||||||
|
snippets_w.h \
|
||||||
spell_w.h \
|
spell_w.h \
|
||||||
ssearch_w.h \
|
ssearch_w.h \
|
||||||
uiprefs_w.h \
|
uiprefs_w.h \
|
||||||
|
@ -46,6 +47,7 @@ SOURCES += \
|
||||||
restable.cpp \
|
restable.cpp \
|
||||||
rtitool.cpp \
|
rtitool.cpp \
|
||||||
searchclause_w.cpp \
|
searchclause_w.cpp \
|
||||||
|
snippets_w.cpp \
|
||||||
spell_w.cpp \
|
spell_w.cpp \
|
||||||
ssearch_w.cpp \
|
ssearch_w.cpp \
|
||||||
uiprefs_w.cpp \
|
uiprefs_w.cpp \
|
||||||
|
@ -64,6 +66,7 @@ FORMS = \
|
||||||
restable.ui \
|
restable.ui \
|
||||||
rtitool.ui \
|
rtitool.ui \
|
||||||
spell.ui \
|
spell.ui \
|
||||||
|
snippets.ui \
|
||||||
ssearchb.ui \
|
ssearchb.ui \
|
||||||
uiprefs.ui \
|
uiprefs.ui \
|
||||||
viewaction.ui \
|
viewaction.ui \
|
||||||
|
|
|
@ -50,6 +50,7 @@
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "internfile.h"
|
#include "internfile.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
|
#include "snippets_w.h"
|
||||||
|
|
||||||
#include "reslist.h"
|
#include "reslist.h"
|
||||||
#include "moc_reslist.cpp"
|
#include "moc_reslist.cpp"
|
||||||
|
@ -281,7 +282,7 @@ static PlainToRichQtReslist g_hiliter;
|
||||||
/////////////////////////////////////
|
/////////////////////////////////////
|
||||||
|
|
||||||
ResList::ResList(QWidget* parent, const char* name)
|
ResList::ResList(QWidget* parent, const char* name)
|
||||||
: RESLIST_PARENTCLASS(parent)
|
: RESLIST_PARENTCLASS(parent), m_parent(0)
|
||||||
{
|
{
|
||||||
if (!name)
|
if (!name)
|
||||||
setObjectName("resList");
|
setObjectName("resList");
|
||||||
|
@ -902,6 +903,9 @@ void ResList::createPopupMenu(const QPoint& pos)
|
||||||
this, SLOT(menuPreviewParent()));
|
this, SLOT(menuPreviewParent()));
|
||||||
popup->addAction(tr("&Open Parent document/folder"),
|
popup->addAction(tr("&Open Parent document/folder"),
|
||||||
this, SLOT(menuOpenParent()));
|
this, SLOT(menuOpenParent()));
|
||||||
|
if (m_source->snippetsCapable())
|
||||||
|
popup->addAction(tr("Open &Snippets window"),
|
||||||
|
this, SLOT(menuOpenSnippets()));
|
||||||
popup->popup(mapToGlobal(pos));
|
popup->popup(mapToGlobal(pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -953,6 +957,20 @@ void ResList::menuOpenParent()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ResList::menuOpenSnippets()
|
||||||
|
{
|
||||||
|
Rcl::Doc doc;
|
||||||
|
if (!getDoc(m_popDoc, doc) || m_source.isNull())
|
||||||
|
return;
|
||||||
|
SnippetsW *sp = new SnippetsW(doc, m_source);
|
||||||
|
if (m_parent) {
|
||||||
|
connect(sp, SIGNAL(startNativeViewer(Rcl::Doc, int)),
|
||||||
|
m_parent, SLOT(startNativeViewer(Rcl::Doc, int)));
|
||||||
|
}
|
||||||
|
|
||||||
|
sp->show();
|
||||||
|
}
|
||||||
|
|
||||||
void ResList::menuEdit()
|
void ResList::menuEdit()
|
||||||
{
|
{
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
|
|
|
@ -41,6 +41,7 @@ using std::pair;
|
||||||
#include "rcldoc.h"
|
#include "rcldoc.h"
|
||||||
#include "reslistpager.h"
|
#include "reslistpager.h"
|
||||||
|
|
||||||
|
class RclMain;
|
||||||
class QtGuiResListPager;
|
class QtGuiResListPager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -66,7 +67,10 @@ class ResList : public RESLIST_PARENTCLASS
|
||||||
int listId() const {return m_listId;}
|
int listId() const {return m_listId;}
|
||||||
int pageFirstDocNum();
|
int pageFirstDocNum();
|
||||||
void setFont();
|
void setFont();
|
||||||
|
void setRclMain(RclMain *m)
|
||||||
|
{
|
||||||
|
m_parent = m;
|
||||||
|
}
|
||||||
public slots:
|
public slots:
|
||||||
virtual void setDocSource(RefCntr<DocSequence> nsource);
|
virtual void setDocSource(RefCntr<DocSequence> nsource);
|
||||||
virtual void resetList(); // Erase current list
|
virtual void resetList(); // Erase current list
|
||||||
|
@ -84,6 +88,7 @@ class ResList : public RESLIST_PARENTCLASS
|
||||||
virtual void menuExpand();
|
virtual void menuExpand();
|
||||||
virtual void menuPreviewParent();
|
virtual void menuPreviewParent();
|
||||||
virtual void menuOpenParent();
|
virtual void menuOpenParent();
|
||||||
|
virtual void menuOpenSnippets();
|
||||||
virtual void previewExposed(int);
|
virtual void previewExposed(int);
|
||||||
virtual void append(const QString &text);
|
virtual void append(const QString &text);
|
||||||
virtual void readDocSource();
|
virtual void readDocSource();
|
||||||
|
@ -132,6 +137,7 @@ class ResList : public RESLIST_PARENTCLASS
|
||||||
// so we store the page and display it when done.
|
// so we store the page and display it when done.
|
||||||
QString m_text;
|
QString m_text;
|
||||||
#endif
|
#endif
|
||||||
|
RclMain *m_parent;
|
||||||
|
|
||||||
virtual void displayPage(); // Display current page
|
virtual void displayPage(); // Display current page
|
||||||
static int newListId();
|
static int newListId();
|
||||||
|
|
67
src/qtgui/snippets.ui
Normal file
67
src/qtgui/snippets.ui
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Snippets</class>
|
||||||
|
<widget class="QDialog" name="Snippets">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>640</width>
|
||||||
|
<height>400</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Snippets</string>
|
||||||
|
</property>
|
||||||
|
<property name="sizeGripEnabled">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout">
|
||||||
|
<item>
|
||||||
|
<widget class="QWebView" name="webView">
|
||||||
|
<property name="url">
|
||||||
|
<url>
|
||||||
|
<string>about:blank</string>
|
||||||
|
</url>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QDialogButtonBox" name="buttonBox">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Horizontal</enum>
|
||||||
|
</property>
|
||||||
|
<property name="standardButtons">
|
||||||
|
<set>QDialogButtonBox::Close</set>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
<customwidgets>
|
||||||
|
<customwidget>
|
||||||
|
<class>QWebView</class>
|
||||||
|
<extends>QWidget</extends>
|
||||||
|
<header>QtWebKit/QWebView</header>
|
||||||
|
</customwidget>
|
||||||
|
</customwidgets>
|
||||||
|
<resources/>
|
||||||
|
<connections>
|
||||||
|
<connection>
|
||||||
|
<sender>buttonBox</sender>
|
||||||
|
<signal>clicked(QAbstractButton*)</signal>
|
||||||
|
<receiver>Snippets</receiver>
|
||||||
|
<slot>close()</slot>
|
||||||
|
<hints>
|
||||||
|
<hint type="sourcelabel">
|
||||||
|
<x>257</x>
|
||||||
|
<y>369</y>
|
||||||
|
</hint>
|
||||||
|
<hint type="destinationlabel">
|
||||||
|
<x>257</x>
|
||||||
|
<y>197</y>
|
||||||
|
</hint>
|
||||||
|
</hints>
|
||||||
|
</connection>
|
||||||
|
</connections>
|
||||||
|
</ui>
|
124
src/qtgui/snippets_w.cpp
Normal file
124
src/qtgui/snippets_w.cpp
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
/* Copyright (C) 2012 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "debuglog.h"
|
||||||
|
#include "recoll.h"
|
||||||
|
#include "snippets_w.h"
|
||||||
|
#include "guiutils.h"
|
||||||
|
#include "rcldb.h"
|
||||||
|
#include "rclhelp.h"
|
||||||
|
#include "plaintorich.h"
|
||||||
|
|
||||||
|
class PlainToRichQtSnippets : public PlainToRich {
|
||||||
|
public:
|
||||||
|
virtual string startMatch(unsigned int)
|
||||||
|
{
|
||||||
|
return string("<span class='rclmatch' style='color: ")
|
||||||
|
+ string((const char *)prefs.qtermcolor.toAscii()) + string("'>");
|
||||||
|
}
|
||||||
|
virtual string endMatch()
|
||||||
|
{
|
||||||
|
return string("</span>");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
static PlainToRichQtSnippets g_hiliter;
|
||||||
|
|
||||||
|
void SnippetsW::init()
|
||||||
|
{
|
||||||
|
if (m_source.isNull())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Make title out of file name if none yet
|
||||||
|
string titleOrFilename;
|
||||||
|
string utf8fn;
|
||||||
|
m_doc.getmeta(Rcl::Doc::keytt, &titleOrFilename);
|
||||||
|
m_doc.getmeta(Rcl::Doc::keyfn, &utf8fn);
|
||||||
|
if (titleOrFilename.empty()) {
|
||||||
|
titleOrFilename = utf8fn;
|
||||||
|
}
|
||||||
|
|
||||||
|
setWindowTitle(QString::fromUtf8(titleOrFilename.c_str()));
|
||||||
|
|
||||||
|
vector<pair<int, string> > vpabs;
|
||||||
|
m_source->getAbstract(m_doc, vpabs);
|
||||||
|
|
||||||
|
HighlightData hdata;
|
||||||
|
m_source->getTerms(hdata);
|
||||||
|
|
||||||
|
QString html = QString::fromAscii(
|
||||||
|
"<html><head>"
|
||||||
|
"<meta http-equiv=\"content-type\" "
|
||||||
|
"content=\"text/html; charset=utf-8\"></head>"
|
||||||
|
"<body style='overflow-x: scroll; white-space: nowrap'>"
|
||||||
|
"<table>"
|
||||||
|
);
|
||||||
|
|
||||||
|
g_hiliter.set_inputhtml(false);
|
||||||
|
|
||||||
|
for (vector<pair<int, string> >::const_iterator it = vpabs.begin();
|
||||||
|
it != vpabs.end(); it++) {
|
||||||
|
html += "<tr><td>";
|
||||||
|
if (it->first > 0) {
|
||||||
|
char buf[100];
|
||||||
|
sprintf(buf, "P. %d", it->first);
|
||||||
|
html += "<a href=\"";
|
||||||
|
html += buf;
|
||||||
|
html += "\">";
|
||||||
|
html += buf;
|
||||||
|
html += "</a>";
|
||||||
|
}
|
||||||
|
html += "</td><td>";
|
||||||
|
list<string> lr;
|
||||||
|
g_hiliter.plaintorich(it->second, lr, hdata);
|
||||||
|
html.append(QString::fromUtf8(lr.front().c_str()));
|
||||||
|
html.append("</td></tr>\n");
|
||||||
|
}
|
||||||
|
html.append("</body></html>");
|
||||||
|
webView->setHtml(html);
|
||||||
|
connect(webView, SIGNAL(linkClicked(const QUrl &)),
|
||||||
|
this, SLOT(linkWasClicked(const QUrl &)));
|
||||||
|
webView->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SnippetsW::linkWasClicked(const QUrl &url)
|
||||||
|
{
|
||||||
|
string ascurl = (const char *)url.toString().toAscii();;
|
||||||
|
LOGDEB(("Snippets::linkWasClicked: [%s]\n", ascurl.c_str()));
|
||||||
|
|
||||||
|
if (ascurl.size() > 3) {
|
||||||
|
int what = ascurl[0];
|
||||||
|
switch (what) {
|
||||||
|
case 'P':
|
||||||
|
{
|
||||||
|
int page = atoi(ascurl.c_str()+2);
|
||||||
|
emit startNativeViewer(m_doc, page);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOGERR(("Snippets::linkWasClicked: bad link [%s]\n", ascurl.c_str()));
|
||||||
|
}
|
||||||
|
|
50
src/qtgui/snippets_w.h
Normal file
50
src/qtgui/snippets_w.h
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
/* Copyright (C) 2012 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifndef _SNIPPETS_W_H_INCLUDED_
|
||||||
|
#define _SNIPPETS_W_H_INCLUDED_
|
||||||
|
|
||||||
|
#include "rcldoc.h"
|
||||||
|
#include "refcntr.h"
|
||||||
|
#include "docseq.h"
|
||||||
|
#include "rclmain_w.h"
|
||||||
|
|
||||||
|
#include "ui_snippets.h"
|
||||||
|
|
||||||
|
class SnippetsW : public QWidget, public Ui::Snippets
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
SnippetsW(Rcl::Doc doc, RefCntr<DocSequence> source, QWidget* parent = 0)
|
||||||
|
: QWidget(parent), m_doc(doc), m_source(source)
|
||||||
|
{
|
||||||
|
setupUi((QDialog*)this);
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected slots:
|
||||||
|
virtual void linkWasClicked(const QUrl &);
|
||||||
|
|
||||||
|
signals:
|
||||||
|
void startNativeViewer(Rcl::Doc, int pagenum);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void init();
|
||||||
|
Rcl::Doc m_doc;
|
||||||
|
RefCntr<DocSequence> m_source;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _SNIPPETS_W_H_INCLUDED_ */
|
|
@ -95,6 +95,13 @@ class DocSequence {
|
||||||
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
virtual bool getAbstract(Rcl::Doc& doc,
|
||||||
|
std::vector<std::pair<int, std::string> >& abs)
|
||||||
|
{
|
||||||
|
abs.push_back(std::pair<int, std::string>(0,
|
||||||
|
doc.meta[Rcl::Doc::keyabs]));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
virtual int getFirstMatchPage(Rcl::Doc&)
|
virtual int getFirstMatchPage(Rcl::Doc&)
|
||||||
{
|
{
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -106,8 +113,16 @@ class DocSequence {
|
||||||
virtual int getResCnt() = 0;
|
virtual int getResCnt() = 0;
|
||||||
|
|
||||||
/** Get title for result list */
|
/** Get title for result list */
|
||||||
virtual std::string title() {return m_title;}
|
virtual std::string title()
|
||||||
|
{
|
||||||
|
return m_title;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Can do snippets ? */
|
||||||
|
virtual bool snippetsCapable()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
/** Get description for underlying query */
|
/** Get description for underlying query */
|
||||||
virtual std::string getDescription() = 0;
|
virtual std::string getDescription() = 0;
|
||||||
|
|
||||||
|
@ -157,6 +172,20 @@ public:
|
||||||
return false;
|
return false;
|
||||||
return m_seq->getAbstract(doc, abs);
|
return m_seq->getAbstract(doc, abs);
|
||||||
}
|
}
|
||||||
|
virtual bool getAbstract(Rcl::Doc& doc,
|
||||||
|
std::vector<std::pair<int, std::string> >& abs)
|
||||||
|
{
|
||||||
|
if (m_seq.isNull())
|
||||||
|
return false;
|
||||||
|
return m_seq->getAbstract(doc, abs);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool snippetsCapable()
|
||||||
|
{
|
||||||
|
if (m_seq.isNull())
|
||||||
|
return false;
|
||||||
|
return m_seq->snippetsCapable();
|
||||||
|
}
|
||||||
virtual std::string getDescription()
|
virtual std::string getDescription()
|
||||||
{
|
{
|
||||||
if (m_seq.isNull())
|
if (m_seq.isNull())
|
||||||
|
|
|
@ -65,6 +65,32 @@ int DocSequenceDb::getResCnt()
|
||||||
return m_rescnt;
|
return m_rescnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This one only gets called to fill-up the snippets window
|
||||||
|
// We ignore most abstract/snippets preferences.
|
||||||
|
bool DocSequenceDb::getAbstract(Rcl::Doc &doc,
|
||||||
|
vector<pair<int, string> >& vpabs)
|
||||||
|
{
|
||||||
|
LOGDEB(("DocSequenceDb::getAbstract/pair\n"));
|
||||||
|
setQuery();
|
||||||
|
|
||||||
|
// Have to put the limit somewhere.
|
||||||
|
int maxoccs = 500;
|
||||||
|
Rcl::abstract_result ret = Rcl::ABSRES_ERROR;
|
||||||
|
if (m_q->whatDb()) {
|
||||||
|
ret = m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), vpabs,
|
||||||
|
maxoccs,
|
||||||
|
m_q->whatDb()->getAbsCtxLen()+ 2);
|
||||||
|
}
|
||||||
|
if (vpabs.empty())
|
||||||
|
vpabs.push_back(pair<int, string>(0, doc.meta[Rcl::Doc::keyabs]));
|
||||||
|
|
||||||
|
// If the list was probably truncated, indicate it.
|
||||||
|
if (ret == Rcl::ABSRES_TRUNC)
|
||||||
|
vpabs.push_back(pair<int, string>(-1, "[...]"));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
|
bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
|
||||||
{
|
{
|
||||||
setQuery();
|
setQuery();
|
||||||
|
|
|
@ -31,6 +31,11 @@ class DocSequenceDb : public DocSequence {
|
||||||
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
|
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
|
||||||
virtual int getResCnt();
|
virtual int getResCnt();
|
||||||
virtual void getTerms(HighlightData& hld);
|
virtual void getTerms(HighlightData& hld);
|
||||||
|
|
||||||
|
// Called to fill-up the snippets window. Ignoers
|
||||||
|
// buildabstract/replaceabstract and syntabslen
|
||||||
|
virtual bool getAbstract(Rcl::Doc &doc, vector<pair<int, string> >&);
|
||||||
|
|
||||||
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
|
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
|
||||||
virtual int getFirstMatchPage(Rcl::Doc&);
|
virtual int getFirstMatchPage(Rcl::Doc&);
|
||||||
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
|
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
|
||||||
|
@ -45,6 +50,11 @@ class DocSequenceDb : public DocSequence {
|
||||||
m_queryBuildAbstract = qba;
|
m_queryBuildAbstract = qba;
|
||||||
m_queryReplaceAbstract = qra;
|
m_queryReplaceAbstract = qra;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool snippetsCapable()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
virtual string title();
|
virtual string title();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -230,7 +230,9 @@ static void listList(const string&, const vector<string>&)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Retrieve and store db-wide frequencies for the query terms.
|
// Retrieve db-wide frequencies for the query terms and store them in
|
||||||
|
// the query object. This is done at most once for a query, and the data is used
|
||||||
|
// while computing abstracts for the different result documents.
|
||||||
void Db::Native::setDbWideQTermsFreqs(Query *query)
|
void Db::Native::setDbWideQTermsFreqs(Query *query)
|
||||||
{
|
{
|
||||||
// Do it once only for a given query.
|
// Do it once only for a given query.
|
||||||
|
@ -252,7 +254,7 @@ void Db::Native::setDbWideQTermsFreqs(Query *query)
|
||||||
for (vector<string>::const_iterator qit = qterms.begin();
|
for (vector<string>::const_iterator qit = qterms.begin();
|
||||||
qit != qterms.end(); qit++) {
|
qit != qterms.end(); qit++) {
|
||||||
query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
|
query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
|
||||||
LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
|
LOGABS(("set..QTermFreqs: [%s] db freq %.1e\n", qit->c_str(),
|
||||||
query->m_nq->termfreqs[*qit]));
|
query->m_nq->termfreqs[*qit]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,6 +308,7 @@ double Db::Native::qualityTerms(Xapian::docid docid,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUGABSTRACT
|
#ifdef DEBUGABSTRACT
|
||||||
|
LOGDEB(("Db::qualityTerms:\n"));
|
||||||
for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
|
for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
|
||||||
qit != byQ.rend(); qit++) {
|
qit != byQ.rend(); qit++) {
|
||||||
LOGDEB(("%.1e->[%s]\n", qit->first, qit->second.c_str()));
|
LOGDEB(("%.1e->[%s]\n", qit->first, qit->second.c_str()));
|
||||||
|
@ -317,6 +320,7 @@ double Db::Native::qualityTerms(Xapian::docid docid,
|
||||||
// Return the positions list for the page break term
|
// Return the positions list for the page break term
|
||||||
bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
|
bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
|
||||||
{
|
{
|
||||||
|
vpos.clear();
|
||||||
// Need to retrieve the document record to check for multiple page breaks
|
// Need to retrieve the document record to check for multiple page breaks
|
||||||
// that we store there for lack of better place
|
// that we store there for lack of better place
|
||||||
map<int, int> mbreaksmap;
|
map<int, int> mbreaksmap;
|
||||||
|
@ -422,25 +426,26 @@ int Db::Native::getFirstMatchPage(Xapian::docid docid, Query *query)
|
||||||
//
|
//
|
||||||
// DatabaseModified and other general exceptions are catched and
|
// DatabaseModified and other general exceptions are catched and
|
||||||
// possibly retried by our caller
|
// possibly retried by our caller
|
||||||
vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
abstract_result Db::Native::makeAbstract(Xapian::docid docid, Query *query,
|
||||||
|
vector<pair<int, string> >& vabs,
|
||||||
|
int imaxoccs, int ictxwords)
|
||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d imaxoccs %d\n", chron.ms(),
|
||||||
m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
|
m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen, imaxoccs));
|
||||||
|
|
||||||
// The (unprefixed) terms matched by this document
|
// The (unprefixed) terms matched by this document
|
||||||
vector<string> terms;
|
vector<string> matchedTerms;
|
||||||
|
|
||||||
{
|
{
|
||||||
vector<string> iterms;
|
vector<string> iterms;
|
||||||
query->getMatchTerms(docid, iterms);
|
query->getMatchTerms(docid, iterms);
|
||||||
noPrefixList(iterms, terms);
|
noPrefixList(iterms, matchedTerms);
|
||||||
if (terms.empty()) {
|
if (matchedTerms.empty()) {
|
||||||
LOGDEB(("makeAbstract::Empty term list\n"));
|
LOGDEB(("makeAbstract::Empty term list\n"));
|
||||||
return vector<string>();
|
return ABSRES_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
listList("Match terms: ", terms);
|
listList("Match terms: ", matchedTerms);
|
||||||
|
|
||||||
// Retrieve the term freqencies for the query terms. This is
|
// Retrieve the term freqencies for the query terms. This is
|
||||||
// actually computed only once for a query, and for all terms in
|
// actually computed only once for a query, and for all terms in
|
||||||
|
@ -455,12 +460,12 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
// removing its meaning from the maximum occurrences per term test
|
// removing its meaning from the maximum occurrences per term test
|
||||||
// used while walking the list below)
|
// used while walking the list below)
|
||||||
multimap<double, string> byQ;
|
multimap<double, string> byQ;
|
||||||
double totalweight = qualityTerms(docid, query, terms, byQ);
|
double totalweight = qualityTerms(docid, query, matchedTerms, byQ);
|
||||||
LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
|
LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
|
||||||
// This can't happen, but would crash us
|
// This can't happen, but would crash us
|
||||||
if (totalweight == 0.0) {
|
if (totalweight == 0.0) {
|
||||||
LOGERR(("makeAbstract: totalweight == 0.0 !\n"));
|
LOGERR(("makeAbstract: totalweight == 0.0 !\n"));
|
||||||
return vector<string>();
|
return ABSRES_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
@ -473,21 +478,25 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
// terms, at their positions around the search terms positions:
|
// terms, at their positions around the search terms positions:
|
||||||
map<unsigned int, string> sparseDoc;
|
map<unsigned int, string> sparseDoc;
|
||||||
|
|
||||||
// All the chosen query term positions.
|
// Total number of occurences for all terms. We stop when we have too much
|
||||||
vector<unsigned int> qtermposs;
|
unsigned int totaloccs = 0;
|
||||||
|
|
||||||
// Limit the total number of slots we populate. The 7 is taken as
|
// Limit the total number of slots we populate. The 7 is taken as
|
||||||
// average word size. It was a mistake to have the user max
|
// average word size. It was a mistake to have the user max
|
||||||
// abstract size parameter in characters, we basically only deal
|
// abstract size parameter in characters, we basically only deal
|
||||||
// with words. We used to limit the character size at the end, but
|
// with words. We used to limit the character size at the end, but
|
||||||
// this damaged our careful selection of terms
|
// this damaged our careful selection of terms
|
||||||
const unsigned int maxtotaloccs =
|
const unsigned int maxtotaloccs = imaxoccs > 0 ? imaxoccs :
|
||||||
m_rcldb->m_synthAbsLen /(7 * (m_rcldb->m_synthAbsWordCtxLen+1));
|
m_rcldb->m_synthAbsLen /(7 * (m_rcldb->m_synthAbsWordCtxLen+1));
|
||||||
LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
|
int ctxwords = ictxwords == -1 ? m_rcldb->m_synthAbsWordCtxLen : ictxwords;
|
||||||
|
LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n",
|
||||||
|
chron.ms(), maxtotaloccs, ctxwords));
|
||||||
|
|
||||||
// This is used to mark positions overlapped by a multi-word match term
|
// This is used to mark positions overlapped by a multi-word match term
|
||||||
const string occupiedmarker("?");
|
const string occupiedmarker("?");
|
||||||
|
|
||||||
|
abstract_result ret = ABSRES_OK;
|
||||||
|
|
||||||
// Let's go populate
|
// Let's go populate
|
||||||
for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
|
for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
|
||||||
qit != byQ.rend(); qit++) {
|
qit != byQ.rend(); qit++) {
|
||||||
|
@ -508,7 +517,10 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
|
|
||||||
Xapian::PositionIterator pos;
|
Xapian::PositionIterator pos;
|
||||||
// There may be query terms not in this doc. This raises an
|
// There may be query terms not in this doc. This raises an
|
||||||
// exception when requesting the position list, we catch it.
|
// exception when requesting the position list, we catch it ??
|
||||||
|
// Not clear how this can happen because we are walking the
|
||||||
|
// match list returned by Xapian. Maybe something with the
|
||||||
|
// fields?
|
||||||
string emptys;
|
string emptys;
|
||||||
try {
|
try {
|
||||||
unsigned int occurrences = 0;
|
unsigned int occurrences = 0;
|
||||||
|
@ -519,14 +531,14 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
continue;
|
continue;
|
||||||
LOGABS(("makeAbstract: [%s] at %d occurrences %d maxoccs %d\n",
|
LOGABS(("makeAbstract: [%s] at %d occurrences %d maxoccs %d\n",
|
||||||
qterm.c_str(), ipos, occurrences, maxoccs));
|
qterm.c_str(), ipos, occurrences, maxoccs));
|
||||||
// Remember the term position
|
|
||||||
qtermposs.push_back(ipos);
|
totaloccs++;
|
||||||
|
|
||||||
// Add adjacent slots to the set to populate at next
|
// Add adjacent slots to the set to populate at next
|
||||||
// step by inserting empty strings. Special provisions
|
// step by inserting empty strings. Special provisions
|
||||||
// for adding ellipsis and for positions overlapped by
|
// for adding ellipsis and for positions overlapped by
|
||||||
// the match term.
|
// the match term.
|
||||||
unsigned int sta = MAX(0, ipos-m_rcldb->m_synthAbsWordCtxLen);
|
unsigned int sta = MAX(0, ipos - ctxwords);
|
||||||
unsigned int sto = ipos + qtrmwrdcnt-1 +
|
unsigned int sto = ipos + qtrmwrdcnt-1 +
|
||||||
m_rcldb->m_synthAbsWordCtxLen;
|
m_rcldb->m_synthAbsWordCtxLen;
|
||||||
for (unsigned int ii = sta; ii <= sto; ii++) {
|
for (unsigned int ii = sta; ii <= sto; ii++) {
|
||||||
|
@ -552,23 +564,29 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
|
|
||||||
// Limit to allocated occurences and total size
|
// Limit to allocated occurences and total size
|
||||||
if (++occurrences >= maxoccs ||
|
if (++occurrences >= maxoccs ||
|
||||||
qtermposs.size() >= maxtotaloccs)
|
totaloccs >= maxtotaloccs) {
|
||||||
|
ret = ABSRES_TRUNC;
|
||||||
|
LOGDEB(("Db::makeAbstract: max occurrences cutoff\n"));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
// Term does not occur. No problem.
|
// Term does not occur. No problem.
|
||||||
}
|
}
|
||||||
if (qtermposs.size() >= maxtotaloccs)
|
if (totaloccs >= maxtotaloccs) {
|
||||||
|
ret = ABSRES_TRUNC;
|
||||||
|
LOGDEB(("Db::makeAbstract: max1 occurrences cutoff\n"));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
LOGABS(("makeAbstract:%d:chosen number of positions %d\n",
|
LOGABS(("makeAbstract:%d:chosen number of positions %d\n",
|
||||||
chron.millis(), qtermposs.size()));
|
chron.millis(), totaloccs));
|
||||||
|
|
||||||
// This can happen if there are term occurences in the keywords
|
// This can happen if there are term occurences in the keywords
|
||||||
// etc. but not elsewhere ?
|
// etc. but not elsewhere ?
|
||||||
if (qtermposs.size() == 0) {
|
if (totaloccs == 0) {
|
||||||
LOGDEB1(("makeAbstract: no occurrences\n"));
|
LOGDEB1(("makeAbstract: no occurrences\n"));
|
||||||
return vector<string>();
|
return ABSRES_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Walk all document's terms position lists and populate slots
|
// Walk all document's terms position lists and populate slots
|
||||||
|
@ -586,6 +604,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
if (has_prefix(*term))
|
if (has_prefix(*term))
|
||||||
continue;
|
continue;
|
||||||
if (cutoff-- < 0) {
|
if (cutoff-- < 0) {
|
||||||
|
ret = ABSRES_TRUNC;
|
||||||
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -594,6 +613,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
for (pos = xrdb.positionlist_begin(docid, *term);
|
for (pos = xrdb.positionlist_begin(docid, *term);
|
||||||
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
||||||
if (cutoff-- < 0) {
|
if (cutoff-- < 0) {
|
||||||
|
ret = ABSRES_TRUNC;
|
||||||
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -604,8 +624,8 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
// at the same position, we want to keep only the
|
// at the same position, we want to keep only the
|
||||||
// first one (ie: dockes and dockes@wanadoo.fr)
|
// first one (ie: dockes and dockes@wanadoo.fr)
|
||||||
if (vit->second.empty()) {
|
if (vit->second.empty()) {
|
||||||
LOGABS(("makeAbstract: populating: [%s] at %d\n",
|
LOGDEB2(("makeAbstract: populating: [%s] at %d\n",
|
||||||
(*term).c_str(), *pos));
|
(*term).c_str(), *pos));
|
||||||
sparseDoc[*pos] = *term;
|
sparseDoc[*pos] = *term;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -637,19 +657,19 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
LOGABS(("makeAbstract:%d: extracting. Got %u pages\n", chron.millis(),
|
LOGABS(("makeAbstract:%d: extracting. Got %u pages\n", chron.millis(),
|
||||||
vpbreaks.size()));
|
vpbreaks.size()));
|
||||||
// Finally build the abstract by walking the map (in order of position)
|
// Finally build the abstract by walking the map (in order of position)
|
||||||
vector<string> vabs;
|
vabs.clear();
|
||||||
string chunk;
|
string chunk;
|
||||||
bool incjk = false;
|
bool incjk = false;
|
||||||
|
int page = 0;
|
||||||
for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
|
for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
|
||||||
it != sparseDoc.end(); it++) {
|
it != sparseDoc.end(); it++) {
|
||||||
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
|
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
|
||||||
if (!occupiedmarker.compare(it->second))
|
if (!occupiedmarker.compare(it->second))
|
||||||
continue;
|
continue;
|
||||||
if (chunk.empty() && !vpbreaks.empty()) {
|
if (chunk.empty() && !vpbreaks.empty()) {
|
||||||
int pnum = getPageNumberForPosition(vpbreaks, it->first);
|
page = getPageNumberForPosition(vpbreaks, it->first);
|
||||||
ostringstream ss;
|
if (page < 0)
|
||||||
ss << pnum;
|
page = 0;
|
||||||
chunk += string(" [p ") + ss.str() + "] ";
|
|
||||||
}
|
}
|
||||||
Utf8Iter uit(it->second);
|
Utf8Iter uit(it->second);
|
||||||
bool newcjk = false;
|
bool newcjk = false;
|
||||||
|
@ -659,7 +679,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
chunk += " ";
|
chunk += " ";
|
||||||
incjk = newcjk;
|
incjk = newcjk;
|
||||||
if (it->second == cstr_ellipsis) {
|
if (it->second == cstr_ellipsis) {
|
||||||
vabs.push_back(chunk);
|
vabs.push_back(pair<int,string>(page, chunk));
|
||||||
chunk.clear();
|
chunk.clear();
|
||||||
} else {
|
} else {
|
||||||
if (it->second.compare(end_of_field_term) &&
|
if (it->second.compare(end_of_field_term) &&
|
||||||
|
@ -668,10 +688,10 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!chunk.empty())
|
if (!chunk.empty())
|
||||||
vabs.push_back(chunk);
|
vabs.push_back(pair<int, string>(page, chunk));
|
||||||
|
|
||||||
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
|
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
|
||||||
return vabs;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rcl::Db methods ///////////////////////////////// */
|
/* Rcl::Db methods ///////////////////////////////// */
|
||||||
|
@ -1516,6 +1536,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||||
if (!tpidx.m_pageincrvec.empty()) {
|
if (!tpidx.m_pageincrvec.empty()) {
|
||||||
ostringstream multibreaks;
|
ostringstream multibreaks;
|
||||||
for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) {
|
for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) {
|
||||||
|
if (i != 0)
|
||||||
|
multibreaks << ",";
|
||||||
multibreaks << tpidx.m_pageincrvec[i].first << "," <<
|
multibreaks << tpidx.m_pageincrvec[i].first << "," <<
|
||||||
tpidx.m_pageincrvec[i].second;
|
tpidx.m_pageincrvec[i].second;
|
||||||
}
|
}
|
||||||
|
@ -2168,31 +2190,59 @@ bool Db::stemDiffers(const string& lang, const string& word,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abstract_result Db::makeDocAbstract(Doc &doc, Query *query,
|
||||||
|
vector<pair<int, string> >& abstract,
|
||||||
|
int maxoccs, int ctxwords)
|
||||||
|
{
|
||||||
|
LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
|
||||||
|
if (!m_ndb || !m_ndb->m_isopen) {
|
||||||
|
LOGERR(("Db::makeDocAbstract: no db\n"));
|
||||||
|
return ABSRES_ERROR;
|
||||||
|
}
|
||||||
|
abstract_result ret = ABSRES_ERROR;
|
||||||
|
XAPTRY(ret = m_ndb->makeAbstract(doc.xdocid, query, abstract,
|
||||||
|
maxoccs, ctxwords),
|
||||||
|
m_ndb->xrdb, m_reason);
|
||||||
|
if (!m_reason.empty())
|
||||||
|
return ABSRES_ERROR;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
bool Db::makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract)
|
bool Db::makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
|
||||||
if (!m_ndb || !m_ndb->m_isopen) {
|
if (!m_ndb || !m_ndb->m_isopen) {
|
||||||
LOGERR(("Db::makeDocAbstract: no db\n"));
|
LOGERR(("Db::makeDocAbstract: no db\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
XAPTRY(abstract = m_ndb->makeAbstract(doc.xdocid, query),
|
vector<pair<int, string> > vpabs;
|
||||||
m_ndb->xrdb, m_reason);
|
if (!makeDocAbstract(doc, query, vpabs))
|
||||||
return m_reason.empty() ? true : false;
|
return false;
|
||||||
|
for (vector<pair<int, string> >::const_iterator it = vpabs.begin();
|
||||||
|
it != vpabs.end(); it++) {
|
||||||
|
string chunk;
|
||||||
|
if (it->first > 0) {
|
||||||
|
ostringstream ss;
|
||||||
|
ss << it->first;
|
||||||
|
chunk += string(" [p ") + ss.str() + "] ";
|
||||||
|
}
|
||||||
|
chunk += it->second;
|
||||||
|
abstract.push_back(chunk);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
|
||||||
if (!m_ndb || !m_ndb->m_isopen) {
|
if (!m_ndb || !m_ndb->m_isopen) {
|
||||||
LOGERR(("Db::makeDocAbstract: no db\n"));
|
LOGERR(("Db::makeDocAbstract: no db\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
vector<string> vab;
|
vector<pair<int, string> > vpabs;
|
||||||
XAPTRY(vab = m_ndb->makeAbstract(doc.xdocid, query),
|
if (!makeDocAbstract(doc, query, vpabs))
|
||||||
m_ndb->xrdb, m_reason);
|
return false;
|
||||||
for (vector<string>::const_iterator it = vab.begin();
|
for (vector<pair<int, string> >::const_iterator it = vpabs.begin();
|
||||||
it != vab.end(); it++) {
|
it != vpabs.end(); it++) {
|
||||||
abstract.append(*it);
|
abstract.append(it->second);
|
||||||
abstract.append(cstr_ellipsis);
|
abstract.append(cstr_ellipsis);
|
||||||
}
|
}
|
||||||
return m_reason.empty() ? true : false;
|
return m_reason.empty() ? true : false;
|
||||||
|
|
|
@ -68,6 +68,11 @@ enum value_slot {
|
||||||
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
|
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum abstract_result {
|
||||||
|
ABSRES_ERROR = 0,
|
||||||
|
ABSRES_OK = 1,
|
||||||
|
ABSRES_TRUNC = 2
|
||||||
|
};
|
||||||
class SearchData;
|
class SearchData;
|
||||||
class TermIter;
|
class TermIter;
|
||||||
class Query;
|
class Query;
|
||||||
|
@ -291,11 +296,21 @@ class Db {
|
||||||
|
|
||||||
/** Set parameters for synthetic abstract generation */
|
/** Set parameters for synthetic abstract generation */
|
||||||
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
|
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
|
||||||
|
int getAbsCtxLen() const
|
||||||
|
{
|
||||||
|
return m_synthAbsWordCtxLen;
|
||||||
|
}
|
||||||
|
|
||||||
/** Build synthetic abstract for document, extracting chunks relevant for
|
/** Build synthetic abstract for document, extracting chunks relevant for
|
||||||
* the input query. This uses index data only (no access to the file) */
|
* the input query. This uses index data only (no access to the file) */
|
||||||
|
// Abstract return as one string
|
||||||
bool makeDocAbstract(Doc &doc, Query *query, string& abstract);
|
bool makeDocAbstract(Doc &doc, Query *query, string& abstract);
|
||||||
|
// Returned as a snippets vector
|
||||||
bool makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract);
|
bool makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract);
|
||||||
|
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||||
|
abstract_result makeDocAbstract(Doc &doc, Query *query,
|
||||||
|
vector<pair<int, string> >& abstract,
|
||||||
|
int maxoccs= -1, int ctxwords = -1);
|
||||||
/** Retrieve detected page breaks positions */
|
/** Retrieve detected page breaks positions */
|
||||||
int getFirstMatchPage(Doc &doc, Query *query);
|
int getFirstMatchPage(Doc &doc, Query *query);
|
||||||
|
|
||||||
|
|
|
@ -94,7 +94,9 @@ class Db::Native {
|
||||||
const vector<string>& terms,
|
const vector<string>& terms,
|
||||||
std::multimap<double, string>& byQ);
|
std::multimap<double, string>& byQ);
|
||||||
void setDbWideQTermsFreqs(Query *query);
|
void setDbWideQTermsFreqs(Query *query);
|
||||||
vector<string> makeAbstract(Xapian::docid id, Query *query);
|
abstract_result makeAbstract(Xapian::docid id, Query *query,
|
||||||
|
vector<pair<int, string> >&, int maxoccs = -1,
|
||||||
|
int ctxwords = -1);
|
||||||
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
|
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
|
||||||
int getFirstMatchPage(Xapian::docid docid, Query *query);
|
int getFirstMatchPage(Xapian::docid docid, Query *query);
|
||||||
int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
|
int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
|
||||||
|
|
|
@ -81,6 +81,8 @@ indexstemminglanguages = english
|
||||||
# unac_except_trans = Ää Öö Üü ää öö üü ßss
|
# unac_except_trans = Ää Öö Üü ää öö üü ßss
|
||||||
# In French, you probably want to decompose oe and ae
|
# In French, you probably want to decompose oe and ae
|
||||||
# unac_except_trans = œoe Œoe æae Æae
|
# unac_except_trans = œoe Œoe æae Æae
|
||||||
|
# Actually, this seems a reasonable default for all until someone protests.
|
||||||
|
unac_except_trans = åå Åå ää Ää öö Öö üü Üü ßss œoe Œoe æae ÆAE fifi flfl
|
||||||
|
|
||||||
# Where to store the database (directory). This may be an absolute path,
|
# Where to store the database (directory). This may be an absolute path,
|
||||||
# else it is taken as relative to the configuration directory (-c argument
|
# else it is taken as relative to the configuration directory (-c argument
|
||||||
|
|
|
@ -31,9 +31,9 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <tr1/unordered_map>
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::tr1::unordered_map;
|
||||||
using std::map;
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -41,20 +41,16 @@ using std::map;
|
||||||
should not be translated according to what UnicodeData says, but
|
should not be translated according to what UnicodeData says, but
|
||||||
instead according to some local rule. There will usually be very
|
instead according to some local rule. There will usually be very
|
||||||
few of them, but they must be looked up for every translated char.
|
few of them, but they must be looked up for every translated char.
|
||||||
|
|
||||||
We use a sorted vector for fastest elimination by binary search and
|
|
||||||
a vector<string> to store the translations
|
|
||||||
*/
|
*/
|
||||||
static vector<unsigned short> except_chars;
|
unordered_map<unsigned short, string> except_trans;
|
||||||
static vector<string> except_trans;
|
static inline bool is_except_char(unsigned short c, string& trans)
|
||||||
static inline size_t is_except_char(unsigned short c)
|
|
||||||
{
|
{
|
||||||
vector<unsigned short>::iterator it =
|
unordered_map<unsigned short, string>::const_iterator it
|
||||||
std::lower_bound(except_chars.begin(), except_chars.end(), c);
|
= except_trans.find(c);
|
||||||
if (it == except_chars.end() || *it != c) {
|
if (it == except_trans.end())
|
||||||
return (size_t(-1));
|
return false;
|
||||||
}
|
trans = it->second;
|
||||||
return std::distance(except_chars.begin(), it);
|
return true;
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* RECOLL_DATADIR */
|
||||||
|
|
||||||
|
@ -12715,21 +12711,18 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||||
// - unaccenting: do nothing (copy original char)
|
// - unaccenting: do nothing (copy original char)
|
||||||
// - unac+fold: use table
|
// - unac+fold: use table
|
||||||
// - fold: use the unicode data.
|
// - fold: use the unicode data.
|
||||||
size_t idx;
|
string trans;
|
||||||
if (what != UNAC_FOLD && except_chars.size() != 0 &&
|
if (what != UNAC_FOLD && except_trans.size() != 0 &&
|
||||||
(idx=is_except_char(c)) != (size_t)-1) {
|
is_except_char(c, trans)) {
|
||||||
if (what == UNAC_UNAC) {
|
if (what == UNAC_UNAC) {
|
||||||
// Unaccent only. Do nothing
|
// Unaccent only. Do nothing
|
||||||
p = 0;
|
p = 0;
|
||||||
l = 0;
|
l = 0;
|
||||||
} else {
|
} else {
|
||||||
// Has to be UNAC_UNACFOLD: use table
|
// Has to be UNAC_UNACFOLD: use table
|
||||||
p = (unsigned short *)(except_trans[idx].c_str() + 2);
|
p = (unsigned short *)trans.c_str();
|
||||||
l = (except_trans[idx].size() - 2) / 2;
|
l = trans.size() / 2;
|
||||||
}
|
}
|
||||||
/* if (p) {unsigned char *cp = (unsigned char *)p;
|
|
||||||
fprintf(stderr, "l %d cp[0] %x cp[1] %x\n", l, (unsigned int)cp[0],
|
|
||||||
(unsigned int)cp[1]);}*/
|
|
||||||
} else {
|
} else {
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* RECOLL_DATADIR */
|
||||||
unac_uf_char_utf16_(c, p, l, what)
|
unac_uf_char_utf16_(c, p, l, what)
|
||||||
|
@ -13076,7 +13069,6 @@ const char* unac_version(void)
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef RECOLL_DATADIR
|
||||||
void unac_set_except_translations(const char *spectrans)
|
void unac_set_except_translations(const char *spectrans)
|
||||||
{
|
{
|
||||||
except_chars.clear();
|
|
||||||
except_trans.clear();
|
except_trans.clear();
|
||||||
if (!spectrans || !spectrans[0])
|
if (!spectrans || !spectrans[0])
|
||||||
return;
|
return;
|
||||||
|
@ -13123,14 +13115,8 @@ void unac_set_except_translations(const char *spectrans)
|
||||||
else
|
else
|
||||||
ch = (out[0] << 8) | (out[1] & 0xff);
|
ch = (out[0] << 8) | (out[1] & 0xff);
|
||||||
|
|
||||||
/* fprintf(stderr, "outsize %d Ch is 0x%hx\n", int(outsize), ch);*/
|
except_trans[ch] = string((const char *)(out + 2), outsize-2);
|
||||||
except_chars.push_back(ch);
|
|
||||||
// We keep ch as the first 2 bytes in the translation so that
|
|
||||||
// both vectors sort identically
|
|
||||||
except_trans.push_back(string((const char *)out, outsize));
|
|
||||||
free(out);
|
free(out);
|
||||||
}
|
}
|
||||||
std::sort(except_chars.begin(), except_chars.end());
|
|
||||||
std::sort(except_trans.begin(), except_trans.end());
|
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* RECOLL_DATADIR */
|
||||||
|
|
|
@ -5,20 +5,23 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
/** Store about user terms and their expansions. This is used mostly for
|
/** Store data about user search terms and their expansions. This is used
|
||||||
* highlighting result text and walking the matches.
|
* mostly for highlighting result text and walking the matches, generating
|
||||||
|
* spelling suggestions.
|
||||||
*/
|
*/
|
||||||
struct HighlightData {
|
struct HighlightData {
|
||||||
/** The user terms, excluding those with wildcards.
|
/** The user terms, excluding those with wildcards. This list is
|
||||||
* This list is intended for orthographic suggestions but the terms are
|
* intended for orthographic suggestions so the terms are always
|
||||||
* unaccented lowercased anyway because they are compared to the dictionary
|
* lowercased, unaccented or not depending on the type of index
|
||||||
* generated from the index term list (which is unaccented).
|
* (as the spelling dictionary is generated from the index terms).
|
||||||
*/
|
*/
|
||||||
std::set<std::string> uterms;
|
std::set<std::string> uterms;
|
||||||
|
|
||||||
/** The original user terms-or-groups. This is for displaying the matched
|
/** The original user terms-or-groups. This is for display
|
||||||
* terms or groups, ie in relation with highlighting or skipping to the
|
* purposes: ie when creating a menu to look for a specific
|
||||||
* next match. These are raw, diacritics and case preserved.
|
* matched group inside a preview window. We want to show the
|
||||||
|
* user-entered data in the menu, not some transformation, so
|
||||||
|
* these are always raw, diacritics and case preserved.
|
||||||
*/
|
*/
|
||||||
std::vector<std::vector<std::string> > ugroups;
|
std::vector<std::vector<std::string> > ugroups;
|
||||||
|
|
||||||
|
@ -35,7 +38,7 @@ struct HighlightData {
|
||||||
|
|
||||||
/** Index into ugroups for each group. Parallel to groups. As a
|
/** Index into ugroups for each group. Parallel to groups. As a
|
||||||
* user term or group may generate many processed/expanded terms
|
* user term or group may generate many processed/expanded terms
|
||||||
* or groups, this is how we relate them
|
* or groups, this is how we relate an expansion to its source.
|
||||||
*/
|
*/
|
||||||
std::vector<unsigned int> grpsugidx;
|
std::vector<unsigned int> grpsugidx;
|
||||||
|
|
||||||
|
|
46
unac/unac.c
46
unac/unac.c
|
@ -31,9 +31,9 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <tr1/unordered_map>
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::tr1::unordered_map;
|
||||||
using std::map;
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -41,20 +41,16 @@ using std::map;
|
||||||
should not be translated according to what UnicodeData says, but
|
should not be translated according to what UnicodeData says, but
|
||||||
instead according to some local rule. There will usually be very
|
instead according to some local rule. There will usually be very
|
||||||
few of them, but they must be looked up for every translated char.
|
few of them, but they must be looked up for every translated char.
|
||||||
|
|
||||||
We use a sorted vector for fastest elimination by binary search and
|
|
||||||
a vector<string> to store the translations
|
|
||||||
*/
|
*/
|
||||||
static vector<unsigned short> except_chars;
|
unordered_map<unsigned short, string> except_trans;
|
||||||
static vector<string> except_trans;
|
static inline bool is_except_char(unsigned short c, string& trans)
|
||||||
static inline size_t is_except_char(unsigned short c)
|
|
||||||
{
|
{
|
||||||
vector<unsigned short>::iterator it =
|
unordered_map<unsigned short, string>::const_iterator it
|
||||||
std::lower_bound(except_chars.begin(), except_chars.end(), c);
|
= except_trans.find(c);
|
||||||
if (it == except_chars.end() || *it != c) {
|
if (it == except_trans.end())
|
||||||
return (size_t(-1));
|
return false;
|
||||||
}
|
trans = it->second;
|
||||||
return std::distance(except_chars.begin(), it);
|
return true;
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* RECOLL_DATADIR */
|
||||||
|
|
||||||
|
@ -12715,21 +12711,18 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||||
// - unaccenting: do nothing (copy original char)
|
// - unaccenting: do nothing (copy original char)
|
||||||
// - unac+fold: use table
|
// - unac+fold: use table
|
||||||
// - fold: use the unicode data.
|
// - fold: use the unicode data.
|
||||||
size_t idx;
|
string trans;
|
||||||
if (what != UNAC_FOLD && except_chars.size() != 0 &&
|
if (what != UNAC_FOLD && except_trans.size() != 0 &&
|
||||||
(idx=is_except_char(c)) != (size_t)-1) {
|
is_except_char(c, trans)) {
|
||||||
if (what == UNAC_UNAC) {
|
if (what == UNAC_UNAC) {
|
||||||
// Unaccent only. Do nothing
|
// Unaccent only. Do nothing
|
||||||
p = 0;
|
p = 0;
|
||||||
l = 0;
|
l = 0;
|
||||||
} else {
|
} else {
|
||||||
// Has to be UNAC_UNACFOLD: use table
|
// Has to be UNAC_UNACFOLD: use table
|
||||||
p = (unsigned short *)(except_trans[idx].c_str() + 2);
|
p = (unsigned short *)trans.c_str();
|
||||||
l = (except_trans[idx].size() - 2) / 2;
|
l = trans.size() / 2;
|
||||||
}
|
}
|
||||||
/* if (p) {unsigned char *cp = (unsigned char *)p;
|
|
||||||
fprintf(stderr, "l %d cp[0] %x cp[1] %x\n", l, (unsigned int)cp[0],
|
|
||||||
(unsigned int)cp[1]);}*/
|
|
||||||
} else {
|
} else {
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* RECOLL_DATADIR */
|
||||||
unac_uf_char_utf16_(c, p, l, what)
|
unac_uf_char_utf16_(c, p, l, what)
|
||||||
|
@ -13076,7 +13069,6 @@ const char* unac_version(void)
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef RECOLL_DATADIR
|
||||||
void unac_set_except_translations(const char *spectrans)
|
void unac_set_except_translations(const char *spectrans)
|
||||||
{
|
{
|
||||||
except_chars.clear();
|
|
||||||
except_trans.clear();
|
except_trans.clear();
|
||||||
if (!spectrans || !spectrans[0])
|
if (!spectrans || !spectrans[0])
|
||||||
return;
|
return;
|
||||||
|
@ -13123,14 +13115,8 @@ void unac_set_except_translations(const char *spectrans)
|
||||||
else
|
else
|
||||||
ch = (out[0] << 8) | (out[1] & 0xff);
|
ch = (out[0] << 8) | (out[1] & 0xff);
|
||||||
|
|
||||||
/* fprintf(stderr, "outsize %d Ch is 0x%hx\n", int(outsize), ch);*/
|
except_trans[ch] = string((const char *)(out + 2), outsize-2);
|
||||||
except_chars.push_back(ch);
|
|
||||||
// We keep ch as the first 2 bytes in the translation so that
|
|
||||||
// both vectors sort identically
|
|
||||||
except_trans.push_back(string((const char *)out, outsize));
|
|
||||||
free(out);
|
free(out);
|
||||||
}
|
}
|
||||||
std::sort(except_chars.begin(), except_chars.end());
|
|
||||||
std::sort(except_trans.begin(), except_trans.end());
|
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* RECOLL_DATADIR */
|
||||||
|
|
|
@ -86,6 +86,13 @@
|
||||||
<h2>News</h2>
|
<h2>News</h2>
|
||||||
<div class="news">
|
<div class="news">
|
||||||
<ul>
|
<ul>
|
||||||
|
<li>2012-09-21: an
|
||||||
|
<a href="https://bitbucket.org/medoc/recoll/wiki/ElinksBeagle">easy
|
||||||
|
way</a> to extend the "Beagle queue"
|
||||||
|
Recoll web history indexing mechanism to other browsers than
|
||||||
|
Firefox (Elinks in this case).
|
||||||
|
</li>
|
||||||
|
|
||||||
<li>2012-09-13: the next Recoll version will maybe acquire switchable
|
<li>2012-09-13: the next Recoll version will maybe acquire switchable
|
||||||
case and diacritics sensitivity. I am writing
|
case and diacritics sensitivity. I am writing
|
||||||
a few pages about the
|
a few pages about the
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue