merged the case/diac sensitivity code back into trunk

2012-09-25 19:20:24 +02:00 · 2012-09-25 19:20:24 +02:00 · 94b571aac6
commit 94b571aac6
parent f896f41d93 603a26f67b
22 changed files with 743 additions and 271 deletions
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -21,7 +21,10 @@
 #include <iostream>
 #include <string>
-#include <set>
+//#include <set>
 #include <tr1/unordered_set>
 using std::tr1::unordered_set;
 #include <cstring>
 #include "textsplit.h"
@ -57,8 +60,8 @@ static int charclasses[charclasses_size];
 // with interesting properties. This is far from full-blown management
 // of Unicode properties, but seems to do the job well enough in most
 // common cases
-static set<unsigned int> unicign;
+static unordered_set<unsigned int> unicign;
-static set<unsigned int> visiblewhite;
+static unordered_set<unsigned int> visiblewhite;
 class CharClassInit {
 public:
--- a/src/common/uproplist.h
+++ b/src/common/uproplist.h
@ -25,24 +25,8 @@
 * This is used as a quick fix to the ascii-based code, and is not correct.
 * the correct way would be to do what http://www.unicode.org/reports/tr29/ 
 * says. 
 * 
 * Data from:
 # PropList-4.0.1.txt
 # Date: 2004-03-02, 02:42:40 GMT [MD]
 #
 # Unicode Character Database
 # Copyright (c) 1991-2004 Unicode, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 # For documentation, see UCD.html
 */
 static const unsigned int uniign[] = {
    0x0021, /*  ; Terminal_Punctuation # Po       EXCLAMATION MARK*/
    0x002C, /*  ; Terminal_Punctuation # Po       COMMA*/
    0x002D, /*  ; Dash # Pd       HYPHEN-MINUS*/
    0x002E, /*  ; Terminal_Punctuation # Po       FULL STOP*/
    0x003A, /*  ; Terminal_Punctuation # Po   [2] COLON..SEMICOLON*/
    0x003B, /*  ; Terminal_Punctuation # Po   [2] COLON..SEMICOLON*/
    0x003F, /*  ; Terminal_Punctuation # Po       QUESTION MARK*/
    0x0085, /* NEXT LINE NEL;Cc */
    0x00A0, /* NO-BREAK SPACE; Zs */
    0x00A1, /* INVERTED EXCLAMATION MARK;Po */
@ -53,85 +37,81 @@ static const unsigned int uniign[] = {
    0x00A6, /* BROKEN BAR;So */
    0x00A7, /* SECTION SIGN;So; */
    0x00A9, /* COPYRIGHT SIGN;So */
-    0x00AB, /*  ; Quotation_Mark # Pi       LEFT-POINTING DOUBLE ANGLE QUOTATION MARK*/
+    0x00AB, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK*/
    0x00AC, /* NOT SIGN;Sm */
-    0x00AD, /*  ; Hyphen # Cf       SOFT HYPHEN*/
+    0x00AD, /* SOFT HYPHEN*/
    0x00AE, /* registered sign */
-    0x00B0, /* DEGREE SIGN;So;0;ET;;;;;N;;;;; */
+    0x00B0, /* DEGREE SIGN */
-    0x00B1, /* PLUS-MINUS SIGN;Sm;0;ET;;;;;N;PLUS-OR-MINUS SIGN;;;;*/
+    0x00B1, /* PLUS-MINUS SIGN */
-    0x00B7, /* MIDDLE DOT;Po;0;ON;;;;;N;;;;;*/
+    0x00B7, /* MIDDLE DOT */
-    0x00BB, /*  ; Quotation_Mark # Pf       RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK*/
+    0x00BB, /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
-    0x00BF, /*                   INVERTED QUESTION MARK;Po */
+    0x00BF, /* INVERTED QUESTION MARK; */
-    0x00D7, /* MULTIPLICATION SIGN;Sm;0;ON;;;;;N;;;;; */
+    0x00D7, /* MULTIPLICATION SIGN */
-    0x037E, /*  ; Terminal_Punctuation # Po       GREEK QUESTION MARK*/
+    0x037E, /* GREEK QUESTION MARK */
-    0x0387, /*  ; Terminal_Punctuation # Po       GREEK ANO TELEIA*/
+    0x0387, /* GREEK ANO TELEIA */
-    0x055C, /*  ; STerm # Po       ARMENIAN EXCLAMATION MARK*/
+    0x055C, /* ARMENIAN EXCLAMATION MARK */
-    0x055E, /*  ; STerm # Po       ARMENIAN QUESTION MARK*/
+    0x055E, /* ARMENIAN QUESTION MARK */
-    0x0589, /*  ; STerm # Po       ARMENIAN FULL STOP*/
+    0x0589, /* ARMENIAN FULL STOP */
-    0x0589, /*  ; Terminal_Punctuation # Po       ARMENIAN FULL STOP*/
+    0x058A, /* ARMENIAN HYPHEN */
-    0x058A, /*  ; Dash # Pd       ARMENIAN HYPHEN*/
+    0x05C3, /* HEBREW PUNCTUATION SOF PASUQ */
-    0x058A, /*  ; Hyphen # Pd       ARMENIAN HYPHEN*/
+    0x060C, /* ARABIC COMMA */
-    0x05C3, /*  ; Terminal_Punctuation # Po       HEBREW PUNCTUATION SOF PASUQ*/
+    0x061B, /* ARABIC SEMICOLON */
-    0x060C, /*  ; Terminal_Punctuation # Po       ARABIC COMMA*/
+    0x061F, /* ARABIC QUESTION MARK */
-    0x061B, /*  ; Terminal_Punctuation # Po       ARABIC SEMICOLON*/
+    0x06D4, /* ARABIC FULL STOP */
-    0x061F, /*  ; STerm # Po       ARABIC QUESTION MARK*/
+    0x166E, /* CANADIAN SYLLABICS FULL STOP */
-    0x061F, /*  ; Terminal_Punctuation # Po       ARABIC QUESTION MARK*/
+    0x1680, /* OGHAM SPACE MARK */
-    0x06D4, /*  ; STerm # Po       ARABIC FULL STOP*/
+    0x16EB, /* RUNIC SINGLE PUNCTUATION */
-    0x06D4, /*  ; Terminal_Punctuation # Po       ARABIC FULL STOP*/
+    0x16EC, /* RUNIC MULTIPLE PUNCTUATION */
-    0x166E, /*  ; STerm # Po       CANADIAN SYLLABICS FULL STOP*/
+    0x16ED, /* RUNIC CROSS PUNCTUATION */
-    0x1680, /*  ; White_Space # Zs       OGHAM SPACE MARK*/
+    0x1803, /* MONGOLIAN FULL STOP */
-    0x16EB, /* RUNIC SINGLE PUNCTUATION;Po;0;L;;;;;N;;;;;*/
+    0x1806, /* MONGOLIAN TODO SOFT HYPHEN */
-    0x16EC, /* RUNIC MULTIPLE PUNCTUATION;Po;0;L;;;;;N;;;;;*/
+    0x1809, /* MONGOLIAN MANCHU FULL STOP */
-    0x16ED, /* RUNIC CROSS PUNCTUATION;Po;0;L;;;;;N;;;;; */
+    0x180E, /* MONGOLIAN VOWEL SEPARATOR */
-    0x1803, /*  ; STerm # Po       MONGOLIAN FULL STOP*/
+    0x2000, /* EN QUAD..HAIR SPACE*/
-    0x1806, /*  ; Hyphen # Pd       MONGOLIAN TODO SOFT HYPHEN*/
+    0x2001, /* EN QUAD..HAIR SPACE*/
-    0x1809, /*  ; STerm # Po       MONGOLIAN MANCHU FULL STOP*/
+    0x2002, /* EN QUAD..HAIR SPACE*/
-    0x180E, /*  ; White_Space # Zs       MONGOLIAN VOWEL SEPARATOR*/
+    0x2003, /* EN QUAD..HAIR SPACE*/
-    0x2000, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2004, /* EN QUAD..HAIR SPACE*/
-    0x2001, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2005, /* EN QUAD..HAIR SPACE*/
-    0x2002, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2006, /* EN QUAD..HAIR SPACE*/
-    0x2003, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2007, /* EN QUAD..HAIR SPACE*/
-    0x2004, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2008, /* EN QUAD..HAIR SPACE*/
-    0x2005, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2009, /* EN QUAD..HAIR SPACE*/
-    0x2006, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x200A, /* EN QUAD..HAIR SPACE*/
-    0x2007, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2010, /* [2] HYPHEN..NON-BREAKING HYPHEN*/
-    0x2008, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2011, /* [2] HYPHEN..NON-BREAKING HYPHEN*/
-    0x2009, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2012, /* [6] HYPHEN..HORIZONTAL BAR*/
-    0x200A, /*  ; White_Space # Zs  [11] EN QUAD..HAIR SPACE*/
+    0x2013, /* [6] HYPHEN..HORIZONTAL BAR*/
-    0x2010, /*  ; Hyphen # Pd   [2] HYPHEN..NON-BREAKING HYPHEN*/
+    0x2014, /* [6] HYPHEN..HORIZONTAL BAR*/
-    0x2011, /*  ; Hyphen # Pd   [2] HYPHEN..NON-BREAKING HYPHEN*/
+    0x2015, /* [6] HYPHEN..HORIZONTAL BAR*/
-    0x2012, /*  ; Dash # Pd   [6] HYPHEN..HORIZONTAL BAR*/
+    0x2018, /* LEFT SINGLE QUOTATION MARK*/
-    0x2013, /*  ; Dash # Pd   [6] HYPHEN..HORIZONTAL BAR*/
+    0x2019, /* RIGHT SINGLE QUOTATION MARK*/
-    0x2014, /*  ; Dash # Pd   [6] HYPHEN..HORIZONTAL BAR*/
+    0x201A, /* SINGLE LOW-9 QUOTATION MARK*/
-    0x2015, /*  ; Dash # Pd   [6] HYPHEN..HORIZONTAL BAR*/
+    0x201B, /* SINGLE HIGH-REVERSED-9 QUOTATION MARK*/
-    0x2018, /*  ; Quotation_Mark # Pi       LEFT SINGLE QUOTATION MARK*/
+    0x201C, /* LEFT DOUBLE QUOTATION MARK*/
-    0x2019, /*  ; Quotation_Mark # Pf       RIGHT SINGLE QUOTATION MARK*/
+    0x201D, /* RIGHT DOUBLE QUOTATION MARK*/
-    0x201A, /*  ; Quotation_Mark # Ps       SINGLE LOW-9 QUOTATION MARK*/
+    0x201E, /* DOUBLE LOW-9 QUOTATION MARK*/
-    0x201B, /*  ; Quotation_Mark # Pi       SINGLE HIGH-REVERSED-9 QUOTATION MARK*/
+    0x201F, /* DOUBLE HIGH-REVERSED-9 QUOTATION MARK*/
-    0x201C, /*  ; Quotation_Mark # Pi       LEFT DOUBLE QUOTATION MARK*/
+    0x2022, /* BULLET */
-    0x201D, /*  ; Quotation_Mark # Pf       RIGHT DOUBLE QUOTATION MARK*/
+    0x2023, /* TRIANGULAR BULLET*/
    0x201E, /*  ; Quotation_Mark # Ps       DOUBLE LOW-9 QUOTATION MARK*/
    0x201F, /*  ; Quotation_Mark # Pi       DOUBLE HIGH-REVERSED-9 QUOTATION MARK*/
    0x2022, /* BULLET;Po;0;ON;;;;;N;;;;; */
    0x2023, /* TRIANGULAR BULLET;Po;0;ON;;;;;N;;;;;*/
    0x2024, /* ONE DOT LEADER;Po;0;ON;<compat> 002E;;;;N;;;;;*/
    0x2025, /* TWO DOT LEADER;Po;0;ON;<compat> 002E 002E;;;;N;;;;; */
    0x2026, /* HORIZONTAL ELLIPSIS;Po;0;ON;<compat> 002E 002E 002E;;;;N;;;;; */
-    0x2028, /*  ; White_Space # Zl       LINE SEPARATOR*/
+    0x2028, /* LINE SEPARATOR */
-    0x2029, /*  ; White_Space # Zp       PARAGRAPH SEPARATOR*/
+    0x2029, /* PARAGRAPH SEPARATOR */
-    0x202F, /*  ; White_Space # Zs       NARROW NO-BREAK SPACE*/
+    0x202F, /* NARROW NO-BREAK SPACE */
-    0x2032, /* PRIME;Po;0;ET;;;;;N;;;;;*/
+    0x2032, /* PRIME */
-    0x2039, /*  ; Quotation_Mark # Pi       SINGLE LEFT-POINTING ANGLE QUOTATION MARK*/
+    0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
-    0x203A, /*  ; Quotation_Mark # Pf       SINGLE RIGHT-POINTING ANGLE QUOTATION MARK*/
+    0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK*/
-    0x203C, /*  ; STerm # Po   [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
+    0x203C, /* [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
-    0x203D, /*  ; STerm # Po   [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
+    0x203D, /* [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
-    0x2047, /*  ; Terminal_Punctuation # Po   [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
+    0x2047, /* [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
-    0x2048, /*  ; Terminal_Punctuation # Po   [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
+    0x2048, /* [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
-    0x2049, /*  ; Terminal_Punctuation # Po   [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
+    0x2049, /* [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
-    0x2053, /*  ; Dash # Po       SWUNG DASH*/
+    0x2053, /* SWUNG DASH*/
-    0x205F, /*  ; White_Space # Zs       MEDIUM MATHEMATICAL SPACE*/
+    0x205F, /* MEDIUM MATHEMATICAL SPACE*/
-    0x207B, /*  ; Dash # Sm       SUPERSCRIPT MINUS*/
+    0x207B, /* SUPERSCRIPT MINUS*/
-    0x208B, /*  ; Dash # Sm       SUBSCRIPT MINUS*/
+    0x208B, /*  SUBSCRIPT MINUS*/
    0x20A0, /* EURO-CURRENCY SIGN */
    0x20A1, /* COLON SIGN */
    0x20A2, /* CRUZEIRO SIGN */
@ -161,60 +141,156 @@ static const unsigned int uniign[] = {
    0x2117, /* SOUND RECORDING COPYRIGHT;So */
    0x2122, /* TRADE MARK SIGN;So; */
    0x2192, /* RIGHTWARDS ARROW;Sm;0;ON;;;;;N;RIGHT ARROW;;;;*/
-    0x2212, /*  ; Dash # Sm       MINUS SIGN*/
+    0x2212, /* MINUS SIGN*/
    0x25A0, /* BLACK SQUARE */
    0x25A1, /* WHITE SQUARE */
    0x25A2, /* WHITE SQUARE WITH ROUNDED CORNERS */
    0x25A3, /* WHITE SQUARE CONTAINING BLACK SMALL SQUARE */
    0x25A4, /* SQUARE WITH HORIZONTAL FILL */
    0x25A5, /* SQUARE WITH VERTICAL FILL */
    0x25A6, /* SQUARE WITH ORTHOGONAL CROSSHATCH FILL */
    0x25A7, /* SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL */
    0x25A8, /* SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL */
    0x25A9, /* SQUARE WITH DIAGONAL CROSSHATCH FILL */
    0x25AA, /* BLACK SMALL SQUARE */
    0x25AB, /* WHITE SMALL SQUARE */
    0x25AC, /* BLACK RECTANGLE */
    0x25AD, /* WHITE RECTANGLE */
    0x25AE, /* BLACK VERTICAL RECTANGLE */
    0x25AF, /* WHITE VERTICAL RECTANGLE */
    0x25B0, /* BLACK PARALLELOGRAM */
    0x25B1, /* WHITE PARALLELOGRAM */
    0x25B2, /* BLACK UP-POINTING TRIANGLE */
    0x25B3, /* WHITE UP-POINTING TRIANGLE */
    0x25B4, /* BLACK UP-POINTING SMALL TRIANGLE */
    0x25B5, /* WHITE UP-POINTING SMALL TRIANGLE */
    0x25B6, /* BLACK RIGHT-POINTING TRIANGLE */
    0x25B7, /* WHITE RIGHT-POINTING TRIANGLE */
    0x25B8, /* BLACK RIGHT-POINTING SMALL TRIANGLE */
    0x25B9, /* WHITE RIGHT-POINTING SMALL TRIANGLE */
    0x25BA, /* BLACK RIGHT-POINTING POINTER */
    0x25BB, /* WHITE RIGHT-POINTING POINTER */
    0x25BC, /* BLACK DOWN-POINTING TRIANGLE */
    0x25BD, /* WHITE DOWN-POINTING TRIANGLE */
    0x25BE, /* BLACK DOWN-POINTING SMALL TRIANGLE */
    0x25BF, /* WHITE DOWN-POINTING SMALL TRIANGLE */
    0x25C0, /* BLACK LEFT-POINTING TRIANGLE */
    0x25C1, /* WHITE LEFT-POINTING TRIANGLE */
    0x25C2, /* BLACK LEFT-POINTING SMALL TRIANGLE */
    0x25C3, /* WHITE LEFT-POINTING SMALL TRIANGLE */
    0x25C4, /* BLACK LEFT-POINTING POINTER */
    0x25C5, /* WHITE LEFT-POINTING POINTER */
    0x25C6, /* BLACK DIAMOND */
    0x25C7, /* WHITE DIAMOND */
    0x25C8, /* WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND */
    0x25C9, /* FISHEYE */
    0x25CA, /* LOZENGE */
    0x25CB, /* WHITE CIRCLE */
    0x25CC, /* DOTTED CIRCLE */
    0x25CD, /* CIRCLE WITH VERTICAL FILL */
    0x25CE, /* BULLSEYE */
    0x25CF, /* BLACK CIRCLE */
    0x25D0, /* CIRCLE WITH LEFT HALF BLACK */
    0x25D1, /* CIRCLE WITH RIGHT HALF BLACK */
    0x25D2, /* CIRCLE WITH LOWER HALF BLACK */
    0x25D3, /* CIRCLE WITH UPPER HALF BLACK */
    0x25D4, /* CIRCLE WITH UPPER RIGHT QUADRANT BLACK */
    0x25D5, /* CIRCLE WITH ALL BUT UPPER LEFT QUADRANT BLACK */
    0x25D6, /* LEFT HALF BLACK CIRCLE */
    0x25D7, /* RIGHT HALF BLACK CIRCLE */
    0x25D8, /* INVERSE BULLET */
    0x25D9, /* INVERSE WHITE CIRCLE */
    0x25DA, /* UPPER HALF INVERSE WHITE CIRCLE */
    0x25DB, /* LOWER HALF INVERSE WHITE CIRCLE */
    0x25DC, /* UPPER LEFT QUADRANT CIRCULAR ARC */
    0x25DD, /* UPPER RIGHT QUADRANT CIRCULAR ARC */
    0x25DE, /* LOWER RIGHT QUADRANT CIRCULAR ARC */
    0x25DF, /* LOWER LEFT QUADRANT CIRCULAR ARC */
    0x25E0, /* UPPER HALF CIRCLE */
    0x25E1, /* LOWER HALF CIRCLE */
    0x25E2, /* BLACK LOWER RIGHT TRIANGLE */
    0x25E3, /* BLACK LOWER LEFT TRIANGLE */
    0x25E4, /* BLACK UPPER LEFT TRIANGLE */
    0x25E5, /* BLACK UPPER RIGHT TRIANGLE */
    0x25E6, /* WHITE BULLET */
    0x25E7, /* SQUARE WITH LEFT HALF BLACK */
    0x25E8, /* SQUARE WITH RIGHT HALF BLACK */
    0x25E9, /* SQUARE WITH UPPER LEFT DIAGONAL HALF BLACK */
    0x25EA, /* SQUARE WITH LOWER RIGHT DIAGONAL HALF BLACK */
    0x25EB, /* WHITE SQUARE WITH VERTICAL BISECTING LINE */
    0x25EC, /* WHITE UP-POINTING TRIANGLE WITH DOT */
    0x25ED, /* UP-POINTING TRIANGLE WITH LEFT HALF BLACK */
    0x25EE, /* UP-POINTING TRIANGLE WITH RIGHT HALF BLACK */
    0x25EF, /* LARGE CIRCLE */
    0x25F0, /* WHITE SQUARE WITH UPPER LEFT QUADRANT */
    0x25F1, /* WHITE SQUARE WITH LOWER LEFT QUADRANT */
    0x25F2, /* WHITE SQUARE WITH LOWER RIGHT QUADRANT */
    0x25F3, /* WHITE SQUARE WITH UPPER RIGHT QUADRANT */
    0x25F4, /* WHITE CIRCLE WITH UPPER LEFT QUADRANT */
    0x25F5, /* WHITE CIRCLE WITH LOWER LEFT QUADRANT */
    0x25F6, /* WHITE CIRCLE WITH LOWER RIGHT QUADRANT */
    0x25F7, /* WHITE CIRCLE WITH UPPER RIGHT QUADRANT */
    0x25F8, /* UPPER LEFT TRIANGLE */
    0x25F9, /* UPPER RIGHT TRIANGLE */
    0x25FA, /* LOWER LEFT TRIANGLE */
    0x25FB, /* WHITE MEDIUM SQUARE */
    0x25FC, /* BLACK MEDIUM SQUARE */
    0x25FD, /* WHITE MEDIUM SMALL SQUARE */
    0x25FE, /* BLACK MEDIUM SMALL SQUARE */
    0x25FF, /* LOWER RIGHT TRIANGLE */
    0x2E2E, /* REVERSED QUESTION MARK;Po;0;ON;;;;;N;;;;; */
-    0x3000, /*  ; White_Space # Zs       IDEOGRAPHIC SPACE*/
+    0x3000, /* IDEOGRAPHIC SPACE*/
-    0x3002, /*  ; STerm # Po       IDEOGRAPHIC FULL STOP*/
+    0x3002, /* IDEOGRAPHIC FULL STOP*/
-    0x300C, /*  ; Quotation_Mark # Ps       LEFT CORNER BRACKET*/
+    0x300C, /* LEFT CORNER BRACKET*/
-    0x300D, /*  ; Quotation_Mark # Pe       RIGHT CORNER BRACKET*/
+    0x300D, /* RIGHT CORNER BRACKET*/
-    0x300E, /*  ; Quotation_Mark # Ps       LEFT WHITE CORNER BRACKET*/
+    0x300E, /* LEFT WHITE CORNER BRACKET*/
-    0x300F, /*  ; Quotation_Mark # Pe       RIGHT WHITE CORNER BRACKET*/
+    0x300F, /* RIGHT WHITE CORNER BRACKET*/
-    0x301C, /*  ; Dash # Pd       WAVE DASH*/
+    0x301C, /* WAVE DASH*/
-    0x301D, /*  ; Quotation_Mark # Ps       REVERSED DOUBLE PRIME QUOTATION MARK*/
+    0x301D, /* REVERSED DOUBLE PRIME QUOTATION MARK*/
-    0x301E, /*  ; Quotation_Mark # Pe       LOW DOUBLE PRIME QUOTATION MARK*/
+    0x301E, /* LOW DOUBLE PRIME QUOTATION MARK*/
-    0x3030, /*  ; Dash # Pd       WAVY DASH*/
+    0x3030, /* WAVY DASH*/
-    0x30FB, /*  ; Hyphen # Pc       KATAKANA MIDDLE DOT*/
+    0x30FB, /* KATAKANA MIDDLE DOT*/
    0xC2B6, /* PILCROW SIGN;So;0;ON;;;;;N;PARAGRAPH SIGN;;;; */
    0xC3B7, /* DIVISION SIGN;Sm;0;ON;;;;;N;;;;; */
-    0xFE31, /*  ; Dash # Pd       PRESENTATION FORM FOR VERTICAL EM DASH*/
+    0xFE31, /* PRESENTATION FORM FOR VERTICAL EM DASH*/
-    0xFE32, /*  ; Dash # Pd       PRESENTATION FORM FOR VERTICAL EN DASH*/
+    0xFE32, /* PRESENTATION FORM FOR VERTICAL EN DASH*/
-    0xFE41, /*  ; Quotation_Mark # Ps       PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET*/
+    0xFE41, /* PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET*/
-    0xFE42, /*  ; Quotation_Mark # Pe       PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET*/
+    0xFE42, /* PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET*/
-    0xFE43, /*  ; Quotation_Mark # Ps       PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET*/
+    0xFE43, /* PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET*/
-    0xFE44, /*  ; Quotation_Mark # Pe       PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET*/
+    0xFE44, /* PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET*/
-    0xFE50, /*  ; Terminal_Punctuation # Po   [3] SMALL COMMA..SMALL FULL STOP*/
+    0xFE50, /* [3] SMALL COMMA..SMALL FULL STOP*/
-    0xFE51, /*  ; Terminal_Punctuation # Po   [3] SMALL COMMA..SMALL FULL STOP*/
+    0xFE51, /* [3] SMALL COMMA..SMALL FULL STOP*/
-    0xFE52, /*  ; STerm # Po       SMALL FULL STOP*/
+    0xFE52, /* STOP*/
-    0xFE52, /*  ; Terminal_Punctuation # Po   [3] SMALL COMMA..SMALL FULL STOP*/
+    0xFE52, /* [3] SMALL COMMA..SMALL FULL STOP*/
-    0xFE54, /*  ; Terminal_Punctuation # Po   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
+    0xFE54, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
-    0xFE55, /*  ; Terminal_Punctuation # Po   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
+    0xFE55, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
-    0xFE56, /*  ; Terminal_Punctuation # Po   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
+    0xFE56, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
-    0xFE57, /*  ; Terminal_Punctuation # Po   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
+    0xFE57, /* [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
-    0xFE58, /*  ; Dash # Pd       SMALL EM DASH*/
+    0xFE58, /* SMALL EM DASH */
-    0xFE63, /*  ; Hyphen # Pd       SMALL HYPHEN-MINUS*/
+    0xFE63, /* SMALL HYPHEN-MINUS */
-    0xFF01, /* FULLWIDTH EXCLAMATION MARK;Po;0;ON;<wide> 0021;;;;N;;;;; */
+    0xFF01, /* FULLWIDTH EXCLAMATION MARK */
-    0xFF02, /* FULLWIDTH QUOTATION MARK;Po;0;ON;<wide> 0022;;;;N;;;;; */
+    0xFF02, /* FULLWIDTH QUOTATION MARK */
-    0xFF03, /* FULLWIDTH NUMBER SIGN;Po;0;ET;<wide> 0023;;;;N;;;;; */
+    0xFF03, /* FULLWIDTH NUMBER SIGN */
-    0xFF04, /* FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;; */
+    0xFF04, /* FULLWIDTH DOLLAR SIGN */
-    0xFF05, /* FULLWIDTH PERCENT SIGN;Po;0;ET;<wide> 0025;;;;N;;;;; */
+    0xFF05, /* FULLWIDTH PERCENT SIGN */
-    0xFF06, /* FULLWIDTH AMPERSAND;Po;0;ON;<wide> 0026;;;;N;;;;; */
+    0xFF06, /* FULLWIDTH AMPERSAND */
-    0xFF07, /* FULLWIDTH APOSTROPHE;Po;0;ON;<wide> 0027;;;;N;;;;; */
+    0xFF07, /* FULLWIDTH APOSTROPHE */
-    0xFF08, /* FULLWIDTH LEFT PARENTHESIS;Ps;0;ON;<wide> 0028;;;;Y;FULLWIDTH OPENIN*/
+    0xFF08, /* FULLWIDTH LEFT PARENTHESIS */
-    0xFF09, /* FULLWIDTH RIGHT PARENTHESIS;Pe;0;ON;<wide> 0029;;;;Y;FULLWIDTH CLOS*/
+    0xFF09, /* FULLWIDTH RIGHT PARENTHESIS */
-    0xFF0A, /* FULLWIDTH ASTERISK;Po;0;ON;<wide> 002A;;;;N;;;;; */
+    0xFF0A, /* FULLWIDTH ASTERISK */
-    0xFF0B, /* FULLWIDTH PLUS SIGN;Sm;0;ES;<wide> 002B;;;;N;;;;; */
+    0xFF0B, /* FULLWIDTH PLUS SIGN */
-    0xFF0C, /* FULLWIDTH COMMA;Po;0;CS;<wide> 002C;;;;N;;;;; */
+    0xFF0C, /* FULLWIDTH COMMA */
-    0xFF0D, /* FULLWIDTH HYPHEN-MINUS;Pd;0;ES;<wide> 002D;;;;N;;;;; */
+    0xFF0D, /* FULLWIDTH HYPHEN-MINUS */
-    0xFF0E, /* FULLWIDTH FULL STOP;Po;0;CS;<wide> 002E;;;;N;FULLWIDTH PERIOD;;;; */
+    0xFF0E, /* FULLWIDTH FULL STOP */
-    0xFF0F, /* FULLWIDTH SOLIDUS;Po;0;CS;<wide> 002F;;;;N;FULLWIDTH SLASH;;;; */
+    0xFF0F, /* FULLWIDTH SOLIDUS  */
-    0xFF1A, /*  ; Terminal_Punctuation # Po   [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
+    0xFF1A, /* [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
-    0xFF1B, /*  ; Terminal_Punctuation # Po   [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
+    0xFF1B, /* [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
-    0xFF1F, /*  ; Terminal_Punctuation # Po       FULLWIDTH QUESTION MARK*/
+    0xFF1F, /* FULLWIDTH QUESTION MARK*/
-    0xFF61, /*  ; Terminal_Punctuation # Po       HALFWIDTH IDEOGRAPHIC FULL STOP*/
+    0xFF61, /* HALFWIDTH IDEOGRAPHIC FULL STOP*/
-    0xFF62, /*  ; Quotation_Mark # Ps       HALFWIDTH LEFT CORNER BRACKET*/
+    0xFF62, /* HALFWIDTH LEFT CORNER BRACKET*/
-    0xFF63, /*  ; Quotation_Mark # Pe       HALFWIDTH RIGHT CORNER BRACKET*/
+    0xFF63, /* HALFWIDTH RIGHT CORNER BRACKET*/
-    0xFF64, /*  ; Terminal_Punctuation # Po       HALFWIDTH IDEOGRAPHIC COMMA*/
+    0xFF64, /* HALFWIDTH IDEOGRAPHIC COMMA*/
-    0xFF65, /*  ; Hyphen # Pc       HALFWIDTH KATAKANA MIDDLE DOT*/
+    0xFF65, /* HALFWIDTH KATAKANA MIDDLE DOT*/
 };
 /* Things that would visibly break a block of text, rendering obvious the need
--- a/src/qtgui/preview_w.h
+++ b/src/qtgui/preview_w.h
@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <QComboBox>
 #include <qvariant.h>
 #include <qwidget.h>
--- a/src/qtgui/rclmain_w.cpp
+++ b/src/qtgui/rclmain_w.cpp
@ -301,6 +301,7 @@ void RclMain::init()
    connect(restable, SIGNAL(docSaveToFileClicked(Rcl::Doc)), 
 	    this, SLOT(saveDocToFile(Rcl::Doc)));
    reslist->setRclMain(this);
    connect(this, SIGNAL(docSourceChanged(RefCntr<DocSequence>)),
 	    reslist, SLOT(setDocSource(RefCntr<DocSequence>)));
    connect(firstPageAction, SIGNAL(activated()), 
@ -931,8 +932,12 @@ void RclMain::showIndexSched(bool modal)
 	connect(indexSched->cronCLB, SIGNAL(clicked()), 
 		this, SLOT(execCronTool()));
 	if (theconfig && theconfig->isDefaultConfig()) {
 #ifdef RCL_MONITOR
 	    connect(indexSched->rtidxCLB, SIGNAL(clicked()), 
 		    this, SLOT(execRTITool()));
 #else
 	    indexSched->rtidxCLB->setEnabled(false);
 #endif
 	} else {
 	    indexSched->rtidxCLB->setEnabled(false);
 	}
@ -1493,8 +1498,9 @@ static bool lookForHtmlBrowser(string &exefile)
    return false;
 }
-void RclMain::startNativeViewer(Rcl::Doc doc)
+void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum)
 {
    LOGDEB(("RclMain::startNativeViewer: page %d\n", pagenum));
    // Look for appropriate viewer
    string cmdplusattr;
    if (prefs.useDesktopOpen) {
@ -1512,11 +1518,13 @@ void RclMain::startNativeViewer(Rcl::Doc doc)
 	return;
    }
-    int pagenum = 1;
+    if (pagenum == -1) {
    if (m_source.isNotNull())
 	pagenum = m_source->getFirstMatchPage(doc);
    if (pagenum == -1)
 	pagenum = 1;
 	if (m_source.isNotNull())
 	    pagenum = m_source->getFirstMatchPage(doc);
 	if (pagenum == -1)
 	    pagenum = 1;
    }
    char cpagenum[20];
    sprintf(cpagenum, "%d", pagenum);
--- a/src/qtgui/rclmain_w.h
+++ b/src/qtgui/rclmain_w.h
@ -119,7 +119,7 @@ public slots:
    virtual void docExpand(Rcl::Doc);
    virtual void startPreview(int docnum, Rcl::Doc doc, int keymods);
    virtual void startPreview(Rcl::Doc);
-    virtual void startNativeViewer(Rcl::Doc);
+    virtual void startNativeViewer(Rcl::Doc, int pagenum = -1);
    virtual void saveDocToFile(Rcl::Doc);
    virtual void previewNextInTab(Preview *, int sid, int docnum);
    virtual void previewPrevInTab(Preview *, int sid, int docnum);
--- a/src/qtgui/recoll.pro.in
+++ b/src/qtgui/recoll.pro.in
@ -25,6 +25,7 @@ HEADERS += \
        restable.h \
        rtitool.h \
        searchclause_w.h \
        snippets_w.h \
        spell_w.h \
        ssearch_w.h \
        uiprefs_w.h \
@ -46,6 +47,7 @@ SOURCES += \
        restable.cpp \
        rtitool.cpp \
        searchclause_w.cpp \
        snippets_w.cpp \
        spell_w.cpp \
        ssearch_w.cpp \
        uiprefs_w.cpp \
@ -64,6 +66,7 @@ FORMS   = \
        restable.ui \
        rtitool.ui \
        spell.ui \
        snippets.ui \
        ssearchb.ui \
        uiprefs.ui \
        viewaction.ui \
--- a/src/qtgui/reslist.cpp
+++ b/src/qtgui/reslist.cpp
@ -50,6 +50,7 @@
 #include "refcntr.h"
 #include "internfile.h"
 #include "indexer.h"
 #include "snippets_w.h"
 #include "reslist.h"
 #include "moc_reslist.cpp"
@ -281,7 +282,7 @@ static PlainToRichQtReslist g_hiliter;
 /////////////////////////////////////
 ResList::ResList(QWidget* parent, const char* name)
-    : RESLIST_PARENTCLASS(parent)
+    : RESLIST_PARENTCLASS(parent), m_parent(0)
 {
    if (!name)
 	setObjectName("resList");
@ -902,6 +903,9 @@ void ResList::createPopupMenu(const QPoint& pos)
 		      this, SLOT(menuPreviewParent()));
    popup->addAction(tr("&Open Parent document/folder"), 
 		     this, SLOT(menuOpenParent()));
    if (m_source->snippetsCapable()) 
 	popup->addAction(tr("Open &Snippets window"), 
 			 this, SLOT(menuOpenSnippets()));
    popup->popup(mapToGlobal(pos));
 }
@ -953,6 +957,20 @@ void ResList::menuOpenParent()
    }
 }
 void ResList::menuOpenSnippets()
 {
    Rcl::Doc doc;
    if (!getDoc(m_popDoc, doc) || m_source.isNull()) 
 	return;
    SnippetsW *sp = new SnippetsW(doc, m_source);
    if (m_parent) {
 	connect(sp, SIGNAL(startNativeViewer(Rcl::Doc, int)),
 		m_parent, SLOT(startNativeViewer(Rcl::Doc, int)));
    }
    sp->show();
 }
 void ResList::menuEdit()
 {
    Rcl::Doc doc;
--- a/src/qtgui/reslist.h
+++ b/src/qtgui/reslist.h
@ -41,6 +41,7 @@ using std::pair;
 #include "rcldoc.h"
 #include "reslistpager.h"
 class RclMain;
 class QtGuiResListPager;
 /**
@ -66,7 +67,10 @@ class ResList : public RESLIST_PARENTCLASS
    int listId() const {return m_listId;}
    int pageFirstDocNum();
    void setFont();
-
+    void setRclMain(RclMain *m) 
    {
 	m_parent = m;
    }
 public slots:
    virtual void setDocSource(RefCntr<DocSequence> nsource);
    virtual void resetList();     // Erase current list
@ -84,6 +88,7 @@ class ResList : public RESLIST_PARENTCLASS
    virtual void menuExpand();
    virtual void menuPreviewParent();
    virtual void menuOpenParent();
    virtual void menuOpenSnippets();
    virtual void previewExposed(int);
    virtual void append(const QString &text);
    virtual void readDocSource();
@ -132,6 +137,7 @@ class ResList : public RESLIST_PARENTCLASS
    // so we store the page and display it when done.
    QString    m_text; 
 #endif
    RclMain   *m_parent;
    virtual void displayPage(); // Display current page
    static int newListId();
--- a/src/qtgui/snippets.ui
+++ b/src/qtgui/snippets.ui
@ -0,0 +1,67 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Snippets</class>
 <widget class="QDialog" name="Snippets">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>640</width>
    <height>400</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Snippets</string>
  </property>
  <property name="sizeGripEnabled">
   <bool>true</bool>
  </property>
  <layout class="QVBoxLayout" name="verticalLayout">
   <item>
    <widget class="QWebView" name="webView">
     <property name="url">
      <url>
       <string>about:blank</string>
      </url>
     </property>
    </widget>
   </item>
   <item>
    <widget class="QDialogButtonBox" name="buttonBox">
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
     </property>
     <property name="standardButtons">
      <set>QDialogButtonBox::Close</set>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <customwidgets>
  <customwidget>
   <class>QWebView</class>
   <extends>QWidget</extends>
   <header>QtWebKit/QWebView</header>
  </customwidget>
 </customwidgets>
 <resources/>
 <connections>
  <connection>
   <sender>buttonBox</sender>
   <signal>clicked(QAbstractButton*)</signal>
   <receiver>Snippets</receiver>
   <slot>close()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>257</x>
     <y>369</y>
    </hint>
    <hint type="destinationlabel">
     <x>257</x>
     <y>197</y>
    </hint>
   </hints>
  </connection>
 </connections>
 </ui>
--- a/src/qtgui/snippets_w.cpp
+++ b/src/qtgui/snippets_w.cpp
@ -0,0 +1,124 @@
 /* Copyright (C) 2012 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 #include "autoconfig.h"
 #include <unistd.h>
 #include <stdio.h>
 #include <string>
 #include <vector>
 using namespace std;
 #include "debuglog.h"
 #include "recoll.h"
 #include "snippets_w.h"
 #include "guiutils.h"
 #include "rcldb.h"
 #include "rclhelp.h"
 #include "plaintorich.h"
 class PlainToRichQtSnippets : public PlainToRich {
 public:
    virtual string startMatch(unsigned int)
    {
 	return string("<span class='rclmatch' style='color: ")
 	    + string((const char *)prefs.qtermcolor.toAscii()) + string("'>");
    }
    virtual string endMatch() 
    {
 	return string("</span>");
    }
 };
 static PlainToRichQtSnippets g_hiliter;
 void SnippetsW::init()
 {
    if (m_source.isNull())
 	return;
    // Make title out of file name if none yet
    string titleOrFilename;
    string utf8fn;
    m_doc.getmeta(Rcl::Doc::keytt, &titleOrFilename);
    m_doc.getmeta(Rcl::Doc::keyfn, &utf8fn);
    if (titleOrFilename.empty()) {
 	titleOrFilename = utf8fn;
    }
    setWindowTitle(QString::fromUtf8(titleOrFilename.c_str()));
    vector<pair<int, string> > vpabs;
    m_source->getAbstract(m_doc, vpabs);
    HighlightData hdata;
    m_source->getTerms(hdata);
    QString html = QString::fromAscii(
 	"<html><head>"
 	"<meta http-equiv=\"content-type\" "
 	"content=\"text/html; charset=utf-8\"></head>"
 	"<body style='overflow-x: scroll; white-space: nowrap'>"
 	"<table>"
 				      );
    g_hiliter.set_inputhtml(false);
    for (vector<pair<int, string> >::const_iterator it = vpabs.begin(); 
 	 it != vpabs.end(); it++) {
 	html += "<tr><td>";
 	if (it->first > 0) {
 	    char buf[100];
 	    sprintf(buf, "P.&nbsp;%d", it->first);
 	    html += "<a href=\"";
 	    html += buf;
 	    html += "\">";
 	    html += buf;
 	    html += "</a>";
 	}
 	html += "</td><td>";
 	list<string> lr;
 	g_hiliter.plaintorich(it->second, lr, hdata);
 	html.append(QString::fromUtf8(lr.front().c_str()));
 	html.append("</td></tr>\n");
    }
    html.append("</body></html>");
    webView->setHtml(html);
    connect(webView, SIGNAL(linkClicked(const QUrl &)), 
 	    this, SLOT(linkWasClicked(const QUrl &)));
    webView->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks);
 }
 void SnippetsW::linkWasClicked(const QUrl &url)
 {
    string ascurl = (const char *)url.toString().toAscii();;
    LOGDEB(("Snippets::linkWasClicked: [%s]\n", ascurl.c_str()));
    if (ascurl.size() > 3) {
 	int what = ascurl[0];
 	switch (what) {
 	case 'P': 
 	{
 	    int page = atoi(ascurl.c_str()+2);
 	    emit startNativeViewer(m_doc, page);
 	    return;
 	}
 	}
    }
    LOGERR(("Snippets::linkWasClicked: bad link [%s]\n", ascurl.c_str()));
 }
--- a/src/qtgui/snippets_w.h
+++ b/src/qtgui/snippets_w.h
@ -0,0 +1,50 @@
 /* Copyright (C) 2012 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 #ifndef _SNIPPETS_W_H_INCLUDED_
 #define _SNIPPETS_W_H_INCLUDED_
 #include "rcldoc.h"
 #include "refcntr.h"
 #include "docseq.h"
 #include "rclmain_w.h"
 #include "ui_snippets.h"
 class SnippetsW : public QWidget, public Ui::Snippets
 {
    Q_OBJECT
 public:
    SnippetsW(Rcl::Doc doc, RefCntr<DocSequence> source, QWidget* parent = 0) 
 	: QWidget(parent), m_doc(doc), m_source(source)
    {
 	setupUi((QDialog*)this);
 	init();
    }
 protected slots:
    virtual void linkWasClicked(const QUrl &);
 signals:
    void startNativeViewer(Rcl::Doc, int pagenum);
 private:
    void init();
    Rcl::Doc m_doc;
    RefCntr<DocSequence> m_source;
 };
 #endif /* _SNIPPETS_W_H_INCLUDED_ */
--- a/src/query/docseq.h
+++ b/src/query/docseq.h
@ -95,6 +95,13 @@ class DocSequence {
 	abs.push_back(doc.meta[Rcl::Doc::keyabs]);
 	return true;
    }
    virtual bool getAbstract(Rcl::Doc& doc, 
 			     std::vector<std::pair<int, std::string> >& abs) 
    {
 	abs.push_back(std::pair<int, std::string>(0,
 						  doc.meta[Rcl::Doc::keyabs]));
 	return true;
    }
    virtual int getFirstMatchPage(Rcl::Doc&) 
    {
 	return -1;
@ -106,8 +113,16 @@ class DocSequence {
    virtual int getResCnt() = 0;
    /** Get title for result list */
-    virtual std::string title() {return m_title;}
+    virtual std::string title() 
    {
 	return m_title;
    }
    /** Can do snippets ? */
    virtual bool snippetsCapable()
    {
 	return false;
    }
    /** Get description for underlying query */
    virtual std::string getDescription() = 0;
@ -157,6 +172,20 @@ public:
 	    return false;
 	return m_seq->getAbstract(doc, abs);
    }
    virtual bool getAbstract(Rcl::Doc& doc, 
 			     std::vector<std::pair<int, std::string> >& abs) 
    {
 	if (m_seq.isNull())
 	    return false;
 	return m_seq->getAbstract(doc, abs);
    }
    virtual bool snippetsCapable()
    {
 	if (m_seq.isNull())
 	    return false;
 	return m_seq->snippetsCapable();
    }
    virtual std::string getDescription() 
    {
 	if (m_seq.isNull())
--- a/src/query/docseqdb.cpp
+++ b/src/query/docseqdb.cpp
@ -65,6 +65,32 @@ int DocSequenceDb::getResCnt()
    return m_rescnt;
 }
 // This one only gets called to fill-up the snippets window
 // We ignore most abstract/snippets preferences.
 bool DocSequenceDb::getAbstract(Rcl::Doc &doc, 
 				vector<pair<int, string> >& vpabs)
 {
    LOGDEB(("DocSequenceDb::getAbstract/pair\n"));
    setQuery();
    // Have to put the limit somewhere. 
    int maxoccs = 500;
    Rcl::abstract_result ret = Rcl::ABSRES_ERROR;
    if (m_q->whatDb()) {
 	ret = m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), vpabs, 
 					     maxoccs, 
 					     m_q->whatDb()->getAbsCtxLen()+ 2);
    } 
    if (vpabs.empty())
 	vpabs.push_back(pair<int, string>(0, doc.meta[Rcl::Doc::keyabs]));
    // If the list was probably truncated, indicate it.
    if (ret == Rcl::ABSRES_TRUNC)
 	vpabs.push_back(pair<int, string>(-1, "[...]"));
    return true;
 }
 bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
 {
    setQuery();
--- a/src/query/docseqdb.h
+++ b/src/query/docseqdb.h
@ -31,6 +31,11 @@ class DocSequenceDb : public DocSequence {
    virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
    virtual int getResCnt();
    virtual void getTerms(HighlightData& hld);
    // Called to fill-up the snippets window. Ignoers
    // buildabstract/replaceabstract and syntabslen
    virtual bool getAbstract(Rcl::Doc &doc, vector<pair<int, string> >&);
    virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
    virtual int getFirstMatchPage(Rcl::Doc&);
    virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
@ -45,6 +50,11 @@ class DocSequenceDb : public DocSequence {
        m_queryBuildAbstract = qba;
        m_queryReplaceAbstract = qra;
    }
    virtual bool snippetsCapable()
    {
 	return true;
    }
    virtual string title();
 private:
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -230,7 +230,9 @@ static void listList(const string&, const vector<string>&)
 }
 #endif
-// Retrieve and store db-wide frequencies for the query terms.
+// Retrieve db-wide frequencies for the query terms and store them in
 // the query object. This is done at most once for a query, and the data is used
 // while computing abstracts for the different result documents.
 void Db::Native::setDbWideQTermsFreqs(Query *query)
 {
    // Do it once only for a given query.
@ -252,7 +254,7 @@ void Db::Native::setDbWideQTermsFreqs(Query *query)
    for (vector<string>::const_iterator qit = qterms.begin(); 
 	 qit != qterms.end(); qit++) {
 	query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
-	LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(), 
+	LOGABS(("set..QTermFreqs: [%s] db freq %.1e\n", qit->c_str(), 
 		query->m_nq->termfreqs[*qit]));
    }
 }
@ -306,6 +308,7 @@ double Db::Native::qualityTerms(Xapian::docid docid,
    }
 #ifdef DEBUGABSTRACT
    LOGDEB(("Db::qualityTerms:\n"));
    for (multimap<double, string>::reverse_iterator qit = byQ.rbegin(); 
 	 qit != byQ.rend(); qit++) {
 	LOGDEB(("%.1e->[%s]\n", qit->first, qit->second.c_str()));
@ -317,6 +320,7 @@ double Db::Native::qualityTerms(Xapian::docid docid,
 // Return the positions list for the page break term
 bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
 {
    vpos.clear();
    // Need to retrieve the document record to check for multiple page breaks
    // that we store there for lack of better place
    map<int, int> mbreaksmap;
@ -422,25 +426,26 @@ int Db::Native::getFirstMatchPage(Xapian::docid docid, Query *query)
 //
 // DatabaseModified and other general exceptions are catched and
 // possibly retried by our caller
-vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
+abstract_result Db::Native::makeAbstract(Xapian::docid docid, Query *query, 
 					 vector<pair<int, string> >& vabs, 
 					 int imaxoccs, int ictxwords)
 {
    Chrono chron;
-    LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
+    LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d imaxoccs %d\n", chron.ms(),
-	     m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
+	     m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen, imaxoccs));
    // The (unprefixed) terms matched by this document
-    vector<string> terms;
+    vector<string> matchedTerms;
    {
        vector<string> iterms;
        query->getMatchTerms(docid, iterms);
-        noPrefixList(iterms, terms);
+        noPrefixList(iterms, matchedTerms);
-        if (terms.empty()) {
+        if (matchedTerms.empty()) {
            LOGDEB(("makeAbstract::Empty term list\n"));
-            return vector<string>();
+            return ABSRES_ERROR;
        }
    }
-    listList("Match terms: ", terms);
+    listList("Match terms: ", matchedTerms);
    // Retrieve the term freqencies for the query terms. This is
    // actually computed only once for a query, and for all terms in
@ -455,12 +460,12 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
    // removing its meaning from the maximum occurrences per term test
    // used while walking the list below)
    multimap<double, string> byQ;
-    double totalweight = qualityTerms(docid, query, terms, byQ);
+    double totalweight = qualityTerms(docid, query, matchedTerms, byQ);
    LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
    // This can't happen, but would crash us
    if (totalweight == 0.0) {
 	LOGERR(("makeAbstract: totalweight == 0.0 !\n"));
-	return vector<string>();
+	return ABSRES_ERROR;
    }
    ///////////////////
@ -473,21 +478,25 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
    // terms, at their positions around the search terms positions:
    map<unsigned int, string> sparseDoc;
-    // All the chosen query term positions. 
+    // Total number of occurences for all terms. We stop when we have too much
-    vector<unsigned int> qtermposs; 
+    unsigned int totaloccs = 0;
    // Limit the total number of slots we populate. The 7 is taken as
    // average word size. It was a mistake to have the user max
    // abstract size parameter in characters, we basically only deal
    // with words. We used to limit the character size at the end, but
    // this damaged our careful selection of terms
-    const unsigned int maxtotaloccs = 
+    const unsigned int maxtotaloccs = imaxoccs > 0 ? imaxoccs :
 	m_rcldb->m_synthAbsLen /(7 * (m_rcldb->m_synthAbsWordCtxLen+1));
-    LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
+    int ctxwords = ictxwords == -1 ? m_rcldb->m_synthAbsWordCtxLen : ictxwords;
    LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n", 
 	    chron.ms(), maxtotaloccs, ctxwords));
    // This is used to mark positions overlapped by a multi-word match term
    const string occupiedmarker("?");
    abstract_result ret = ABSRES_OK;
    // Let's go populate
    for (multimap<double, string>::reverse_iterator qit = byQ.rbegin(); 
 	 qit != byQ.rend(); qit++) {
@ -508,7 +517,10 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 	Xapian::PositionIterator pos;
 	// There may be query terms not in this doc. This raises an
-	// exception when requesting the position list, we catch it.
+	// exception when requesting the position list, we catch it ??
 	// Not clear how this can happen because we are walking the
 	// match list returned by Xapian. Maybe something with the
 	// fields?
 	string emptys;
 	try {
 	    unsigned int occurrences = 0;
@ -519,14 +531,14 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 		    continue;
 		LOGABS(("makeAbstract: [%s] at %d occurrences %d maxoccs %d\n",
 			qterm.c_str(), ipos, occurrences, maxoccs));
-		// Remember the term position
+
-		qtermposs.push_back(ipos);
+		totaloccs++;
 		// Add adjacent slots to the set to populate at next
 		// step by inserting empty strings. Special provisions
 		// for adding ellipsis and for positions overlapped by
 		// the match term.
-		unsigned int sta = MAX(0, ipos-m_rcldb->m_synthAbsWordCtxLen);
+		unsigned int sta = MAX(0, ipos - ctxwords);
 		unsigned int sto = ipos + qtrmwrdcnt-1 + 
 		    m_rcldb->m_synthAbsWordCtxLen;
 		for (unsigned int ii = sta; ii <= sto;  ii++) {
@ -552,23 +564,29 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 		// Limit to allocated occurences and total size
 		if (++occurrences >= maxoccs || 
-		    qtermposs.size() >= maxtotaloccs)
+		    totaloccs >= maxtotaloccs) {
 		    ret = ABSRES_TRUNC;
 		    LOGDEB(("Db::makeAbstract: max occurrences cutoff\n"));
 		    break;
 		}
 	    }
 	} catch (...) {
 	    // Term does not occur. No problem.
 	}
-	if (qtermposs.size() >= maxtotaloccs)
+	if (totaloccs >= maxtotaloccs) {
 	    ret = ABSRES_TRUNC;
 	    LOGDEB(("Db::makeAbstract: max1 occurrences cutoff\n"));
 	    break;
 	}
    }
    LOGABS(("makeAbstract:%d:chosen number of positions %d\n", 
-	    chron.millis(), qtermposs.size()));
+	    chron.millis(), totaloccs));
    // This can happen if there are term occurences in the keywords
    // etc. but not elsewhere ?
-    if (qtermposs.size() == 0) {
+    if (totaloccs == 0) {
 	LOGDEB1(("makeAbstract: no occurrences\n"));
-	return vector<string>();
+	return ABSRES_ERROR;
    }
    // Walk all document's terms position lists and populate slots
@ -586,6 +604,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 	    if (has_prefix(*term))
 		continue;
 	    if (cutoff-- < 0) {
 		ret = ABSRES_TRUNC;
 		LOGDEB0(("makeAbstract: max term count cutoff\n"));
 		break;
 	    }
@ -594,6 +613,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 	    for (pos = xrdb.positionlist_begin(docid, *term); 
 		 pos != xrdb.positionlist_end(docid, *term); pos++) {
 		if (cutoff-- < 0) {
 		    ret = ABSRES_TRUNC;
 		    LOGDEB0(("makeAbstract: max term count cutoff\n"));
 		    break;
 		}
@ -604,8 +624,8 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 		    // at the same position, we want to keep only the
 		    // first one (ie: dockes and dockes@wanadoo.fr)
 		    if (vit->second.empty()) {
-			LOGABS(("makeAbstract: populating: [%s] at %d\n", 
+			LOGDEB2(("makeAbstract: populating: [%s] at %d\n", 
-				(*term).c_str(), *pos));
+				 (*term).c_str(), *pos));
 			sparseDoc[*pos] = *term;
 		    }
 		}
@ -637,19 +657,19 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
    LOGABS(("makeAbstract:%d: extracting. Got %u pages\n", chron.millis(),
 	    vpbreaks.size()));
    // Finally build the abstract by walking the map (in order of position)
-    vector<string> vabs;
+    vabs.clear();
    string chunk;
    bool incjk = false;
    int page = 0;
    for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
 	 it != sparseDoc.end(); it++) {
 	LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
 	if (!occupiedmarker.compare(it->second))
 	    continue;
 	if (chunk.empty() && !vpbreaks.empty()) {
-	    int pnum =  getPageNumberForPosition(vpbreaks, it->first);
+	    page =  getPageNumberForPosition(vpbreaks, it->first);
-	    ostringstream ss;
+	    if (page < 0) 
-	    ss << pnum;
+		page = 0;
 	    chunk += string(" [p ") + ss.str() + "] ";
 	}
 	Utf8Iter uit(it->second);
 	bool newcjk = false;
@ -659,7 +679,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 	    chunk += " ";
 	incjk = newcjk;
 	if (it->second == cstr_ellipsis) {
-	    vabs.push_back(chunk);
+	    vabs.push_back(pair<int,string>(page, chunk));
 	    chunk.clear();
 	} else {
 	    if (it->second.compare(end_of_field_term) && 
@ -668,10 +688,10 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
 	}
    }
    if (!chunk.empty())
-	vabs.push_back(chunk);
+	vabs.push_back(pair<int, string>(page, chunk));
    LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
-    return vabs;
+    return ret;
 }
 /* Rcl::Db methods ///////////////////////////////// */
@ -1516,6 +1536,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
    if (!tpidx.m_pageincrvec.empty()) {
 	ostringstream multibreaks;
 	for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) {
 	    if (i != 0)
 		multibreaks << ",";
 	    multibreaks << tpidx.m_pageincrvec[i].first << "," << 
 		tpidx.m_pageincrvec[i].second;
 	}
@ -2168,31 +2190,59 @@ bool Db::stemDiffers(const string& lang, const string& word,
    return true;
 }
 abstract_result Db::makeDocAbstract(Doc &doc, Query *query, 
 				    vector<pair<int, string> >& abstract, 
 				    int maxoccs, int ctxwords)
 {
    LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
    if (!m_ndb || !m_ndb->m_isopen) {
 	LOGERR(("Db::makeDocAbstract: no db\n"));
 	return ABSRES_ERROR;
    }
    abstract_result ret = ABSRES_ERROR;
    XAPTRY(ret = m_ndb->makeAbstract(doc.xdocid, query, abstract, 
 				     maxoccs, ctxwords),
           m_ndb->xrdb, m_reason);
    if (!m_reason.empty())
 	return ABSRES_ERROR;
    return ret;
 }
 bool Db::makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract)
 {
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
    if (!m_ndb || !m_ndb->m_isopen) {
 	LOGERR(("Db::makeDocAbstract: no db\n"));
 	return false;
    }
-    XAPTRY(abstract = m_ndb->makeAbstract(doc.xdocid, query),
+    vector<pair<int, string> > vpabs;
-           m_ndb->xrdb, m_reason);
+    if (!makeDocAbstract(doc, query, vpabs)) 
-    return m_reason.empty() ? true : false;
+	return false;
    for (vector<pair<int, string> >::const_iterator it = vpabs.begin();
 	 it != vpabs.end(); it++) {
 	string chunk;
 	if (it->first > 0) {
 	    ostringstream ss;
 	    ss << it->first;
 	    chunk += string(" [p ") + ss.str() + "] ";
 	}
 	chunk += it->second;
 	abstract.push_back(chunk);
    }
    return true;
 }
 bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
 {
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
    if (!m_ndb || !m_ndb->m_isopen) {
 	LOGERR(("Db::makeDocAbstract: no db\n"));
 	return false;
    }
-    vector<string> vab;
+    vector<pair<int, string> > vpabs;
-    XAPTRY(vab = m_ndb->makeAbstract(doc.xdocid, query),
+    if (!makeDocAbstract(doc, query, vpabs))
-           m_ndb->xrdb, m_reason);
+	return false;
-    for (vector<string>::const_iterator it = vab.begin(); 
+    for (vector<pair<int, string> >::const_iterator it = vpabs.begin(); 
-	 it != vab.end(); it++) {
+	 it != vpabs.end(); it++) {
-	abstract.append(*it);
+	abstract.append(it->second);
 	abstract.append(cstr_ellipsis);
    }
    return m_reason.empty() ? true : false;
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -68,6 +68,11 @@ enum value_slot {
    VALUE_SIG = 10      // Doc sig as chosen by app (ex: mtime+size
 };
 enum abstract_result {
    ABSRES_ERROR = 0,
    ABSRES_OK = 1,
    ABSRES_TRUNC = 2
 };
 class SearchData;
 class TermIter;
 class Query;
@ -291,11 +296,21 @@ class Db {
    /** Set parameters for synthetic abstract generation */
    void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
    int getAbsCtxLen() const 
    {
 	return m_synthAbsWordCtxLen;
    }
    /** Build synthetic abstract for document, extracting chunks relevant for
     * the input query. This uses index data only (no access to the file) */
    // Abstract return as one string
    bool makeDocAbstract(Doc &doc, Query *query, string& abstract);
    // Returned as a snippets vector
    bool makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract);
    // Returned as a vector of pair<page,snippet> page is 0 if unknown
    abstract_result makeDocAbstract(Doc &doc, Query *query, 
 				    vector<pair<int, string> >& abstract, 
 				    int maxoccs= -1, int ctxwords = -1);
    /** Retrieve detected page breaks positions */
    int getFirstMatchPage(Doc &doc, Query *query);
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -94,7 +94,9 @@ class Db::Native {
 			const vector<string>& terms,
 			std::multimap<double, string>& byQ);
    void setDbWideQTermsFreqs(Query *query);
-    vector<string> makeAbstract(Xapian::docid id, Query *query);
+    abstract_result makeAbstract(Xapian::docid id, Query *query, 
 				 vector<pair<int, string> >&, int maxoccs = -1,
 				 int ctxwords = -1);
    bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
    int getFirstMatchPage(Xapian::docid docid, Query *query);
    int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
--- a/src/sampleconf/recoll.conf.in
+++ b/src/sampleconf/recoll.conf.in
@ -81,6 +81,8 @@ indexstemminglanguages = english
 # unac_except_trans = Ää Öö Üü ää öö üü ßss
 # In French, you probably want to decompose oe and ae
 # unac_except_trans = œoe Œoe æae Æae
 # Actually, this seems a reasonable default for all until someone protests.
 unac_except_trans = åå Åå ää Ää öö Öö üü Üü ßss œoe Œoe æae ÆAE ﬁfi ﬂfl
 # Where to store the database (directory). This may be an absolute path,
 # else it is taken as relative to the configuration directory (-c argument
--- a/src/unac/unac.c
+++ b/src/unac/unac.c
@ -31,9 +31,9 @@
 #include <map>
 #include <string>
 #include <algorithm>
 #include <tr1/unordered_map>
 using std::string;
-using std::vector;
+using std::tr1::unordered_map;
 using std::map;
 #include "smallut.h"
 /* 
@ -41,20 +41,16 @@ using std::map;
   should not be translated according to what UnicodeData says, but
   instead according to some local rule. There will usually be very
   few of them, but they must be looked up for every translated char.
   We use a sorted vector for fastest elimination by binary search and
   a vector<string> to store the translations
 */
-static vector<unsigned short> except_chars;
+unordered_map<unsigned short, string> except_trans;
-static vector<string> except_trans;
+static inline bool is_except_char(unsigned short c, string& trans)
 static inline size_t is_except_char(unsigned short c)
 {
-    vector<unsigned short>::iterator it = 
+    unordered_map<unsigned short, string>::const_iterator it 
-	std::lower_bound(except_chars.begin(), except_chars.end(), c);
+	= except_trans.find(c);
-    if (it == except_chars.end() || *it != c) {
+    if (it == except_trans.end())
-	return (size_t(-1));
+	return false;
-    }
+    trans = it->second;
-    return std::distance(except_chars.begin(), it);
+    return true;
 }
 #endif /* RECOLL_DATADIR */
@ -12715,21 +12711,18 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
    //   - unaccenting: do nothing (copy original char)
    //   - unac+fold: use table
    //   - fold: use the unicode data.
-    size_t idx;
+    string trans;
-    if (what != UNAC_FOLD && except_chars.size() != 0 && 
+    if (what != UNAC_FOLD && except_trans.size() != 0 && 
-	(idx=is_except_char(c)) != (size_t)-1) {
+	is_except_char(c, trans)) {
 	if (what == UNAC_UNAC) {
 	    // Unaccent only. Do nothing
 	    p = 0;
 	    l = 0;
 	} else {
 	    // Has to be UNAC_UNACFOLD: use table
-	    p = (unsigned short *)(except_trans[idx].c_str() + 2);
+	    p = (unsigned short *)trans.c_str();
-	    l = (except_trans[idx].size() - 2) / 2;
+	    l = trans.size() / 2;
 	}
 	/* if (p) {unsigned char *cp = (unsigned char *)p;
 	   fprintf(stderr, "l %d cp[0] %x cp[1] %x\n", l, (unsigned int)cp[0], 
 	   (unsigned int)cp[1]);}*/
    } else {
 #endif /* RECOLL_DATADIR */
 	unac_uf_char_utf16_(c, p, l, what)
@ -13076,7 +13069,6 @@ const char* unac_version(void)
 #ifdef RECOLL_DATADIR
 void unac_set_except_translations(const char *spectrans)
 {
    except_chars.clear();
    except_trans.clear();
    if (!spectrans || !spectrans[0])
 	return;
@ -13123,14 +13115,8 @@ void unac_set_except_translations(const char *spectrans)
 	else
 	    ch = (out[0] << 8) | (out[1] & 0xff);
-	/* fprintf(stderr, "outsize %d Ch is 0x%hx\n", int(outsize), ch);*/
+	except_trans[ch] = string((const char *)(out + 2), outsize-2);
 	except_chars.push_back(ch);
 	// We keep ch as the first 2 bytes in the translation so that 
 	// both vectors sort identically
 	except_trans.push_back(string((const char *)out, outsize));
 	free(out);
    }
    std::sort(except_chars.begin(), except_chars.end());
    std::sort(except_trans.begin(), except_trans.end());
 }
 #endif /* RECOLL_DATADIR */
--- a/src/utils/hldata.h
+++ b/src/utils/hldata.h
@ -5,20 +5,23 @@
 #include <string>
 #include <set>
-/** Store about user terms and their expansions. This is used mostly for
+/** Store data about user search terms and their expansions. This is used
- *  highlighting result text and walking the matches.
+ * mostly for highlighting result text and walking the matches, generating 
 * spelling suggestions.
 */
 struct HighlightData {
-    /** The user terms, excluding those with wildcards. 
+    /** The user terms, excluding those with wildcards. This list is
-     * This list is intended for orthographic suggestions but the terms are
+     * intended for orthographic suggestions so the terms are always
-     * unaccented lowercased anyway because they are compared to the dictionary
+     * lowercased, unaccented or not depending on the type of index 
-     * generated from the index term list (which is unaccented).
+     * (as the spelling dictionary is generated from the index terms).
     */
    std::set<std::string> uterms;
-    /** The original user terms-or-groups. This is for displaying the matched
+    /** The original user terms-or-groups. This is for display
-     * terms or groups, ie in relation with highlighting or skipping to the 
+     * purposes: ie when creating a menu to look for a specific
-     * next match. These are raw, diacritics and case preserved.
+     * matched group inside a preview window. We want to show the
     * user-entered data in the menu, not some transformation, so
     * these are always raw, diacritics and case preserved.
     */
    std::vector<std::vector<std::string> > ugroups;
@ -35,7 +38,7 @@ struct HighlightData {
    /** Index into ugroups for each group. Parallel to groups. As a
     * user term or group may generate many processed/expanded terms
-     * or groups, this is how we relate them 
+     * or groups, this is how we relate an expansion to its source.
     */
    std::vector<unsigned int> grpsugidx;
--- a/unac/unac.c
+++ b/unac/unac.c
@ -31,9 +31,9 @@
 #include <map>
 #include <string>
 #include <algorithm>
 #include <tr1/unordered_map>
 using std::string;
-using std::vector;
+using std::tr1::unordered_map;
 using std::map;
 #include "smallut.h"
 /* 
@ -41,20 +41,16 @@ using std::map;
   should not be translated according to what UnicodeData says, but
   instead according to some local rule. There will usually be very
   few of them, but they must be looked up for every translated char.
   We use a sorted vector for fastest elimination by binary search and
   a vector<string> to store the translations
 */
-static vector<unsigned short> except_chars;
+unordered_map<unsigned short, string> except_trans;
-static vector<string> except_trans;
+static inline bool is_except_char(unsigned short c, string& trans)
 static inline size_t is_except_char(unsigned short c)
 {
-    vector<unsigned short>::iterator it = 
+    unordered_map<unsigned short, string>::const_iterator it 
-	std::lower_bound(except_chars.begin(), except_chars.end(), c);
+	= except_trans.find(c);
-    if (it == except_chars.end() || *it != c) {
+    if (it == except_trans.end())
-	return (size_t(-1));
+	return false;
-    }
+    trans = it->second;
-    return std::distance(except_chars.begin(), it);
+    return true;
 }
 #endif /* RECOLL_DATADIR */
@ -12715,21 +12711,18 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
    //   - unaccenting: do nothing (copy original char)
    //   - unac+fold: use table
    //   - fold: use the unicode data.
-    size_t idx;
+    string trans;
-    if (what != UNAC_FOLD && except_chars.size() != 0 && 
+    if (what != UNAC_FOLD && except_trans.size() != 0 && 
-	(idx=is_except_char(c)) != (size_t)-1) {
+	is_except_char(c, trans)) {
 	if (what == UNAC_UNAC) {
 	    // Unaccent only. Do nothing
 	    p = 0;
 	    l = 0;
 	} else {
 	    // Has to be UNAC_UNACFOLD: use table
-	    p = (unsigned short *)(except_trans[idx].c_str() + 2);
+	    p = (unsigned short *)trans.c_str();
-	    l = (except_trans[idx].size() - 2) / 2;
+	    l = trans.size() / 2;
 	}
 	/* if (p) {unsigned char *cp = (unsigned char *)p;
 	   fprintf(stderr, "l %d cp[0] %x cp[1] %x\n", l, (unsigned int)cp[0], 
 	   (unsigned int)cp[1]);}*/
    } else {
 #endif /* RECOLL_DATADIR */
 	unac_uf_char_utf16_(c, p, l, what)
@ -13076,7 +13069,6 @@ const char* unac_version(void)
 #ifdef RECOLL_DATADIR
 void unac_set_except_translations(const char *spectrans)
 {
    except_chars.clear();
    except_trans.clear();
    if (!spectrans || !spectrans[0])
 	return;
@ -13123,14 +13115,8 @@ void unac_set_except_translations(const char *spectrans)
 	else
 	    ch = (out[0] << 8) | (out[1] & 0xff);
-	/* fprintf(stderr, "outsize %d Ch is 0x%hx\n", int(outsize), ch);*/
+	except_trans[ch] = string((const char *)(out + 2), outsize-2);
 	except_chars.push_back(ch);
 	// We keep ch as the first 2 bytes in the translation so that 
 	// both vectors sort identically
 	except_trans.push_back(string((const char *)out, outsize));
 	free(out);
    }
    std::sort(except_chars.begin(), except_chars.end());
    std::sort(except_trans.begin(), except_trans.end());
 }
 #endif /* RECOLL_DATADIR */
--- a/website/index.html.en
+++ b/website/index.html.en
@ -86,6 +86,13 @@
      <h2>News</h2>
      <div class="news">
      <ul>
        <li>2012-09-21: an
          <a href="https://bitbucket.org/medoc/recoll/wiki/ElinksBeagle">easy
                 way</a> to extend the "Beagle queue"
         Recoll web history indexing mechanism to other browsers than
         Firefox (Elinks in this case).
         </li>
        <li>2012-09-13: the next Recoll version will maybe acquire switchable
          case and diacritics sensitivity. I am writing 
          a few pages about the