Add a new environment variable "RECOLL_ACTIVE_EXTRA_DBS", which helps

choose the active external indexes list.
This commit is contained in:
hxcan 2012-03-29 16:56:38 +08:00
commit 5bd071c5a6
788 changed files with 332998 additions and 0 deletions

102
.hgignore Normal file
View file

@ -0,0 +1,102 @@
syntax: glob
*.o
*.dep
*.dep.stamp
*~
\#*
src/Makefile
src/autom4te.cache
src/bincimapmime/alldeps
src/common/autoconfig.h
src/common/rclversion.h
src/config.log
src/config.status
src/desktop/unity-lens-recoll/Makefile
src/desktop/unity-lens-recoll/autom4te.cache
src/desktop/unity-lens-recoll/bin/unity-recoll-daemon
src/desktop/unity-lens-recoll/config.log
src/desktop/unity-lens-recoll/config.status
src/desktop/unity-lens-recoll/data/recoll.lens
src/desktop/unity-lens-recoll/data/unity-lens-recoll.service
src/doc/user/HTML.manifest
src/doc/user/index.html
src/doc/user/rcl.indexing.beaglequeue.html
src/doc/user/rcl.indexing.config.html
src/doc/user/rcl.indexing.html
src/doc/user/rcl.indexing.monitor.html
src/doc/user/rcl.indexing.periodic.html
src/doc/user/rcl.indexing.storage.html
src/doc/user/rcl.install.building.html
src/doc/user/rcl.install.config.html
src/doc/user/rcl.install.external.html
src/doc/user/rcl.install.html
src/doc/user/rcl.introduction.html
src/doc/user/rcl.introduction.recoll.html
src/doc/user/rcl.introduction.search.html
src/doc/user/rcl.kicker-applet.html
src/doc/user/rcl.program.api.html
src/doc/user/rcl.program.fields.html
src/doc/user/rcl.program.html
src/doc/user/rcl.search.anchorwild.html
src/doc/user/rcl.search.commandline.html
src/doc/user/rcl.search.complex.html
src/doc/user/rcl.search.custom.html
src/doc/user/rcl.search.desktop.html
src/doc/user/rcl.search.history.html
src/doc/user/rcl.search.html
src/doc/user/rcl.search.lang.html
src/doc/user/rcl.search.multidb.html
src/doc/user/rcl.search.preview.html
src/doc/user/rcl.search.reslist.html
src/doc/user/rcl.search.sort.html
src/doc/user/rcl.search.termexplorer.html
src/doc/user/rcl.search.tips.html
src/doc/user/rcl.search.wildcards.html
src/doc/user/rcl.searchkcl.html
src/doc/user/rcl.searchkio.html
src/doc/user/rcl.searchkio.searchabledocs.html
src/doc/user/usermanual.aux
src/doc/user/usermanual.html
src/doc/user/usermanual.html-text
src/doc/user/usermanual.log
src/doc/user/usermanual.out
src/doc/user/usermanual.pdf
src/doc/user/usermanual.tex-pdf
src/doc/user/usermanual.tex-pdf-tmp
src/doc/user/usermanual.txt
src/filters/rclexecm.pyc
src/filters/rcllatinclass.pyc
src/index/alldeps
src/index/alldeps.stamp
src/index/recollindex
src/kde/kioslave/kio_recoll/builddir
src/lib/alldeps
src/lib/librcl.a
src/mk/localdefs
src/mk/sysconf
src/python/recoll/build
src/python/recoll/setup.py
src/qtgui/.moc/*
src/qtgui/.obj/*
src/qtgui/.ui/*
src/qtgui/Makefile
src/qtgui/i18n/*.qm
src/qtgui/qrc_recoll.cpp
src/qtgui/recoll
src/qtgui/recoll.app
src/qtgui/recoll.pro
src/query/alldeps
src/query/recollq
src/query/xadump
src/recollinstall
src/sampleconf/rclmon.sh
src/sampleconf/recoll.conf
tests/config/aspdict.en.rws
tests/config/history
tests/config/idxstatus.txt
tests/config/index.pid
tests/config/missing
tests/config/xapiandb
tests/indexedmimetypes/idxstatus.txt
tests/indexedmimetypes/index.pid
website/usermanual/*

View file

@ -0,0 +1,34 @@
# New ports collection makefile for: recoll
# Date created: 5 December 2005
# Whom: J.F. Dockes <jean-francois.dockes@wanadoo.fr>
#
# $FreeBSD: ports/deskutils/recoll/Makefile,v 1.44 2010/10/09 17:52:42 makc Exp $
#
PORTNAME= recoll
PORTVERSION= 1.16.2
CATEGORIES= deskutils
MASTER_SITES= http://www.lesbonscomptes.com/recoll/
MAINTAINER= jf@dockes.org
COMMENT= A personal full text search package, based on QT and Xapian
BUILD_DEPENDS= xapian-core>=1.0.12:${PORTSDIR}/databases/xapian-core
RUN_DEPENDS:= ${BUILD_DEPENDS}
USE_QT_VER= 4
QT_COMPONENTS= gui qmake_build uic_build moc_build rcc_build
GNU_CONFIGURE= yes
USE_GMAKE= yes
USE_ICONV= yes
INSTALLS_ICONS= yes
USE_FAM= yes
MAN1= recoll.1 recollindex.1
MAN5= recoll.conf.5
post-patch:
${REINPLACE_CMD} -e 's/^CXXFLAGS =/CXXFLAGS ?=/' \
${WRKSRC}/mk/localdefs.in
.include <bsd.port.mk>

View file

@ -0,0 +1,2 @@
SHA256 (recoll-1.16.2.tar.gz) = f0f29dff2d82ef8541c51963870f31daf28472f3c8822c81c17c346769b77355
SIZE (recoll-1.16.2.tar.gz) = 1422148

View file

@ -0,0 +1,24 @@
Recoll is a personal full text search package with a QT graphical
interface. It is based on a very strong backend (Xapian), for which it
provides an easy to use and feature-rich interface.
Features:
* Free, GPL license.
* QT-based GUI.
* Supports the following document types (and their compressed versions):
- Natively: text, html, OpenOffice files, maildir and mailbox
(Mozilla and Thunderbird mail ok) with attachments, gaim log files.
- With external helpers: pdf (pdftotext), postscript (ghostscript),
msword (antiword), excel, ppt (catdoc), rtf (unrtf),
* Powerful query facilities, with boolean searches, phrases, filter on
file types and directory tree.
* Support for multiple charsets. Internal processing and storage uses
Unicode UTF-8.
* Stemming performed at query time (can switch stemming language after
indexing)
* Easy installation. No database daemon, web server or exotic language
necessary.
* An indexer which runs either as a thread inside the GUI or as an
external, cron'able program.
WWW: http://www.lesbonscomptes.com/recoll/

View file

@ -0,0 +1,99 @@
bin/recoll
bin/recollindex
share/applications/recoll-searchgui.desktop
share/icons/hicolor/48x48/apps/recoll.png
share/pixmaps/recoll.png
%%DATADIR%%/doc/docbook.css
%%DATADIR%%/doc/usermanual.html
%%DATADIR%%/examples/fields
%%DATADIR%%/examples/mimeconf
%%DATADIR%%/examples/mimemap
%%DATADIR%%/examples/mimeview
%%DATADIR%%/examples/rclmon.sh
%%DATADIR%%/examples/recoll.conf
%%DATADIR%%/filters/hotrecoll.py
%%DATADIR%%/filters/rclabw
%%DATADIR%%/filters/rclaptosidman
%%DATADIR%%/filters/rclaudio
%%DATADIR%%/filters/rclchm
%%DATADIR%%/filters/rcldjvu
%%DATADIR%%/filters/rcldoc
%%DATADIR%%/filters/rcldvi
%%DATADIR%%/filters/rclexecm.py
%%DATADIR%%/filters/rclfb2
%%DATADIR%%/filters/rclflac
%%DATADIR%%/filters/rclgaim
%%DATADIR%%/filters/rclics
%%DATADIR%%/filters/rclid3
%%DATADIR%%/filters/rclimg
%%DATADIR%%/filters/rclinfo
%%DATADIR%%/filters/rclkar
%%DATADIR%%/filters/rclkwd
%%DATADIR%%/filters/rcllatinclass.py
%%DATADIR%%/filters/rcllatinstops.zip
%%DATADIR%%/filters/rcllyx
%%DATADIR%%/filters/rclman
%%DATADIR%%/filters/rclnull
%%DATADIR%%/filters/rclogg
%%DATADIR%%/filters/rclopxml
%%DATADIR%%/filters/rclpdf
%%DATADIR%%/filters/rclppt
%%DATADIR%%/filters/rclps
%%DATADIR%%/filters/rclpurple
%%DATADIR%%/filters/rclpython
%%DATADIR%%/filters/rclrar
%%DATADIR%%/filters/rclrtf
%%DATADIR%%/filters/rclscribus
%%DATADIR%%/filters/rclshowinfo
%%DATADIR%%/filters/rclsiduxman
%%DATADIR%%/filters/rclsoff
%%DATADIR%%/filters/rclsvg
%%DATADIR%%/filters/rcltex
%%DATADIR%%/filters/rcltext
%%DATADIR%%/filters/rcluncomp
%%DATADIR%%/filters/rclwar
%%DATADIR%%/filters/rclwpd
%%DATADIR%%/filters/rclxls
%%DATADIR%%/filters/rclzip
%%DATADIR%%/filters/xdg-open
%%DATADIR%%/images/aptosid-book.png
%%DATADIR%%/images/aptosid-manual.png
%%DATADIR%%/images/document.png
%%DATADIR%%/images/drawing.png
%%DATADIR%%/images/folder.png
%%DATADIR%%/images/html.png
%%DATADIR%%/images/image.png
%%DATADIR%%/images/message.png
%%DATADIR%%/images/mozilla_doc.png
%%DATADIR%%/images/pdf.png
%%DATADIR%%/images/pidgin.png
%%DATADIR%%/images/postscript.png
%%DATADIR%%/images/presentation.png
%%DATADIR%%/images/sidux-book.png
%%DATADIR%%/images/source.png
%%DATADIR%%/images/sownd.png
%%DATADIR%%/images/soffice.png
%%DATADIR%%/images/spreadsheet.png
%%DATADIR%%/images/text-x-python.png
%%DATADIR%%/images/txt.png
%%DATADIR%%/images/wordprocessing.png
%%DATADIR%%/translations/recoll_cs.qm
%%DATADIR%%/translations/recoll_fr.qm
%%DATADIR%%/translations/recoll_it.qm
%%DATADIR%%/translations/recoll_lt.qm
%%DATADIR%%/translations/recoll_de.qm
%%DATADIR%%/translations/recoll_uk.qm
%%DATADIR%%/translations/recoll_ru.qm
%%DATADIR%%/translations/recoll_tr.qm
%%DATADIR%%/translations/recoll_xx.qm
@dirrm %%DATADIR%%/doc
@dirrm %%DATADIR%%/examples
@dirrm %%DATADIR%%/filters
@dirrm %%DATADIR%%/images
@dirrm %%DATADIR%%/translations
@dirrm %%DATADIR%%
@dirrmtry share/applications
@dirrmtry share/icons/hicolor/48x48/apps
@dirrmtry share/icons/hicolor/48x48
@dirrmtry share/icons/hicolor
@dirrmtry share/icons

View file

@ -0,0 +1,86 @@
#!/bin/sh
# Packages needed
# sudo apt-get install g++ gnupg dput lintian mini-dinstall yaclc bzr devscripts
# For the kio: (and kdesdk?)
# sudo apt-get install pkg-kde-tools cdbs
RCLVERS=1.17.1
LENSVERS=1.17.1.2654
PPAVERS=1
case $RCLVERS in
[23]*) PPANAME=recollexp-ppa;;
1.14*) PPANAME=recoll-ppa;;
*) PPANAME=recoll15-ppa;;
esac
#PPANAME=recollexp-ppa
echo "PPA: $PPANAME. Type CR if Ok, else ^C"
read rep
####### QT4
debdir=debianrclqt4
series4="lucid maverick natty oneiric precise"
series4="natty oneiric precise"
for series in $series4 ; do
rm -rf recoll-${RCLVERS}/debian
cp -rp ${debdir}/ recoll-${RCLVERS}/debian
if test -f $debdir/control-$series ; then
cp -f -p $debdir/control-$series recoll-${RCLVERS}/debian/control
else
cp -f -p $debdir/control recoll-${RCLVERS}/debian/control
fi
sed -e s/SERIES/${series}/g \
-e s/PPAVERS/${PPAVERS}/g \
< ${debdir}/changelog > recoll-${RCLVERS}/debian/changelog
(cd recoll-${RCLVERS};debuild -S -sa) || break
dput $PPANAME recoll_${RCLVERS}-1~ppa${PPAVERS}~${series}1_source.changes
done
### KIO
seriesk="lucid maverick natty oneiric precise"
seriesk=""
debdir=debiankio
for series in $seriesk ; do
rm -rf recoll-${RCLVERS}/debian
cp -rp ${debdir}/ recoll-${RCLVERS}/debian
sed -e s/SERIES/$series/g \
-e s/PPAVERS/${PPAVERS}/g \
< ${debdir}/changelog > recoll-${RCLVERS}/debian/changelog ;
(cd recoll-${RCLVERS};debuild -S -sa) || break
dput $PPANAME kio-recoll_${RCLVERS}-0~ppa${PPAVERS}~${series}1_source.changes
done
### Unity Lens
seriesl="natty oneiric precise"
#seriesl="oneiric"
debdir=debianunitylens
for series in $seriesl ; do
rm -rf recoll-lens-${LENSVERS}/debian
cp -rp ${debdir}/ recoll-lens-${LENSVERS}/debian
sed -e s/SERIES/$series/g \
-e s/PPAVERS/${PPAVERS}/g \
< ${debdir}/changelog > recoll-lens-${LENSVERS}/debian/changelog ;
(cd recoll-lens-${LENSVERS};debuild -S -sa) || break
dput $PPANAME recoll-lens_${LENSVERS}-1~ppa${PPAVERS}~${series}1_source.changes
done

View file

@ -0,0 +1,48 @@
kio-recoll (1.17.0-0~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.17.0
-- Jean-Francois Dockes <jf@dockes.org> Sun, 25 Mar 2012 18:05:00 +0200
kio-recoll (1.16.2-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.16.2
-- Jean-Francois Dockes <jf@dockes.org> Mon, 07 Nov 2011 17:57:00 +0200
kio-recoll (1.16.1-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.16.1
-- Jean-Francois Dockes <jf@dockes.org> Wed, 28 Sep 2011 15:07:00 +0200
kio-recoll (1.16.0-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.16.0
-- Jean-Francois Dockes <jf@dockes.org> Wed, 07 Sep 2011 18:30:00 +0200
kio-recoll (1.15.8-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.8
-- Jean-Francois Dockes <jf@dockes.org> Mon, 02 May 2011 17:27:00 +0200
kio-recoll (1.15.5-0~ppa1~SERIES1) SERIES; urgency=low
* Update to recoll version 1.15.5
-- Jean-Francois Dockes <jf@dockes.org> Fri, 04 Mar 2011 13:54:00 +0200
kio-recoll (1.15.2-0~ppa1~SERIES1) SERIES; urgency=low
* Update to recoll version 1.15.2
-- Jean-Francois Dockes <jf@dockes.org> Thu, 15 Feb 2011 08:43:00 +0200
kio-recoll (1.15.0-0~ppa1~SERIES1) SERIES; urgency=low
* Update to recoll version 1.15.0
-- Jean-Francois Dockes <jf@dockes.org> Wed, 02 Feb 2011 09:48:00 +0200
kio-recoll (1.14.3-0~ppa1~SERIES1) SERIES; urgency=low
* Update to recoll version 1.14.3
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2010 10:25:00 +0200
kio-recoll (1.14.2-0~ppa1~SERIES1) SERIES; urgency=low
* Update to recoll version 1.14.2
-- Jean-Francois Dockes <jf@dockes.org> Sat, 25 Sep 2010 09:37:20 +0200
kio-recoll (1.13.01-0~ppa4~SERIES1) SERIES; urgency=low
* Update to recoll version 1.13.01
-- Jean-Francois Dockes <jf@dockes.org> Thu, 07 Jan 2010 11:26:00 +0100
kio-recoll (1.12.3-0~ppa4~jaunty1) jaunty; urgency=low
* Initial release
-- Jean-Francois Dockes <jf@dockes.org> Tue, 24 Nov 2009 08:55:00 +0100

View file

@ -0,0 +1 @@
7

View file

@ -0,0 +1,25 @@
Source: kio-recoll
Section: kde
Priority: extra
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
Build-Depends: cdbs, cmake, debhelper (>= 7), kdelibs5-dev (>= 4:4.2.2), pkg-kde-tools (>= 0.4.0), libxapian-dev, libz-dev
Standards-Version: 3.8.1
Homepage: http://www.recoll.org/
Package: kio-recoll
Architecture: any
Depends: ${misc:Depends}, ${shlibs:Depends}
Description: A Recoll KIO slave for KDE 4
A Recoll KIO slave for KDE 4, allows performing a Recoll search by
entering an appropriate URL in a KDE open dialog, or with an HTML-based
interface displayed in Konqueror.
The HTML-based interface is similar to the Recoll GUI QT-based interface,
slightly less powerful. It allows performing a search while staying fully
within the KDE framework: drag and drop from the result list works
normally and you have your normal choice of applications for opening files.
An alternative interface uses a directory view of search results. Due to
limitations in the current KIO slave interface, it is currently not
obviously useful.
The interface is described in more detail inside a help file which you can
access by entering recoll:/ inside the konqueror URL line (this works only
if the recoll KIO slave has been previously installed).

View file

@ -0,0 +1,113 @@
This package was debianized by Jean-Francois Dockes <jfd@recoll.org> on
Wed, 10 Jan 2007 16:04:13 +0100.
It was downloaded from http://www.recoll.org
Upstream Author: Jean-Francois Dockes <jfd@recoll.org>
Copyright: (C) 2005,2006, Jean-Francois Dockes <jfd@recoll.org>
License:
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this package; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
On Debian systems, the complete text of the GNU General
Public License can be found in `/usr/share/common-licenses/GPL'.
The Debian packaging is (C) 2007, Jean-Francois Dockes <jfd@recoll.org> and
is licensed under the GPL, see above.
Portions of the software are:
Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL:
/* This file is part of The New Aspell
* Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL
* license version 2.0 or 2.1. You should have received a copy of the
* LGPL license along with this library if you did not you can find it
* at http://www.gnu.org/.
On Debian systems, the complete text of the GNU LGPL
can be found in `/usr/share/common-licenses/LGPL-2'.
Copyright 2002-2005 Andreas Aardal Hanssen
Copyright (C) 2000-2004 Mikio Hirabayashi
Copyright 1999,2000,2001 BrightStation PLC
Copyright 2001 Ananova Ltd
Copyright 2002 Olly Betts
Copyright (C) 2000, 2001, 2002 Loic Dachary <loic@senga.org>
- GPL V2 or later, same license text as above
Copyright (c) 1991-2004 Unicode, Inc.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2006 Unicode, Inc. All rights reserved. Distributed under
the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining a
copy of the Unicode data files and any associated documentation (the "Data
Files") or Unicode software and any associated documentation (the
"Software") to deal in the Data Files or Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, and/or sell copies of the Data Files or Software, and
to permit persons to whom the Data Files or Software are furnished to do
so, provided that (a) the above copyright notice(s) and this permission
notice appear with all copies of the Data Files or Software, (b) both the
above copyright notice(s) and this permission notice appear in associated
documentation, and (c) there is clear notice in each modified Data File or
in the Software as well as in the documentation associated with the Data
File(s) or Software that the data or software has been modified.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
/*
* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*
* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
* rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software for any particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*/

View file

@ -0,0 +1,2 @@
usr/lib/kde4
usr/share/kde4/services

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,6 @@
#!/usr/bin/make -f
include /usr/share/cdbs/1/rules/debhelper.mk
include /usr/share/pkg-kde-tools/makefiles/1/cdbs/kde.mk
DEB_SRCDIR = kde/kioslave/kio_recoll

View file

@ -0,0 +1,4 @@
version=3
http://www.recoll.org/download.html recoll-(.*)\.tar\.gz

View file

@ -0,0 +1,59 @@
recoll (1.14.3-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.14.3
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2010 10:25:00 +0200
recoll (1.14.2-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.14.2
-- Jean-Francois Dockes <jf@dockes.org> Sat, 25 Sep 2010 09:37:20 +0200
recoll (1.13.04-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.13.04
-- Jean-Francois Dockes <jf@dockes.org> Thu, 14 Apr 2010 13:42:00 +0200
recoll (1.13.00-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.13.00
-- Jean-Francois Dockes <jf@dockes.org> Tue, 05 Jan 2010 09:52:20 +0100
recoll (1.12.4-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.12.4
-- Jean-Francois Dockes <jf@dockes.org> Wed, 28 Oct 2009 17:16:20 +0200
recoll (1.12.3-0~ppa3~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.12.3
-- Jean-Francois Dockes <jf@dockes.org> Wed, 28 Oct 2009 17:16:20 +0200
recoll (1.12.2-0~ppa3~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.12.2
-- Jean-Francois Dockes <jf@dockes.org> Mon, 19 Oct 2009 16:17:46 +0200
recoll (1.11.0-0ubuntu1) dapper; urgency=low
* Updated package to recoll version 1.11.0
-- Jean-Francois Dockes <jfd@recoll.org> Sun, 19 Oct 2008 09:57:13 +0200
recoll (1.10.6-0ubuntu1) dapper; urgency=low
* Updated package to recoll version 1.10.6
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 12 Sep 2008 10:14:20 +0200
recoll (1.10.4-0ubuntu1) hardy; urgency=low
* Updated package to recoll version 1.10.4
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 29 Aug 2008 15:39:40 +0200
recoll (1.10.1-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.10.1
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 01 Feb 2008 11:30:01 +0100
recoll (1.10.0-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.10.0
-- Jean-Francois Dockes <jfd@recoll.org> Wed, 11 Nov 2007 15:34:51 +0200
recoll (1.9.0-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.9.0
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 7 Sep 2007 15:34:51 +0200
recoll (1.8.1-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.8.1
-- Jean-Francois Dockes <jfd@recoll.org> Wed, 7 Mar 2007 09:08:05 +0100
recoll (1.7.5-0ubuntu1) feisty; urgency=low
* Initial release
-- Jean-Francois Dockes <jfd@recoll.org> Wed, 10 Jan 2007 16:04:13 +0100

View file

@ -0,0 +1 @@
5

View file

@ -0,0 +1,35 @@
Source: recoll
Section: x11
Priority: optional
Maintainer: Jean-Francois Dockes <jf@dockes.org>
Build-Depends: debhelper (>= 5), libqt3-headers, libqt3-mt-dev, qt3-dev-tools, libxapian-dev
Standards-Version: 3.7.2
Package: recoll
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Recommends: aspell
Description: a personal full text search package with a QT GUI
The Recoll personal full text search package is based on a very strong
backend (Xapian), for which it provides an easy to use and feature-rich
interface.
.
Features:
* QT-based GUI.
* Supports the following document types (and their compressed versions):
- Natively: text, html, OpenOffice files, maildir and mailbox
(Mozilla and Thunderbird mail ok) with attachments, gaim log files.
- With external helpers: pdf (pdftotext), postscript (ghostscript),
msword (antiword), excel, ppt (catdoc), rtf (unrtf),
* Powerful query facilities, with boolean searches, phrases, filter on
file types and directory tree.
* Support for multiple charsets. Internal processing and storage uses
Unicode UTF-8.
* Stemming performed at query time (can switch stemming language after
indexing)
* Easy installation. No database daemon, web server or exotic language
necessary.
* An indexer which runs either as a thread inside the GUI or as an
external, cron'able program.
.
Homepage: <http://www.lesbonscomptes.com/recoll>

View file

@ -0,0 +1,113 @@
This package was debianized by Jean-Francois Dockes <jfd@recoll.org> on
Wed, 10 Jan 2007 16:04:13 +0100.
It was downloaded from http://www.recoll.org
Upstream Author: Jean-Francois Dockes <jfd@recoll.org>
Copyright: (C) 2005,2006, Jean-Francois Dockes <jfd@recoll.org>
License:
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this package; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
On Debian systems, the complete text of the GNU General
Public License can be found in `/usr/share/common-licenses/GPL'.
The Debian packaging is (C) 2007, Jean-Francois Dockes <jfd@recoll.org> and
is licensed under the GPL, see above.
Portions of the software are:
Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL:
/* This file is part of The New Aspell
* Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL
* license version 2.0 or 2.1. You should have received a copy of the
* LGPL license along with this library if you did not you can find it
* at http://www.gnu.org/.
On Debian systems, the complete text of the GNU LGPL
can be found in `/usr/share/common-licenses/LGPL-2'.
Copyright 2002-2005 Andreas Aardal Hanssen
Copyright (C) 2000-2004 Mikio Hirabayashi
Copyright 1999,2000,2001 BrightStation PLC
Copyright 2001 Ananova Ltd
Copyright 2002 Olly Betts
Copyright (C) 2000, 2001, 2002 Loic Dachary <loic@senga.org>
- GPL V2 or later, same license text as above
Copyright (c) 1991-2004 Unicode, Inc.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2006 Unicode, Inc. All rights reserved. Distributed under
the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining a
copy of the Unicode data files and any associated documentation (the "Data
Files") or Unicode software and any associated documentation (the
"Software") to deal in the Data Files or Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, and/or sell copies of the Data Files or Software, and
to permit persons to whom the Data Files or Software are furnished to do
so, provided that (a) the above copyright notice(s) and this permission
notice appear with all copies of the Data Files or Software, (b) both the
above copyright notice(s) and this permission notice appear in associated
documentation, and (c) there is clear notice in each modified Data File or
in the Software as well as in the documentation associated with the Data
File(s) or Software that the data or software has been modified.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
/*
* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*
* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
* rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software for any particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*/

View file

@ -0,0 +1 @@
README

View file

@ -0,0 +1,2 @@
?package(recoll):needs="X11" section="Apps/Databases"\
title="Personal Search Tool" command="/usr/bin/recoll"

View file

@ -0,0 +1,69 @@
#!/usr/bin/make -f
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
# This has to be exported to make some magic below work.
export DH_OPTIONS
# These are used for cross-compiling and for saving the configure script
# from having to guess our platform (since we know it already)
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
CFLAGS = -Wall -g
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
CFLAGS += -O0
else
CFLAGS += -O2
endif
config.status: configure
dh_testdir
./configure CFLAGS="$(CFLAGS)" LDFLAGS="-Wl,-z,defs" \
--host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) \
--prefix=/usr --mandir=\$${prefix}/share/man
build: build-stamp
build-stamp: config.status
dh_testdir
$(MAKE)
touch $@
clean:
dh_testdir
dh_testroot
rm -f build-stamp
-$(MAKE) distclean
dh_clean
install:
dh_testdir
dh_testroot
dh_clean -k
dh_installdirs
$(MAKE) prefix=$(CURDIR)/debian/recoll/usr install
binary-arch: build install
dh_testdir
dh_testroot
dh_installchangelogs ChangeLog
dh_installdocs
dh_installmenu
dh_installman
dh_link
dh_strip
dh_compress
dh_fixperms
dh_installdeb
dh_shlibdeps
dh_gencontrol
dh_md5sums
dh_builddeb
binary-indep: build install
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install

View file

@ -0,0 +1,12 @@
# Example watch control file for uscan
# Rename this file to "watch" and then you can run the "uscan" command
# to check for upstream updates and more.
# See uscan(1) for format
# Compulsory line, this is a version 3 file
version=3
# Uncomment to examine a Webpage
# <Webpage URL> <string match>
http://www.recoll.org/download.html recoll-(.*)\.tar\.gz

View file

@ -0,0 +1,118 @@
recoll (1.17.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.17.1: unity lens support
-- Jean-Francois Dockes <jf@dockes.org> Tue, 27 Mar 2012 16:21:00 +0200
recoll (1.17.0-0~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.17.0
-- Jean-Francois Dockes <jf@dockes.org> Mon, 18 Mar 2012 16:50:00 +0200
recoll (1.16.2-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.16.2
-- Jean-Francois Dockes <jf@dockes.org> Mon, 07 Nov 2011 17:50:00 +0200
recoll (1.16.1-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.16.1
-- Jean-Francois Dockes <jf@dockes.org> Wed, 28 Sep 2011 15:07:00 +0200
recoll (1.16.0-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.16.0
-- Jean-Francois Dockes <jf@dockes.org> Wed, 07 Sep 2011 18:30:00 +0200
recoll (1.15.8-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.8
-- Jean-Francois Dockes <jf@dockes.org> Mon, 02 May 2011 17:27:00 +0200
recoll (1.15.7-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.7
-- Jean-Francois Dockes <jf@dockes.org> Thu, 10 Mar 2011 10:54:00 +0200
recoll (1.15.5-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.5
-- Jean-Francois Dockes <jf@dockes.org> Fri, 04 Mar 2011 13:54:00 +0200
recoll (1.15.2-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.2
-- Jean-Francois Dockes <jf@dockes.org> Mon, 14 Feb 2011 21:54:00 +0200
recoll (1.15.1-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.1 (fixes qt 4.4 build issue on karmic)
-- Jean-Francois Dockes <jf@dockes.org> Wed, 02 Feb 2011 15:48:00 +0200
recoll (1.15.0-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.15.0
-- Jean-Francois Dockes <jf@dockes.org> Wed, 02 Feb 2011 09:48:00 +0200
recoll (1.14.3-0~ppa1~SERIES1) SERIES; urgency=low
* Update to release 1.14.3
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2010 10:25:00 +0200
recoll (1.14.2-0~ppa1~SERIES1) SERIES; urgency=low
* Update to release 1.14.2
-- Jean-Francois Dockes <jf@dockes.org> Sat, 25 Sep 2010 09:37:20 +0200
recoll (1.13.04-0~ppa2~SERIES1) SERIES; urgency=low
* Switch to qt4 on Jaunty.
-- Jean-Francois Dockes <jf@dockes.org> Sat, 01 May 2010 12:15:00 +0200
recoll (1.13.04-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.13.04
-- Jean-Francois Dockes <jf@dockes.org> Thu, 14 Apr 2010 13:42:00 +0200
recoll (1.13.02-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.13.02
* Imported current goodness from debian maintainer control and rules
files. Thanks to Kartik Mistry <kartik@debian.org>
-- Jean-Francois Dockes <jf@dockes.org> Wed, 03 Feb 2010 16:21:00 +0100
recoll (1.13.01-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.13.01
-- Jean-Francois Dockes <jf@dockes.org> Thu, 07 Jan 2010 10:52:00 +0100
recoll (1.13.00-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.13.00
-- Jean-Francois Dockes <jf@dockes.org> Tue, 05 Jan 2010 09:52:20 +0100
recoll (1.12.4-0~ppa1~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.12.4
-- Jean-Francois Dockes <jf@dockes.org> Wed, 28 Oct 2009 17:16:20 +0200
recoll (1.12.3-0~ppa3~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.12.3
-- Jean-Francois Dockes <jf@dockes.org> Wed, 28 Oct 2009 17:16:20 +0200
recoll (1.12.2-0~ppa3~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.12.2
-- Jean-Francois Dockes <jf@dockes.org> Mon, 19 Oct 2009 16:17:46 +0200
recoll (1.11.0-0ubuntu1) dapper; urgency=low
* Updated package to recoll version 1.11.0
-- Jean-Francois Dockes <jfd@recoll.org> Sun, 19 Oct 2008 09:57:13 +0200
recoll (1.10.6-0ubuntu1) dapper; urgency=low
* Updated package to recoll version 1.10.6
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 12 Sep 2008 10:14:20 +0200
recoll (1.10.4-0ubuntu1) hardy; urgency=low
* Updated package to recoll version 1.10.4
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 29 Aug 2008 15:39:40 +0200
recoll (1.10.1-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.10.1
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 01 Feb 2008 11:30:01 +0100
recoll (1.10.0-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.10.0
-- Jean-Francois Dockes <jfd@recoll.org> Wed, 11 Nov 2007 15:34:51 +0200
recoll (1.9.0-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.9.0
-- Jean-Francois Dockes <jfd@recoll.org> Fri, 7 Sep 2007 15:34:51 +0200
recoll (1.8.1-0ubuntu1) gutsy; urgency=low
* Updated package to recoll version 1.8.1
-- Jean-Francois Dockes <jfd@recoll.org> Wed, 7 Mar 2007 09:08:05 +0100
recoll (1.7.5-0ubuntu1) feisty; urgency=low
* Initial release
-- Jean-Francois Dockes <jfd@recoll.org> Wed, 10 Jan 2007 16:04:13 +0100

View file

@ -0,0 +1 @@
5

View file

@ -0,0 +1,44 @@
Source: recoll
Section: x11
Priority: optional
Maintainer: Jean-Francois Dockes <jf@dockes.org>
Build-Depends: debhelper (>= 7),
autotools-dev,
libqt4-dev,
libqtwebkit-dev,
libxapian-dev (>= 1.0.15),
libx11-dev,
libz-dev,
python-dev,
quilt
Standards-Version: 3.8.3
Package: recoll
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Recommends: aspell, python, xsltproc
Suggests: antiword, catdoc, ghostscript, libimage-exiftool-perl, poppler-utils, unrtf, python-mutagen
Description: a personal full text search package with a QT GUI
The Recoll personal full text search package is based on a very strong
backend (Xapian), for which it provides an easy to use and feature-rich
interface.
.
Features:
* QT-based GUI.
* Supports the following document types (and their compressed versions):
- Natively: text, html, OpenOffice files, maildir and mailbox
(Mozilla and Thunderbird mail ok) with attachments, gaim log files.
- With external helpers: pdf (pdftotext), postscript (ghostscript),
msword (antiword), excel, ppt (catdoc), rtf (unrtf),
* Powerful query facilities, with boolean searches, phrases, filter on
file types and directory tree.
* Support for multiple charsets. Internal processing and storage uses
Unicode UTF-8.
* Stemming performed at query time (can switch stemming language after
indexing)
* Easy installation. No database daemon, web server or exotic language
necessary.
* An indexer which runs either as a thread inside the GUI or as an
external, cron'able program.
.
Homepage: <http://www.lesbonscomptes.com/recoll>

View file

@ -0,0 +1,43 @@
Source: recoll
Section: x11
Priority: optional
Maintainer: Jean-Francois Dockes <jf@dockes.org>
Build-Depends: debhelper (>= 7),
autotools-dev,
libqt4-dev,
libxapian-dev (>= 1.0.15),
libx11-dev,
libz-dev,
python-dev,
quilt
Standards-Version: 3.8.3
Package: recoll
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Recommends: aspell, python, xsltproc
Suggests: antiword, catdoc, ghostscript, libimage-exiftool-perl, poppler-utils, unrtf, python-mutagen
Description: a personal full text search package with a QT GUI
The Recoll personal full text search package is based on a very strong
backend (Xapian), for which it provides an easy to use and feature-rich
interface.
.
Features:
* QT-based GUI.
* Supports the following document types (and their compressed versions):
- Natively: text, html, OpenOffice files, maildir and mailbox
(Mozilla and Thunderbird mail ok) with attachments, gaim log files.
- With external helpers: pdf (pdftotext), postscript (ghostscript),
msword (antiword), excel, ppt (catdoc), rtf (unrtf),
* Powerful query facilities, with boolean searches, phrases, filter on
file types and directory tree.
* Support for multiple charsets. Internal processing and storage uses
Unicode UTF-8.
* Stemming performed at query time (can switch stemming language after
indexing)
* Easy installation. No database daemon, web server or exotic language
necessary.
* An indexer which runs either as a thread inside the GUI or as an
external, cron'able program.
.
Homepage: <http://www.lesbonscomptes.com/recoll>

View file

@ -0,0 +1,113 @@
This package was debianized by Jean-Francois Dockes <jfd@recoll.org> on
Wed, 10 Jan 2007 16:04:13 +0100.
It was downloaded from http://www.recoll.org
Upstream Author: Jean-Francois Dockes <jfd@recoll.org>
Copyright: (C) 2005,2006, Jean-Francois Dockes <jfd@recoll.org>
License:
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this package; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
On Debian systems, the complete text of the GNU General
Public License can be found in `/usr/share/common-licenses/GPL'.
The Debian packaging is (C) 2007, Jean-Francois Dockes <jfd@recoll.org> and
is licensed under the GPL, see above.
Portions of the software are:
Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL:
/* This file is part of The New Aspell
* Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL
* license version 2.0 or 2.1. You should have received a copy of the
* LGPL license along with this library if you did not you can find it
* at http://www.gnu.org/.
On Debian systems, the complete text of the GNU LGPL
can be found in `/usr/share/common-licenses/LGPL-2'.
Copyright 2002-2005 Andreas Aardal Hanssen
Copyright (C) 2000-2004 Mikio Hirabayashi
Copyright 1999,2000,2001 BrightStation PLC
Copyright 2001 Ananova Ltd
Copyright 2002 Olly Betts
Copyright (C) 2000, 2001, 2002 Loic Dachary <loic@senga.org>
- GPL V2 or later, same license text as above
Copyright (c) 1991-2004 Unicode, Inc.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2006 Unicode, Inc. All rights reserved. Distributed under
the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining a
copy of the Unicode data files and any associated documentation (the "Data
Files") or Unicode software and any associated documentation (the
"Software") to deal in the Data Files or Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, and/or sell copies of the Data Files or Software, and
to permit persons to whom the Data Files or Software are furnished to do
so, provided that (a) the above copyright notice(s) and this permission
notice appear with all copies of the Data Files or Software, (b) both the
above copyright notice(s) and this permission notice appear in associated
documentation, and (c) there is clear notice in each modified Data File or
in the Software as well as in the documentation associated with the Data
File(s) or Software that the data or software has been modified.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
/*
* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*
* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
* rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software for any particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*/

View file

@ -0,0 +1 @@
README

View file

@ -0,0 +1,2 @@
?package(recoll):needs="X11" section="Apps/Databases"\
title="Personal Search Tool" command="/usr/bin/recoll"

View file

@ -0,0 +1,12 @@
setup.py --root and --user options interfer with debian wanting a dist-packages,
not site-packages installation for python modules
--- a/recollinstall.in
+++ b/recollinstall.in
@@ -139,5 +139,4 @@
${datadir}/recoll/translations/recoll_zh.qm || exit 1
-@NOPYTHON@(cd python/recoll;python setup.py install \
- --prefix=${REALPREFIX} ${ROOTFORPYTHON})
+#@NOPYTHON@(cd python/recoll;python setup.py install \
+# --prefix=${REALPREFIX} ${ROOTFORPYTHON})

View file

@ -0,0 +1 @@
fix-python-install.patch

View file

@ -0,0 +1,72 @@
#!/usr/bin/make -f
include /usr/share/quilt/quilt.make
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
CFLAGS = -Wall -g
#LDFLAGS = -Wl,-z,defs
#build qt4 UI only
export QMAKE=qmake-qt4
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
CFLAGS += -O0
else
CFLAGS += -O2
endif
config.status: configure
dh_testdir
./configure CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" \
--host=$(DEB_HOST_GNU_TYPE) \
--build=$(DEB_BUILD_GNU_TYPE) \
--mandir=\$${prefix}/share/man \
--prefix=/usr
build: build-stamp
build-stamp: $(QUILT_STAMPFN) config.status
dh_testdir
$(MAKE)
touch $@
clean: unpatch
dh_testdir
dh_testroot
rm -f build-stamp config.log
[ ! -f Makefile ] || $(MAKE) distclean
dh_clean Makefile
install:
dh_testdir
dh_testroot
dh_prep
dh_installdirs
$(MAKE) prefix=$(CURDIR)/debian/recoll/usr install
(cd python/recoll;python setup.py install --install-layout=deb --root=$(CURDIR)/debian/recoll/ )
binary-arch: build install
dh_testdir
dh_testroot
dh_installchangelogs ChangeLog
dh_installdocs README
dh_installmenu
dh_installman
dh_link
dh_strip
dh_compress
dh_fixperms
dh_installdeb
dh_shlibdeps
dh_gencontrol
dh_md5sums
dh_builddeb
binary-indep: build install
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install

View file

@ -0,0 +1,12 @@
# Example watch control file for uscan
# Rename this file to "watch" and then you can run the "uscan" command
# to check for upstream updates and more.
# See uscan(1) for format
# Compulsory line, this is a version 3 file
version=3
# Uncomment to examine a Webpage
# <Webpage URL> <string match>
http://www.recoll.org/download.html recoll-(.*)\.tar\.gz

View file

@ -0,0 +1,12 @@
recoll-lens (1.17.1.2654-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated lens to vers. 1.17.1.2654 : display/open results for embedded files
-- Jean-Francois Dockes <jf@dockes.org> Tue, 27 Mar 2012 16:22:00 +0200
recoll-lens (1.17.0.2648-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated lens to vers. 1.17.0.2648 : don't use app shortcut 'a'
-- Jean-Francois Dockes <jf@dockes.org> Mon, 26 Mar 2012 22:05:00 +0200
recoll-lens (1.17.0.2646-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated lens to vers. 1.17.0.2646 : Unity 5.0/Ubuntu 12.04 compatibility
-- Jean-Francois Dockes <jf@dockes.org> Sun, 25 Mar 2012 16:42:00 +0200
recoll-lens (1.17.0.2645-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Updated package to recoll version 1.17.0
-- Jean-Francois Dockes <jf@dockes.org> Sun, 25 Mar 2012 16:42:00 +0200

View file

@ -0,0 +1 @@
5

View file

@ -0,0 +1,17 @@
Source: recoll-lens
Section: x11
Priority: optional
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
Build-Depends: debhelper (>= 7),
autotools-dev,
recoll,
python
Standards-Version: 3.9.2
Package: recoll-lens
Architecture: all
Depends: ${misc:Depends}, python, recoll, unity
Description:Unity Lens for searching the Recoll index.
Allows querying the Recoll index from the Unity Dash, optionally
filtering on file category.
Homepage: http://www.recoll.org

View file

@ -0,0 +1,20 @@
This package was debianized by Jean-Francois Dockes <jfd@recoll.org> on
Sun, 25 Mar 2012 16:31:00 +0200.
It was downloaded from http://www.recoll.org
Upstream Author: Jean-Francois Dockes <jfd@recoll.org>
Copyright: (C) 2012 Jean-Francois Dockes <jfd@recoll.org>
License: GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
On Debian systems, the complete text of the GNU General
Public License can be found in `/usr/share/common-licenses/GPL'.
Debian packaging is (C) 2012, Jean-Francois Dockes, same license.
Derived from Original bliss apps lens by Mikkel Kamstrup Erlandsen:
Copyright 2011, Canonical Ltd
Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@canonical.com>
Distribute under the terms of the GNU General Public License v3

View file

@ -0,0 +1 @@
README

View file

@ -0,0 +1,51 @@
#!/usr/bin/make -f
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
config.status: configure
dh_testdir
./configure --host=$(DEB_HOST_GNU_TYPE) \
--build=$(DEB_BUILD_GNU_TYPE) \
--mandir=\$${prefix}/share/man \
--prefix=/usr \
--sysconfdir=/etc
build: build-stamp
build-stamp: config.status
dh_testdir
$(MAKE)
touch $@
clean:
dh_testdir
dh_testroot
rm -f build-stamp config.log
[ ! -f Makefile ] || $(MAKE) distclean
dh_clean Makefile
install:
dh_testdir
dh_testroot
dh_prep
dh_installdirs
$(MAKE) prefix=$(CURDIR)/debian/recoll-lens/usr install
binary-indep: build install
dh_testdir
dh_testroot
dh_installchangelogs ChangeLog
dh_installdocs README
dh_link
dh_compress
dh_fixperms
dh_installdeb
dh_gencontrol
dh_md5sums
dh_builddeb
binary: binary-indep
.PHONY: build clean binary-indep binary-arch binary install

View file

@ -0,0 +1,12 @@
# Example watch control file for uscan
# Rename this file to "watch" and then you can run the "uscan" command
# to check for upstream updates and more.
# See uscan(1) for format
# Compulsory line, this is a version 3 file
version=3
# Uncomment to examine a Webpage
# <Webpage URL> <string match>
http://www.recoll.org/download.html recoll-lens-(.*)\.tar\.gz

View file

@ -0,0 +1,24 @@
To use/test the port out of the official macports tree:
- Edit sources.conf /opt/local/etc/macports/sources.conf, and insert a URL
pointing to your local repository before the rsync one:
file:///Users/dockes/projets/fulltext/recoll/packaging/macports
rsync://rsync.macports.org/release/ports [default]
(inserting before ensures it's used before the macports one)
- The port should live under category/portname (ie: textproc/
- After you create or update your Portfile, use the MacPorts portindex
command in the local repository's directory to create or update the index
of the ports in your local repository.
%% cd ~/path/to/macports
%% portindex
Once the local port is added to the PortIndex, it becomes available for
searching or installation as with any other Portfile in the MacPorts
tree
http://guide.macports.org/#development.local-repositories

View file

@ -0,0 +1,46 @@
# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
# $Id$
PortSystem 1.0
PortGroup app 1.0
name recoll
version 1.16.2
categories textproc
platforms darwin
license GPL-2+
maintainers dockes.org:jf openmaintainer
description Desktop full text search
long_description Recoll is a desktop search tool based on Xapian
homepage http://www.recoll.org/
master_sites ${homepage}
checksums sha1 40c18a958eeecbb70cbdf14fa7319b54525537fa \
rmd160 fb598b9c637cab49734547a41f2e8ec232f89dbe
depends_lib port:xapian-core \
port:qt4-mac \
port:aspell \
port:libiconv \
port:zlib
depends_run port:antiword \
port:catdoc \
port:libxslt \
port:poppler \
port:unrtf \
port:unzip
patchfiles patch-configure.diff \
patch-sampleconf-mimeview.diff
configure.args --without-x \
--disable-x11mon
build.args CC=${configure.cc} CXX=${configure.cxx}
# g++-4.2: -E, -S, -save-temps and -M options are not allowed with multiple -arch flags
universal_variant no

View file

@ -0,0 +1,11 @@
--- configure.orig 2011-10-11 06:25:31.000000000 -0500
+++ configure 2011-11-27 20:02:11.000000000 -0600
@@ -4366,7 +4366,7 @@
LIBICONV=""
S_LDFLAGS=$LDFLAGS
S_CPPFLAGS=$CPPFLAGS
-for dir in ${libdir} /opt/local/lib /usr/local/lib ;do
+for dir in ${libdir} ;do
CPPFLAGS="$S_CPPFLAGS -I$dir/../include"
LDFLAGS="$S_LDFLAGS -L$dir"

View file

@ -0,0 +1,134 @@
--- sampleconf/mimeview 2011-10-11 08:44:09.000000000 +0200
+++ sampleconf/mimeview.mac 2011-11-27 17:55:42.000000000 +0100
@@ -2,7 +2,8 @@
## ##########################################
# External viewers, launched by the recoll GUI when you click on a result
-# 'edit' link
+# 'Open' link - MAC version
+# On the MAC, we basically use "open" for everything...
# Mime types which we should not uncompress if they are found gzipped or
# bzipped because the native viewer knows how to handle. These would be
@@ -11,74 +12,71 @@
[view]
# Pseudo entry used if the 'use desktop' preference is set in the GUI
-application/x-all = xdg-open %f
+application/x-all = open %f
application/x-kword = kword %f
application/x-abiword = abiword %f
-application/pdf = okular %f
-application/postscript = okular %f
-application/x-dvi = okular %f
+application/pdf = open %f
+application/postscript = open %f
+application/x-dvi = open %f
application/x-lyx = lyx %f
application/x-scribus = scribus %f
-application/msword = libreoffice %f
-application/vnd.ms-excel = libreoffice %f
-application/vnd.ms-powerpoint = libreoffice %f
+application/msword = open %f
+application/vnd.ms-excel = open %f
+application/vnd.ms-powerpoint = open %f
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
- libreoffice %f
+ open %f
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
- libreoffice %f
+ open %f
application/vnd.openxmlformats-officedocument.presentationml.template = \
- libreoffice %f
+ open %f
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
- libreoffice %f
+ open %f
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
- libreoffice %f
+ open %f
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
- libreoffice %f
-application/vnd.sun.xml.calc = libreoffice %f
-application/vnd.sun.xml.calc.template = libreoffice %f
-application/vnd.sun.xml.draw = libreoffice %f
-application/vnd.sun.xml.draw.template = libreoffice %f
-application/vnd.sun.xml.impress = libreoffice %f
-application/vnd.sun.xml.impress.template = libreoffice %f
-application/vnd.sun.xml.math = libreoffice %f
-application/vnd.sun.xml.writer = libreoffice %f
-application/vnd.sun.xml.writer.global = libreoffice %f
-application/vnd.sun.xml.writer.template = libreoffice %f
-application/vnd.wordperfect = libreoffice %f
+ open %f
+application/vnd.sun.xml.calc = open %f
+application/vnd.sun.xml.calc.template = open %f
+application/vnd.sun.xml.draw = open %f
+application/vnd.sun.xml.draw.template = open %f
+application/vnd.sun.xml.impress = open %f
+application/vnd.sun.xml.impress.template = open %f
+application/vnd.sun.xml.math = open %f
+application/vnd.sun.xml.writer = open %f
+application/vnd.sun.xml.writer.global = open %f
+application/vnd.sun.xml.writer.template = open %f
+application/vnd.wordperfect = open %f
application/x-chm = kchmviewer %f
-application/x-fsdirectory = dolphin %f
+application/x-fsdirectory = open %f
application/x-gnuinfo = xterm -e "info -f %f"
-application/x-flac = rhythmbox %f
-audio/mpeg = rhythmbox %f
-application/ogg = rhythmbox %f
-
-image/jpeg = gwenview %f
-image/png = gwenview %f
-image/tiff = gwenview %f
-image/gif = gwenview %f
-image/svg+xml = inkview %f
-image/vnd.djvu = djview %f
-image/x-xcf = gimp %f
-image/bmp = gwenview %f
-image/x-ms-bmp = gwenview %f
-image/x-xpmi = gwenview %f
-
-# Or firefox -remote "openFile(%u)"
-application/x-tex = emacsclient %f
-text/x-tex = emacsclient %f
-text/html = firefox %u
+application/x-flac = open %f
+audio/mpeg = open %f
+application/ogg = open %f
+
+image/jpeg = open %f
+image/png = open %f
+image/tiff = open %f
+image/gif = open %f
+image/svg+xml = open %f
+image/vnd.djvu = open %f
+image/x-xcf = open %f
+image/bmp = open %f
+image/x-ms-bmp = open %f
+image/x-xpmi = open %f
+
+application/x-tex = open %f
+text/x-tex = open %f
+text/html = open %u
text/html|gnuinfo = rclshowinfo %F %(title)
-text/plain = emacsclient %f
-text/x-c = emacsclient %f
-text/x-c+ = emacsclient %f
-text/x-c++ = emacsclient %f
-text/x-html-sidux-man = konqueror %f
-text/x-html-aptosid-man = iceweasel %f
+text/plain = open %f
+text/x-c = open %f
+text/x-c+ = open %f
+text/x-c++ = open %f
text/x-python = idle %f

View file

@ -0,0 +1,77 @@
%define name kio_recoll
%define version 1.17.0
%define release 0
Name: %{name}
Version: %{version}
Release: %{release}
Summary: KIO slave for the Recoll full text search tool
Source0: http://www.recoll.org/recoll-%{version}.tar.gz
URL: http://www.recoll.org/
Group: Applications/Databases
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
License: GPL
BuildRequires: libkde4-devel zlib-devel xapian-core-devel libuuid-devel
Requires: recoll
%description
Recoll is a personal full text search package for Linux, FreeBSD and
other Unix systems. It is based on a very strong backend (Xapian), for
which it provides an easy to use, feature-rich, easy administration
interface.
# ---------------------------------------------------------------------------
%prep
%setup -q -n recoll-%{version}
# ---------------------------------------------------------------------------
%build
%cmake_kde4 kde/kioslave/kio_recoll
pwd
make %{?_smp_mflags}
# ---------------------------------------------------------------------------
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
# ---------------------------------------------------------------------------
%clean
rm -rf $RPM_BUILD_ROOT
# ---------------------------------------------------------------------------
%files
%defattr(-,root,root,-)
%{_libdir}/kde4/kio_recoll.so
%{_datadir}/kde4/apps/kio_recoll
%{_datadir}/kde4/apps/kio_recoll/help.html
%{_datadir}/kde4/apps/kio_recoll/welcome.html
%{_datadir}/kde4/services/recoll.protocol
%{_datadir}/kde4/services/recollf.protocol
%if 0%{?suse_version} > 1120
%dir %{_datadir}/kde4/apps
%dir %{_datadir}/kde4/services
%dir %{_libdir}/kde4
%endif
# ---------------------------------------------------------------------------
%changelog
* Sun Mar 18 2012 Jean-Francois Dockes <jfd@recoll.org> 1.17.0-0
- 1.17.0
* Mon May 02 2011 Jean-Francois Dockes <jfd@recoll.org> 1.16.2-0
- 1.16.2
* Mon May 02 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.8-0
- 1.15.8
* Sun Mar 06 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.5-0
- Initial spec file for kio

131
packaging/rpm/recoll.spec Normal file
View file

@ -0,0 +1,131 @@
%define name recoll
%define version 1.17.0
%define release 0
Name: %{name}
Version: %{version}
Release: %{release}
Summary: Desktop Full Text Search Tool with a QT Gui
Source0: http://www.recoll.org/%{name}-%{version}.tar.gz
URL: http://www.recoll.org/
Group: Applications/Databases
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
License: GPL
# libxapian-devel or xapian-core-devel?
BuildRequires: libqt4-devel zlib-devel libxapian-devel libuuid-devel python-devel
%description
Recoll is a personal full text search package for Linux, FreeBSD and
other Unix systems. It is based on a very strong backend (Xapian), for
which it provides an easy to use, feature-rich, easy administration
interface.
# ---------------------------------------------------------------------------
%prep
%setup -q
# ---------------------------------------------------------------------------
%build
%configure
make %{?_smp_mflags}
# ---------------------------------------------------------------------------
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
# ---------------------------------------------------------------------------
%clean
rm -rf $RPM_BUILD_ROOT
# ---------------------------------------------------------------------------
%files
%defattr(-,root,root,-)
%{_bindir}/*
%{python_sitearch}/
%{_datadir}/%{name}
%{_datadir}/applications/recoll-searchgui.desktop
%{_datadir}/icons/hicolor/48x48/apps/recoll.png
%{_datadir}/pixmaps/recoll.png
%{_mandir}/man1/recoll*
%{_mandir}/man5/recoll*
%if 0%{?suse_version} > 1120
%dir %{_datadir}/applications
%dir %{_datadir}/pixmaps
%endif
%dir %{_datadir}/icons/hicolor/48x48/apps
%dir %{_datadir}/icons/hicolor/48x48
%dir %{_datadir}/icons/hicolor
# ---------------------------------------------------------------------------
%changelog
* Sun Mar 18 2012 Jean-Francois Dockes <jfd@recoll.org> 1.17.0-0
- Update to release 1.17.0
* Mon Nov 07 2011 Jean-Francois Dockes <jfd@recoll.org> 1.16.2-0
- Update to release 1.16.2
* Mon May 02 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.8-0
- Update to release 1.15.8
* Fri Mar 04 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.5-0
- Update to release 1.15.2
* Thu Feb 15 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.2-0
- Update to release 1.15.2
* Thu Feb 2 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.0-0
- Update to release 1.15.0
* Thu Oct 28 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.3-1
- Update to release 1.14.3
* Thu Oct 28 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.2-1
- Rebuilt with a Xapian built with --disable-sse
* Sat Sep 24 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.2-0
- Update to release 1.14.2
* Thu Apr 14 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.04-1
- Update to release 1.13.04
* Thu Jan 07 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.01-1
- Update to release 1.13.01
* Thu Dec 10 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.4-1
- Update to release 1.12.4
* Wed Oct 28 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.3-1
- Update to release 1.12.3
* Tue Sep 20 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.2-1
- Update to release 1.12.0
* Thu Jan 29 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.0-1
- Update to release 1.12.0
* Mon Oct 13 2008 Jean-Francois Dockes <jfd@recoll.org> 1.11.0-1
- Update to release 1.11.0
* Thu Sep 11 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.6-1
- Update to release 1.10.6
* Thu May 27 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.2-1
- Update to release 1.10.2
* Thu Jan 31 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.1-1
- Update to release 1.10.1
* Wed Nov 21 2007 Jean-Francois Dockes <jfd@recoll.org> 1.10.0-1
- Update to release 1.10.0
* Tue Sep 11 2007 Jean-Francois Dockes <jfd@recoll.org> 1.9.0-1
- Update to release 1.9.0
* Tue Mar 6 2007 Jean-Francois Dockes <jfd@recoll.org> 1.8.1-1
- Update to release 1.8.1
* Mon Jan 15 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.5-1
- Update to release 1.7.5
* Mon Jan 08 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.3-1
- Update to release 1.7.3
* Tue Nov 28 2006 Jean-Francois Dockes <jfd@recoll.org> 1.6.1-1
- Update to release 1.6.1
* Mon Oct 2 2006 Jean-Francois Dockes <jfd@recoll.org> 1.4.3-1
- Update to release 1.5.3
* Sun May 7 2006 Jean-Francois Dockes <jfd@recoll.org> 1.4.3-1
- Update to release 1.4.3
* Fri Mar 31 2006 Jean-Francois Dockes <jfd@recoll.org> 1.3.3-1
- Update to release 1.3.3
* Thu Feb 2 2006 Jean-Francois Dockes <jfd@recoll.org> 1.2.2-1
- Update to release 1.2.2
* Thu Jan 10 2006 Jean-Francois Dockes <jfd@recoll.org> 1.1.0-1
- Initial packaging

View file

@ -0,0 +1,88 @@
Summary: Desktop full text search tool with a qt gui
Name: recoll
Version: 1.12.0
Release: %mkrel 1
License: GPL
Group: Databases
URL: http://www.recoll.org/
Source0: http://www.lesbonscomptes.com/recoll/%{name}-%{version}.tar.bz2
Patch1: %{name}-configure.patch
BuildRequires: libxapian-devel
BuildRequires: libfam-devel
BuildRequires: libqt-devel >= 3.3.7
BuildRequires: libaspell-devel
Requires: xapian
BuildRoot: %{_tmppath}/%{name}-%{version}--buildroot
%description
Recoll is a personal full text search tool for Unix/Linux.
It is based on the very strong Xapian backend, for which
it provides an easy to use, feature-rich, easy administration,
QT graphical interface.
%prep
%setup -q
%patch1 -p0
%build
%configure2_5x \
--with-fam \
--with-aspell
%make
%install
[ "%{buildroot}" != "/" ] && rm -rf %{buildroot}
%makeinstall_std
desktop-file-install --vendor="" \
--add-category="X-MandrivaLinux-MoreApplications-Databases" \
--dir %{buildroot}%{_datadir}/applications %{buildroot}%{_datadir}/applications/*
%clean
[ "%{buildroot}" != "/" ] && rm -rf %{buildroot}
%files
%defattr(644,root,root,755)
%doc %{_datadir}/%{name}/doc
%attr(755,root,root) %{_bindir}/%{name}*
%{_datadir}/applications/recoll-searchgui.desktop
%{_datadir}/icons/hicolor/48x48/apps/recoll-searchgui.png
%dir %{_datadir}/%{name}
%dir %{_datadir}/%{name}/examples
%dir %{_datadir}/%{name}/filters
%dir %{_datadir}/%{name}/images
%dir %{_datadir}/%{name}/translations
%{_datadir}/%{name}/examples/mime*
%{_datadir}/%{name}/examples/*.conf
%attr(755,root,root) %{_datadir}/%{name}/examples/rclmon.sh
%attr(755,root,root) %{_datadir}/%{name}/filters/rc*
%{_datadir}/%{name}/filters/xdg-open
%{_datadir}/%{name}/images/*png
%{_mandir}/man1/recoll*
%{_mandir}/man5/recoll*
%{_datadir}/%{name}/translations/*.qm
%changelog
* Fri Apr 20 2007 Tomasz Pawel Gajc <tpg@mandriva.org> 1.12.1-1mdv2008.0
+ Revision: 16093
- new version
- drop P0
+ Mandriva <devel@mandriva.com>
* Tue Mar 06 2007 Tomasz Pawel Gajc <tpg@mandriva.org> 1.7.5-2mdv2007.0
+ Revision: 134128
- rebuild
* Tue Jan 30 2007 Tomasz Pawel Gajc <tpg@mandriva.org> 1.7.5-1mdv2007.1
+ Revision: 115423
- add patch 1 - fix build on x86_64
- add patch 0 - fix menu entry
- fix group
- add buildrequires
- set correct bits on files
- Import recoll

View file

@ -0,0 +1,93 @@
Name: recoll
Version: 1.14.3
Release: 1%{?dist}
Summary: Desktop full text search tool with a qt gui
Group: Applications/Databases
License: GPL
URL: http://www.recoll.org/
Source0: http://www.recoll.org/recoll-1.14.3.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
# Not sure how easy it is to find a xapian-core rpm. Will be easier to
# build by hand for many. Run time uses a static link to xapian, doesnt
# depend on libxapian.so
BuildRequires: qt-devel
Requires: qt
%description
Recoll is a personal full text search package for Linux, FreeBSD and
other Unix systems. It is based on a very strong backend (Xapian), for
which it provides an easy to use, feature-rich, easy administration
interface.
%prep
%setup -q
%build
[ -n "$QTDIR" ] || . %{_sysconfdir}/profile.d/qt.sh
%configure
make %{?_smp_mflags} static
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
%clean
rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root,-)
%{_bindir}/*
%{_datadir}/%{name}
%{_datadir}/applications/%{name}-searchgui.desktop
%{_datadir}/icons/hicolor/48x48/apps/%{name}.png
%{_datadir}/pixmaps/%{name}.png
%{_mandir}/man1/recoll*
%{_mandir}/man5/recoll*
%doc
%changelog
* Thu Nov 25 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.3-1
- Update to release 1.14.3
* Sat Sep 24 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.2-2
- Rebuilt with xapian 1.0.21 configured with the -disable-sse flag to avoid the "illegal instruction" problem on older CPUs
* Sat Sep 24 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.2-1
- Update to release 1.14.2
* Thu Apr 14 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.04-1
- Update to release 1.13.04
* Thu Jan 07 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.01-1
- Update to release 1.13.01
* Wed Oct 28 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.3-1
- Update to release 1.12.3
* Thu Jan 29 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.0-1
- Update to release 1.12.0
* Mon Oct 13 2008 Jean-Francois Dockes <jfd@recoll.org> 1.11.0-1
- Update to release 1.11.0
* Fri Sep 12 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.6-1
- Update to release 1.10.6
* Thu May 27 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.2-1
- Update to release 1.10.2
* Thu Jan 31 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.1-1
- Update to release 1.10.1
* Wed Nov 21 2007 Jean-Francois Dockes <jfd@recoll.org> 1.10.0-1
- Update to release 1.10.0
* Tue Sep 11 2007 Jean-Francois Dockes <jfd@recoll.org> 1.9.0-1
- Update to release 1.9.0
* Tue Mar 6 2007 Jean-Francois Dockes <jfd@recoll.org> 1.8.1-1
- Update to release 1.8.1
* Mon Jan 15 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.5-1
- Update to release 1.7.5
* Mon Jan 08 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.3-1
- Update to release 1.7.3
* Tue Nov 28 2006 Jean-Francois Dockes <jfd@recoll.org> 1.6.1-1
- Update to release 1.6.0
* Mon Nov 20 2006 Jean-Francois Dockes <jfd@recoll.org> 1.5.11-1
- Update to release 1.5.11
* Mon Oct 2 2006 Jean-Francois Dockes <jfd@recoll.org> 1.5.2-1
- Update to release 1.5.2
* Fri Mar 31 2006 Jean-Francois Dockes <jfd@recoll.org> 1.3.2-1
- Update to release 1.3.1
* Wed Feb 1 2006 Jean-Francois Dockes <jfd@recoll.org> 1.2.0-1
- Initial packaging

View file

@ -0,0 +1,112 @@
Name: recoll
Version: 1.17.0
Release: 1%{?dist}
Summary: Desktop full text search tool with a qt gui
Group: Applications/Databases
License: GPLv2
URL: http://www.recoll.org/
Source0: http://www.recoll.org/recoll-1.17.0.tar.gz
BuildRequires: qt-devel qt-webkit-devel xapian-core-devel zlib-devel desktop-file-utils
%description
Recoll is a personal full text search package for Linux, FreeBSD and
other Unix systems. It is based on a very strong backend (Xapian), for
which it provides an easy to use, feature-rich, easy administration
interface.
%prep
%setup -q
%build
QMAKE=qmake-qt4
export QMAKE
%configure
# No smpflags as the builds fails for some reason if -j is used.
make
%install
rm -rf %{buildroot}
make install DESTDIR=%{buildroot}
desktop-file-install --delete-original \
--dir=%{buildroot}/%{_datadir}/applications \
%{buildroot}/%{_datadir}/applications/%{name}-searchgui.desktop
%clean
rm -rf %{buildroot}
%files
%defattr(-,root,root,-)
%{_bindir}/*
%{_datadir}/%{name}
%{_datadir}/applications/%{name}-searchgui.desktop
%{_datadir}/icons/hicolor/48x48/apps/%{name}.png
%{_datadir}/pixmaps/%{name}.png
%{_mandir}/man1/recoll*
%{_mandir}/man5/recoll*
%doc
%changelog
* Sun Mar 18 2012 J.F. Dockes <jfd@recoll.org> 1.17.9-1
- updated to release 1.16.1
* Wed Sep 28 2011 J.F. Dockes <jfd@recoll.org> 1.16.1-1
- updated to release 1.16.1
* Wed Sep 21 2011 J.F. Dockes <jfd@recoll.org> 1.16.0-1
- updated to release 1.16.0
* Mon May 02 2011 J.F. Dockes <jfd@recoll.org> 1.15.8-1
- updated to release 1.15.8
* Fri Mar 15 2011 J.F. Dockes <jfd@recoll.org> 1.15.5-1
- updated to release 1.15.5
* Thu Feb 15 2011 J.F. Dockes <jfd@recoll.org> 1.15.2-1
- updated to release 1.15.2
* Wed Feb 02 2011 J.F. Dockes <jfd@recoll.org> 1.15.0-1
- updated to release 1.15.0
* Mon Sep 13 2010 J.F. Dockes <jfd@recoll.org> 1.14.0-1
- updated to release 1.14.0
* Sun May 9 2010 J.F. Dockes <jfd@recoll.org> 1.13.04-2
- Bumped the release number to issue new rpms for fc10
* Sun May 9 2010 J.F. Dockes 1.13.04
- spec file updated to recoll release 1.13.04.
* Fri Feb 12 2010 Terry Duell 1.13.02
- updated to release 1.13.02
* Mon Jan 12 2010 Terry Duell 1.13.01-3
- rpm spec file updated to fix Fedora desktop-file-install and install icon
* Sun Jan 10 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.01-2
- Rpm Spec file updated for recent fedoras: depend on xapian packages, use qt4
* Thu Jan 07 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.01-1
- Update to release 1.13.01
* Thu Dec 10 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.4-1
- Update to release 1.12.4
* Thu Jan 29 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.0-1
- Update to release 1.12.0
* Mon Oct 13 2008 Jean-Francois Dockes <jfd@recoll.org> 1.11.0-1
- Update to release 1.11.0
* Fri Sep 12 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.6-1
- Update to release 1.10.6
* Thu May 27 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.2-1
- Update to release 1.10.2
* Thu Jan 31 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.1-1
- Update to release 1.10.1
* Wed Nov 21 2007 Jean-Francois Dockes <jfd@recoll.org> 1.10.0-1
- Update to release 1.10.0
* Tue Sep 11 2007 Jean-Francois Dockes <jfd@recoll.org> 1.9.0-1
- Update to release 1.9.0
* Tue Mar 6 2007 Jean-Francois Dockes <jfd@recoll.org> 1.8.1-1
- Update to release 1.8.1
* Mon Jan 15 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.5-1
- Update to release 1.7.5
* Mon Jan 08 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.3-1
- Update to release 1.7.3
* Tue Nov 28 2006 Jean-Francois Dockes <jfd@recoll.org> 1.6.1-1
- Update to release 1.6.0
* Mon Nov 20 2006 Jean-Francois Dockes <jfd@recoll.org> 1.5.11-1
- Update to release 1.5.11
* Mon Oct 2 2006 Jean-Francois Dockes <jfd@recoll.org> 1.5.2-1
- Update to release 1.5.2
* Fri Mar 31 2006 Jean-Francois Dockes <jfd@recoll.org> 1.3.2-1
- Update to release 1.3.1
* Wed Feb 1 2006 Jean-Francois Dockes <jfd@recoll.org> 1.2.0-1
- Initial packaging

View file

@ -0,0 +1,123 @@
%define name recoll
%define version 1.17.0
%define release %mkrel 1
Name: %{name}
Version: %{version}
Release: %{release}
Summary: Desktop full text search tool with a qt gui
Source0: http://www.recoll.org/%{name}-%{version}.tar.gz
URL: http://www.recoll.org/
Group: Applications/Databases
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
License: GPL
Requires: xapian-core
%description
Recoll is a personal full text search package for Linux, FreeBSD and
other Unix systems. It is based on a very strong backend (Xapian), for
which it provides an easy to use, feature-rich, easy administration
interface.
# ---------------------------------------------------------------------------
%prep
%setup -q
# ---------------------------------------------------------------------------
%build
[ -n "$QTDIR" ] || . %{_sysconfdir}/profile.d/60qt4.sh
%configure
make %{?_smp_mflags}
# ---------------------------------------------------------------------------
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
# ---------------------------------------------------------------------------
%clean
rm -rf $RPM_BUILD_ROOT
# ---------------------------------------------------------------------------
%files
%defattr(-,root,root,-)
%{_bindir}/*
%{_datadir}/applications/recoll-searchgui.desktop
%{_datadir}/icons/hicolor/48x48/apps/recoll.png
%{_datadir}/pixmaps/recoll.png
%{_datadir}/%{name}
%{_mandir}/man1/recoll*
%{_mandir}/man5/recoll*
# ---------------------------------------------------------------------------
%changelog
* Sun Mar 18 2012 Jean-Francois Dockes <jfd@recoll.org> 1.17.0-1
- Update to release 1.17.0
* Mon Nov 07 2011 Jean-Francois Dockes <jfd@recoll.org> 1.16.2-1
- Update to release 1.16.2
* Wed Sep 28 2011 Jean-Francois Dockes <jfd@recoll.org> 1.16.1-1
- Update to release 1.16.1
* Wed Sep 21 2011 Jean-Francois Dockes <jfd@recoll.org> 1.16.0-1
- Update to release 1.16.0
* Tue May 03 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.8-1
- Update to release 1.15.8
* Fri Mar 04 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.5-1
- Update to release 1.15.5
* Thu Feb 15 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.2-1
- Update to release 1.15.2
* Wed Feb 02 2011 Jean-Francois Dockes <jfd@recoll.org> 1.15.0-1
- Update to release 1.15.0
* Thu Nov 25 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.3-1
- Update to release 1.14.2
* Sat Sep 24 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.2-1
- Update to release 1.14.2
* Mon Sep 13 2010 Jean-Francois Dockes <jfd@recoll.org> 1.14.0-1
- Update to release 1.14.0
* Thu Apr 14 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.04-1
- Update to release 1.13.01
* Thu Jan 07 2010 Jean-Francois Dockes <jfd@recoll.org> 1.13.01-1
- Update to release 1.13.01
* Thu Dec 10 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.4-1
- Update to release 1.12.2
* Mon Oct 19 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.2-1
- Update to release 1.12.2
* Thu Jan 29 2009 Jean-Francois Dockes <jfd@recoll.org> 1.12.0-1
- Update to release 1.12.0
* Mon Oct 13 2008 Jean-Francois Dockes <jfd@recoll.org> 1.11.0-1
- Update to release 1.11.0
* Thu May 27 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.2-1
- Update to release 1.10.2
* Thu Jan 31 2008 Jean-Francois Dockes <jfd@recoll.org> 1.10.1-1
- Update to release 1.10.1
* Wed Nov 21 2007 Jean-Francois Dockes <jfd@recoll.org> 1.10.0-1
- Update to release 1.10.0
* Tue Sep 11 2007 Jean-Francois Dockes <jfd@recoll.org> 1.9.0-1
- Update to release 1.9.0
* Tue Mar 6 2007 Jean-Francois Dockes <jfd@recoll.org> 1.8.1-1
- Update to release 1.8.1
* Mon Jan 15 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.5-1
- Update to release 1.7.5
* Mon Jan 08 2007 Jean-Francois Dockes <jfd@recoll.org> 1.7.3-1
- Update to release 1.7.3
* Tue Nov 28 2006 Jean-Francois Dockes <jfd@recoll.org> 1.6.1-1
- Update to release 1.6.1
* Mon Nov 20 2006 Jean-Francois Dockes <jfd@recoll.org> 1.5.11-1
- Update to release 1.5.11
* Mon Oct 2 2006 Jean-Francois Dockes <jfd@recoll.org> 1.5.2-1
- Update to release 1.5.2
* Sun May 7 2006 Jean-Francois Dockes <jfd@recoll.org> 1.4.3-1
- Update to release 1.4.3
* Fri Mar 31 2006 Jean-Francois Dockes <jfd@recoll.org> 1.3.3-1
- Update to release 1.3.3
* Thu Feb 2 2006 Jean-Francois Dockes <jfd@recoll.org> 1.2.2-1
- Update to release 1.2.2
* Thu Jan 10 2006 Jean-Francois Dockes <jfd@recoll.org> 1.1.0-1
- Initial packaging

340
src/COPYING Normal file
View file

@ -0,0 +1,340 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

10552
src/ChangeLog Normal file

File diff suppressed because it is too large Load diff

961
src/INSTALL Normal file
View file

@ -0,0 +1,961 @@
More documentation can be found in the doc/ directory or at http://www.recoll.org
Link: HOME
Link: PREVIOUS
Link: NEXT
Recoll user manual
Prev Next
--------------------------------------------------------------------------
Chapter 5. Installation and configuration
Table of Contents
5.1. Installing a binary copy
5.2. Supporting packages
5.3. Building from source
5.4. Configuration overview
5.1. Installing a binary copy
There are three types of binary Recoll installations:
* Through your system normal software distribution framework (ie,
Debian/Ubuntu apt, FreeBSD ports, etc.).
* From a package downloaded from the Recoll web site.
* From a prebuilt tree downloaded from the Recoll web site.
In all cases, the strict software dependancies (ie on Xapian or iconv)
will be automatically satisfied, you should not have to worry about them.
You will only have to check or install supporting applications for the
file types that you want to index beyond those that are natively processed
by Recoll (text, HTML, mail files, and a few others).
You should also maybe have a look at the configuration section (but this
may not be necessary for a quick test with default parameters). Most
parameters can be more conveniently set from the GUI interface.
5.1.1. Installing through a package system
If you use a BSD-type port system or a prebuilt package (DEB, RPM,
manually or through the system software configuration utility), just
follow the usual procedure for your system.
5.1.2. Installing a prebuilt Recoll
The unpackaged binary versions on the Recoll web site are just compressed
tar files of a build tree, where only the useful parts were kept
(executables and sample configuration).
The executable binary files are built with a static link to libxapian and
libiconv, to make installation easier (no dependencies).
After extracting the tar file, you can proceed with installation as if you
had built the package from source (that is, just type make install). The
binary trees are built for installation to /usr/local.
--------------------------------------------------------------------------
Prev Home Next
API Supporting packages
Link: HOME
Link: UP
Link: PREVIOUS
Link: NEXT
Recoll user manual
Prev Chapter 5. Installation and configuration Next
--------------------------------------------------------------------------
5.2. Supporting packages
Recoll uses external applications to index some file types. You need to
install them for the file types that you wish to have indexed (these are
run-time optional dependencies. None is needed for building or running
Recoll except for indexing their specific file type).
After an indexing pass, the commands that were found missing can be
displayed from the recoll File menu. The list is stored in the missing
text file inside the configuration directory.
A list of common file types which need external commands follows. Many of
the filters need the iconv command, which is not always listed as a
dependancy.
Please note that, due to the relatively dynamic nature of this
information, the most up to date version is now kept on the Recoll helper
applications page along with links to the home pages or best
source/patches pages, and misc tips. The list below is not updated often
and may be quite stale.
For many Linux distributions, most of the commands listed can be installed
from the package repositories. However, the packages are sometimes
outdated, or not the best version for Recoll, so you should take a look at
the Recoll helper applications page if a file type is important to you.
As of Recoll release 1.14, a number of XML-based formats that were handled
by ad hoc filter code now use the xsltproc command, which usually comes
with libxslt. These are: abiword, fb2 (ebooks), kword, openoffice, svg.
Now for the list:
* Openoffice files need unzip and xsltproc.
* PDF files need pdftotext which is part of the Xpdf or Poppler
packages.
* Postscript files need pstotext. The original version has an issue with
shell character in file names, which is corrected in recent packages.
See the the Recoll helper applications page for more detail.
* MS Word needs antiword. It is also useful to have wvWare installed as
it may be be used as a fallback for some files which antiword does not
handle.
* MS Excel and PowerPoint need catdoc.
* MS Open XML (docx) needs xsltproc.
* Wordperfect files need wpd2html from the libwpd (or libwpd-tools on
Ubuntu) package.
* RTF files need unrtf, which, in its standard version, has much trouble
with non-western character sets. Check the Recoll helper applications
page.
* TeX files need untex or detex. Check the Recoll helper applications
page for sources if it's not packaged for your distribution.
* dvi files need dvips.
* djvu files need djvutxt and djvused from the DjVuLibre package.
* Audio files: Recoll releases before 1.13 used the id3info command from
the id3lib package to extract mp3 tag information, metaflac (standard
flac tools) for flac files, and ogginfo (vorbis tools) for ogg files.
Releases 1.14 and later use a single Python filter based on mutagen
for all audio file types.
* Pictures: Recoll uses the Exiftool Perl package to extract tag
information. Most image file formats are supported. Note that there
may not be much interest in indexing the technical tags (image size,
aperture, etc.). This is only of interest if you store personal tags
or textual descriptions inside the image files.
* chm: files in microsoft help format need Python and the pychm module
(which needs chmlib).
* ICS: up to Recoll 1.13, iCalendar files need Python and the icalendar
module. icalendar is not needed for newer versions, which use internal
code.
* Zip archives need Python (and the standard zipfile module).
* Rar archives need Python, the rarfile Python module and the unrar
utility.
* Midi karaoke files need Python and the Midi module
* Konqueror webarchive format with Python (uses the Tarfile module).
* mimehtml web archive format (support based on the mail filter, which
introduces some mild weirdness, but still usable).
Text, HTML, mail folders, and Scribus files are processed internally. Lyx
is used to index Lyx files. Many filters need iconv and the standard sed
and awk.
--------------------------------------------------------------------------
Prev Home Next
Installation and configuration Up Building from source
Link: HOME
Link: UP
Link: PREVIOUS
Link: NEXT
Recoll user manual
Prev Chapter 5. Installation and configuration Next
--------------------------------------------------------------------------
5.3. Building from source
5.3.1. Prerequisites
C++ compiler. Up to Recoll version 1.13.04, its absence can manifest
itself by strange messages about a missing iconv_open.
Development files for Xapian core.
Important: If you are building Xapian for an older CPU (before Pentium 4
or Athlon 64), you need to add the --disable-sse flag to the configure
command. Else all Xapian application will crash with an illegal
instruction error.
Development files for Qt .
Development files for X11 and zlib.
Check the Recoll download page for up to date version information.
You will most probably be able to find a binary package for Qt for your
system. You may have to compile Xapian but this is not difficult (if you
are using FreeBSD, there is a port).
You may also need libiconv. Recoll currently uses version 1.9 (this should
not be critical). On Linux systems, the iconv interface is part of libc
and you should not need to do anything special.
5.3.2. Building
Recoll has been built on Linux, FreeBSD, Mac OS X, and Solaris, most
versions after 2005 should be ok, maybe some older ones too (Solaris 8 is
ok). If you build on another system, and need to modify things, I would
very much welcome patches.
Depending on the Qt 3 configuration on your system, you may have to set
the QTDIR and QMAKESPECS variables in your environment:
* QTDIR should point to the directory above the one that holds the qt
include files (ie: if qt.h is /usr/local/qt/include/qt.h, QTDIR should
be /usr/local/qt).
* QMAKESPECS should be set to the name of one of the qt mkspecs
sub-directories (ie: linux-g++).
On many Linux systems, QTDIR is set by the login scripts, and QMAKESPECS
is not needed because there is a default link in mkspecs/.
Neither QTDIR nor QMAKESPECS should be needed with Qt 4, configuration
details are entirely determined by qmake (which is quite often installed
as qmake-qt4).
Configure options:
* --without-aspell will disable the code for phonetic matching of search
terms.
* --with-fam or --with-inotify will enable the code for real time
indexing. Inotify support is enabled by default on recent Linux
systems.
* --disable-webkit is available from version 1.17 to implement the
result list with a Qt QTextBrowser instead of a WebKit widget if you
do not or can't depend on the latter.
* --enable-xattr will enable code to fetch data from file extended
attributes. This is only useful is some application stores data in
there, and also needs some simple configuration (see comments in the
fields configuration file).
* --enable-camelcase will enable splitting camelCase words. This is not
enabled by default as it has the unfortunate side-effect of making
some phrase searches quite confusing: ie, "MySQL manual" would be
matched by "MySQL manual" and "my sql manual" but not "mysql manual"
(only inside phrase searches).
* --with-file-command Specify the version of the 'file' command to use
(ie: --with-file-command=/usr/local/bin/file). Can be useful to enable
the gnu version on systems where the native one is bad.
* --disable-qtgui Disable the Qt interface. Will allow building the
indexer and the command line search program in absence of a Qt
environment.
* --disable-x11mon Disable X11 connection monitoring inside recollindex.
Together with --disable-qtgui, this allows building recoll without Qt
and X11.
* Of course the usual autoconf configure options, like --prefix apply.
Normal procedure:
cd recoll-xxx
configure
make
(practices usual hardship-repelling invocations)
There is little auto-configuration. The configure script will mainly link
one of the system-specific files in the mk directory to mk/sysconf. If
your system is not known yet, it will tell you as much, and you may want
to manually copy and modify one of the existing files (the new file name
should be the output of uname -s).
5.3.3. Installation
Either type make install or execute recollinstall prefix, in the root of
the source tree. This will copy the commands to prefix/bin and the sample
configuration files, scripts and other shared data to prefix/share/recoll.
If the installation prefix given to recollinstall is different from either
the system default or the value which was specified when executing
configure (as in configure --prefix /some/path), you will have to set the
RECOLL_DATADIR environment variable to indicate where the shared data is
to be found (ie for (ba)sh: export
RECOLL_DATADIR=/some/path/share/recoll).
You can then proceed to configuration.
--------------------------------------------------------------------------
Prev Home Next
Supporting packages Up Configuration overview
Link: HOME
Link: UP
Link: PREVIOUS
Recoll user manual
Prev Chapter 5. Installation and configuration
--------------------------------------------------------------------------
5.4. Configuration overview
Most of the parameters specific to the recoll GUI are set through the
Preferences menu and stored in the standard Qt place
($HOME/.config/Recoll.org/recoll.conf). You probably do not want to edit
this by hand.
Recoll indexing options are set inside text configuration files located in
a configuration directory. There can be several such directories, each of
which define the parameters for one index.
The configuration files can be edited by hand or through the Indexing
configuration dialog (Preferences menu). The GUI tool will try to respect
your formatting and comments as much as possible, so it is quite possible
to use both ways.
The most accurate documentation for the configuration parameters is given
by comments inside the default files, and we will just give a general
overview here.
For each index, there are two sets of configuration files. System-wide
configuration files are kept in a directory named like
/usr/[local/]share/recoll/examples, and define default values, shared by
all indexes. For each index, a parallel set of files defines the
customized parameters.
The default location of the configuration is the .recoll directory in your
home. Most people will only use this directory.
This location can be changed, or others can be added with the
RECOLL_CONFDIR environment variable or the -c option parameter to recoll
and recollindex.
If the .recoll directory does not exist when recoll or recollindex are
started, it will be created with a set of empty configuration files.
recoll will give you a chance to edit the configuration file before
starting indexing. recollindex will proceed immediately. To avoid
mistakes, the automatic directory creation will only occur for the default
location, not if -c or RECOLL_CONFDIR were used (in the latter cases, you
will have to create the directory).
All configuration files share the same format. For example, a short
extract of the main configuration file might look as follows:
# Space-separated list of directories to index.
topdirs = ~/docs /usr/share/doc
[~/somedirectory-with-utf8-txt-files]
defaultcharset = utf-8
There are three kinds of lines:
* Comment (starts with #) or empty.
* Parameter affectation (name = value).
* Section definition ([somedirname]).
Depending on the type of configuration file, section definitions either
separate groups of parameters or allow redefining some parameters for a
directory sub-tree. They stay in effect until another section definition,
or the end of file, is encountered. Some of the parameters used for
indexing are looked up hierarchically from the current directory location
upwards. Not all parameters can be meaningfully redefined, this is
specified for each in the next section.
When found at the beginning of a file path, the tilde character (~) is
expanded to the name of the user's home directory, as a shell would do.
White space is used for separation inside lists. List elements with
embedded spaces can be quoted using double-quotes.
5.4.1. Main configuration file
recoll.conf is the main configuration file. It defines things like what to
index (top directories and things to ignore), and the default character
set to use for document types which do not specify it internally.
The default configuration will index your home directory. If this is not
appropriate, start recoll to create a blank configuration, click Cancel,
and edit the configuration file before restarting the command. This will
start the initial indexing, which may take some time.
Most of the following parameters can be changed from the Index
Configuration menu in the recoll interface. Some can only be set by
editing the configuration file.
5.4.1.1. Parameters affecting what documents we index:
topdirs
Specifies the list of directories or files to index (recursively
for directories). You can use symbolic links as elements of this
list. See the followLinks option about following symbolic links
found under the top elements (not followed by default).
skippedNames
A space-separated list of patterns for names of files or
directories that should be completely ignored. The list defined in
the default file is:
skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
*~ .beagle .git .hg .bzr loop.ps .xsession-errors \
.recoll* xapiandb recollrc recoll.conf
The list can be redefined at any sub-directory in the indexed
area.
The top-level directories are not affected by this list (that is,
a directory in topdirs might match and would still be indexed).
The list in the default configuration does not exclude hidden
directories (names beginning with a dot), which means that it may
index quite a few things that you do not want. On the other hand,
mail user agents like thunderbird usually store messages in hidden
directories, and you probably want this indexed. One possible
solution is to have .* in skippedNames, and add things like
~/.thunderbird or ~/.evolution in topdirs.
Not even the file names are indexed for patterns in this list. See
the recoll_noindex variable in mimemap for an alternative approach
which indexes the file names.
skippedPaths and daemSkippedPaths
A space-separated list of patterns for paths of files or
directories that should be skipped. There is no default in the
sample configuration file, but the code always adds the
configuration and database directories in there.
skippedPaths is used both by batch and real time indexing.
daemSkippedPaths can be used to specify things that should be
indexed at startup, but not monitored.
Example of use for skipping text files only in a specific
directory:
skippedPaths = ~/somedir/..txt
skippedPathsFnmPathname
The values in the *skippedPaths variables are matched by default
with fnmatch(3), with the FNM_PATHNAME and FNM_LEADING_DIR flags.
This means that '/' characters must be matched explicitely. You
can set skippedPathsFnmPathname to 0 to disable the use of
FNM_PATHNAME (meaning that /*/dir3 will match /dir1/dir2/dir3).
followLinks
Specifies if the indexer should follow symbolic links while
walking the file tree. The default is to ignore symbolic links to
avoid multiple indexing of linked files. No effort is made to
avoid duplication when this option is set to true. This option can
be set individually for each of the topdirs members by using
sections. It can not be changed below the topdirs level.
indexedmimetypes
Recoll normally indexes any file which it knows how to read. This
list lets you restrict the indexed mime types to what you specify.
If the variable is unspecified or the list empty (the default),
all supported types are processed.
compressedfilemaxkbs
Size limit for compressed (.gz or .bz2) files. These need to be
decompressed in a temporary directory for identification, which
can be very wasteful if 'uninteresting' big compressed files are
present. Negative means no limit, 0 means no processing of any
compressed file. Defaults to -1.
textfilemaxmbs
Maximum size for text files. Very big text files are often
uninteresting logs. Set to -1 to disable (default 20MB).
textfilepagekbs
If set to other than -1, text files will be indexed as multiple
documents of the given page size. This may be useful if you do
want to index very big text files as it will both reduce memory
usage at index time and help with loading data to the preview
window. A size of a few megabytes would seem reasonable (default:
1MB).
indexallfilenames
Recoll indexes file names in a special section of the database to
allow specific file names searches using wild cards. This
parameter decides if file name indexing is performed only for
files with mime types that would qualify them for full text
indexing, or for all files inside the selected subtrees,
independently of mime type.
usesystemfilecommand
Decide if we use the file -i system command as a final step for
determining the mime type for a file (the main procedure uses
suffix associations as defined in the mimemap file). This can be
useful for files with suffix-less names, but it will also cause
the indexing of many bogus "text" files.
processbeaglequeue
If this is set, process the directory where Beagle Web browser
plugins copy visited pages for indexing. Of course, Beagle MUST
NOT be running, else things will behave strangely.
beaglequeuedir
The path to the Beagle indexing queue. This is hard-coded in the
Beagle plugin as ~/.beagle/ToIndex so there should be no need to
change it.
5.4.1.2. Parameters affecting how we generate terms:
Changing some of these parameters will imply a full reindex. Also, when
using multiple indexes, it may not make sense to search indexes that don't
share the values for these parameters, because they usually affect both
search and index operations.
nonumbers
If this set to true, no terms will be generated for numbers. For
example "123", "1.5e6", 192.168.1.4, would not be indexed
("value123" would still be). Numbers are often quite interesting
to search for, and this should probably not be set except for
special situations, ie, scientific documents with huge amounts of
numbers in them. This can only be set for a whole index, not for a
subtree.
nocjk
If this set to true, specific east asian (Chinese Korean Japanese)
characters/word splitting is turned off. This will save a small
amount of cpu if you have no CJK documents. If your document base
does include such text but you are not interested in searching it,
setting nocjk may be a significant time and space saver.
cjkngramlen
This lets you adjust the size of n-grams used for indexing CJK
text. The default value of 2 is probably appropriate in most
cases. A value of 3 would allow more precision and efficiency on
longer words, but the index will be approximately twice as large.
indexstemminglanguages
A list of languages for which the stem expansion databases will be
built. See recollindex(1) or use the recollindex -l command for
possible values. You can add a stem expansion database for a
different language by using recollindex -s, but it will be deleted
during the next indexing. Only languages listed in the
configuration file are permanent.
defaultcharset
The name of the character set used for files that do not contain a
character set definition (ie: plain text files). This can be
redefined for any sub-directory. If it is not set at all, the
character set used is the one defined by the nls environment
(LC_ALL, LC_CTYPE, LANG), or iso8859-1 if nothing is set.
maildefcharset
This can be used to define the default character set specifically
for mail messages which don't specify it. This is mainly useful
for readpst (libpst) dumps, which are utf-8 but do not say so.
localfields
This allows setting fields for all documents under a given
directory. Typical usage would be to set an "rclaptg" field, to be
used in mimeview to select a specific viewer. If several fields
are to be set, they should be separated with a colon (':')
character (which there is currently no way to escape). Ie:
localfields= rclaptg=gnus:other = val, then select specifier
viewer with mimetype|tag=... in mimeview.
5.4.1.3. Parameters affecting where and how we store things:
dbdir
The name of the Xapian data directory. It will be created if
needed when the index is initialized. If this is not an absolute
path, it will be interpreted relative to the configuration
directory. The value can have embedded spaces but starting or
trailing spaces will be trimmed. You cannot use quotes here.
idxstatusfile
The name of the scratch file where the indexer process updates its
status. Default: idxstatus.txt inside the configuration directory.
maxfsoccuppc
Maximum file system occupation before we stop indexing. The value
is a percentage, corresponding to what the "Capacity" df output
column shows. The default value is 0, meaning no checking.
mboxcachedir
The directory where mbox message offsets cache files are held.
This is normally $RECOLL_CONFDIR/mboxcache, but it may be useful
to share a directory between different configurations.
mboxcacheminmbs
The minimum mbox file size over which we cache the offsets. There
is really no sense in caching offsets for small files. The default
is 5 MB.
webcachedir
This is only used by the Beagle web browser plugin indexing code,
and defines where the cache for visited pages will live. Default:
$RECOLL_CONFDIR/webcache
webcachemaxmbs
This is only used by the Beagle web browser plugin indexing code,
and defines the maximum size for the web page cache. Default: 40
MB.
idxflushmb
Threshold (megabytes of new text data) where we flush from memory
to disk index. Setting this can help control memory usage. A value
of 0 means no explicit flushing, letting Xapian use its own
default, which is flushing every 10000 (or XAPIAN_FLUSH_THRESHOLD)
documents, which gives little memory usage control, as memory
usage depends on average document size. The default value is 10.
5.4.1.4. Miscellaneous parameters:
loglevel,daemloglevel
Verbosity level for recoll and recollindex. A value of 4 lists
quite a lot of debug/information messages. 2 only lists errors.
The daemversion is specific to the indexing monitor daemon.
logfilename, daemlogfilename
Where the messages should go. 'stderr' can be used as a special
value, and is the default. The daemversion is specific to the
indexing monitor daemon.
mondelaypatterns
This allows specify wildcard path patterns (processed with
fnmatch(3) with 0 flag), to match files which change too often and
for which a delay should be observed before re-indexing. This is a
space-separated list, each entry being a pattern and a time in
seconds, separated by a colon. You can use double quotes if a path
entry contains white space. Example:
mondelaypatterns = *.log:20 "this one has spaces*:10"
monixinterval
Minimum interval (seconds) for processing the indexing queue. The
real time monitor does not process each event when it comes in,
but will wait this time for the queue to accumulate to diminish
overhead and in order to aggregate multiple events to the same
file. Default 30 S.
monauxinterval
Period (in seconds) at which the real time monitor will regenerate
the auxiliary databases (spelling, stemming) if needed. The
default is one hour.
filtermaxseconds
Maximum filter execution time, after which it is aborted. Some
postscript programs just loop...
filtersdir
A directory to search for the external filter scripts used to
index some types of files. The value should not be changed, except
if you want to modify one of the default scripts. The value can be
redefined for any sub-directory.
iconsdir
The name of the directory where recoll result list icons are
stored. You can change this if you want different images.
idxabsmlen
Recoll stores an abstract for each indexed file inside the
database. The text can come from an actual 'abstract' section in
the document or will just be the beginning of the document. It is
stored in the index so that it can be displayed inside the result
lists without decoding the original file. The idxabsmlen parameter
defines the size of the stored abstract. The default value is 250
bytes. The search interface gives you the choice to display this
stored text or a synthetic abstract built by extracting text
around the search terms. If you always prefer the synthetic
abstract, you can reduce this value and save a little space.
aspellLanguage
Language definitions to use when creating the aspell dictionary.
The value must match a set of aspell language definition files.
You can type "aspell config" to see where these are installed
(look for data-dir). The default if the variable is not set is to
use your desktop national language environment to guess the value.
noaspell
If this is set, the aspell dictionary generation is turned off.
Useful for cases where you don't need the functionality or when it
is unusable because aspell crashes during dictionary generation.
5.4.2. The fields file
This file contains information about dynamic fields handling in Recoll.
Some very basic fields have hard-wired behaviour, and, mostly, you should
not change the original data inside the fields file. But you can create
custom fields fitting your data and handle them just like they were native
ones.
The fields file has several sections, which each define an aspect of
fields processing. Quite often, you'll have to modify several sections to
obtain the desired behaviour.
We will only give a short description here, you should refer to the
comments inside the file for more detailed information.
Field names should be lowercase alphabetic ASCII.
[prefixes]
A field becomes indexed (searchable) by having a prefix defined in
this section.
[stored]
A field becomes stored (displayable inside results) by having its
name listed in this section (typically with an empty value).
[aliases]
This section defines lists of synonyms for the canonical names
used inside the [prefixes] and [stored] sections
filter-specific sections
Some filters may need specific configuration for handling fields.
Only the mail message filter currently has such a section (named
[mail]). It allows indexing arbitrary mail headers in addition to
the ones indexed by default. Other such sections may appear in the
future.
Here follows a small example of a personal fields file. This would extract
a specific mail header and use it as a searchable field, with data
displayable inside result lists. (Side note: as the mail filter does no
decoding on the values, only plain ascii headers can be indexed, and only
the first occurrence will be used for headers that occur several times).
[prefixes]
# Index mailmytag contents (with the given prefix)
mailmytag = XMTAG
[stored]
# Store mailmytag inside the document data record (so that it can be
# displayed - as %(mailmytag) - in result lists).
mailmytag =
[mail]
# Extract the X-My-Tag mail header, and use it internally with the
# mailmytag field name
x-my-tag = mailmytag
5.4.3. The mimemap file
mimemap specifies the file name extension to mime type mappings.
For file names without an extension, or with an unknown one, the system's
file -i command will be executed to determine the mime type (this can be
switched off inside the main configuration file).
The mappings can be specified on a per-subtree basis, which may be useful
in some cases. Example: gaim logs have a .txt extension but should be
handled specially, which is possible because they are usually all located
in one place.
mimemap also has a recoll_noindex variable which is a list of suffixes.
Matching files will be skipped (which avoids unnecessary decompressions or
file executions). This is partially redundant with skippedNames in the
main configuration file, with a few differences: it will not affect
directories, it cannot be made dependant on the file-system location (it
is a configuration-wide parameter), and the file names will still be
indexed (not even the file names are indexed for patterns in skippedNames.
recoll_noindex is used mostly for things known to be unindexable by a
given Recoll version. Having it there avoids cluttering the more
user-oriented and locally customized skippedNames.
5.4.4. The mimeconf file
mimeconf specifies how the different mime types are handled for indexing,
and which icons are displayed in the recoll result lists.
Changing the parameters in the [index] section is probably not a good idea
except if you are a Recoll developer.
The [icons] section allows you to change the icons which are displayed by
recoll in the result lists (the values are the basenames of the png images
inside the iconsdir directory (specified in recoll.conf).
5.4.5. The mimeview file
mimeview specifies which programs are started when you click on an Open
link in a result list. Ie: HTML is normally displayed using firefox, but
you may prefer Konqueror, your openoffice.org program might be named
oofice instead of openoffice etc.
Changes to this file can be done by direct editing, or through the recoll
user preferences dialog.
If Use desktop preferences to choose document editor is checked in the
Recoll GUI user preferences, all mimeview entries will be ignored except
the one labelled application/x-all (which is set to use xdg-open by
default).
As for the other configuration files, the normal usage is to have a
mimeview inside your own configuration directory, with just the
non-default entries, which will override those from the central
configuration file.
Please note that these entries must be placed under a [view] section.
The keys in the file are normally mime types. You can add an application
tag to specialize the choice for an area of the filesystem (using a
localfields specification in mimeconf). The syntax for the key is
mimetype|tag
The nouncompforviewmts entry, (placed at the top level, outside of the
[view] section), holds a list of mime types that should not be
uncompressed before starting the viewer (if they are found compressed, ie:
mydoc.doc.gz).
The right side of each assignment holds a command to be executed for
opening the file. The following substitutions are performed:
* %D. Document date
* %f. File name. This may be the name of a temporary file if it was
necessary to create one (ie: to extract a subdocument from a
container).
* %F. Original file name. Same as %f except if a temporary file is used.
* %i. Internal path, for subdocuments of containers. The format depends
on the container type. If this appears in the command line, Recoll
will not create a temporary file to extract the subdocument, expecting
the called application (possibly a script) to be able to handle it.
* %M. Mime type
* %U, %u. Url.
In addition to the predefined values above, all strings like %(fieldname)
will be replaced by the value of the field named fieldname for the
document. This could be used in combination with field customisation to
help with opening the document.
5.4.6. Examples of configuration adjustments
5.4.6.1. Adding an external viewer for an non-indexed type
Imagine that you have some kind of file which does not have indexable
content, but for which you would like to have a functional Open link in
the result list (when found by file name). The file names end in .blob and
can be displayed by application blobviewer.
You need two entries in the configuration files for this to work:
* In $RECOLL_CONFDIR/mimemap (typically ~/.recoll/mimemap), add the
following line:
.blob = application/x-blobapp
Note that the mime type is made up here, and you could call it
diesel/oil just the same.
* In $RECOLL_CONFDIR/mimeview under the [view] section, add:
application/x-blobapp = blobviewer %f
We are supposing that blobviewer wants a file name parameter here, you
would use %u if it liked URLs better.
If you just wanted to change the application used by Recoll to display a
mime type which it already knows, you would just need to edit mimeview.
The entries you add in your personal file override those in the central
configuration, which you do not need to alter. mimeview can also be
modified from the Gui.
5.4.6.2. Adding indexing support for a new file type
Let us now imagine that the above .blob files actually contain indexable
text and that you know how to extract it with a command line program.
Getting Recoll to index the files is easy. You need to perform the above
alteration, and also to add data to the mimeconf file (typically in
~/.recoll/mimeconf):
* Under the [index] section, add the following line (more about the
rclblob indexing script later):
application/x-blobapp = exec rclblob
* Under the [icons] section, you should choose an icon to be displayed
for the files inside the result lists. Icons are normally 64x64 pixels
PNG files which live in /usr/[local/]share/recoll/images.
* Under the [categories] section, you should add the mime type where it
makes sense (you can also create a category). Categories may be used
for filtering in advanced search.
The rclblob filter should be an executable program or script which exists
inside /usr/[local/]share/recoll/filters. It will be given a file name as
argument and should output the text or html contents on the standard
output.
The filter programming section describes in more detail how to write a
filter.
--------------------------------------------------------------------------
Prev Home
Building from source Up

81
src/Makefile.in Normal file
View file

@ -0,0 +1,81 @@
# Copyright (C) 2005 J.F.Dockes
prefix = @prefix@
exec_prefix = @exec_prefix@
bindir = @bindir@
datadir = @datadir@
datarootdir = @datarootdir@
mandir = @mandir@
QMAKE = @QMAKE@
QTGUI = @QTGUI@
all: mk/sysconf
${MAKE} -C lib
${MAKE} -C index recollindex
@NOQTMAKE@(cd $(QTGUI); ${QMAKE} recoll.pro)
@NOQTMAKE@${MAKE} -C $(QTGUI) depth=..
@NOPYTHON@${MAKE} -C python/recoll
${MAKE} -C query recollq
mk/sysconf:
@echo "You need to run configure first" ; exit 1
static: mk/sysconf
${MAKE} -C lib
rm -f index/recollindex
${MAKE} -C index BSTATIC=-Wl,-Bstatic BDYNAMIC=-Wl,-Bdynamic \
LIBXAPIANSTATICEXTRA="@LIBXAPIANSTATICEXTRA@" \
recollindex
@NOQTMAKE@(cd $(QTGUI); $(QMAKE) recoll.pro)
@NOQTMAKE@rm -f $(QTGUI)/recoll
@NOQTMAKE@${MAKE} -C $(QTGUI) BSTATIC=-Wl,-Bstatic \
@NOQTMAKE@ BDYNAMIC=-Wl,-Bdynamic depth=.. \
@NOQTMAKE@ LIBXAPIANSTATICEXTRA="@LIBXAPIANSTATICEXTRA@"
clean:
${MAKE} -C common clean
${MAKE} -C index clean
${MAKE} -C internfile clean
${MAKE} -C lib clean
${MAKE} -C query clean
${MAKE} -C utils clean
-${MAKE} -C desktop/unity-lens-recoll clean
-${MAKE} -C python/recoll clean
@NOQTMAKE@@-${MAKE} -C $(QTGUI) clean
rm -f qtgui/Makefile qtgui/confgui/Makefile qtgui/recoll
rm -f filters/rclexecm.pyc
rm -rf qtgui/.moc qtgui/.ui qtgui/confgui/.moc qtgui/confgui/.ui
rm -rf qtgui/.obj qtgui/.moc qtgui/.ui
rm -rf python/recoll/build
rm -rf $(QTGUI)/recoll.app
# Note: we don't remove the top Makefile, to keep the "clean" targets
# available but a "Make" won't work without a configure anyway
distclean: clean
-${MAKE} -C desktop/unity-lens-recoll distclean
-${MAKE} -C python/recoll distclean
rm -f mk/sysconf mk/localdefs sampleconf/recoll.conf \
qtgui/recoll.pro \
config.log config.status \
recollinstall \
lib/*.dep common/autoconfig.h
rm -f common/rclversion.h
rm -f index/alldeps lib/alldeps query/alldeps bincimapmime/alldeps
rm -rf autom4te.cache
maintainer-clean: distclean
rm -f doc/user/*.html* doc/user/*.txt doc/user/HTML.manifest
rm -f qtgui/i18n/*.qm
# recollinstall can be executed by the user and will compute 'normal'
# values for bindir etc., relative to the prefix argument. When executed
# here, we pass a bunch of variables in the environment, the values will
# override the computed defaults.
install: all
DESTDIR=${DESTDIR} bindir=${bindir} datadir=${datadir} \
mandir=${mandir} \
/bin/sh ./recollinstall ${prefix}
.PHONY: all static clean distclean install

3338
src/README Normal file

File diff suppressed because it is too large Load diff

1
src/VERSION Normal file
View file

@ -0,0 +1 @@
1.17.1

31
src/aspell/Makefile Normal file
View file

@ -0,0 +1,31 @@
depth = ..
include $(depth)/mk/sysconf
PROGS = rclaspell
SRCS = rclaspell.cpp
all: depend $(BIGLIB) $(PROGS)
RCLASPELL_OBJS= trrclaspell.o $(BIGLIB)
rclaspell : $(RCLASPELL_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o rclaspell $(RCLASPELL_OBJS) \
$(LIBXAPIAN) $(LIBICONV)
trrclaspell.o : rclaspell.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_RCLASPELL -c -o trrclaspell.o \
rclaspell.cpp
$(BIGLIB): force
cd $(depth)/lib;$(MAKE)
force:
depend: alldeps.stamp
alldeps.stamp : $(SRCS)
$(CXX) -M $(ALL_CXXFLAGS) $(SRCS) > alldeps
touch alldeps.stamp
clean:
cp /dev/null alldeps
rm -f alldeps.stamp
rm -f *.o $(PROGS)
include alldeps

729
src/aspell/aspell-local.h Normal file
View file

@ -0,0 +1,729 @@
/* Automatically generated file. Do not edit directly. */
/* This file is part of The New Aspell
* Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL
* license version 2.0 or 2.1. You should have received a copy of the
* LGPL license along with this library if you did not you can find it
* at http://www.gnu.org/. */
#ifndef ASPELL_ASPELL__H
#define ASPELL_ASPELL__H
#ifdef __cplusplus
extern "C" {
#endif
/******************************* type id *******************************/
union AspellTypeId {
unsigned int num;
char str[4];
};
typedef union AspellTypeId AspellTypeId;
/************************** mutable container **************************/
typedef struct AspellMutableContainer AspellMutableContainer;
int aspell_mutable_container_add(struct AspellMutableContainer * ths, const char * to_add);
int aspell_mutable_container_remove(struct AspellMutableContainer * ths, const char * to_rem);
void aspell_mutable_container_clear(struct AspellMutableContainer * ths);
struct AspellMutableContainer * aspell_mutable_container_to_mutable_container(struct AspellMutableContainer * ths);
/******************************* key info *******************************/
enum AspellKeyInfoType {AspellKeyInfoString, AspellKeyInfoInt, AspellKeyInfoBool, AspellKeyInfoList};
typedef enum AspellKeyInfoType AspellKeyInfoType;
struct AspellKeyInfo {
/* The name of the key. */
const char * name;
/* The key type. */
enum AspellKeyInfoType type;
/* The default value of the key. */
const char * def;
/* A brief description of the key or NULL if internal value. */
const char * desc;
int flags;
int other_data;
};
typedef struct AspellKeyInfo AspellKeyInfo;
/******************************** config ********************************/
typedef struct AspellKeyInfoEnumeration AspellKeyInfoEnumeration;
int aspell_key_info_enumeration_at_end(const struct AspellKeyInfoEnumeration * ths);
const struct AspellKeyInfo * aspell_key_info_enumeration_next(struct AspellKeyInfoEnumeration * ths);
void delete_aspell_key_info_enumeration(struct AspellKeyInfoEnumeration * ths);
struct AspellKeyInfoEnumeration * aspell_key_info_enumeration_clone(const struct AspellKeyInfoEnumeration * ths);
void aspell_key_info_enumeration_assign(struct AspellKeyInfoEnumeration * ths, const struct AspellKeyInfoEnumeration * other);
typedef struct AspellConfig AspellConfig;
struct AspellConfig * new_aspell_config();
void delete_aspell_config(struct AspellConfig * ths);
struct AspellConfig * aspell_config_clone(const struct AspellConfig * ths);
void aspell_config_assign(struct AspellConfig * ths, const struct AspellConfig * other);
unsigned int aspell_config_error_number(const struct AspellConfig * ths);
const char * aspell_config_error_message(const struct AspellConfig * ths);
const struct AspellError * aspell_config_error(const struct AspellConfig * ths);
/* Sets extra keys which this config class should
* accept. begin and end are expected to point to
* the beginning and ending of an array of Aspell
* Key Info. */
void aspell_config_set_extra(struct AspellConfig * ths, const struct AspellKeyInfo * begin, const struct AspellKeyInfo * end);
/* Returns the KeyInfo object for the
* corresponding key or returns NULL and sets
* error_num to PERROR_UNKNOWN_KEY if the key is
* not valid. The pointer returned is valid for
* the lifetime of the object. */
const struct AspellKeyInfo * aspell_config_keyinfo(struct AspellConfig * ths, const char * key);
/* Returns a newly allocated enumeration of all
* the possible objects this config class uses. */
struct AspellKeyInfoEnumeration * aspell_config_possible_elements(struct AspellConfig * ths, int include_extra);
/* Returns the default value for given key which
* may involve substituting variables, thus it is
* not the same as keyinfo(key)->def returns NULL
* and sets error_num to PERROR_UNKNOWN_KEY if
* the key is not valid. Uses the temporary
* string. */
const char * aspell_config_get_default(struct AspellConfig * ths, const char * key);
/* Returns a newly allocated enumeration of all
* the key/value pairs. This DOES not include ones
* which are set to their default values. */
struct AspellStringPairEnumeration * aspell_config_elements(struct AspellConfig * ths);
/* Inserts an item, if the item already exists it
* will be replaced. Returns TRUE if it succeeded
* or FALSE on error. If the key in not valid it
* sets error_num to PERROR_UNKNOWN_KEY, if the
* value is not valid it will set error_num to
* PERROR_BAD_VALUE, if the value can not be
* changed it sets error_num to
* PERROR_CANT_CHANGE_VALUE, and if the value is
* a list and you are trying to set its directory,
* it sets error_num to PERROR_LIST_SET */
int aspell_config_replace(struct AspellConfig * ths, const char * key, const char * value);
/* Remove a key and returns TRUE if it exists
* otherwise return FALSE. This effectively sets
* the key to its default value. Calling replace
* with a value of "<default>" will also call
* remove. If the key does not exist then it sets
* error_num to 0 or PERROR_NOT, if the key is
* not valid then it sets error_num to
* PERROR_UNKNOWN_KEY, if the value can not be
* changed then it sets error_num to
* PERROR_CANT_CHANGE_VALUE */
int aspell_config_remove(struct AspellConfig * ths, const char * key);
int aspell_config_have(const struct AspellConfig * ths, const char * key);
/* Returns NULL on error. */
const char * aspell_config_retrieve(struct AspellConfig * ths, const char * key);
int aspell_config_retrieve_list(struct AspellConfig * ths, const char * key, struct AspellMutableContainer * lst);
/* Return -1 on error, 0 if false, 1 if true. */
int aspell_config_retrieve_bool(struct AspellConfig * ths, const char * key);
/* Return -1 on error. */
int aspell_config_retrieve_int(struct AspellConfig * ths, const char * key);
/******************************** error ********************************/
struct AspellError {
const char * mesg;
const struct AspellErrorInfo * err;
};
typedef struct AspellError AspellError;
int aspell_error_is_a(const struct AspellError * ths, const struct AspellErrorInfo * e);
struct AspellErrorInfo {
const struct AspellErrorInfo * isa;
const char * mesg;
unsigned int num_parms;
const char * parms[3];
};
typedef struct AspellErrorInfo AspellErrorInfo;
/**************************** can have error ****************************/
typedef struct AspellCanHaveError AspellCanHaveError;
unsigned int aspell_error_number(const struct AspellCanHaveError * ths);
const char * aspell_error_message(const struct AspellCanHaveError * ths);
const struct AspellError * aspell_error(const struct AspellCanHaveError * ths);
void delete_aspell_can_have_error(struct AspellCanHaveError * ths);
/******************************** errors ********************************/
extern const struct AspellErrorInfo * const aerror_other;
extern const struct AspellErrorInfo * const aerror_operation_not_supported;
extern const struct AspellErrorInfo * const aerror_cant_copy;
extern const struct AspellErrorInfo * const aerror_unimplemented_method;
extern const struct AspellErrorInfo * const aerror_file;
extern const struct AspellErrorInfo * const aerror_cant_open_file;
extern const struct AspellErrorInfo * const aerror_cant_read_file;
extern const struct AspellErrorInfo * const aerror_cant_write_file;
extern const struct AspellErrorInfo * const aerror_invalid_name;
extern const struct AspellErrorInfo * const aerror_bad_file_format;
extern const struct AspellErrorInfo * const aerror_dir;
extern const struct AspellErrorInfo * const aerror_cant_read_dir;
extern const struct AspellErrorInfo * const aerror_config;
extern const struct AspellErrorInfo * const aerror_unknown_key;
extern const struct AspellErrorInfo * const aerror_cant_change_value;
extern const struct AspellErrorInfo * const aerror_bad_key;
extern const struct AspellErrorInfo * const aerror_bad_value;
extern const struct AspellErrorInfo * const aerror_duplicate;
extern const struct AspellErrorInfo * const aerror_key_not_string;
extern const struct AspellErrorInfo * const aerror_key_not_int;
extern const struct AspellErrorInfo * const aerror_key_not_bool;
extern const struct AspellErrorInfo * const aerror_key_not_list;
extern const struct AspellErrorInfo * const aerror_no_value_reset;
extern const struct AspellErrorInfo * const aerror_no_value_enable;
extern const struct AspellErrorInfo * const aerror_no_value_disable;
extern const struct AspellErrorInfo * const aerror_no_value_clear;
extern const struct AspellErrorInfo * const aerror_language_related;
extern const struct AspellErrorInfo * const aerror_unknown_language;
extern const struct AspellErrorInfo * const aerror_unknown_soundslike;
extern const struct AspellErrorInfo * const aerror_language_not_supported;
extern const struct AspellErrorInfo * const aerror_no_wordlist_for_lang;
extern const struct AspellErrorInfo * const aerror_mismatched_language;
extern const struct AspellErrorInfo * const aerror_affix;
extern const struct AspellErrorInfo * const aerror_corrupt_affix;
extern const struct AspellErrorInfo * const aerror_invalid_cond;
extern const struct AspellErrorInfo * const aerror_invalid_cond_strip;
extern const struct AspellErrorInfo * const aerror_incorrect_encoding;
extern const struct AspellErrorInfo * const aerror_encoding;
extern const struct AspellErrorInfo * const aerror_unknown_encoding;
extern const struct AspellErrorInfo * const aerror_encoding_not_supported;
extern const struct AspellErrorInfo * const aerror_conversion_not_supported;
extern const struct AspellErrorInfo * const aerror_pipe;
extern const struct AspellErrorInfo * const aerror_cant_create_pipe;
extern const struct AspellErrorInfo * const aerror_process_died;
extern const struct AspellErrorInfo * const aerror_bad_input;
extern const struct AspellErrorInfo * const aerror_invalid_string;
extern const struct AspellErrorInfo * const aerror_invalid_word;
extern const struct AspellErrorInfo * const aerror_invalid_affix;
extern const struct AspellErrorInfo * const aerror_inapplicable_affix;
extern const struct AspellErrorInfo * const aerror_unknown_unichar;
extern const struct AspellErrorInfo * const aerror_word_list_flags;
extern const struct AspellErrorInfo * const aerror_invalid_flag;
extern const struct AspellErrorInfo * const aerror_conflicting_flags;
extern const struct AspellErrorInfo * const aerror_version_control;
extern const struct AspellErrorInfo * const aerror_bad_version_string;
extern const struct AspellErrorInfo * const aerror_filter;
extern const struct AspellErrorInfo * const aerror_cant_dlopen_file;
extern const struct AspellErrorInfo * const aerror_empty_filter;
extern const struct AspellErrorInfo * const aerror_no_such_filter;
extern const struct AspellErrorInfo * const aerror_confusing_version;
extern const struct AspellErrorInfo * const aerror_bad_version;
extern const struct AspellErrorInfo * const aerror_identical_option;
extern const struct AspellErrorInfo * const aerror_options_only;
extern const struct AspellErrorInfo * const aerror_invalid_option_modifier;
extern const struct AspellErrorInfo * const aerror_cant_describe_filter;
extern const struct AspellErrorInfo * const aerror_filter_mode_file;
extern const struct AspellErrorInfo * const aerror_mode_option_name;
extern const struct AspellErrorInfo * const aerror_no_filter_to_option;
extern const struct AspellErrorInfo * const aerror_bad_mode_key;
extern const struct AspellErrorInfo * const aerror_expect_mode_key;
extern const struct AspellErrorInfo * const aerror_mode_version_requirement;
extern const struct AspellErrorInfo * const aerror_confusing_mode_version;
extern const struct AspellErrorInfo * const aerror_bad_mode_version;
extern const struct AspellErrorInfo * const aerror_missing_magic_expression;
extern const struct AspellErrorInfo * const aerror_empty_file_ext;
extern const struct AspellErrorInfo * const aerror_filter_mode_expand;
extern const struct AspellErrorInfo * const aerror_unknown_mode;
extern const struct AspellErrorInfo * const aerror_mode_extend_expand;
extern const struct AspellErrorInfo * const aerror_filter_mode_magic;
extern const struct AspellErrorInfo * const aerror_file_magic_pos;
extern const struct AspellErrorInfo * const aerror_file_magic_range;
extern const struct AspellErrorInfo * const aerror_missing_magic;
extern const struct AspellErrorInfo * const aerror_bad_magic;
extern const struct AspellErrorInfo * const aerror_expression;
extern const struct AspellErrorInfo * const aerror_invalid_expression;
/******************************* speller *******************************/
typedef struct AspellSpeller AspellSpeller;
struct AspellCanHaveError * new_aspell_speller(struct AspellConfig * config);
struct AspellSpeller * to_aspell_speller(struct AspellCanHaveError * obj);
void delete_aspell_speller(struct AspellSpeller * ths);
unsigned int aspell_speller_error_number(const struct AspellSpeller * ths);
const char * aspell_speller_error_message(const struct AspellSpeller * ths);
const struct AspellError * aspell_speller_error(const struct AspellSpeller * ths);
struct AspellConfig * aspell_speller_config(struct AspellSpeller * ths);
/* Returns 0 if it is not in the dictionary,
* 1 if it is, or -1 on error. */
int aspell_speller_check(struct AspellSpeller * ths, const char * word, int word_size);
/* Add this word to your own personal word list. */
int aspell_speller_add_to_personal(struct AspellSpeller * ths, const char * word, int word_size);
/* Add this word to the current spelling session. */
int aspell_speller_add_to_session(struct AspellSpeller * ths, const char * word, int word_size);
/* This is your own personal word list file plus
* any extra words added during this session to
* your own personal word list. */
const struct AspellWordList * aspell_speller_personal_word_list(struct AspellSpeller * ths);
/* This is a list of words added to this session
* that are not in the main word list or in your
* own personal list but are considered valid for
* this spelling session. */
const struct AspellWordList * aspell_speller_session_word_list(struct AspellSpeller * ths);
/* This is the main list of words used during this
* spelling session. */
const struct AspellWordList * aspell_speller_main_word_list(struct AspellSpeller * ths);
int aspell_speller_save_all_word_lists(struct AspellSpeller * ths);
int aspell_speller_clear_session(struct AspellSpeller * ths);
/* Return NULL on error.
* The word list returned by suggest is only
* valid until the next call to suggest. */
const struct AspellWordList * aspell_speller_suggest(struct AspellSpeller * ths, const char * word, int word_size);
int aspell_speller_store_replacement(struct AspellSpeller * ths, const char * mis, int mis_size, const char * cor, int cor_size);
/******************************** filter ********************************/
typedef struct AspellFilter AspellFilter;
void delete_aspell_filter(struct AspellFilter * ths);
unsigned int aspell_filter_error_number(const struct AspellFilter * ths);
const char * aspell_filter_error_message(const struct AspellFilter * ths);
const struct AspellError * aspell_filter_error(const struct AspellFilter * ths);
struct AspellFilter * to_aspell_filter(struct AspellCanHaveError * obj);
/*************************** document checker ***************************/
struct AspellToken {
unsigned int offset;
unsigned int len;
};
typedef struct AspellToken AspellToken;
typedef struct AspellDocumentChecker AspellDocumentChecker;
void delete_aspell_document_checker(struct AspellDocumentChecker * ths);
unsigned int aspell_document_checker_error_number(const struct AspellDocumentChecker * ths);
const char * aspell_document_checker_error_message(const struct AspellDocumentChecker * ths);
const struct AspellError * aspell_document_checker_error(const struct AspellDocumentChecker * ths);
/* Creates a new document checker.
* The speller class is expected to last until
* this class is destroyed.
* If config is given it will be used to override
* any relevent options set by this speller class.
* The config class is not once this function is done.
* If filter is given then it will take ownership of
* the filter class and use it to do the filtering.
* You are expected to free the checker when done. */
struct AspellCanHaveError * new_aspell_document_checker(struct AspellSpeller * speller);
struct AspellDocumentChecker * to_aspell_document_checker(struct AspellCanHaveError * obj);
/* Reset the internal state of the filter.
* Should be called whenever a new document is
* being filtered. */
void aspell_document_checker_reset(struct AspellDocumentChecker * ths);
/* Process a string.
* The string passed in should only be split on
* white space characters. Furthermore, between
* calls to reset, each string should be passed
* in exactly once and in the order they appeared
* in the document. Passing in strings out of
* order, skipping strings or passing them in
* more than once may lead to undefined results. */
void aspell_document_checker_process(struct AspellDocumentChecker * ths, const char * str, int size);
/* Returns the next misspelled word in the
* processed string. If there are no more
* misspelled words, then token.word will be
* NULL and token.size will be 0 */
struct AspellToken aspell_document_checker_next_misspelling(struct AspellDocumentChecker * ths);
/* Returns the underlying filter class. */
struct AspellFilter * aspell_document_checker_filter(struct AspellDocumentChecker * ths);
/****************************** word list ******************************/
typedef struct AspellWordList AspellWordList;
int aspell_word_list_empty(const struct AspellWordList * ths);
unsigned int aspell_word_list_size(const struct AspellWordList * ths);
struct AspellStringEnumeration * aspell_word_list_elements(const struct AspellWordList * ths);
/************************** string enumeration **************************/
typedef struct AspellStringEnumeration AspellStringEnumeration;
void delete_aspell_string_enumeration(struct AspellStringEnumeration * ths);
struct AspellStringEnumeration * aspell_string_enumeration_clone(const struct AspellStringEnumeration * ths);
void aspell_string_enumeration_assign(struct AspellStringEnumeration * ths, const struct AspellStringEnumeration * other);
int aspell_string_enumeration_at_end(const struct AspellStringEnumeration * ths);
const char * aspell_string_enumeration_next(struct AspellStringEnumeration * ths);
/********************************* info *********************************/
struct AspellModuleInfo {
const char * name;
double order_num;
const char * lib_dir;
struct AspellStringList * dict_dirs;
struct AspellStringList * dict_exts;
};
typedef struct AspellModuleInfo AspellModuleInfo;
struct AspellDictInfo {
/* The Name to identify this dictionary by. */
const char * name;
/* The language code to identify this dictionary.
* A two letter UPPER-CASE ISO 639 language code
* and an optional two letter ISO 3166 country
* code after a dash or underscore. */
const char * code;
/* Any extra information to distinguish this
* variety of dictionary from other dictionaries
* which may have the same language and size. */
const char * jargon;
int size;
/* A two char digit code describing the size of
* the dictionary: 10=tiny, 20=really small,
* 30=small, 40=med-small, 50=med, 60=med-large,
* 70=large, 80=huge, 90=insane. Please check
* the README in aspell-lang-200?????.tar.bz2 or
* see SCOWL (http://wordlist.sourceforge.net)
* for an example of how these sizes are used. */
const char * size_str;
struct AspellModuleInfo * module;
};
typedef struct AspellDictInfo AspellDictInfo;
typedef struct AspellModuleInfoList AspellModuleInfoList;
struct AspellModuleInfoList * get_aspell_module_info_list(struct AspellConfig * config);
int aspell_module_info_list_empty(const struct AspellModuleInfoList * ths);
unsigned int aspell_module_info_list_size(const struct AspellModuleInfoList * ths);
struct AspellModuleInfoEnumeration * aspell_module_info_list_elements(const struct AspellModuleInfoList * ths);
typedef struct AspellDictInfoList AspellDictInfoList;
struct AspellDictInfoList * get_aspell_dict_info_list(struct AspellConfig * config);
int aspell_dict_info_list_empty(const struct AspellDictInfoList * ths);
unsigned int aspell_dict_info_list_size(const struct AspellDictInfoList * ths);
struct AspellDictInfoEnumeration * aspell_dict_info_list_elements(const struct AspellDictInfoList * ths);
typedef struct AspellModuleInfoEnumeration AspellModuleInfoEnumeration;
int aspell_module_info_enumeration_at_end(const struct AspellModuleInfoEnumeration * ths);
const struct AspellModuleInfo * aspell_module_info_enumeration_next(struct AspellModuleInfoEnumeration * ths);
void delete_aspell_module_info_enumeration(struct AspellModuleInfoEnumeration * ths);
struct AspellModuleInfoEnumeration * aspell_module_info_enumeration_clone(const struct AspellModuleInfoEnumeration * ths);
void aspell_module_info_enumeration_assign(struct AspellModuleInfoEnumeration * ths, const struct AspellModuleInfoEnumeration * other);
typedef struct AspellDictInfoEnumeration AspellDictInfoEnumeration;
int aspell_dict_info_enumeration_at_end(const struct AspellDictInfoEnumeration * ths);
const struct AspellDictInfo * aspell_dict_info_enumeration_next(struct AspellDictInfoEnumeration * ths);
void delete_aspell_dict_info_enumeration(struct AspellDictInfoEnumeration * ths);
struct AspellDictInfoEnumeration * aspell_dict_info_enumeration_clone(const struct AspellDictInfoEnumeration * ths);
void aspell_dict_info_enumeration_assign(struct AspellDictInfoEnumeration * ths, const struct AspellDictInfoEnumeration * other);
/***************************** string list *****************************/
typedef struct AspellStringList AspellStringList;
struct AspellStringList * new_aspell_string_list();
int aspell_string_list_empty(const struct AspellStringList * ths);
unsigned int aspell_string_list_size(const struct AspellStringList * ths);
struct AspellStringEnumeration * aspell_string_list_elements(const struct AspellStringList * ths);
int aspell_string_list_add(struct AspellStringList * ths, const char * to_add);
int aspell_string_list_remove(struct AspellStringList * ths, const char * to_rem);
void aspell_string_list_clear(struct AspellStringList * ths);
struct AspellMutableContainer * aspell_string_list_to_mutable_container(struct AspellStringList * ths);
void delete_aspell_string_list(struct AspellStringList * ths);
struct AspellStringList * aspell_string_list_clone(const struct AspellStringList * ths);
void aspell_string_list_assign(struct AspellStringList * ths, const struct AspellStringList * other);
/****************************** string map ******************************/
typedef struct AspellStringMap AspellStringMap;
struct AspellStringMap * new_aspell_string_map();
int aspell_string_map_add(struct AspellStringMap * ths, const char * to_add);
int aspell_string_map_remove(struct AspellStringMap * ths, const char * to_rem);
void aspell_string_map_clear(struct AspellStringMap * ths);
struct AspellMutableContainer * aspell_string_map_to_mutable_container(struct AspellStringMap * ths);
void delete_aspell_string_map(struct AspellStringMap * ths);
struct AspellStringMap * aspell_string_map_clone(const struct AspellStringMap * ths);
void aspell_string_map_assign(struct AspellStringMap * ths, const struct AspellStringMap * other);
int aspell_string_map_empty(const struct AspellStringMap * ths);
unsigned int aspell_string_map_size(const struct AspellStringMap * ths);
struct AspellStringPairEnumeration * aspell_string_map_elements(const struct AspellStringMap * ths);
/* Insert a new element.
* Will NOT overwrite an existing entry.
* Returns FALSE if the element already exists. */
int aspell_string_map_insert(struct AspellStringMap * ths, const char * key, const char * value);
/* Insert a new element.
* Will overwrite an existing entry.
* Always returns TRUE. */
int aspell_string_map_replace(struct AspellStringMap * ths, const char * key, const char * value);
/* Looks up an element and returns the value.
* Returns NULL if the element does not exist.
* Returns an empty string if the element exists
* but has a NULL value. */
const char * aspell_string_map_lookup(const struct AspellStringMap * ths, const char * key);
/***************************** string pair *****************************/
struct AspellStringPair {
const char * first;
const char * second;
};
typedef struct AspellStringPair AspellStringPair;
/*********************** string pair enumeration ***********************/
typedef struct AspellStringPairEnumeration AspellStringPairEnumeration;
int aspell_string_pair_enumeration_at_end(const struct AspellStringPairEnumeration * ths);
struct AspellStringPair aspell_string_pair_enumeration_next(struct AspellStringPairEnumeration * ths);
void delete_aspell_string_pair_enumeration(struct AspellStringPairEnumeration * ths);
struct AspellStringPairEnumeration * aspell_string_pair_enumeration_clone(const struct AspellStringPairEnumeration * ths);
void aspell_string_pair_enumeration_assign(struct AspellStringPairEnumeration * ths, const struct AspellStringPairEnumeration * other);
/******************************** cache ********************************/
/* Reset the global cache(s) so that cache queries will
* create a new object. If existing objects are still in
* use they are not deleted. If which is NULL then all
* caches will be reset. Current caches are "encode",
* "decode", "dictionary", "language", and "keyboard". */
int aspell_reset_cache(const char * which);
#ifdef __cplusplus
}
#endif
#endif /* ASPELL_ASPELL__H */

520
src/aspell/rclaspell.cpp Normal file
View file

@ -0,0 +1,520 @@
#ifndef TEST_RCLASPELL
/* Copyright (C) 2006 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
#endif
#ifdef RCL_USE_ASPELL
#include <unistd.h>
#include <dlfcn.h>
#include <iostream>
#include <stdlib.h>
#include <vector>
#include ASPELL_INCLUDE
#include "pathut.h"
#include "execmd.h"
#include "rclaspell.h"
#include "debuglog.h"
#include "ptmutex.h"
// Just a place where we keep the Aspell library entry points together
class AspellApi {
public:
struct AspellConfig *(*new_aspell_config)();
int (*aspell_config_replace)(struct AspellConfig *, const char * key,
const char * value);
struct AspellCanHaveError *(*new_aspell_speller)(struct AspellConfig *);
void (*delete_aspell_config)(struct AspellConfig *);
void (*delete_aspell_can_have_error)(struct AspellCanHaveError *);
struct AspellSpeller * (*to_aspell_speller)(struct AspellCanHaveError *);
struct AspellConfig * (*aspell_speller_config)(struct AspellSpeller *);
const struct AspellWordList * (*aspell_speller_suggest)
(struct AspellSpeller *, const char *, int);
int (*aspell_speller_check)(struct AspellSpeller *, const char *, int);
struct AspellStringEnumeration * (*aspell_word_list_elements)
(const struct AspellWordList * ths);
const char * (*aspell_string_enumeration_next)
(struct AspellStringEnumeration * ths);
void (*delete_aspell_string_enumeration)(struct AspellStringEnumeration *);
const struct AspellError *(*aspell_error)
(const struct AspellCanHaveError *);
const char *(*aspell_error_message)(const struct AspellCanHaveError *);
const char *(*aspell_speller_error_message)(const struct AspellSpeller *);
void (*delete_aspell_speller)(struct AspellSpeller *);
};
static AspellApi aapi;
static PTMutexInit o_aapi_mutex;
#define NMTOPTR(NM, TP) \
if ((aapi.NM = TP dlsym(m_data->m_handle, #NM)) == 0) { \
badnames += #NM + string(" "); \
}
static const char *aspell_lib_suffixes[] = {
".so",
".so.15",
".so.16"
};
static const unsigned int nlibsuffs = sizeof(aspell_lib_suffixes) / sizeof(char *);
// Stuff that we don't wish to see in the .h (possible sysdeps, etc.)
class AspellData {
public:
AspellData()
: m_handle(0), m_speller(0)
{}
~AspellData() {
LOGDEB2(("~AspellData\n"));
if (m_handle) {
dlclose(m_handle);
m_handle = 0;
}
if (m_speller) {
// Dumps core if I do this??
//aapi.delete_aspell_speller(m_speller);
m_speller = 0;
LOGDEB2(("~AspellData: speller done\n"));
}
}
void *m_handle;
string m_exec;
AspellSpeller *m_speller;
};
Aspell::Aspell(RclConfig *cnf)
: m_config(cnf), m_data(0)
{
}
Aspell::~Aspell()
{
deleteZ(m_data);
}
bool Aspell::init(string &reason)
{
PTMutexLocker locker(o_aapi_mutex);
deleteZ(m_data);
// Language: we get this from the configuration, else from the NLS
// environment. The aspell language names used for selecting language
// definition files (used to create dictionaries) are like en, fr
if (!m_config->getConfParam("aspellLanguage", m_lang) || m_lang.empty()) {
string lang = "en";
const char *cp;
if ((cp = getenv("LC_ALL")))
lang = cp;
else if ((cp = getenv("LANG")))
lang = cp;
if (!lang.compare("C"))
lang = "en";
m_lang = lang.substr(0, lang.find_first_of("_"));
}
m_data = new AspellData;
const char *aspell_prog_from_env = getenv("ASPELL_PROG");
if (aspell_prog_from_env && access(aspell_prog_from_env, X_OK) == 0) {
m_data->m_exec = aspell_prog_from_env;
#ifdef ASPELL_PROG
} else if (access(ASPELL_PROG, X_OK) == 0) {
m_data->m_exec = ASPELL_PROG;
#endif // ASPELL_PROG
} else {
ExecCmd::which("aspell", m_data->m_exec);
}
if (m_data->m_exec.empty()) {
reason = "aspell program not found or not executable";
deleteZ(m_data);
return false;
}
// We first look for the aspell library in libdir, and also try to
// be clever with ASPELL_PROG.
vector<string> libdirs;
libdirs.push_back(LIBDIR);
// If not in the standard place, the aspell library has to live
// under the same prefix as the aspell program.
{
string aspellPrefix = path_getfather(path_getfather(m_data->m_exec));
// This would probably require some more tweaking on solaris/irix etc.
string dir = sizeof(long) > 4 ? "lib64" : "lib";
string libaspell = path_cat(aspellPrefix, dir);
if (libaspell != LIBDIR)
libdirs.push_back(libaspell);
}
reason = "Could not open shared library ";
for (vector<string>::iterator it = libdirs.begin();
it != libdirs.end(); it++) {
string libbase = path_cat(*it, "libaspell");
string lib;
for (unsigned int i = 0; i < nlibsuffs; i++) {
lib = libbase + aspell_lib_suffixes[i];
reason += string("[") + lib + "] ";
if ((m_data->m_handle = dlopen(lib.c_str(), RTLD_LAZY)) != 0) {
reason.erase();
goto found;
}
}
}
found:
if (m_data->m_handle == 0) {
reason += string(" : ") + dlerror();
deleteZ(m_data);
return false;
}
string badnames;
NMTOPTR(new_aspell_config, (struct AspellConfig *(*)()));
NMTOPTR(aspell_config_replace, (int (*)(struct AspellConfig *,
const char *, const char *)));
NMTOPTR(new_aspell_speller,
(struct AspellCanHaveError *(*)(struct AspellConfig *)));
NMTOPTR(delete_aspell_config,
(void (*)(struct AspellConfig *)));
NMTOPTR(delete_aspell_can_have_error,
(void (*)(struct AspellCanHaveError *)));
NMTOPTR(to_aspell_speller,
(struct AspellSpeller *(*)(struct AspellCanHaveError *)));
NMTOPTR(aspell_speller_config,
(struct AspellConfig *(*)(struct AspellSpeller *)));
NMTOPTR(aspell_speller_suggest,
(const struct AspellWordList *(*)(struct AspellSpeller *,
const char *, int)));
NMTOPTR(aspell_speller_check,
(int (*)(struct AspellSpeller *, const char *, int)));
NMTOPTR(aspell_word_list_elements,
(struct AspellStringEnumeration *(*)
(const struct AspellWordList *)));
NMTOPTR(aspell_string_enumeration_next,
(const char * (*)(struct AspellStringEnumeration *)));
NMTOPTR(delete_aspell_string_enumeration,
(void (*)(struct AspellStringEnumeration *)));
NMTOPTR(aspell_error,
(const struct AspellError*(*)(const struct AspellCanHaveError *)));
NMTOPTR(aspell_error_message,
(const char *(*)(const struct AspellCanHaveError *)));
NMTOPTR(aspell_speller_error_message,
(const char *(*)(const struct AspellSpeller *)));
NMTOPTR(delete_aspell_speller, (void (*)(struct AspellSpeller *)));
if (!badnames.empty()) {
reason = string("Aspell::init: symbols not found:") + badnames;
deleteZ(m_data);
return false;
}
return true;
}
bool Aspell::ok() const
{
return m_data != 0 && m_data->m_handle != 0;
}
string Aspell::dicPath()
{
return path_cat(m_config->getConfDir(),
string("aspdict.") + m_lang + string(".rws"));
}
// The data source for the create dictionary aspell command. We walk
// the term list, filtering out things that are probably not words.
// Note that the manual for the current version (0.60) of aspell
// states that utf-8 is not well supported, so that we should maybe
// also filter all 8bit chars.
class AspExecPv : public ExecCmdProvide {
public:
string *m_input; // pointer to string used as input buffer to command
Rcl::TermIter *m_tit;
Rcl::Db &m_db;
AspExecPv(string *i, Rcl::TermIter *tit, Rcl::Db &db)
: m_input(i), m_tit(tit), m_db(db)
{}
void newData() {
while (m_db.termWalkNext(m_tit, *m_input)) {
// Prefixed terms are also somewhere else without the suffix,
// skip them
if (m_input->empty() ||
('A' <= m_input->at(0) && m_input->at(0) <= 'Z'))
continue;
if (!Rcl::Db::isSpellingCandidate(*m_input))
continue;
// Got a non-empty sort-of appropriate term, let's send it to
// aspell
m_input->append("\n");
return;
}
// End of data. Tell so. Exec will close cmd.
m_input->erase();
}
};
bool Aspell::buildDict(Rcl::Db &db, string &reason)
{
if (!ok())
return false;
// We create the dictionary by executing the aspell command:
// aspell --lang=[lang] create master [dictApath]
ExecCmd aspell;
list<string> args;
args.push_back(string("--lang=")+ m_lang);
args.push_back("--encoding=utf-8");
args.push_back("create");
args.push_back("master");
args.push_back(dicPath());
aspell.setStderr("/dev/null");
Rcl::TermIter *tit = db.termWalkOpen();
if (tit == 0) {
reason = "termWalkOpen failed\n";
return false;
}
string termbuf;
AspExecPv pv(&termbuf, tit, db);
aspell.setProvide(&pv);
if (aspell.doexec(m_data->m_exec, args, &termbuf)) {
reason = string("aspell dictionary creation command failed.\n"
"One possible reason might be missing language "
"data files for lang = ") + m_lang;
return false;
}
db.termWalkClose(tit);
return true;
}
bool Aspell::make_speller(string& reason)
{
if (!ok())
return false;
if (m_data->m_speller != 0)
return true;
AspellCanHaveError *ret;
AspellConfig *config = aapi.new_aspell_config();
aapi.aspell_config_replace(config, "lang", m_lang.c_str());
aapi.aspell_config_replace(config, "encoding", "utf-8");
aapi.aspell_config_replace(config, "master", dicPath().c_str());
aapi.aspell_config_replace(config, "sug-mode", "fast");
// aapi.aspell_config_replace(config, "sug-edit-dist", "2");
ret = aapi.new_aspell_speller(config);
aapi.delete_aspell_config(config);
if (aapi.aspell_error(ret) != 0) {
reason = aapi.aspell_error_message(ret);
aapi.delete_aspell_can_have_error(ret);
return false;
}
m_data->m_speller = aapi.to_aspell_speller(ret);
return true;
}
bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
{
LOGDEB2(("Aspell::check [%s]\n", term.c_str()));
if (!ok() || !make_speller(reason))
return false;
if (term.empty())
return true; //??
int ret = aapi.aspell_speller_check(m_data->m_speller,
term.c_str(), term.length());
reason.clear();
switch (ret) {
case 0: return false;
case 1: return true;
default:
case -1:
reason.append("Aspell error: ");
reason.append(aapi.aspell_speller_error_message(m_data->m_speller));
return false;
}
}
bool Aspell::suggest(Rcl::Db &db, const string &term,
list<string>& suggestions, string& reason)
{
if (!ok() || !make_speller(reason))
return false;
if (term.empty())
return true; //??
AspellCanHaveError *ret;
const AspellWordList *wl =
aapi.aspell_speller_suggest(m_data->m_speller,
term.c_str(), term.length());
if (wl == 0) {
reason = aapi.aspell_speller_error_message(m_data->m_speller);
return false;
}
AspellStringEnumeration *els = aapi.aspell_word_list_elements(wl);
const char *word;
while ((word = aapi.aspell_string_enumeration_next(els)) != 0) {
// stemDiffers checks that the word exists (we don't want
// aspell computed stuff, only exact terms from the dictionary),
// and that it stems differently to the base word (else it's not
// useful to expand the search). Or is it ?
// ******** This should depend if
// stemming is turned on or not for querying *******
string sw(word);
if (db.termExists(sw) && db.stemDiffers("english", sw, term))
suggestions.push_back(word);
}
aapi.delete_aspell_string_enumeration(els);
return true;
}
#endif // RCL_USE_ASPELL
#else // TEST_RCLASPELL test driver ->
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
#endif
#ifdef RCL_USE_ASPELL
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <iostream>
using namespace std;
#include "rclinit.h"
#include "rclconfig.h"
#include "rcldb.h"
#include "rclaspell.h"
static char *thisprog;
RclConfig *rclconfig;
Rcl::Db rcldb;
static char usage [] =
" -b : build dictionary\n"
" -s <term>: suggestions for term\n"
"\n\n"
;
static void
Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_s 0x2
#define OPT_b 0x4
int main(int argc, char **argv)
{
string word;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'b': op_flags |= OPT_b; break;
case 's': op_flags |= OPT_s; if (argc < 2) Usage();
word = *(++argv);
argc--;
goto b1;
default: Usage(); break;
}
b1: argc--; argv++;
}
if (argc != 0 || op_flags == 0)
Usage();
string reason;
rclconfig = recollinit(0, 0, reason);
if (!rclconfig || !rclconfig->ok()) {
fprintf(stderr, "Configuration problem: %s\n", reason.c_str());
exit(1);
}
string dbdir = rclconfig->getDbDir();
if (dbdir.empty()) {
fprintf(stderr, "No db directory in configuration");
exit(1);
}
if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
fprintf(stderr, "Could not open database in %s\n", dbdir.c_str());
exit(1);
}
Aspell aspell(rclconfig);
if (!aspell.init(reason)) {
cerr << "Init failed: " << reason << endl;
exit(1);
}
if (op_flags & OPT_b) {
if (!aspell.buildDict(rcldb, reason)) {
cerr << "buildDict failed: " << reason << endl;
exit(1);
}
} else {
list<string> suggs;
if (!aspell.suggest(rcldb, word, suggs, reason)) {
cerr << "suggest failed: " << reason << endl;
exit(1);
}
cout << "Suggestions for " << word << ":" << endl;
for (list<string>::iterator it = suggs.begin();
it != suggs.end(); it++) {
cout << *it << endl;
}
}
exit(0);
}
#else
int main(int argc, char **argv)
{return 1;}
#endif // RCL_USE_ASPELL
#endif // TEST_RCLASPELL test driver

79
src/aspell/rclaspell.h Normal file
View file

@ -0,0 +1,79 @@
/* Copyright (C) 2006 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _RCLASPELL_H_INCLUDED_
#define _RCLASPELL_H_INCLUDED_
/* autoconfig.h must be included before this file */
#ifdef RCL_USE_ASPELL
/**
* Aspell speller interface class.
*
* Aspell is used to let the user find about spelling variations that may
* exist in the document set for a given word.
* A specific aspell dictionary is created out of all the terms in the
* xapian index, and we then use it to expand a term to spelling neighbours.
* We use the aspell C api for term expansion, but have
* to execute the program to create dictionaries.
*/
#include <string>
#include <list>
#include "rclconfig.h"
#include "rcldb.h"
#ifndef NO_NAMESPACES
using std::string;
using std::list;
#endif // NO_NAMESPACES
class AspellData;
class Aspell {
public:
Aspell(RclConfig *cnf);
~Aspell();
/** Check health */
bool ok() const;
/** Find the aspell command and shared library, init function pointers */
bool init(string &reason);
/** Build dictionary out of index term list. This is done at the end
* of an indexing pass. */
bool buildDict(Rcl::Db &db, string &reason);
/** Check that word is in dictionary. ret==false && !reason.empty() => err*/
bool check(Rcl::Db &db, const string& term, string& reason);
/** Return a list of possible expansions for a given word */
bool suggest(Rcl::Db &db, const string& term, list<string> &suggestions,
string &reason);
private:
string dicPath();
RclConfig *m_config;
string m_lang;
AspellData *m_data;
bool make_speller(string& reason);
};
#endif /* RCL_USE_ASPELL */
#endif /* _RCLASPELL_H_INCLUDED_ */

View file

@ -0,0 +1,3 @@
Most of the code in this directory was taken from the Binc IMAP project
(http://www.bincimap.org/), version 1.3.3

45
src/bincimapmime/AUTHORS Normal file
View file

@ -0,0 +1,45 @@
The following parties have participated in writing code or otherwise
contributed to the Binc IMAP project:
Author:
Andreas Aardal Hanssen <andreas-binc@bincimap.org>
Several users have been very helpful with bug reports and suggestions, and
the author is very grateful for their contributions.
Some users have also gone to the extra effort of debugging the cause of a
bug, or have found a way of implementing a feature, and have either provided
a very good description of what is needed, or they have actually provided a
patch that has been added to Binc IMAP.
While adding extra value to the discussion around the discovery of a bug or
the evaluation of a new feature, these contributors also take some load of
the author's back, so they deserve extra thanks.
In this list are also included people who have contributed with mirrors and
translations of the web pages.
Henry Baragar <Henry.Baragar (a) Instantiated.Ca>
Jürgen Botz <jurgen (a) botz.org>
Charlie Brady <charlieb (a) e-smith.com>
Caskey Dickson <caskey (a) technocage.com>
Ketil Froyn <ketil (a) froyn.name>
Gary Gordon <gv-mail (a) mygirlfriday.info>
Marek Gutkowski <marek (a) moveo.pl>
Daniel James <daniel_james (a) eml.cc>
Zak Johnson <zakj (a) nox.cx>
Sergei Kolobov <sergei (a) kolobov.com>
Rafal Kupka <kupson (a) kupson.fdns.net>
Eivind Kvedalen <bincimap (a) eivind.kvedalen.name>
HIROSHIMA Naoki <naokih (a) iron-horse.org>
Greger Stolt Nilsen <greger (a) psychoproject.net>
John Starks <jstarks (a) starksnet.net>
Peter Stuge <stuge-binc (a) cdy.org>
Gerrit Pape <pape (a) smarden.org>
Jeremy Rossi <jeremy (a) jeremyrossi.com>
Dale Woolridge <dale-contrib-bincimap (a) woolridge.org>
If you have contributed to the Binc IMAP project but are not listed here
(this happens quite often), please send a mail to andreas-binc@bincimap.org
and I'll add you to the list.

356
src/bincimapmime/COPYING Normal file
View file

@ -0,0 +1,356 @@
This software is released under the GPL. Find a full copy of the GNU
General Public License below.
In addition, as a special exception, Andreas Aardal Hanssen, author of
Binc IMAP, gives permission to link the code of this program with the
OpenSSL library (or with modified versions of OpenSSL that use the
same license as OpenSSL, listed in the included COPYING.OpenSSL file),
and distribute linked combinations including the two.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify this file, you may
extend this exception to your version of the file, but you are not
obligated to do so. If you do not wish to do so, delete this exception
statement from your version.
---------------------------------------------------------------------
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

39
src/bincimapmime/Makefile Normal file
View file

@ -0,0 +1,39 @@
# @(#$Id: Makefile,v 1.6 2006-01-19 12:01:42 dockes Exp $ (C) 2005 J.F.Dockes
depth = ..
include $(depth)/mk/sysconf
LIBS = libmime.a
PROGS = trbinc
all: depend $(LIBS)
SRCS = mime-getpart.cc mime-parsefull.cc mime-parseonlyheader.cc \
mime-printbody.cc mime-printdoc.cc mime-printheader.cc mime.cc \
convert.cc iodevice.cc iofactory.cc
OBJS = mime-getpart.o mime-parsefull.o mime-parseonlyheader.o \
mime-printbody.o mime-printdoc.o mime-printheader.o mime.o \
convert.o iodevice.o iofactory.o
libmime.a : $(OBJS)
$(AR) ru libmime.a $(OBJS)
.cc.o:
$(CXX) $(ALL_CXXFLAGS) -c $<
TRBINCOBJS = trbinc.o
trbinc: trbinc.o
$(CXX) -o trbinc trbinc.o libmime.a
depend: alldeps.stamp
alldeps.stamp : $(SRCS)
$(CXX) -M $(ALL_CXXFLAGS) $(SRCS) > alldeps
touch alldeps.stamp
clean:
cp /dev/null alldeps
rm -f alldeps.stamp
rm -f ${LIBS} ${PROGS} ${OBJS}
include alldeps

View file

@ -0,0 +1,90 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* address.cc
*
* Description:
* Implementation of the Address class.
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "address.h"
#include "convert.h"
#include <string>
#ifndef NO_NAMESPACES
using namespace ::std;
using namespace Binc;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
Address::Address(const string &name, const string &addr)
{
string::size_type pos = addr.find('@');
this->name = name;
if (pos != string::npos) {
this->local = addr.substr(0, pos);
this->host = addr.substr(pos + 1);
} else this->local = addr;
}
//------------------------------------------------------------------------
Address::Address(const string &wholeaddress)
{
string::size_type start = wholeaddress.find('<');
string addr;
if (start != string::npos)
addr = wholeaddress.substr(start + 1);
else
addr = wholeaddress;
trim(addr, "<>");
if (start != string::npos)
name = wholeaddress.substr(0, start);
else
name = string();
trim(name);
trim(name, "\"");
start = addr.find('@');
local = addr.substr(0, start);
host = addr.substr(start + 1);
trim(local);
trim(host);
trim(name);
}
//------------------------------------------------------------------------
string Address::toParenList(void) const
{
string tmp = "(";
tmp += name.empty() ? "NIL" : toImapString(name);
tmp += " NIL ";
tmp += local.empty() ? "\"\"" : toImapString(local);
tmp += " ";
tmp += host.empty() ? "\"\"" : toImapString(host);
tmp += ")";
return tmp;
}

View file

@ -0,0 +1,52 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* src/mailbox/address.h
*
* Description:
* Declaration of the Address class.
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifndef address_h_included
#define address_h_included
#include <string>
namespace Binc {
//------------------------------------------------------------------------
class Address {
public:
std::string name;
std::string local;
std::string host;
//--
std::string toParenList(void) const;
//--
Address(const std::string &name, const std::string &addr);
Address(const std::string &wholeaddr);
};
}
#endif

View file

133
src/bincimapmime/convert.cc Normal file
View file

@ -0,0 +1,133 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* convert.cc
*
* Description:
* Implementation of miscellaneous convertion functions.
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "convert.h"
#include <string>
#ifndef NO_NAMESPACES
using namespace ::std;
using namespace Binc;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
BincStream::BincStream(void)
{
}
//------------------------------------------------------------------------
BincStream::~BincStream(void)
{
clear();
}
//------------------------------------------------------------------------
string BincStream::popString(unsigned int size)
{
if (size > nstr.length())
size = nstr.length();
string tmp = nstr.substr(0, size);
nstr = nstr.substr(size);
return tmp;
}
//------------------------------------------------------------------------
char BincStream::popChar(void)
{
if (nstr.length() == 0)
return '\0';
char c = nstr[0];
nstr = nstr.substr(1);
return c;
}
//------------------------------------------------------------------------
void BincStream::unpopChar(char c)
{
nstr = c + nstr;
}
//------------------------------------------------------------------------
void BincStream::unpopStr(const string &s)
{
nstr = s + nstr;
}
//------------------------------------------------------------------------
const string &BincStream::str(void) const
{
return nstr;
}
//------------------------------------------------------------------------
void BincStream::clear(void)
{
nstr.clear();
}
//------------------------------------------------------------------------
unsigned int BincStream::getSize(void) const
{
return (unsigned int) nstr.length();
}
//------------------------------------------------------------------------
BincStream &BincStream::operator << (std::ostream&(*)(std::ostream&))
{
nstr += "\r\n";
return *this;
}
//------------------------------------------------------------------------
BincStream &BincStream::operator << (const string &t)
{
nstr += t;
return *this;
}
//------------------------------------------------------------------------
BincStream &BincStream::operator << (int t)
{
nstr += toString(t);
return *this;
}
//------------------------------------------------------------------------
BincStream &BincStream::operator << (unsigned int t)
{
nstr += toString(t);
return *this;
}
//------------------------------------------------------------------------
BincStream &BincStream::operator << (char t)
{
nstr += t;
return *this;
}

323
src/bincimapmime/convert.h Normal file
View file

@ -0,0 +1,323 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* src/util/convert.h
*
* Description:
* Declaration of miscellaneous convertion functions.
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifndef convert_h_included
#define convert_h_included
#include <string>
#include <vector>
#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <sys/stat.h>
#include <cstdlib>
#include <cstring>
#include "address.h"
//#include "depot.h"
namespace Binc {
//----------------------------------------------------------------------
inline std::string toString(int i_in)
{
char intbuf[16];
snprintf(intbuf, sizeof(intbuf), "%d", i_in);
return std::string(intbuf);
}
//----------------------------------------------------------------------
inline std::string toString(unsigned int i_in)
{
char intbuf[16];
snprintf(intbuf, sizeof(intbuf), "%u", i_in);
return std::string(intbuf);
}
//----------------------------------------------------------------------
inline std::string toString(unsigned long i_in)
{
char longbuf[40];
snprintf(longbuf, sizeof(longbuf), "%lu", i_in);
return std::string(longbuf);
}
//----------------------------------------------------------------------
inline std::string toString(const char *i_in)
{
return std::string(i_in);
}
//----------------------------------------------------------------------
inline int atoi(const std::string &s_in)
{
return ::atoi(s_in.c_str());
}
//----------------------------------------------------------------------
inline std::string toHex(const std::string &s)
{
const char hexchars[] = "0123456789abcdef";
std::string tmp;
for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) {
unsigned char c = (unsigned char)*i;
tmp += hexchars[((c & 0xf0) >> 4)];
tmp += hexchars[c & 0x0f];
}
return tmp;
}
//----------------------------------------------------------------------
inline std::string fromHex(const std::string &s)
{
const char hexchars[] = "0123456789abcdef";
std::string tmp;
for (std::string::const_iterator i = s.begin();
i != s.end() && i + 1 != s.end(); i += 2) {
int n;
unsigned char c = *i;
unsigned char d = *(i + 1);
const char *t;
if ((t = strchr(hexchars, c)) == 0)
return "out of range";
n = (t - hexchars) << 4;
if ((t = strchr(hexchars, d)) == 0)
return "out of range";
n += (t - hexchars);
if (n >= 0 && n <= 255)
tmp += (char) n;
else
return "out of range";
}
return tmp;
}
//----------------------------------------------------------------------
inline std::string toImapString(const std::string &s_in)
{
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
unsigned char c = (unsigned char)*i;
if (c <= 31 || c >= 127 || c == '\"' || c == '\\')
return "{" + toString(s_in.length()) + "}\r\n" + s_in;
}
return "\"" + s_in + "\"";
}
//----------------------------------------------------------------------
inline void uppercase(std::string &input)
{
for (std::string::iterator i = input.begin(); i != input.end(); ++i)
*i = toupper(*i);
}
//----------------------------------------------------------------------
inline void lowercase(std::string &input)
{
for (std::string::iterator i = input.begin(); i != input.end(); ++i)
*i = tolower(*i);
}
//----------------------------------------------------------------------
inline void chomp(std::string &s_in, const std::string &chars = " \t\r\n")
{
int n = s_in.length();
while (n > 1 && chars.find(s_in[n - 1]) != std::string::npos)
s_in.resize(n-- - 1);
}
//----------------------------------------------------------------------
inline void trim(std::string &s_in, const std::string &chars = " \t\r\n")
{
while (s_in != "" && chars.find(s_in[0]) != std::string::npos)
s_in = s_in.substr(1);
chomp(s_in, chars);
}
//----------------------------------------------------------------------
inline const std::string unfold(const std::string &a,
bool removecomment = true)
{
std::string tmp;
bool incomment = false;
bool inquotes = false;
for (std::string::const_iterator i = a.begin(); i != a.end(); ++i) {
unsigned char c = (unsigned char)*i;
if (!inquotes && removecomment) {
if (c == '(') {
incomment = true;
tmp += " ";
} else if (c == ')') {
incomment = false;
} else if (c != 0x0a && c != 0x0d) {
tmp += *i;
}
} else if (c != 0x0a && c != 0x0d) {
tmp += *i;
}
if (!incomment) {
if (*i == '\"')
inquotes = !inquotes;
}
}
trim(tmp);
return tmp;
}
//----------------------------------------------------------------------
inline void split(const std::string &s_in, const std::string &delim,
std::vector<std::string> &dest, bool skipempty = true)
{
std::string token;
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
if (delim.find(*i) != std::string::npos) {
if (!skipempty || token != "")
dest.push_back(token);
token.clear();
} else
token += *i;
}
if (token != "")
dest.push_back(token);
}
//----------------------------------------------------------------------
inline void splitAddr(const std::string &s_in,
std::vector<std::string> &dest, bool skipempty = true)
{
static const std::string delim = ",";
std::string token;
bool inquote = false;
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
if (inquote && *i == '\"') inquote = false;
else if (!inquote && *i == '\"') inquote = true;
if (!inquote && delim.find(*i) != std::string::npos) {
if (!skipempty || token != "")
dest.push_back(token);
token.clear();
} else
token += *i;
}
if (token != "")
dest.push_back(token);
}
//----------------------------------------------------------------------
inline std::string toCanonMailbox(const std::string &s_in)
{
if (s_in.find("..") != std::string::npos) return std::string();
if (s_in.length() >= 5) {
std::string a = s_in.substr(0, 5);
uppercase(a);
return a == "INBOX" ?
a + (s_in.length() > 5 ? s_in.substr(5) : std::string()) : s_in;
}
return s_in;
}
//------------------------------------------------------------------------
inline std::string toRegex(const std::string &s_in, char delimiter)
{
std::string regex = "^";
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
if (*i == '.' || *i == '[' || *i == ']' || *i == '{' || *i == '}' ||
*i == '(' || *i == ')' || *i == '^' || *i == '$' || *i == '?' ||
*i == '+' || *i == '\\') {
regex += "\\";
regex += *i;
} else if (*i == '*')
regex += ".*?";
else if (*i == '%') {
regex += "(\\";
regex += delimiter;
regex += "){0,1}";
regex += "[^\\";
regex += delimiter;
regex += "]*?";
} else regex += *i;
}
if (regex[regex.length() - 1] == '?')
regex[regex.length() - 1] = '$';
else
regex += "$";
return regex;
}
//------------------------------------------------------------------------
class BincStream {
private:
std::string nstr;
public:
//--
BincStream &operator << (std::ostream&(*)(std::ostream&));
BincStream &operator << (const std::string &t);
BincStream &operator << (unsigned int t);
BincStream &operator << (int t);
BincStream &operator << (char t);
//--
std::string popString(unsigned int size);
//--
char popChar(void);
void unpopChar(char c);
void unpopStr(const std::string &s);
//--
const std::string &str(void) const;
//--
unsigned int getSize(void) const;
//--
void clear(void);
//--
BincStream(void);
~BincStream(void);
};
}
#endif

View file

@ -0,0 +1,322 @@
/*-*-mode:c++-*-*/
/* --------------------------------------------------------------------
* Filename:
* src/iodevice.cc
*
* Description:
* Implementation of the IODevice class.
* --------------------------------------------------------------------
* Copyright 2002, 2003 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#include "iodevice.h"
#include "convert.h" // BincStream
//#include "session.h" // getEnv/hasEnv
#include <stdlib.h>
#include <unistd.h>
#ifndef NO_NAMESPACES
using namespace ::std;
using namespace ::Binc;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
IODevice::IODevice(int f) : flags(f | IsEnabled),
maxInputBufferSize(0),
maxOutputBufferSize(0),
timeout(0),
readCount(0), writeCount(0),
outputLevel(ErrorLevel),
outputLevelLimit(ErrorLevel),
error(Unknown), errorString("Unknown error"),
dumpfd(0)
{
}
//------------------------------------------------------------------------
IODevice::~IODevice(void)
{
}
//------------------------------------------------------------------------
IODevice &IODevice::operator <<(ostream &(*source)(ostream &))
{
if (!(flags & IsEnabled) || outputLevel > outputLevelLimit)
return *this;
static std::ostream &(*endl_funcptr)(ostream &) = endl;
if (source != endl_funcptr)
return *this;
outputBuffer << "\r\n";
if (dumpfd)
::write(dumpfd, "\r\n", 2);
if (flags & FlushesOnEndl)
flush();
else if (flags & HasOutputLimit)
if (outputBuffer.getSize() > maxOutputBufferSize)
flush();
return *this;
}
//------------------------------------------------------------------------
bool IODevice::canRead(void) const
{
return false;
}
//------------------------------------------------------------------------
void IODevice::clear()
{
if (!(flags & IsEnabled))
return;
inputBuffer.clear();
outputBuffer.clear();
}
//------------------------------------------------------------------------
bool IODevice::flush()
{
if (!(flags & IsEnabled))
return true;
WriteResult writeResult = WriteWait;
do {
unsigned int s = outputBuffer.getSize();
if (s == 0)
break;
if (!waitForWrite())
return false;
writeResult = write();
if (writeResult == WriteError)
return false;
writeCount += s - outputBuffer.getSize();
} while (outputBuffer.getSize() > 0 && writeResult == WriteWait);
outputBuffer.clear();
return true;
}
//------------------------------------------------------------------------
void IODevice::setFlags(unsigned int f)
{
flags |= f;
}
//------------------------------------------------------------------------
void IODevice::clearFlags(unsigned int f)
{
flags &= ~f;
}
//------------------------------------------------------------------------
void IODevice::setMaxInputBufferSize(unsigned int max)
{
maxInputBufferSize = max;
}
//------------------------------------------------------------------------
void IODevice::setMaxOutputBufferSize(unsigned int max)
{
maxOutputBufferSize = max;
}
//------------------------------------------------------------------------
void IODevice::setTimeout(unsigned int t)
{
timeout = t;
if (t)
flags |= HasTimeout;
else
flags &= ~HasTimeout;
}
//------------------------------------------------------------------------
unsigned int IODevice::getTimeout(void) const
{
return timeout;
}
//------------------------------------------------------------------------
void IODevice::setOutputLevel(LogLevel level)
{
outputLevel = level;
}
//------------------------------------------------------------------------
IODevice::LogLevel IODevice::getOutputLevel(void) const
{
return outputLevel;
}
//------------------------------------------------------------------------
void IODevice::setOutputLevelLimit(LogLevel level)
{
outputLevelLimit = level;
}
//------------------------------------------------------------------------
IODevice::LogLevel IODevice::getOutputLevelLimit(void) const
{
return outputLevelLimit;
}
//------------------------------------------------------------------------
bool IODevice::readStr(string *dest, unsigned int max)
{
// If max is 0, fill the input buffer once only if it's empty.
if (!max && inputBuffer.getSize() == 0 && !fillInputBuffer())
return false;
// If max is != 0, wait until we have max.
while (max && inputBuffer.getSize() < max) {
if (!fillInputBuffer())
return false;
}
unsigned int bytesToRead = max ? max : inputBuffer.getSize();
*dest += inputBuffer.str().substr(0, bytesToRead);
if (dumpfd) {
::write(dumpfd, inputBuffer.str().substr(0, bytesToRead).c_str(),
bytesToRead);
}
inputBuffer.popString(bytesToRead);
readCount += bytesToRead;
return true;
}
//------------------------------------------------------------------------
bool IODevice::readChar(char *dest)
{
if (inputBuffer.getSize() == 0 && !fillInputBuffer())
return false;
char c = inputBuffer.popChar();
if (dest)
*dest = c;
if (dumpfd)
::write(dumpfd, &c, 1);
++readCount;
return true;
}
//------------------------------------------------------------------------
void IODevice::unreadChar(char c)
{
inputBuffer.unpopChar(c);
}
//------------------------------------------------------------------------
void IODevice::unreadStr(const string &s)
{
inputBuffer.unpopStr(s);
}
//------------------------------------------------------------------------
bool IODevice::skipTo(char c)
{
char dest = '\0';
do {
if (!readChar(&dest))
return false;
if (dumpfd)
::write(dumpfd, &dest, 1);
} while (c != dest);
return true;
}
//------------------------------------------------------------------------
string IODevice::service(void) const
{
return "nul";
}
//------------------------------------------------------------------------
bool IODevice::waitForWrite(void) const
{
return false;
}
//------------------------------------------------------------------------
bool IODevice::waitForRead(void) const
{
return false;
}
//------------------------------------------------------------------------
IODevice::WriteResult IODevice::write(void)
{
return WriteError;
}
//------------------------------------------------------------------------
bool IODevice::fillInputBuffer(void)
{
return false;
}
//------------------------------------------------------------------------
IODevice::Error IODevice::getLastError(void) const
{
return error;
}
//------------------------------------------------------------------------
string IODevice::getLastErrorString(void) const
{
return errorString;
}
//------------------------------------------------------------------------
unsigned int IODevice::getReadCount(void) const
{
return readCount;
}
//------------------------------------------------------------------------
unsigned int IODevice::getWriteCount(void) const
{
return writeCount;
}
//------------------------------------------------------------------------
void IODevice::enableProtocolDumping(void)
{
#if 0
BincStream ss;
ss << "/tmp/bincimap-dump-" << (int) time(0) << "-"
<< Session::getInstance().getIP() << "-XXXXXX";
char *safename = strdup(ss.str().c_str());
dumpfd = mkstemp(safename);
if (dumpfd == -1)
dumpfd = 0;
delete safename;
#endif
}

401
src/bincimapmime/iodevice.h Normal file
View file

@ -0,0 +1,401 @@
/*-*-mode:c++;c-basic-offset:2-*-*/
/* --------------------------------------------------------------------
* Filename:
* src/iodevice.h
*
* Description:
* Declaration of the IODevice class.
* --------------------------------------------------------------------
* Copyright 2002, 2003 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifndef iodevice_h_included
#define iodevice_h_included
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "convert.h" // BincStream
#include <string>
#include <unistd.h> // ::write
namespace Binc {
/*!
\class IODevice
\brief The IODevice class provides a framework for reading and
writing to device.
Implement new devices by inheriting this class and overloading all
virtual methods.
service() returns the service that the specific device is used
for. Two values are "log" and "client".
\sa IOFactory, MultilogDevice, SyslogDevice, StdIODevice, SSLDevice
*/
class IODevice {
public:
/*!
Standard options for an IODevice.
*/
enum Flags {
None = 0,
FlushesOnEndl = 1 << 0,
HasInputLimit = 1 << 1,
HasOutputLimit = 1 << 2,
IsEnabled = 1 << 3,
HasTimeout = 1 << 4
};
/*!
Errors from when an operation returned false.
*/
enum Error {
Unknown,
Timeout
};
/*!
Constructs an invalid IODevice.
Instances of IODevice perform no operations, and all boolean
functions always return false. This constructor is only useful
if called from a subclass that reimplements all virtual methods.
*/
IODevice(int f = 0);
/*!
Destructs an IODevice; does nothing.
*/
virtual ~IODevice(void);
/*!
Clears all data in the input and output buffers.
*/
void clear(void);
/*!
Sets one or more flags.
\param f A bitwise OR of flags from the Flags enum.
*/
void setFlags(unsigned int f);
/*!
Clears one or more flags.
\param f A bitwise OR of flags from the Flags enum.
*/
void clearFlags(unsigned int f);
/*!
Sets the maximum allowed input buffer size. If this size is
non-zero and exceeded, reading from the device will fail. This
functionality is used to prevent clients from forcing this class
to consume so much memory that the program crashes.
Setting the max input buffer size to 0 disables the input size
limit.
\param max The maximum input buffer size in bytes.
*/
void setMaxInputBufferSize(unsigned int max);
/*!
Sets the maximum allowed output buffer size. If this size is
non-zero and exceeded, flush() is called implicitly.
Setting the max output buffer size to 0 disables the output size
limit. This is generally discouraged.
As a contrast to setMaxInputBufferSize(), this function is used
to bundle up consequent write calls, allowing more efficient use
of the underlying device as larger blocks of data are written at
a time.
\param max The maximum output buffer size in bytes.
*/
void setMaxOutputBufferSize(unsigned int max);
/*!
Sets the device's internal timeout in seconds. This timeout is
used both when waiting for data to read and for waiting for the
ability to write.
If this timeout is exceeded, the read or write function that
triggered the timeout will fail.
Setting the timeout to 0 disables the timeout.
\param t The timeout in seconds.
\sa getTimeout()
*/
void setTimeout(unsigned int t);
/*!
Returns the timeout in seconds, or 0 if there is no timeout.
\sa setTimeout()
*/
unsigned int getTimeout(void) const;
enum LogLevel {
ErrorLevel,
InfoLevel,
WarningLevel,
DebugLevel
};
/*!
Sets the output level for the following write operations on this
device.
The output level is a number which gives the following write
operations a priority. You can use setOutputLevelLimit() to
filter the write operations valid for different operating modes.
This enables you to have certain write operations ignored.
For instance, if the output level is set to 0, then "Hello" is
written, and the output level is set to 1, followed by writing
"Daisy", the output level limit value will decive wether only
"Hello" is written, or if also "Daisy" is written.
A low value of the level gives higher priority, and a high level
will give low priority. The default value is 0, and write
operations that are done with output level 0 are never ignored.
\param level The output level
\sa getOutputLevel(), setOutputLevelLimit()
*/
void setOutputLevel(LogLevel level);
/*!
Returns the current output level.
\sa setOutputLevel()
*/
LogLevel getOutputLevel(void) const;
/*!
Sets the current output level limit. Write operations with a
level higher than the output level limit are ignored.
\param level The output level limit
\sa setOutputLevel()
*/
void setOutputLevelLimit(LogLevel level);
/*!
Returns the current output level limit.
\sa setOutputLevelLimit()
*/
LogLevel getOutputLevelLimit(void) const;
/*!
Returns the number of bytes that have been read from this device
since it was created.
*/
unsigned int getReadCount(void) const;
/*!
Returns the number of bytes that have been written to this
device since it was created.
*/
unsigned int getWriteCount(void) const;
/*!
Calling this function enables the built-in protocol dumping feature in
the device. All input and output to this device will be dumped to a file
in /tmp.
*/
void enableProtocolDumping(void);
/*!
Writes data to the device. Depending on the value of the max
output buffer size, the data may not be written immediately.
\sa setMaxOutputBufferSize()
*/
template <class T> IODevice &operator <<(const T &source);
/*!
Writes data to the device. This function specializes on standard
ostream derivates, such as std::endl.
*/
IODevice &operator <<(std::ostream &(*source)(std::ostream &));
/*!
Returns true if data can be read from the device; otherwise
returns false.
*/
virtual bool canRead(void) const;
/*!
Reads data from the device, and stores this in a string. Returns
true on success; otherwise returns false.
\param dest The incoming data is stored in this string.
\param max No more than this number of bytes is read from the
device.
*/
bool readStr(std::string *dest, unsigned int max = 0);
/*!
Reads exactly one byte from the device and stores this in a
char. Returns true on success; otherwise returns false.
\param dest The incoming byte is stored in this char.
*/
bool readChar(char *dest = 0);
/*!
FIXME: add docs
*/
void unreadChar(char c);
/*!
FIXME: add docs
*/
void unreadStr(const std::string &s);
/*!
Reads characters from the device, until and including one
certain character is found. All read characters are discarded.
This function can be used to skip to the beginning of a line,
with the terminating character being '\n'.
\param The certain character.
*/
bool skipTo(char c);
/*!
Flushes the output buffer. Writes all data in the output buffer
to the device.
*/
bool flush(void);
/*!
Returns the type of error that most recently occurred.
*/
Error getLastError(void) const;
/*!
Returns a human readable description of the error that most
recently occurred. If no known error has occurred, this method
returns "Unknown error".
*/
std::string getLastErrorString(void) const;
/*!
Returns the type of service provided by this device. Two valid
return values are "client" and "log".
*/
virtual std::string service(void) const;
protected:
/*!
Waits until data can be written to the device. If the timeout is
0, this function waits indefinitely. Otherwise, it waits until
the timeout has expired.
If this function returns true, data can be written to the
device; otherwise, getLastError() must be checked to determine
whether a timeout occurred or whether an error with the device
prevents further writing.
*/
virtual bool waitForWrite(void) const;
/*!
Waits until data can be read from the device.
\sa waitForWrite()
*/
virtual bool waitForRead(void) const;
/*!
Types of results from a write.
*/
enum WriteResult {
WriteWait = 0,
WriteDone = 1 << 0,
WriteError = 1 << 1
};
/*!
Writes as much data as possible to the device. If some but not
all data was written, returns WriteWait. If all data was
written, returns WriteDone. If an error occurred, returns
WriteError.
*/
virtual WriteResult write(void);
/*!
Reads data from the device, and stores it in the input buffer.
Returns true on success; otherwise returns false.
This method will fail if there is no more data available, if a
timeout occurred or if an error with the device prevents more
data from being read.
The number of bytes read from the device is undefined.
*/
virtual bool fillInputBuffer(void);
BincStream inputBuffer;
BincStream outputBuffer;
protected:
unsigned int flags;
unsigned int maxInputBufferSize;
unsigned int maxOutputBufferSize;
unsigned int timeout;
unsigned int readCount;
unsigned int writeCount;
LogLevel outputLevel;
LogLevel outputLevelLimit;
mutable Error error;
mutable std::string errorString;
int dumpfd;
};
//----------------------------------------------------------------------
template <class T> IODevice &IODevice::operator <<(const T &source)
{
if ((flags & IsEnabled) && outputLevel <= outputLevelLimit) {
outputBuffer << source;
if (dumpfd) {
BincStream ss;
ss << source;
::write(dumpfd, ss.str().c_str(), ss.getSize());
}
if (flags & HasInputLimit)
if (outputBuffer.getSize() > maxOutputBufferSize)
flush();
}
return *this;
}
}
#endif

View file

@ -0,0 +1,87 @@
/*-*-mode:c++-*-*/
/* --------------------------------------------------------------------
* Filename:
* src/iofactory.cc
*
* Description:
* Implementation of the IOFactory class.
* --------------------------------------------------------------------
* Copyright 2002, 2003 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#include "iofactory.h"
#include "iodevice.h"
#ifndef NO_NAMESPACES
using namespace ::Binc;
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
IOFactory::IOFactory(void)
{
}
//------------------------------------------------------------------------
IOFactory::~IOFactory(void)
{
}
//------------------------------------------------------------------------
IOFactory &IOFactory::getInstance(void)
{
static IOFactory ioFactory;
return ioFactory;
}
//------------------------------------------------------------------------
void IOFactory::addDevice(IODevice *dev)
{
IODevice *ioDevice = IOFactory::getInstance().devices[dev->service()];
// FIXME: Delete correct object. Now, only IODevice's destructor is
// called, and only IODevice's memory is freed.
if (ioDevice)
delete ioDevice;
IOFactory::getInstance().devices[dev->service()] = dev;
}
//------------------------------------------------------------------------
IODevice &IOFactory::getClient(void)
{
static IODevice nulDevice;
IOFactory &ioFactory = IOFactory::getInstance();
if (ioFactory.devices.find("client") != ioFactory.devices.end())
return *ioFactory.devices["client"];
return nulDevice;
}
//------------------------------------------------------------------------
IODevice &IOFactory::getLogger(void)
{
static IODevice nulDevice;
IOFactory &ioFactory = IOFactory::getInstance();
if (ioFactory.devices.find("log") != ioFactory.devices.end())
return *ioFactory.devices["log"];
return nulDevice;
}

View file

@ -0,0 +1,69 @@
/*-*-mode:c++-*-*/
/* --------------------------------------------------------------------
* Filename:
* src/iofactory.h
*
* Description:
* Declaration of the IOFactory class.
* --------------------------------------------------------------------
* Copyright 2002, 2003 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifndef IOFACTORY_H_INCLUDED
#define IOFACTORY_H_INCLUDED
#include <map>
#include <string>
#include "iodevice.h"
namespace Binc {
class IOFactory {
public:
~IOFactory(void);
static void addDevice(IODevice *dev);
static IOFactory &getInstance(void);
static IODevice &getClient(void);
static IODevice &getLogger(void);
private:
IOFactory(void);
std::map<std::string, IODevice *> devices;
};
}
#define bincClient \
IOFactory::getClient()
#if !defined (DEBUG)
#define bincError if (false) std::cout
#define bincWarning if (false) std::cout
#define bincDebug if (false) std::cout
#else
#define bincError \
IOFactory::getLogger().setOutputLevel(IODevice::ErrorLevel);IOFactory::getLogger()
#define bincWarning \
IOFactory::getLogger().setOutputLevel(IODevice::WarningLevel);IOFactory::getLogger()
#define bincDebug \
IOFactory::getLogger().setOutputLevel(IODevice::DebugLevel);IOFactory::getLogger()
#endif
#define bincInfo \
IOFactory::getLogger().setOutputLevel(IODevice::InfoLevel);IOFactory::getLogger()
#endif

View file

@ -0,0 +1,96 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime-getpart.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "convert.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
const Binc::MimePart *Binc::MimePart::getPart(const string &findpart,
string genpart, FetchType fetchType) const
{
if (findpart == genpart)
return this;
if (isMultipart()) {
if (members.size() != 0) {
vector<MimePart>::const_iterator i = members.begin();
int part = 1;
while (i != members.end()) {
BincStream ss;
ss << genpart;
if (genpart != "")
ss << ".";
ss << part;
const MimePart *m;
if ((m = (*i).getPart(findpart, ss.str())) != 0) {
if (fetchType == FetchHeader && m->isMessageRFC822())
m = &m->members[0];
return m;
}
++i;
++part;
}
}
} else if (isMessageRFC822()) {
if (members.size() == 1) {
const MimePart *m = members[0].getPart(findpart, genpart);
return m;
} else {
return 0;
}
} else {
// Singlepart
if (genpart != "")
genpart += ".";
genpart += "1";
if (findpart == genpart)
return this;
}
return 0;
}

View file

@ -0,0 +1,216 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* src/mime-inputsource.h
*
* Description:
* The base class of the MIME input source
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifndef mime_inputsource_h_included
#define mime_inputsource_h_included
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <unistd.h>
#include <iostream>
namespace Binc {
class MimeInputSource {
public:
inline MimeInputSource(int fd, unsigned int start = 0);
virtual inline ~MimeInputSource(void);
virtual inline size_t fillRaw(char *raw, size_t nbytes);
virtual inline void reset(void);
virtual inline bool fillInputBuffer(void);
inline void seek(unsigned int offset);
inline bool getChar(char *c);
inline void ungetChar(void);
inline int getFileDescriptor(void) const;
inline unsigned int getOffset(void) const;
private:
int fd;
char data[16384];
unsigned int offset;
unsigned int tail;
unsigned int head;
unsigned int start;
char lastChar;
};
inline MimeInputSource::MimeInputSource(int fd, unsigned int start)
{
this->fd = fd;
this->start = start;
offset = 0;
tail = 0;
head = 0;
lastChar = '\0';
memset(data, '\0', sizeof(data));
seek(start);
}
inline MimeInputSource::~MimeInputSource(void)
{
}
inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
{
return read(fd, raw, nbytes);
}
inline bool MimeInputSource::fillInputBuffer(void)
{
char raw[4096];
ssize_t nbytes = fillRaw(raw, 4096);
if (nbytes <= 0) {
// FIXME: If ferror(crlffile) we should log this.
return false;
}
for (ssize_t i = 0; i < nbytes; ++i) {
const char c = raw[i];
if (c == '\r') {
if (lastChar == '\r') {
data[tail++ & (0x4000-1)] = '\r';
data[tail++ & (0x4000-1)] = '\n';
}
} else if (c == '\n') {
data[tail++ & (0x4000-1)] = '\r';
data[tail++ & (0x4000-1)] = '\n';
} else {
if (lastChar == '\r') {
data[tail++ & (0x4000-1)] = '\r';
data[tail++ & (0x4000-1)] = '\n';
}
data[tail++ & (0x4000-1)] = c;
}
lastChar = c;
}
return true;
}
inline void MimeInputSource::reset(void)
{
offset = head = tail = 0;
lastChar = '\0';
if (fd != -1)
lseek(fd, 0, SEEK_SET);
}
inline void MimeInputSource::seek(unsigned int seekToOffset)
{
if (offset > seekToOffset)
reset();
char c;
int n = 0;
while (seekToOffset > offset) {
if (!getChar(&c))
break;
++n;
}
}
inline bool MimeInputSource::getChar(char *c)
{
if (head == tail && !fillInputBuffer())
return false;
*c = data[head++ & (0x4000-1)];
++offset;
return true;
}
inline void MimeInputSource::ungetChar()
{
--head;
--offset;
}
inline int MimeInputSource::getFileDescriptor(void) const
{
return fd;
}
inline unsigned int MimeInputSource::getOffset(void) const
{
return offset;
}
///////////////////////////////////
class MimeInputSourceStream : public MimeInputSource {
public:
inline MimeInputSourceStream(istream& s, unsigned int start = 0);
virtual inline size_t fillRaw(char *raw, size_t nb);
virtual inline void reset(void);
private:
istream& s;
};
inline MimeInputSourceStream::MimeInputSourceStream(istream& si,
unsigned int start)
: MimeInputSource(-1, start), s(si)
{
}
inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
{
// Why can't streams tell how many characters were actually read
// when hitting eof ?
std::streampos st = s.tellg();
s.seekg(0, ios::end);
std::streampos lst = s.tellg();
s.seekg(st);
size_t nbytes = lst - st;
if (nbytes > nb) {
nbytes = nb;
}
if (nbytes <= 0) {
return (size_t)-1;
}
s.read(raw, nbytes);
return nbytes;
}
inline void MimeInputSourceStream::reset(void)
{
MimeInputSource::reset();
s.seekg(0);
}
}
extern Binc::MimeInputSource *mimeSource;
#endif

View file

@ -0,0 +1,631 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime-parsefull.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "mime-utils.h"
#include "mime-inputsource.h"
#include "convert.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
Binc::MimeInputSource *mimeSource = 0;
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
#undef MPF
#ifdef MPF
#define MPFDEB(X) fprintf X
#else
#define MPFDEB(X)
#endif
//------------------------------------------------------------------------
void Binc::MimeDocument::parseFull(int fd) const
{
if (allIsParsed)
return;
allIsParsed = true;
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
delete mimeSource;
mimeSource = new MimeInputSource(fd);
} else {
mimeSource->reset();
}
headerstartoffsetcrlf = 0;
headerlength = 0;
bodystartoffsetcrlf = 0;
bodylength = 0;
size = 0;
messagerfc822 = false;
multipart = false;
int bsize = 0;
string bound;
MimePart::parseFull(bound, bsize);
// eat any trailing junk to get the correct size
char c;
while (mimeSource->getChar(&c));
size = mimeSource->getOffset();
}
void Binc::MimeDocument::parseFull(istream& s) const
{
if (allIsParsed)
return;
allIsParsed = true;
delete mimeSource;
mimeSource = new MimeInputSourceStream(s);
headerstartoffsetcrlf = 0;
headerlength = 0;
bodystartoffsetcrlf = 0;
bodylength = 0;
size = 0;
messagerfc822 = false;
multipart = false;
int bsize = 0;
string bound;
MimePart::parseFull(bound, bsize);
// eat any trailing junk to get the correct size
char c;
while (mimeSource->getChar(&c));
size = mimeSource->getOffset();
}
//------------------------------------------------------------------------
static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines)
{
using namespace ::Binc;
char c;
bool eof = false;
char cqueue[4];
string name;
string content;
while (mimeSource->getChar(&c)) {
// If we encounter a \r before we got to the first ':', then
// rewind back to the start of the line and assume we're at the
// start of the body.
if (c == '\r') {
for (int i = 0; i < (int) name.length() + 1; ++i)
mimeSource->ungetChar();
return false;
}
// A colon marks the end of the header name
if (c == ':') break;
// Otherwise add to the header name
name += c;
}
cqueue[0] = '\0';
cqueue[1] = '\0';
cqueue[2] = '\0';
cqueue[3] = '\0';
// Read until the end of the header.
bool endOfHeaders = false;
while (!endOfHeaders) {
if (!mimeSource->getChar(&c)) {
eof = true;
break;
}
if (c == '\n') ++*nlines;
for (int i = 0; i < 3; ++i)
cqueue[i] = cqueue[i + 1];
cqueue[3] = c;
if (strncmp(cqueue, "\r\n\r\n", 4) == 0) {
endOfHeaders = true;
break;
}
// If the last character was a newline, and the first now is not
// whitespace, then rewind one character and store the current
// key,value pair.
if (cqueue[2] == '\n' && c != ' ' && c != '\t') {
if (content.length() > 2)
content.resize(content.length() - 2);
trim(content);
header->add(name, content);
if (c != '\r') {
mimeSource->ungetChar();
if (c == '\n') --*nlines;
return true;
}
mimeSource->getChar(&c);
return false;
}
content += c;
}
if (name != "") {
if (content.length() > 2)
content.resize(content.length() - 2);
header->add(name, content);
}
return !(eof || endOfHeaders);
}
//------------------------------------------------------------------------
static void parseHeader(Binc::Header *header, unsigned int *nlines)
{
while (parseOneHeaderLine(header, nlines))
{ }
}
//------------------------------------------------------------------------
static void analyzeHeader(Binc::Header *header, bool *multipart,
bool *messagerfc822, string *subtype,
string *boundary)
{
using namespace ::Binc;
// Do simple parsing of headers to determine the
// type of message (multipart,messagerfc822 etc)
HeaderItem ctype;
if (header->getFirstHeader("content-type", ctype)) {
vector<string> types;
split(ctype.getValue(), ";", types);
if (types.size() > 0) {
// first element should describe content type
string tmp = types[0];
trim(tmp);
vector<string> v;
split(tmp, "/", v);
string key, value;
key = (v.size() > 0) ? v[0] : "text";
value = (v.size() > 1) ? v[1] : "plain";
lowercase(key);
if (key == "multipart") {
*multipart = true;
lowercase(value);
*subtype = value;
} else if (key == "message") {
lowercase(value);
if (value == "rfc822")
*messagerfc822 = true;
}
}
for (vector<string>::const_iterator i = types.begin();
i != types.end(); ++i) {
string element = *i;
trim(element);
if (element.find("=") != string::npos) {
string::size_type pos = element.find('=');
string key = element.substr(0, pos);
string value = element.substr(pos + 1);
lowercase(key);
trim(key);
if (key == "boundary") {
trim(value, " \"");
*boundary = value;
}
}
}
}
}
static void parseMessageRFC822(vector<Binc::MimePart> *members,
bool *foundendofpart,
unsigned int *bodylength,
unsigned int *nbodylines,
const string &toboundary)
{
using namespace ::Binc;
// message rfc822 means a completely enclosed mime document. we
// call the parser recursively, and pass on the boundary string
// that we got. when parse() finds this boundary, it returns 0. if
// it finds the end boundary (boundary + "--"), it returns != 0.
MimePart m;
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
// parsefull returns the number of bytes that need to be removed
// from the body because of the terminating boundary string.
int bsize = 0;
if (m.parseFull(toboundary, bsize))
*foundendofpart = true;
// make sure bodylength doesn't overflow
*bodylength = mimeSource->getOffset();
if (*bodylength >= bodystartoffsetcrlf) {
*bodylength -= bodystartoffsetcrlf;
if (*bodylength >= (unsigned int) bsize) {
*bodylength -= (unsigned int) bsize;
} else {
*bodylength = 0;
}
} else {
*bodylength = 0;
}
*nbodylines += m.getNofLines();
members->push_back(m);
}
static bool skipUntilBoundary(const string &delimiter,
unsigned int *nlines, bool *eof)
{
int endpos = delimiter.length();
char *delimiterqueue = 0;
int delimiterpos = 0;
const char *delimiterStr = delimiter.c_str();
if (delimiter != "") {
delimiterqueue = new char[endpos];
memset(delimiterqueue, 0, endpos);
}
// first, skip to the first delimiter string. Anything between the
// header and the first delimiter string is simply ignored (it's
// usually a text message intended for non-mime clients)
char c;
bool foundBoundary = false;
for (;;) {
if (!mimeSource->getChar(&c)) {
*eof = true;
break;
}
if (c == '\n')
++*nlines;
// if there is no delimiter, we just read until the end of the
// file.
if (!delimiterqueue)
continue;
delimiterqueue[delimiterpos++] = c;
if (delimiterpos == endpos)
delimiterpos = 0;
if (compareStringToQueue(delimiterStr, delimiterqueue,
delimiterpos, endpos)) {
foundBoundary = true;
break;
}
}
delete [] delimiterqueue;
delimiterqueue = 0;
return foundBoundary;
}
// JFD: Things we do after finding a boundary (something like CRLF--somestring)
// Need to see if this is a final one (with an additional -- at the end),
// and need to check if it is immediately followed by another boundary
// (in this case, we give up our final CRLF in its favour)
static inline void postBoundaryProcessing(bool *eof,
unsigned int *nlines,
int *boundarysize,
bool *foundendofpart)
{
// Read two more characters. This may be CRLF, it may be "--" and
// it may be any other two characters.
char a = '\0';
if (!mimeSource->getChar(&a))
*eof = true;
if (a == '\n')
++*nlines;
char b = '\0';
if (!mimeSource->getChar(&b))
*eof = true;
if (b == '\n')
++*nlines;
// If eof, we're done here
if (*eof)
return;
// If we find two dashes after the boundary, then this is the end
// of boundary marker, and we need to get 2 more chars
if (a == '-' && b == '-') {
*foundendofpart = true;
*boundarysize += 2;
if (!mimeSource->getChar(&a))
*eof = true;
if (a == '\n')
++*nlines;
if (!mimeSource->getChar(&b))
*eof = true;
if (b == '\n')
++*nlines;
}
// If the boundary is followed by CRLF, we need to handle the
// special case where another boundary line follows
// immediately. In this case we consider the CRLF to be part of
// the NEXT boundary.
if (a == '\r' && b == '\n') {
// Get 2 more
if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b)) {
*eof = true;
} else if (a == '-' && b == '-') {
MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n"));
mimeSource->ungetChar();
mimeSource->ungetChar();
mimeSource->ungetChar();
mimeSource->ungetChar();
} else {
// We unget the 2 chars, and keep our crlf (increasing our own size)
MPFDEB((stderr, "BINC: keeping my CRLF\n"));
mimeSource->ungetChar();
mimeSource->ungetChar();
*boundarysize += 2;
}
} else {
// Boundary string not followed by CRLF, don't read more and let
// others skip the rest. Note that this is allowed but quite uncommon
mimeSource->ungetChar();
mimeSource->ungetChar();
}
}
static void parseMultipart(const string &boundary,
const string &toboundary,
bool *eof,
unsigned int *nlines,
int *boundarysize,
bool *foundendofpart,
unsigned int *bodylength,
vector<Binc::MimePart> *members)
{
MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n",
boundary.c_str(),
toboundary.c_str()));
using namespace ::Binc;
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
// multipart parsing starts with skipping to the first
// boundary. then we call parse() for all parts. the last parse()
// command will return a code indicating that it found the last
// boundary of this multipart. Note that the first boundary does
// not have to start with CRLF.
string delimiter = "--" + boundary;
skipUntilBoundary(delimiter, nlines, eof);
if (!eof)
*boundarysize = delimiter.size();
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
// read all mime parts.
if (!*foundendofpart && !*eof) {
bool quit = false;
do {
MimePart m;
// If parseFull returns != 0, then it encountered the multipart's
// final boundary.
int bsize = 0;
if (m.parseFull(boundary, bsize)) {
quit = true;
*boundarysize = bsize;
}
members->push_back(m);
} while (!quit);
}
if (!*foundendofpart && !*eof) {
// multipart parsing starts with skipping to the first
// boundary. then we call parse() for all parts. the last parse()
// command will return a code indicating that it found the last
// boundary of this multipart. Note that the first boundary does
// not have to start with CRLF.
string delimiter = "\r\n--" + toboundary;
skipUntilBoundary(delimiter, nlines, eof);
if (!*eof)
*boundarysize = delimiter.size();
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
}
// make sure bodylength doesn't overflow
*bodylength = mimeSource->getOffset();
if (*bodylength >= bodystartoffsetcrlf) {
*bodylength -= bodystartoffsetcrlf;
if (*bodylength >= (unsigned int) *boundarysize) {
*bodylength -= (unsigned int) *boundarysize;
} else {
*bodylength = 0;
}
} else {
*bodylength = 0;
}
MPFDEB((stderr, "BINC: ParseMultipart return\n"));
}
static void parseSinglePart(const string &toboundary,
int *boundarysize,
unsigned int *nbodylines,
unsigned int *nlines,
bool *eof, bool *foundendofpart,
unsigned int *bodylength)
{
MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n",
toboundary.c_str()));
using namespace ::Binc;
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
// If toboundary is empty, then we read until the end of the
// file. Otherwise we will read until we encounter toboundary.
string _toboundary;
if (toboundary != "") {
_toboundary = "\r\n--";
_toboundary += toboundary;
}
// if (skipUntilBoundary(_toboundary, nlines, eof))
// *boundarysize = _toboundary.length();
char *boundaryqueue = 0;
int endpos = _toboundary.length();
if (toboundary != "") {
boundaryqueue = new char[endpos];
memset(boundaryqueue, 0, endpos);
}
*boundarysize = 0;
const char *_toboundaryStr = _toboundary.c_str();
string line;
bool toboundaryIsEmpty = (toboundary == "");
char c;
int boundarypos = 0;
while (mimeSource->getChar(&c)) {
if (c == '\n') { ++*nbodylines; ++*nlines; }
if (toboundaryIsEmpty)
continue;
// find boundary
boundaryqueue[boundarypos++] = c;
if (boundarypos == endpos)
boundarypos = 0;
if (compareStringToQueue(_toboundaryStr, boundaryqueue,
boundarypos, endpos)) {
*boundarysize = _toboundary.length();
break;
}
}
delete [] boundaryqueue;
if (toboundary != "") {
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
} else {
// Recoll: in the case of a multipart body with a null
// boundary (probably illegal but wtf), eof was not set and
// multipart went into a loop until bad alloc.
*eof = true;
}
// make sure bodylength doesn't overflow
*bodylength = mimeSource->getOffset();
if (*bodylength >= bodystartoffsetcrlf) {
*bodylength -= bodystartoffsetcrlf;
if (*bodylength >= (unsigned int) *boundarysize) {
*bodylength -= (unsigned int) *boundarysize;
} else {
*bodylength = 0;
}
} else {
*bodylength = 0;
}
MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n",
*bodylength, *boundarysize));
}
//------------------------------------------------------------------------
int Binc::MimePart::parseFull(const string &toboundary,
int &boundarysize) const
{
MPFDEB((stderr, "BINC: parsefull, toboundary[%s]\n", toboundary.c_str()));
headerstartoffsetcrlf = mimeSource->getOffset();
// Parse the header of this mime part.
parseHeader(&h, &nlines);
// Headerlength includes the seperating CRLF. Body starts after the
// CRLF.
headerlength = mimeSource->getOffset() - headerstartoffsetcrlf;
bodystartoffsetcrlf = mimeSource->getOffset();
bodylength = 0;
// Determine the type of mime part by looking at fields in the
// header.
analyzeHeader(&h, &multipart, &messagerfc822, &subtype, &boundary);
bool eof = false;
bool foundendofpart = false;
if (messagerfc822) {
parseMessageRFC822(&members, &foundendofpart, &bodylength,
&nbodylines, toboundary);
} else if (multipart) {
parseMultipart(boundary, toboundary, &eof, &nlines, &boundarysize,
&foundendofpart, &bodylength,
&members);
} else {
parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
&eof, &foundendofpart, &bodylength);
}
MPFDEB((stderr, "BINC: parsefull ret, toboundary[%s]\n", toboundary.c_str()));
return (eof || foundendofpart) ? 1 : 0;
}

View file

@ -0,0 +1,196 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime-parseonlyheader.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "mime-utils.h"
#include "mime-inputsource.h"
#include "convert.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
void Binc::MimeDocument::parseOnlyHeader(int fd) const
{
if (allIsParsed || headerIsParsed)
return;
headerIsParsed = true;
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
delete mimeSource;
mimeSource = new MimeInputSource(fd);
} else {
mimeSource->reset();
}
headerstartoffsetcrlf = 0;
headerlength = 0;
bodystartoffsetcrlf = 0;
bodylength = 0;
messagerfc822 = false;
multipart = false;
nlines = 0;
nbodylines = 0;
MimePart::parseOnlyHeader("");
}
void Binc::MimeDocument::parseOnlyHeader(istream& s) const
{
if (allIsParsed || headerIsParsed)
return;
headerIsParsed = true;
delete mimeSource;
mimeSource = new MimeInputSourceStream(s);
headerstartoffsetcrlf = 0;
headerlength = 0;
bodystartoffsetcrlf = 0;
bodylength = 0;
messagerfc822 = false;
multipart = false;
nlines = 0;
nbodylines = 0;
MimePart::parseOnlyHeader("");
}
//------------------------------------------------------------------------
int Binc::MimePart::parseOnlyHeader(const string &toboundary) const
{
string name;
string content;
char cqueue[4];
memset(cqueue, 0, sizeof(cqueue));
headerstartoffsetcrlf = mimeSource->getOffset();
bool quit = false;
char c = '\0';
while (!quit) {
// read name
while (1) {
if (!mimeSource->getChar(&c)) {
quit = true;
break;
}
if (c == '\n') ++nlines;
if (c == ':') break;
if (c == '\n') {
for (int i = name.length() - 1; i >= 0; --i)
mimeSource->ungetChar();
quit = true;
name.clear();
break;
}
name += c;
if (name.length() == 2 && name.substr(0, 2) == "\r\n") {
name.clear();
quit = true;
break;
}
}
if (name.length() == 1 && name[0] == '\r') {
name.clear();
break;
}
if (quit) break;
while (!quit) {
if (!mimeSource->getChar(&c)) {
quit = true;
break;
}
if (c == '\n') ++nlines;
for (int i = 0; i < 3; ++i)
cqueue[i] = cqueue[i + 1];
cqueue[3] = c;
if (strncmp(cqueue, "\r\n\r\n", 4) == 0) {
quit = true;
break;
}
if (cqueue[2] == '\n') {
// guess the mime rfc says what can not appear on the beginning
// of a line.
if (!isspace(cqueue[3])) {
if (content.length() > 2)
content.resize(content.length() - 2);
trim(content);
h.add(name, content);
name = c;
content.clear();
break;
}
}
content += c;
}
}
if (name != "") {
if (content.length() > 2)
content.resize(content.length() - 2);
h.add(name, content);
}
headerlength = mimeSource->getOffset() - headerstartoffsetcrlf;
return 1;
}

View file

@ -0,0 +1,107 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime-printbody.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "mime-utils.h"
#include "mime-inputsource.h"
#include "convert.h"
#include "iodevice.h"
#include "iofactory.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
void Binc::MimePart::printBody(int fd, IODevice &output,
unsigned int startoffset,
unsigned int length) const
{
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
delete mimeSource;
mimeSource = new MimeInputSource(fd);
}
mimeSource->reset();
mimeSource->seek(bodystartoffsetcrlf + startoffset);
if (startoffset + length > bodylength)
length = bodylength - startoffset;
char c = '\0';
for (unsigned int i = 0; i < length; ++i) {
if (!mimeSource->getChar(&c))
break;
output << (char)c;
}
}
void Binc::MimePart::getBody(int fd, string &s,
unsigned int startoffset,
unsigned int length) const
{
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
delete mimeSource;
mimeSource = new MimeInputSource(fd);
}
getBody(s, startoffset, length);
}
void Binc::MimePart::getBody(string &s,
unsigned int startoffset,
unsigned int length) const
{
mimeSource->reset();
mimeSource->seek(bodystartoffsetcrlf + startoffset);
s.reserve(length);
if (startoffset + length > bodylength)
length = bodylength - startoffset;
char c = '\0';
for (unsigned int i = 0; i < length; ++i) {
if (!mimeSource->getChar(&c))
break;
s += (char)c;
}
}

View file

@ -0,0 +1,72 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime-printdoc.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "mime-utils.h"
#include "mime-inputsource.h"
#include "convert.h"
#include "iodevice.h"
#include "iofactory.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
void Binc::MimePart::printDoc(int fd, IODevice &output,
unsigned int startoffset,
unsigned int length) const
{
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
delete mimeSource;
mimeSource = new MimeInputSource(fd);
}
mimeSource->reset();
mimeSource->seek(headerstartoffsetcrlf);
char c;
for (unsigned int i = 0; i < length; ++i) {
if (!mimeSource->getChar(&c))
break;
output << (char)c;
}
}

View file

@ -0,0 +1,200 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime-printheader.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "mime-utils.h"
#include "mime-inputsource.h"
#include "convert.h"
#include "iodevice.h"
#include "iofactory.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
void Binc::MimePart::printHeader(int fd, IODevice &output,
vector<string> headers, bool includeheaders,
unsigned int startoffset,
unsigned int length, string &store) const
{
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
delete mimeSource;
mimeSource = new MimeInputSource(fd);
}
mimeSource->seek(headerstartoffsetcrlf);
string name;
string content;
char cqueue[2];
memset(cqueue, 0, sizeof(cqueue));
bool quit = false;
char c = '\0';
unsigned int wrotebytes = 0;
unsigned int processedbytes = 0;
bool hasHeaderSeparator = false;
while (!quit) {
// read name
while (1) {
// allow EOF to end the header
if (!mimeSource->getChar(&c)) {
quit = true;
break;
}
// assume this character is part of the header name.
name += c;
// break on the first colon
if (c == ':')
break;
// break if a '\n' turned up.
if (c == '\n') {
// end of headers detected
if (name == "\r\n") {
hasHeaderSeparator = true;
quit = true;
break;
}
// put all data back in the buffer to the beginning of this
// line.
for (int i = name.length(); i >= 0; --i)
mimeSource->ungetChar();
// abort printing of header. note that in this case, the
// headers will not end with a seperate \r\n.
quit = true;
name.clear();
break;
}
}
if (quit) break;
// at this point, we have a name, that is - the start of a
// header. we'll read until the end of the header.
while (!quit) {
// allow EOF to end the header.
if (!mimeSource->getChar(&c)) {
quit = true;
break;
}
if (c == '\n') ++nlines;
// make a fifo queue of the last 4 characters.
cqueue[0] = cqueue[1];
cqueue[1] = c;
// print header
if (cqueue[0] == '\n' && cqueue[1] != '\t' && cqueue[1] != ' ') {
// it wasn't a space, so put it back as it is most likely
// the start of a header name. in any case it terminates the
// content part of this header.
mimeSource->ungetChar();
string lowername = name;
lowercase(lowername);
trim(lowername, ": \t");
bool foundMatch = false;
for (vector<string>::const_iterator i = headers.begin();
i != headers.end(); ++i) {
string nametmp = *i;
lowercase(nametmp);
if (nametmp == lowername) {
foundMatch = true;
break;
}
}
if (foundMatch == includeheaders || headers.size() == 0) {
string out = name + content;
for (string::const_iterator i = out.begin(); i != out.end(); ++i)
if (processedbytes >= startoffset && wrotebytes < length) {
if (processedbytes >= startoffset) {
store += *i;
++wrotebytes;
}
} else
++processedbytes;
}
// move on to the next header
content.clear();
name.clear();
break;
}
content += c;
}
}
if (name != "") {
string lowername = name;
lowercase(lowername);
trim(lowername, ": \t");
bool foundMatch = false;
for (vector<string>::const_iterator i = headers.begin();
i != headers.end(); ++i) {
string nametmp = *i;
lowercase(nametmp);
if (nametmp == lowername) {
foundMatch = true;
break;
}
}
if (hasHeaderSeparator || foundMatch == includeheaders || headers.size() == 0) {
string out = name + content;
for (string::const_iterator i = out.begin(); i != out.end(); ++i)
if (processedbytes >= startoffset && wrotebytes < length) {
store += *i;
++wrotebytes;
} else
++processedbytes;
}
}
}

View file

@ -0,0 +1,55 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifndef mime_utils_h_included
#define mime_utils_h_included
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
inline bool compareStringToQueue(const char *s_in, char *bqueue,
int pos, int size)
{
for (int i = 0; i < size; ++i) {
if (s_in[i] != bqueue[pos])
return false;
if (++pos == size)
pos = 0;
}
return true;
}
#endif

159
src/bincimapmime/mime.cc Normal file
View file

@ -0,0 +1,159 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* mime.cc
*
* Description:
* Implementation of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "mime.h"
#include "convert.h"
#include <string>
#include <vector>
#include <map>
#include <exception>
#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */
//------------------------------------------------------------------------
Binc::MimeDocument::MimeDocument(void) : MimePart()
{
allIsParsed = false;
headerIsParsed = false;
}
//------------------------------------------------------------------------
Binc::MimeDocument::~MimeDocument(void)
{
}
//------------------------------------------------------------------------
void Binc::MimeDocument::clear(void) const
{
members.clear();
h.clear();
headerIsParsed = false;
allIsParsed = false;
}
//------------------------------------------------------------------------
void Binc::MimePart::clear(void) const
{
members.clear();
h.clear();
}
//------------------------------------------------------------------------
Binc::MimePart::MimePart(void)
{
size = 0;
messagerfc822 = false;
multipart = false;
nlines = 0;
nbodylines = 0;
}
//------------------------------------------------------------------------
Binc::MimePart::~MimePart(void)
{
}
//------------------------------------------------------------------------
Binc::HeaderItem::HeaderItem(void)
{
}
//------------------------------------------------------------------------
Binc::HeaderItem::HeaderItem(const string &key, const string &value)
{
this->key = key;
this->value = value;
}
//------------------------------------------------------------------------
Binc::Header::Header(void)
{
}
//------------------------------------------------------------------------
Binc::Header::~Header(void)
{
}
//------------------------------------------------------------------------
bool Binc::Header::getFirstHeader(const string &key, HeaderItem &dest) const
{
string k = key;
lowercase(k);
for (vector<HeaderItem>::const_iterator i = content.begin();
i != content.end(); ++i) {
string tmp = (*i).getKey();
lowercase(tmp);
if (tmp == k) {
dest = *i;
return true;
}
}
return false;
}
//------------------------------------------------------------------------
bool Binc::Header::getAllHeaders(const string &key, vector<HeaderItem> &dest) const
{
string k = key;
lowercase(k);
for (vector<HeaderItem>::const_iterator i = content.begin();
i != content.end(); ++i) {
string tmp = (*i).getKey();
lowercase(tmp);
if (tmp == k)
dest.push_back(*i);
}
return (dest.size() != 0);
}
//------------------------------------------------------------------------
void Binc::Header::clear(void) const
{
content.clear();
}
//------------------------------------------------------------------------
void Binc::Header::add(const string &key, const string &value)
{
content.push_back(HeaderItem(key, value));
}

147
src/bincimapmime/mime.h Normal file
View file

@ -0,0 +1,147 @@
/* -*- mode:c++;c-basic-offset:2 -*- */
/* --------------------------------------------------------------------
* Filename:
* src/parsers/mime/mime.h
*
* Description:
* Declaration of main mime parser components
* --------------------------------------------------------------------
* Copyright 2002-2005 Andreas Aardal Hanssen
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
* --------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifndef mime_h_included
#define mime_h_included
#include <string>
#include <vector>
#include <map>
#include <stdio.h>
namespace Binc {
//----------------------------------------------------------------------
class HeaderItem {
private:
mutable std::string key;
mutable std::string value;
public:
inline const std::string &getKey(void) const { return key; }
inline const std::string &getValue(void) const { return value; }
//--
HeaderItem(void);
HeaderItem(const std::string &key, const std::string &value);
};
//----------------------------------------------------------------------
class Header {
private:
mutable std::vector<HeaderItem> content;
public:
bool getFirstHeader(const std::string &key, HeaderItem &dest) const;
bool getAllHeaders(const std::string &key, std::vector<HeaderItem> &dest) const;
void add(const std::string &name, const std::string &content);
void clear(void) const;
//--
Header(void);
~Header(void);
};
//----------------------------------------------------------------------
class IODevice;
class MimeDocument;
class MimePart {
protected:
public:
mutable bool multipart;
mutable bool messagerfc822;
mutable std::string subtype;
mutable std::string boundary;
mutable unsigned int headerstartoffsetcrlf;
mutable unsigned int headerlength;
mutable unsigned int bodystartoffsetcrlf;
mutable unsigned int bodylength;
mutable unsigned int nlines;
mutable unsigned int nbodylines;
mutable unsigned int size;
public:
enum FetchType {
FetchBody,
FetchHeader,
FetchMime
};
mutable Header h;
mutable std::vector<MimePart> members;
inline const std::string &getSubType(void) const { return subtype; }
inline bool isMultipart(void) const { return multipart; }
inline bool isMessageRFC822(void) const { return messagerfc822; }
inline unsigned int getSize(void) const { return bodylength; }
inline unsigned int getNofLines(void) const { return nlines; }
inline unsigned int getNofBodyLines(void) const { return nbodylines; }
inline unsigned int getBodyLength(void) const { return bodylength; }
inline unsigned int getBodyStartOffset(void) const { return bodystartoffsetcrlf; }
void printBody(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
void getBody(int fd, std::string& s, unsigned int startoffset, unsigned int length) const;
void getBody(std::string& s, unsigned int startoffset, unsigned int length) const;
void printHeader(int fd, Binc::IODevice &output, std::vector<std::string> headers, bool includeheaders, unsigned int startoffset, unsigned int length, std::string &storage) const;
void printDoc(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
virtual void clear(void) const;
const MimePart *getPart(const std::string &findpart, std::string genpart, FetchType fetchType = FetchBody) const;
virtual int parseOnlyHeader(const std::string &toboundary) const;
virtual int parseFull(const std::string &toboundary, int &boundarysize) const;
MimePart(void);
virtual ~MimePart(void);
};
//----------------------------------------------------------------------
class MimeDocument : public MimePart {
private:
mutable bool headerIsParsed;
mutable bool allIsParsed;
public:
void parseOnlyHeader(int fd) const;
void parseFull(int fd) const;
void parseOnlyHeader(std::istream& s) const;
void parseFull(std::istream& s) const;
void clear(void) const;
inline bool isHeaderParsed(void) { return headerIsParsed; }
inline bool isAllParsed(void) { return allIsParsed; }
//--
MimeDocument(void);
~MimeDocument(void);
};
};
#endif

126
src/bincimapmime/trbinc.cc Normal file
View file

@ -0,0 +1,126 @@
/* Copyright (C) 2006 J.F.Dockes */
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <sstream>
#ifndef NO_NAMESPACES
using namespace std;
#endif /* NO_NAMESPACES */
#include "mime.h"
static char *thisprog;
static char usage [] =
"trbinc <mboxfile> \n\n"
;
static void
Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_s 0x2
#define OPT_b 0x4
#define DEFCOUNT 10
const char *hnames[] = {"Subject", "Content-type"};
int nh = sizeof(hnames) / sizeof(char *);
int main(int argc, char **argv)
{
int count = DEFCOUNT;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 's': op_flags |= OPT_s; break;
case 'b': op_flags |= OPT_b; if (argc < 2) Usage();
if ((sscanf(*(++argv), "%d", &count)) != 1)
Usage();
argc--;
goto b1;
default: Usage(); break;
}
b1: argc--; argv++;
}
if (argc != 1)
Usage();
char *mfile = *argv++;argc--;
int fd;
if ((fd = open(mfile, 0)) < 0) {
perror("Opening");
exit(1);
}
Binc::MimeDocument doc;
#if 0
doc.parseFull(fd);
#else
char *cp;
int size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, 0);
fprintf(stderr, "Size: %d\n", size);
cp = (char *)malloc(size);
if (cp==0) {
fprintf(stderr, "Malloc %d failed\n", size);
exit(1);
}
int n;
if ((n=read(fd, cp, size)) != size) {
fprintf(stderr, "Read failed: requested %d, got %d\n", size, n);
exit(1);
}
std::stringstream s(string(cp, size), ios::in);
doc.parseFull(s);
#endif
if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
fprintf(stderr, "Parse error\n");
exit(1);
}
close(fd);
Binc::HeaderItem hi;
for (int i = 0; i < nh ; i++) {
if (!doc.h.getFirstHeader(hnames[i], hi)) {
fprintf(stderr, "No %s\n", hnames[i]);
exit(1);
}
printf("%s: %s\n", hnames[i], hi.getValue().c_str());
}
exit(0);
}

38
src/common/Makefile Normal file
View file

@ -0,0 +1,38 @@
# @(#$Id: Makefile,v 1.16 2008-12-17 14:26:49 dockes Exp $ (C) 2005 J.F.Dockes
depth = ..
include $(depth)/mk/sysconf
# Only test executables get build in here
PROGS = unacpp textsplit rclconfig
all: $(BIGLIB) $(PROGS)
$(BIGLIB): force
cd $(depth)/lib;$(MAKE)
force:
UNACPP_OBJS= trunacpp.o $(BIGLIB)
unacpp : $(UNACPP_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o unacpp $(UNACPP_OBJS) \
$(LIBICONV)
trunacpp.o : unacpp.cpp unacpp.h
$(CXX) $(ALL_CXXFLAGS) -DTEST_UNACPP -c -o trunacpp.o unacpp.cpp
TEXTSPLIT_OBJS= trtextsplit.o $(BIGLIB)
textsplit : $(TEXTSPLIT_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o textsplit $(TEXTSPLIT_OBJS) $(LIBICONV)
trtextsplit.o : textsplit.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_TEXTSPLIT -c -o trtextsplit.o \
textsplit.cpp
RCLCONFIG_OBJS= trrclconfig.o $(BIGLIB)
rclconfig : $(RCLCONFIG_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o rclconfig $(RCLCONFIG_OBJS) \
$(LIBICONV) $(LIBSYS)
trrclconfig.o : rclconfig.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_RCLCONFIG -c -o trrclconfig.o \
rclconfig.cpp
clean:
rm -f *.o $(PROGS)

116
src/common/autoconfig.h.in Normal file
View file

@ -0,0 +1,116 @@
/* common/autoconfig.h.in. Generated from configure.ac by autoheader. */
/* Path to the aspell api include file */
#undef ASPELL_INCLUDE
/* Path to the aspell program */
#undef ASPELL_PROG
/* No X11 session monitoring support */
#undef DISABLE_X11MON
/* Path to the fam api include file */
#undef FAM_INCLUDE
/* Path to the file program */
#undef FILE_PROG
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `mkdtemp' function. */
#undef HAVE_MKDTEMP
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/mount.h> header file. */
#undef HAVE_SYS_MOUNT_H
/* Define to 1 if you have the <sys/statfs.h> header file. */
#undef HAVE_SYS_STATFS_H
/* Define to 1 if you have the <sys/statvfs.h> header file. */
#undef HAVE_SYS_STATVFS_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/vfs.h> header file. */
#undef HAVE_SYS_VFS_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* putenv parameter is const */
#undef PUTENV_ARG_CONST
/* iconv parameter 2 is const char** */
#undef RCL_ICONV_INBUF_CONST
/* Real time monitoring option */
#undef RCL_MONITOR
/* Split camelCase words */
#undef RCL_SPLIT_CAMELCASE
/* Compile the aspell interface */
#undef RCL_USE_ASPELL
/* Compile the fam interface */
#undef RCL_USE_FAM
/* Compile the inotify interface */
#undef RCL_USE_INOTIFY
/* Use file extended attributes */
#undef RCL_USE_XATTR
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Enable using the system's 'file' command to id mime if we fail internally
*/
#undef USE_SYSTEM_FILE_COMMAND
/* Define to 1 if the X Window System is missing or not being used. */
#undef X_DISPLAY_MISSING
#undef _FILE_OFFSET_BITS
#undef _LARGE_FILES
#if _FILE_OFFSET_BITS == 64 || defined(__APPLE__) || defined(__OpenBSD__)
#define OFFTPC "%lld"
#else
#define OFFTPC "%ld"
#endif

View file

@ -0,0 +1,80 @@
/* Copyright (C) 2011 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include "cstr.h"
#include "beaglequeuecache.h"
#include "circache.h"
#include "debuglog.h"
#include "rclconfig.h"
#include "pathut.h"
#include "rcldoc.h"
const string cstr_bgc_mimetype("mimetype");
BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
{
string ccdir;
cnf->getConfParam("webcachedir", ccdir);
if (ccdir.empty())
ccdir = "webcache";
ccdir = path_tildexpand(ccdir);
// If not an absolute path, compute relative to config dir
if (ccdir.at(0) != '/')
ccdir = path_cat(cnf->getConfDir(), ccdir);
int maxmbs = 40;
cnf->getConfParam("webcachemaxmbs", &maxmbs);
m_cache = new CirCache(ccdir);
m_cache->create(off_t(maxmbs)*1000*1024, CirCache::CC_CRUNIQUE);
}
BeagleQueueCache::~BeagleQueueCache()
{
delete m_cache;
}
// Read document from cache. Return the metadata as an Rcl::Doc
// @param htt Beagle Hit Type
bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc,
string& data, string *htt)
{
string dict;
if (!m_cache->get(udi, dict, data))
return false;
ConfSimple cf(dict, 1);
if (htt)
cf.get(Rcl::Doc::keybght, *htt, cstr_null);
// Build a doc from saved metadata
cf.get(cstr_url, dotdoc.url, cstr_null);
cf.get(cstr_bgc_mimetype, dotdoc.mimetype, cstr_null);
cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null);
cf.get(cstr_fbytes, dotdoc.pcbytes, cstr_null);
dotdoc.sig.clear();
list<string> names = cf.getNames(cstr_null);
for (list<string>::const_iterator it = names.begin();
it != names.end(); it++) {
cf.get(*it, dotdoc.meta[*it], cstr_null);
}
dotdoc.meta[Rcl::Doc::keyudi] = udi;
return true;
}

View file

@ -0,0 +1,50 @@
/* Copyright (C) 2009 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _beaglequeuecache_h_included_
#define _beaglequeuecache_h_included_
#include <string>
using std::string;
class RclConfig;
namespace Rcl {
class Db;
class Doc;
}
class CirCache;
/**
* Manage the CirCache for the Beagle Queue indexer. Separated from the main
* indexer code because it's also used for querying (getting the data for a
* preview
*/
class BeagleQueueCache {
public:
BeagleQueueCache(RclConfig *config);
~BeagleQueueCache();
bool getFromCache(const string& udi, Rcl::Doc &doc, string& data,
string *hittype = 0);
// We could write proxies for all the circache ops, but why bother?
CirCache *cc() {return m_cache;}
private:
CirCache *m_cache;
};
extern const string cstr_bgc_mimetype;
#endif /* _beaglequeuecache_h_included_ */

6
src/common/cstr.cpp Normal file
View file

@ -0,0 +1,6 @@
#include "cstr.h"
#define RCLIN_CSTR_CPPFILE
#undef _CSTR_H_INCLUDED_
#include "cstr.h"

78
src/common/cstr.h Normal file
View file

@ -0,0 +1,78 @@
/* Copyright (C) 2011 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _CSTR_H_INCLUDED_
#define _CSTR_H_INCLUDED_
// recoll mostly uses STL strings. In many places we had automatic
// conversion from a C string to an STL one. This costs, and can
// become significant if used often.
//
// This file and the associated .cpp file declares/defines constant
// strings used in the program. Strings are candidates for a move here
// when they are used in a fast loop or are shared.
#include <string>
using std::string;
// The following slightly hacky preprocessing directives and the
// companion code in the cpp file looks complicated, but it just
// ensures that we only have to write the strings once to get the
// extern declaration and the definition.
#ifdef RCLIN_CSTR_CPPFILE
#undef DEF_CSTR
#define DEF_CSTR(NM, STR) const string cstr_##NM(STR)
#else
#define DEF_CSTR(NM, STR) extern const string cstr_##NM
#endif
DEF_CSTR(caption, "caption");
DEF_CSTR(dmtime, "dmtime");
DEF_CSTR(dquote, "\"");
DEF_CSTR(fbytes, "fbytes");
DEF_CSTR(fileu, "file://");
DEF_CSTR(fmtime, "fmtime");
DEF_CSTR(iso_8859_1, "ISO-8859-1");
DEF_CSTR(minwilds, "*?[");
DEF_CSTR(newline, "\n");
DEF_CSTR(null, "");
DEF_CSTR(plus, "+");
DEF_CSTR(textplain, "text/plain");
DEF_CSTR(url, "url");
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
// used to store all data generated by format-translating filters. It is
// different from Rcl::Doc for mostly historical reasons. The translation
// from Filter to Doc occurs inside internfile.cpp
DEF_CSTR(dj_keyds, "description");
DEF_CSTR(dj_keyfn, "filename");
DEF_CSTR(dj_keymd, "modificationdate");
DEF_CSTR(dj_keyorigcharset, "origcharset");
DEF_CSTR(dj_keytitle, "title");
DEF_CSTR(dj_keyrecipient, "recipient");
DEF_CSTR(dj_keymsgid, "msgid");
DEF_CSTR(dj_keyabstract, "abstract");
DEF_CSTR(dj_keyauthor, "author");
DEF_CSTR(dj_keycharset, "charset");
DEF_CSTR(dj_keycontent, "content");
DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keymd5, "md5");
DEF_CSTR(dj_keymt, "mimetype");
DEF_CSTR(dj_keydocsize, "docsize");
#endif /* _CSTR_H_INCLUDED_ */

1300
src/common/rclconfig.cpp Normal file

File diff suppressed because it is too large Load diff

307
src/common/rclconfig.h Normal file
View file

@ -0,0 +1,307 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_
#include <list>
#include <string>
#include <vector>
#include <set>
#include <utility>
#include <map>
#include <set>
#ifndef NO_NAMESPACES
using std::list;
using std::string;
using std::vector;
using std::pair;
using std::set;
using std::map;
using std::set;
#endif
#include "conftree.h"
#include "smallut.h"
class RclConfig;
// A small class used for parameters that need to be computed from the
// config string, and which can change with the keydir. Minimize work
// by using the keydirgen and a saved string to avoid unneeded
// recomputations
class ParamStale {
public:
RclConfig *parent;
ConfNull *conffile;
string paramname;
int savedkeydirgen;
string savedvalue;
void init(RclConfig *rconf, ConfNull *cnf, const string& nm);
bool needrecompute();
};
// Data associated to a indexed field name:
struct FieldTraits {
string pfx; // indexing prefix,
int wdfinc; // Index time term frequency increment (default 1)
double boost; // Query time boost (default 1.0)
FieldTraits(int i, double f) {wdfinc = i; boost = f;}
FieldTraits() : wdfinc(1), boost(1.0) {}
FieldTraits(const string& s) : pfx(s), wdfinc(1), boost(1.0) {}
};
class RclConfig {
public:
// Constructor: we normally look for a configuration file, except
// if this was specified on the command line and passed through
// argcnf
RclConfig(const string *argcnf = 0);
// Return a writable clone of the main config. This belongs to the
// caller (must delete it when done)
ConfNull *cloneMainConfig();
/** (re)Read recoll.conf */
bool updateMainConfig();
bool ok() {return m_ok;}
const string &getReason() {return m_reason;}
/** Return the directory where this configuration is stored.
* This was possibly silently created by the rclconfig
* constructor it it is the default one (~/.recoll) and it did
* not exist yet. */
string getConfDir() {return m_confdir;}
/** Check if the config files were modified since we read them */
bool sourceChanged();
/** Returns true if this is ~/.recoll */
bool isDefaultConfig();
/** Get the local value for /usr/local/share/recoll/ */
const string& getDatadir() {return m_datadir;}
/** Set current directory reference, and fetch automatic parameters. */
void setKeyDir(const string &dir);
string getKeyDir() const {return m_keydir;}
/** Get generic configuration parameter according to current keydir */
bool getConfParam(const string &name, string &value)
{
if (m_conf == 0)
return false;
return m_conf->get(name, value, m_keydir);
}
/** Variant with autoconversion to int */
bool getConfParam(const string &name, int *value);
/** Variant with autoconversion to bool */
bool getConfParam(const string &name, bool *value);
/** Variant with conversion to string list/vector
* (stringToStrings). Can fail if the string is malformed. */
bool getConfParam(const string &name, vector<string> *value);
bool getConfParam(const string &name, list<string> *value);
/**
* Get list of config names under current sk, with possible
* wildcard filtering
*/
list<string> getConfNames(const char *pattern = 0) {
return m_conf->getNames(m_keydir, pattern);
}
/** Check if name exists anywhere in config */
bool hasNameAnywhere(const string& nm)
{
return m_conf? m_conf->hasNameAnywhere(nm) : false;
}
/** Get default charset for current keydir (was set during setKeydir)
* filenames are handled differently */
const string &getDefCharset(bool filename = false);
/** Get list of top directories. This is needed from a number of places
* and needs some cleaning-up code. An empty list is always an error, no
* need for other status */
list<string> getTopdirs();
/** Get database directory */
string getDbDir();
/** Get stoplist file name */
string getStopfile();
/** Get indexing pid file name */
string getPidfile();
/** Get indexing status file name */
string getIdxStatusFile();
/** Get list of skipped file names for current keydir */
list<string>& getSkippedNames();
/** Get list of skipped paths patterns. Doesn't depend on the keydir */
list<string> getSkippedPaths();
/** Get list of skipped paths patterns, daemon version (may add some)
Doesn't depend on the keydir */
list<string> getDaemSkippedPaths();
/** conf: Add local fields to target dic */
bool addLocalFields(map<string, string> *tgt);
/**
* mimemap: Check if file name should be ignored because of suffix
*
* The list of ignored suffixes is initialized on first call, and
* not changed for subsequent setKeydirs.
*/
bool inStopSuffixes(const string& fn);
/**
* Check in mimeconf if input mime type is a compressed one, and
* return command to uncompress if it is.
*
* The returned command has substitutable places for input file name
* and temp dir name, and will return output name
*/
bool getUncompressor(const string &mtpe, list<string>& cmd);
/** mimemap: compute mimetype */
string getMimeTypeFromSuffix(const string &suffix);
/** mimemap: get a list of all indexable mime types defined */
list<string> getAllMimeTypes();
/** mimemap: Get appropriate suffix for mime type. This is inefficient */
string getSuffixFromMimeType(const string &mt);
/** mimeconf: get input filter for mimetype */
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false);
/** For lines like: "name = some value; attr1 = value1; attr2 = val2"
* Separate the value and store the attributes in a ConfSimple
* @param whole the raw value. No way to escape a semi-colon in there.
*/
bool valueSplitAttributes(const string& whole, string& value,
ConfSimple& attrs);
/** mimeconf: get icon name for mimetype */
string getMimeIconName(const string &mtype, string *path = 0);
/** mimeconf: get list of file categories */
bool getMimeCategories(list<string>&);
/** mimeconf: is parameter one of the categories ? */
bool isMimeCategory(string&);
/** mimeconf: get list of mime types for category */
bool getMimeCatTypes(const string& cat, list<string>&);
/** mimeconf: get list of gui filters (doc cats by default */
bool getGuiFilterNames(list<string>&);
/** mimeconf: get query lang frag for named filter */
bool getGuiFilter(const string& filtername, string& frag);
/** fields: get field prefix from field name */
bool getFieldTraits(const string& fldname, const FieldTraits **);
const set<string>& getStoredFields() {return m_storedFields;}
set<string> getIndexedFields();
/** Get canonic name for possible alias */
string fieldCanon(const string& fld);
/** Get xattr name to field names translations */
const map<string, string>& getXattrToField() {return m_xattrtofld;}
/** Get value of a parameter inside the "fields" file. Only some filters
use this (ie: mh_mail). The information specific to a given filter
is typically stored in a separate section(ie: [mail]) */
list<string> getFieldSectNames(const string &sk, const char* = 0);
bool getFieldConfParam(const string &name, const string &sk, string &value);
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
string getMimeViewerDef(const string &mimetype, const string& apptag);
bool getMimeViewerDefs(vector<pair<string, string> >&);
bool setMimeViewerDef(const string& mimetype, const string& cmd);
/** Check if mime type is designated as needing no uncompress before view
* (if a file of this type is found compressed). Default is true,
* exceptions are found in the nouncompforviewmts mimeview list */
bool mimeViewerNeedsUncomp(const string &mimetype);
/** Store/retrieve missing helpers description string */
string getMissingHelperDesc();
void storeMissingHelperDesc(const string &s);
/** Find exec file for external filter. cmd is the command name from the
* command string returned by getMimeHandlerDef */
string findFilter(const string& cmd);
~RclConfig() {
freeAll();
}
RclConfig(const RclConfig &r) {
initFrom(r);
}
RclConfig& operator=(const RclConfig &r) {
if (this != &r) {
freeAll();
initFrom(r);
}
return *this;
}
friend class ParamStale;
private:
int m_ok;
string m_reason; // Explanation for bad state
string m_confdir; // User directory where the customized files are stored
string m_datadir; // Example: /usr/local/share/recoll
string m_keydir; // Current directory used for parameter fetches.
int m_keydirgen; // To help with knowing when to update computed data.
list<string> m_cdirs; // directory stack for the confstacks
ConfStack<ConfTree> *m_conf; // Parsed configuration files
ConfStack<ConfTree> *mimemap; // The files don't change with keydir,
ConfStack<ConfSimple> *mimeconf; // but their content may depend on it.
ConfStack<ConfSimple> *mimeview; //
ConfStack<ConfSimple> *m_fields;
map<string, FieldTraits> m_fldtotraits; // Field to field params
map<string, string> m_aliastocanon;
set<string> m_storedFields;
map<string, string> m_xattrtofld;
void *m_stopsuffixes;
unsigned int m_maxsufflen;
ParamStale m_stpsuffstate;
ParamStale m_skpnstate;
list<string> m_skpnlist;
// Parameters auto-fetched on setkeydir
string defcharset;
// Limiting set of mime types to be processed. Normally empty.
ParamStale m_rmtstate;
set<string> m_restrictMTypes;
/** Create initial user configuration */
bool initUserConfig();
/** Copy from other */
void initFrom(const RclConfig& r);
/** Init pointers to 0 */
void zeroMe();
/** Free data then zero pointers */
void freeAll();
bool readFieldsConfig(const string& errloc);
};
#endif /* _RCLCONFIG_H_INCLUDED_ */

138
src/common/rclinit.cpp Normal file
View file

@ -0,0 +1,138 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <stdio.h>
#include <signal.h>
#include <locale.h>
#include <pthread.h>
#include <cstdlib>
#if !defined(PUTENV_ARG_CONST)
#include <string.h>
#endif
#include "debuglog.h"
#include "rclconfig.h"
#include "rclinit.h"
#include "pathut.h"
#include "unac.h"
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
SIGUSR1, SIGUSR2};
RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf)
{
if (cleanup)
atexit(cleanup);
// We ignore SIGPIPE always. All pieces of code which can write to a pipe
// must check write() return values.
signal(SIGPIPE, SIG_IGN);
// We would like to block SIGCHLD globally, but we can't because
// QT uses it. Have to block it inside execmd.cpp
// Install signal handler
if (sigcleanup) {
struct sigaction action;
action.sa_handler = sigcleanup;
action.sa_flags = 0;
sigemptyset(&action.sa_mask);
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++)
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
if (sigaction(catchedSigs[i], &action, 0) < 0) {
perror("Sigaction failed");
}
}
}
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
RclConfig *config = new RclConfig(argcnf);
if (!config || !config->ok()) {
reason = "Configuration could not be built:\n";
if (config)
reason += config->getReason();
else
reason += "Out of memory ?";
return 0;
}
// Retrieve the log file name and level
string logfilename, loglevel;
if (flags & RCLINIT_DAEMON) {
config->getConfParam(string("daemlogfilename"), logfilename);
config->getConfParam(string("daemloglevel"), loglevel);
}
if (logfilename.empty())
config->getConfParam(string("logfilename"), logfilename);
if (loglevel.empty())
config->getConfParam(string("loglevel"), loglevel);
// Initialize logging
if (!logfilename.empty()) {
logfilename = path_tildexpand(logfilename);
// If not an absolute path or , compute relative to config dir
if (logfilename.at(0) != '/' &&
!DebugLog::DebugLog::isspecialname(logfilename.c_str())) {
logfilename = path_cat(config->getConfDir(), logfilename);
}
DebugLog::setfilename(logfilename.c_str());
}
if (!loglevel.empty()) {
int lev = atoi(loglevel.c_str());
DebugLog::getdbl()->setloglevel(lev);
}
// Make sure the locale is set. This is only for converting file names
// to utf8 for indexing.
setlocale(LC_CTYPE, "");
// Make sure the locale charset is initialized (so that multiple
// threads don't try to do it at once).
config->getDefCharset();
// Init unac locking
unac_init_mt();
int flushmb;
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
LOGDEB1(("rclinit: idxflushmb=%d, set XAPIAN_FLUSH_THRESHOLD to 10E6\n",
flushmb));
static const char *cp = "XAPIAN_FLUSH_THRESHOLD=1000000";
#ifdef PUTENV_ARG_CONST
::putenv(cp);
#else
::putenv(strdup(cp));
#endif
}
return config;
}
// Signals are handled by the main thread. All others should call this routine
// to block possible signals
void recoll_threadinit()
{
sigset_t sset;
sigemptyset(&sset);
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++)
sigaddset(&sset, catchedSigs[i]);
pthread_sigmask(SIG_BLOCK, &sset, 0);
}

57
src/common/rclinit.h Normal file
View file

@ -0,0 +1,57 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _RCLINIT_H_INCLUDED_
#define _RCLINIT_H_INCLUDED_
#include <string>
#ifndef NO_NAMESPACES
using std::string;
#endif
class RclConfig;
/**
* Initialize by reading configuration, opening log file, etc.
*
* This must be called from the main thread before starting any others. It sets
* up the global signal handling. other threads must call recoll_threadinit()
* when starting.
*
* @param flags misc modifiers
* @param cleanup function to call before exiting (atexit)
* @param sigcleanup function to call on terminal signal (INT/HUP...) This
* should typically set a flag which tells the program (recoll,
* recollindex etc.. to exit as soon as possible (after closing the db,
* etc.). cleanup will then be called by exit().
* @param reason in case of error: output string explaining things
* @param argcnf Configuration directory name from the command line (overriding
* default and environment
* @return the parsed configuration.
*/
enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1};
extern RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0);
inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0) {
return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf);
}
// Threads need to call this to block signals.
// The main thread handles all signals.
extern void recoll_threadinit();
#endif /* _RCLINIT_H_INCLUDED_ */

View file

@ -0,0 +1 @@
static const char *rclversionstr = "@RCLVERSION@";

987
src/common/textsplit.cpp Normal file
View file

@ -0,0 +1,987 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_TEXTSPLIT
#include "autoconfig.h"
#include <assert.h>
#include <iostream>
#include <string>
#include <set>
#include <cstring>
#include "textsplit.h"
#include "debuglog.h"
//#define UTF8ITER_CHECK
#include "utf8iter.h"
#include "uproplist.h"
#ifndef NO_NAMESPACES
using namespace std;
#endif /* NO_NAMESPACES */
/**
* Splitting a text into words. The code in this file works with utf-8
* in a semi-clean way (see uproplist.h). Ascii still gets special treatment.
*/
// Character classes: we have three main groups, and then some chars
// are their own class because they want special handling.
//
// We have an array with 256 slots where we keep the character types.
// The array could be fully static, but we use a small function to fill it
// once.
// The array is actually a remnant of the original version which did no utf8.
// Only the lower 127 slots are now used, but keep it at 256
// because it makes some tests in the code simpler.
const unsigned int charclasses_size = 256;
enum CharClass {LETTER=256, SPACE=257, DIGIT=258, WILD=259,
A_ULETTER=260, A_LLETTER=261};
static int charclasses[charclasses_size];
// Real UTF-8 characters are handled with sets holding all characters
// with interesting properties. This is far from full-blown management
// of Unicode properties, but seems to do the job well enough in most
// common cases
static set<unsigned int> unicign;
static set<unsigned int> visiblewhite;
class CharClassInit {
public:
CharClassInit()
{
unsigned int i;
// Set default value for all: SPACE
for (i = 0 ; i < 256 ; i ++)
charclasses[i] = SPACE;
char digits[] = "0123456789";
for (i = 0; i < strlen(digits); i++)
charclasses[int(digits[i])] = DIGIT;
char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (i = 0; i < strlen(upper); i++)
charclasses[int(upper[i])] = A_ULETTER;
char lower[] = "abcdefghijklmnopqrstuvwxyz";
for (i = 0; i < strlen(lower); i++)
charclasses[int(lower[i])] = A_LLETTER;
char wild[] = "*?[]";
for (i = 0; i < strlen(wild); i++)
charclasses[int(wild[i])] = WILD;
char special[] = ".@+-,#'_\n\r";
for (i = 0; i < strlen(special); i++)
charclasses[int(special[i])] = special[i];
for (i = 0; i < sizeof(uniign) / sizeof(int); i++) {
unicign.insert(uniign[i]);
}
unicign.insert((unsigned int)-1);
for (i = 0; i < sizeof(avsbwht) / sizeof(int); i++) {
visiblewhite.insert(avsbwht[i]);
}
}
};
static const CharClassInit charClassInitInstance;
static inline int whatcc(unsigned int c)
{
if (c <= 127) {
return charclasses[c];
} else {
if (unicign.find(c) != unicign.end())
return SPACE;
else
return LETTER;
}
}
// CJK Unicode character detection:
//
// 2E80..2EFF; CJK Radicals Supplement
// 3000..303F; CJK Symbols and Punctuation
// 3040..309F; Hiragana
// 30A0..30FF; Katakana
// 3100..312F; Bopomofo
// 3130..318F; Hangul Compatibility Jamo
// 3190..319F; Kanbun
// 31A0..31BF; Bopomofo Extended
// 31C0..31EF; CJK Strokes
// 31F0..31FF; Katakana Phonetic Extensions
// 3200..32FF; Enclosed CJK Letters and Months
// 3300..33FF; CJK Compatibility
// 3400..4DBF; CJK Unified Ideographs Extension A
// 4DC0..4DFF; Yijing Hexagram Symbols
// 4E00..9FFF; CJK Unified Ideographs
// A700..A71F; Modifier Tone Letters
// AC00..D7AF; Hangul Syllables
// F900..FAFF; CJK Compatibility Ideographs
// FE30..FE4F; CJK Compatibility Forms
// FF00..FFEF; Halfwidth and Fullwidth Forms
// 20000..2A6DF; CJK Unified Ideographs Extension B
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
// Note: the p > 127 test is not necessary, but optimizes away the ascii case
#define UNICODE_IS_CJK(p) \
((p) > 127 && \
(((p) >= 0x2E80 && (p) <= 0x2EFF) || \
((p) >= 0x3000 && (p) <= 0x9FFF) || \
((p) >= 0xA700 && (p) <= 0xA71F) || \
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
((p) >= 0xF900 && (p) <= 0xFAFF) || \
((p) >= 0xFE30 && (p) <= 0xFE4F) || \
((p) >= 0xFF00 && (p) <= 0xFFEF) || \
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
((p) >= 0x2F800 && (p) <= 0x2FA1F)))
bool TextSplit::isCJK(int c)
{
return UNICODE_IS_CJK(c);
}
bool TextSplit::o_processCJK = true;
unsigned int TextSplit::o_CJKNgramLen = 2;
bool TextSplit::o_noNumbers = false;
// Do some checking (the kind which is simpler to do here than in the
// main loop), then send term to our client.
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
int btstart, int btend)
{
LOGDEB3(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
unsigned int l = w.length();
if (l > 0 && l < (unsigned)m_maxWordLength) {
// 1 byte word: we index single ascii letters and digits, but
// nothing else. We might want to turn this into a test for a
// single utf8 character instead ?
if (l == 1) {
int c = (int)w[0];
if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER &&
charclasses[c] != DIGIT) {
//cerr << "ERASING single letter term " << c << endl;
return true;
}
}
if (pos != m_prevpos || l != m_prevlen) {
bool ret = takeword(w, pos, btstart, btend);
m_prevpos = pos;
m_prevlen = w.length();
return ret;
}
LOGDEB2(("TextSplit::emitterm:dup: [%s] pos %d\n", w.c_str(), pos));
}
return true;
}
/**
* A routine called from different places in text_to_words(), to
* adjust the current state of the parser, and call the word
* handler/emitter. Emit and reset the current word, possibly emit the current
* span (if different). In query mode, words are not emitted, only final spans
*
* This is purely for factoring common code from different places in
* text_to_words().
*
* @return true if ok, false for error. Splitting should stop in this case.
* @param spanerase Set if the current span is at its end. Reset it.
* @param bp The current BYTE position in the stream
* @param spanemit This is set for intermediate spans: glue char changed.
*/
inline bool TextSplit::doemit(bool spanerase, int bp, bool spanemit)
{
LOGDEB2(("TextSplit::doemit: sper %d bp %d spem %d. spp %d wS %d wL %d "
"inn %d span [%s]\n",
spanerase, bp, spanemit, m_spanpos, m_wordStart, m_wordLen,
m_inNumber, m_span.c_str()));
// Emit span? When splitting for query, we only emit final spans
// (spanerase)
bool spanemitted = false;
if (!(m_flags & TXTS_NOSPANS) &&
!((m_wordLen == m_span.length()) &&
(o_noNumbers) && m_inNumber) &&
((spanemit && !(m_flags & TXTS_ONLYSPANS)) || spanerase) ) {
// Check for an acronym/abbreviation ie I.B.M.
if (spanerase && m_wordLen != m_span.length() && m_span.length() > 2
&& m_span.length() <= 20) {
bool acron = true;
for (unsigned int i = 1 ; i < m_span.length(); i += 2) {
if (m_span[i] != '.') {
acron = false;
break;
}
}
if (acron) {
string acronym;
for (unsigned int i = 0; i < m_span.length(); i += 2) {
acronym += m_span[i];
}
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(),
bp))
return false;
}
}
// Maybe trim at end. These are chars that we would keep inside
// a span, but not at the end
while (m_span.length() > 0) {
switch (m_span[m_span.length()-1]) {
case '.':
case '-':
case ',':
case '@':
case '_':
case '\'':
m_span.resize(m_span.length()-1);
if (--bp < 0)
bp = 0;
break;
default:
goto breakloop1;
}
}
breakloop1:
spanemitted = true;
if (!emitterm(true, m_span, m_spanpos, bp - m_span.length(), bp))
return false;
}
// Emit word if different from span and not 'no words' mode
if (!(m_flags & TXTS_ONLYSPANS) && m_wordLen &&
!(o_noNumbers && m_inNumber) &&
(!spanemitted || m_wordLen != m_span.length())) {
string s(m_span.substr(m_wordStart, m_wordLen));
if (!emitterm(false, s, m_wordpos, bp - m_wordLen, bp))
return false;
}
// Adjust state
if (m_wordLen) {
m_wordpos++;
m_wordLen = 0;
}
if (spanerase) {
discardspan();
} else {
m_wordStart = m_span.length();
}
return true;
}
void TextSplit::discardspan()
{
m_span.erase();
m_spanpos = m_wordpos;
m_wordStart = 0;
m_wordLen = 0;
}
/**
* Splitting a text into terms to be indexed.
* We basically emit a word every time we see a separator, but some chars are
* handled specially so that special cases, ie, c++ and jfd@recoll.com etc,
* are handled properly,
*/
bool TextSplit::text_to_words(const string &in)
{
LOGDEB1(("TextSplit::text_to_words: docjk %d (%d) %s%s%s [%s]\n",
o_processCJK, o_CJKNgramLen,
m_flags & TXTS_NOSPANS ? " nospans" : "",
m_flags & TXTS_ONLYSPANS ? " onlyspans" : "",
m_flags & TXTS_KEEPWILD ? " keepwild" : "",
in.substr(0,50).c_str()));
m_span.erase();
m_inNumber = false;
m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0;
int curspanglue = 0;
// Running count of non-alphanum chars. Reset when we see one;
int nonalnumcnt = 0;
Utf8Iter it(in);
for (; !it.eof(); it++) {
unsigned int c = *it;
nonalnumcnt++;
if (c == (unsigned int)-1) {
LOGERR(("Textsplit: error occured while scanning UTF-8 string\n"));
return false;
}
if (o_processCJK && UNICODE_IS_CJK(c)) {
// CJK character hit.
// Do like at EOF with the current non-cjk data.
if (m_wordLen || m_span.length()) {
if (!doemit(true, it.getBpos()))
return false;
}
// Hand off situation to the cjk routine.
if (!cjk_to_words(&it, &c)) {
LOGERR(("Textsplit: scan error in cjk handler\n"));
return false;
}
// Check for eof, else c contains the first non-cjk
// character after the cjk sequence, just go on.
if (it.eof())
break;
}
int cc = whatcc(c);
switch (cc) {
case DIGIT:
if (m_wordLen == 0)
m_inNumber = true;
m_wordLen += it.appendchartostring(m_span);
nonalnumcnt = 0;
break;
case SPACE:
SPACE:
curspanglue = 0;
nonalnumcnt = 0;
if (m_wordLen || m_span.length()) {
if (!doemit(true, it.getBpos()))
return false;
m_inNumber = false;
}
break;
case WILD:
if (m_flags & TXTS_KEEPWILD)
goto NORMALCHAR;
else
goto SPACE;
break;
case '-':
case '+':
curspanglue = cc;
if (m_wordLen == 0) {
// + or - don't start a term except if this looks like
// it's going to be to be a number
if (whatcc(it[it.getCpos()+1]) == DIGIT) {
// -10
m_inNumber = true;
m_wordLen += it.appendchartostring(m_span);
} else {
goto SPACE;
}
} else if (m_inNumber && (m_span[m_span.length() - 1] == 'e' ||
m_span[m_span.length() - 1] == 'E')) {
if (whatcc(it[it.getCpos()+1]) == DIGIT) {
m_wordLen += it.appendchartostring(m_span);
} else {
goto SPACE;
}
} else {
if (!doemit(false, it.getBpos()))
return false;
m_inNumber = false;
m_wordStart += it.appendchartostring(m_span);
}
break;
case '.':
case ',':
{
// Need a little lookahead here. At worse this gets the end null
int nextc = it[it.getCpos()+1];
int nextwhat = whatcc(nextc);
if (m_inNumber) {
// we're eliminating 132.jpg here. Good idea ?
if (nextwhat != DIGIT && nextc != 'e' && nextc != 'E')
goto SPACE;
m_wordLen += it.appendchartostring(m_span);
curspanglue = cc;
break;
} else {
// If . inside a word, it's spanglue, else, it's whitespace.
// We also keep an initial '.' for catching .net, but this adds
// quite a few spurious terms !
// Another problem is that something like .x-errs
// will be split as .x-errs, x, errs but not x-errs
// A final comma in a word will be removed by doemit
// Only letters and digits make sense after
if (nextwhat != A_LLETTER && nextwhat != A_ULETTER &&
nextwhat != DIGIT && nextwhat != LETTER)
goto SPACE;
if (cc == '.') {
// Check for number like .1
if (m_span.length() == 0 && nextwhat == DIGIT) {
m_inNumber = true;
m_wordLen += it.appendchartostring(m_span);
curspanglue = cc;
break;
}
if (m_wordLen) {
// Disputable special case: set spanemit to
// true when encountering a '.' while spanglue
// is '_'. Think of a_b.c Done to
// avoid breaking stuff after changing '_'
// from wordchar to spanglue
if (!doemit(false, it.getBpos(), curspanglue == '_'))
return false;
curspanglue = cc;
// span length could have been adjusted by trimming
// inside doemit
if (m_span.length())
m_wordStart += it.appendchartostring(m_span);
break;
} else {
m_wordStart += it.appendchartostring(m_span);
curspanglue = cc;
break;
}
}
}
goto SPACE;
}
break;
case '@':
if (m_wordLen) {
if (!doemit(false, it.getBpos()))
return false;
curspanglue = cc;
m_inNumber = false;
}
m_wordStart += it.appendchartostring(m_span);
break;
case '_':
if (m_wordLen) {
if (!doemit(false, it.getBpos()))
return false;
curspanglue = cc;
m_inNumber = false;
}
m_wordStart += it.appendchartostring(m_span);
break;
case '\'':
// If in word, potential span: o'brien, else, this is more
// whitespace
if (m_wordLen) {
if (!doemit(false, it.getBpos()))
return false;
curspanglue = cc;
m_inNumber = false;
m_wordStart += it.appendchartostring(m_span);
}
break;
case '#':
// Keep it only at end of word ... Special case for c# you see...
if (m_wordLen > 0) {
int w = whatcc(it[it.getCpos()+1]);
if (w == SPACE || w == '\n' || w == '\r') {
m_wordLen += it.appendchartostring(m_span);
break;
}
}
goto SPACE;
break;
case '\n':
case '\r':
if (m_span.length() && m_span[m_span.length() - 1] == '-') {
// if '-' is the last char before end of line, just
// ignore the line change. This is the right thing to
// do almost always. We'd then need a way to check if
// the - was added as part of the word hyphenation, or was
// there in the first place, but this would need a dictionary.
// Also we'd need to check for a soft-hyphen and remove it,
// but this would require more utf-8 magic
} else {
// Handle like a normal separator
goto SPACE;
}
break;
#ifdef RCL_SPLIT_CAMELCASE
// Camelcase handling.
// If we get uppercase ascii after lowercase ascii, emit word.
// This emits "camel" when hitting the 'C' of camelCase
// Not enabled by defaults as this makes phrase searches quite
// confusing.
// ie "MySQL manual" is matched by "MySQL manual" and
// "my sql manual" but not "mysql manual"
case A_ULETTER:
if (m_span.length() &&
charclasses[(unsigned char)m_span[m_span.length() - 1]] ==
A_LLETTER) {
if (m_wordLen) {
if (!doemit(false, it.getBpos()))
return false;
}
}
goto NORMALCHAR;
// CamelCase handling.
// If we get lowercase after uppercase and the current
// word length is bigger than one, it means we had a
// string of several upper-case letters: an
// acronym (readHTML) or a single letter article (ALittleHelp).
// Emit the uppercase word before proceeding
case A_LLETTER:
if (m_span.length() &&
charclasses[(unsigned char)m_span[m_span.length() - 1]] ==
A_ULETTER && m_wordLen > 1) {
// Multiple upper-case letters. Single letter word
// or acronym which we want to emit now
m_wordLen--;
if (!doemit(false, it.getBpos()))
return false;
m_wordStart--;
m_wordLen++;
}
goto NORMALCHAR;
#endif /* CAMELCASE */
default:
NORMALCHAR:
if (m_inNumber && c != 'e' && c != 'E') {
m_inNumber = false;
}
m_wordLen += it.appendchartostring(m_span);
nonalnumcnt = 0;
break;
}
}
if (m_wordLen || m_span.length()) {
if (!doemit(true, it.getBpos()))
return false;
}
return true;
}
// Using an utf8iter pointer just to avoid needing its definition in
// textsplit.h
//
// We output ngrams for exemple for char input a b c and ngramlen== 2,
// we generate: a ab b bc c as words
//
// This is very different from the normal behaviour, so we don't use
// the doemit() and emitterm() routines
//
// The routine is sort of a mess and goes to show that we'd probably
// be better off converting the whole buffer to utf32 on entry...
bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
{
LOGDEB1(("cjk_to_words: m_wordpos %d\n", m_wordpos));
Utf8Iter &it = *itp;
// We use an offset buffer to remember the starts of the utf-8
// characters which we still need to use.
assert(o_CJKNgramLen < o_CJKMaxNgramLen);
unsigned int boffs[o_CJKMaxNgramLen+1];
// Current number of valid offsets;
unsigned int nchars = 0;
unsigned int c = 0;
for (; !it.eof(); it++) {
c = *it;
if (!UNICODE_IS_CJK(c)) {
// Return to normal handler
break;
}
if (whatcc(c) == SPACE) {
// Flush the ngram buffer and go on
nchars = 0;
continue;
}
if (nchars == o_CJKNgramLen) {
// Offset buffer full, shift it. Might be more efficient
// to have a circular one, but things are complicated
// enough already...
for (unsigned int i = 0; i < nchars-1; i++) {
boffs[i] = boffs[i+1];
}
} else {
nchars++;
}
// Take note of byte offset for this character.
boffs[nchars-1] = it.getBpos();
// Output all new ngrams: they begin at each existing position
// and end after the new character. onlyspans->only output
// maximum words, nospans=> single chars
if (!(m_flags & TXTS_ONLYSPANS) || nchars == o_CJKNgramLen) {
unsigned int btend = it.getBpos() + it.getBlen();
unsigned int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
unsigned int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
for (unsigned int i = loopbeg; i < loopend; i++) {
if (!takeword(it.buffer().substr(boffs[i],
btend-boffs[i]),
m_wordpos - (nchars-i-1), boffs[i], btend)) {
return false;
}
}
if ((m_flags & TXTS_ONLYSPANS)) {
// Only spans: don't overlap: flush buffer
nchars = 0;
}
}
// Increase word position by one, other words are at an
// existing position. This could be subject to discussion...
m_wordpos++;
}
// If onlyspans is set, there may be things to flush in the buffer
// first
if ((m_flags & TXTS_ONLYSPANS) && nchars > 0 && nchars != o_CJKNgramLen) {
unsigned int btend = it.getBpos(); // Current char is out
if (!takeword(it.buffer().substr(boffs[0], btend-boffs[0]),
m_wordpos - nchars,
boffs[0], btend)) {
return false;
}
}
m_span.erase();
m_inNumber = false;
m_wordStart = m_wordLen = m_prevpos = m_prevlen = 0;
m_spanpos = m_wordpos;
*cp = c;
return true;
}
// Specialization for countWords
class TextSplitCW : public TextSplit {
public:
int wcnt;
TextSplitCW(Flags flags) : TextSplit(flags), wcnt(0) {}
bool takeword(const string &, int, int, int) {
wcnt++;
return true;
}
};
int TextSplit::countWords(const string& s, TextSplit::Flags flgs)
{
TextSplitCW splitter(flgs);
splitter.text_to_words(s);
return splitter.wcnt;
}
bool TextSplit::hasVisibleWhite(const string &in)
{
Utf8Iter it(in);
for (; !it.eof(); it++) {
unsigned int c = (unsigned char)*it;
LOGDEB3(("TextSplit::hasVisibleWhite: testing 0x%04x\n", c));
if (c == (unsigned int)-1) {
LOGERR(("hasVisibleWhite: error while scanning UTF-8 string\n"));
return false;
}
if (visiblewhite.find(c) != visiblewhite.end())
return true;
}
return false;
}
template <class T> bool u8stringToStrings(const string &s, T &tokens)
{
Utf8Iter it(s);
string current;
tokens.clear();
enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
states state = SPACE;
for (; !it.eof(); it++) {
unsigned int c = *it;
if (visiblewhite.find(c) != visiblewhite.end())
c = ' ';
LOGDEB3(("TextSplit::stringToStrings: 0x%04x\n", c));
if (c == (unsigned int)-1) {
LOGERR(("TextSplit::stringToStrings: error while "
"scanning UTF-8 string\n"));
return false;
}
switch (c) {
case '"':
switch(state) {
case SPACE: state = INQUOTE; continue;
case TOKEN: goto push_char;
case ESCAPE: state = INQUOTE; goto push_char;
case INQUOTE: tokens.push_back(current);current.clear();
state = SPACE; continue;
}
break;
case '\\':
switch(state) {
case SPACE:
case TOKEN: state=TOKEN; goto push_char;
case INQUOTE: state = ESCAPE; continue;
case ESCAPE: state = INQUOTE; goto push_char;
}
break;
case ' ':
case '\t':
case '\n':
case '\r':
switch(state) {
case SPACE: continue;
case TOKEN: tokens.push_back(current); current.clear();
state = SPACE; continue;
case INQUOTE:
case ESCAPE: goto push_char;
}
break;
default:
switch(state) {
case ESCAPE: state = INQUOTE; break;
case SPACE: state = TOKEN; break;
case TOKEN:
case INQUOTE: break;
}
push_char:
it.appendchartostring(current);
}
}
// End of string. Process residue, and possible error (unfinished quote)
switch(state) {
case SPACE: break;
case TOKEN: tokens.push_back(current); break;
case INQUOTE:
case ESCAPE: return false;
}
return true;
}
bool TextSplit::stringToStrings(const string &s, list<string> &tokens)
{
return u8stringToStrings<list<string> >(s, tokens);
}
#else // TEST driver ->
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <iostream>
#include "textsplit.h"
#include "readfile.h"
#include "debuglog.h"
#include "transcode.h"
#include "unacpp.h"
#include "termproc.h"
using namespace std;
class myTermProc : public Rcl::TermProc {
int first;
bool nooutput;
public:
myTermProc() : TermProc(0), first(1), nooutput(false) {}
void setNoOut(bool val) {nooutput = val;}
virtual bool takeword(const string &term, int pos, int bs, int be)
{
if (nooutput)
return true;
FILE *fp = stdout;
if (first) {
fprintf(fp, "%3s %-20s %4s %4s\n", "pos", "Term", "bs", "be");
first = 0;
}
fprintf(fp, "%3d %-20s %4d %4d\n", pos, term.c_str(), bs, be);
return true;
}
};
static string teststring =
"Un bout de texte \nnormal. 2eme phrase.3eme;quatrieme.\n"
"\"Jean-Francois Dockes\" <jfd@okyz.com>\n"
"n@d @net .net t@v@c c# c++ o'brien 'o'brien' l'ami\n"
"data123\n"
"134 +134 -14 0.1 .1 2. -1.5 +1.5 1,2 1.54e10 1,2e30 .1e10 1.e-8\n"
"@^#$(#$(*)\n"
"192.168.4.1 one\n\rtwo\r"
"Debut-\ncontinue\n"
"[olala][ululu] (valeur) (23)\n"
"utf-8 ucs-4© \\nodef\n"
"A b C 2 . +"
"','this\n"
" ,able,test-domain "
" -wl,--export-dynamic "
" ~/.xsession-errors "
;
static string teststring1 = " nouvel-an ";
static string thisprog;
static string usage =
" textsplit [opts] [filename]\n"
" -q : no output\n"
" -s : only spans\n"
" -w : only words\n"
" -n : no numbers\n"
" -k : preserve wildcards (?*)\n"
" -c : just count words\n"
" -u : use unac\n"
" -C [charset] : input charset\n"
" -S [stopfile] : stopfile to use for commongrams\n"
" if filename is 'stdin', will read stdin for data (end with ^D)\n"
" \n\n"
;
static void
Usage(void)
{
cerr << thisprog << ": usage:\n" << usage;
exit(1);
}
static int op_flags;
#define OPT_s 0x1
#define OPT_w 0x2
#define OPT_q 0x4
#define OPT_c 0x8
#define OPT_k 0x10
#define OPT_C 0x20
#define OPT_n 0x40
#define OPT_S 0x80
#define OPT_u 0x100
int main(int argc, char **argv)
{
string charset, stopfile;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'c': op_flags |= OPT_c; break;
case 'C': op_flags |= OPT_C; if (argc < 2) Usage();
charset = *(++argv); argc--;
goto b1;
case 'k': op_flags |= OPT_k; break;
case 'n': op_flags |= OPT_n; break;
case 'q': op_flags |= OPT_q; break;
case 's': op_flags |= OPT_s; break;
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
stopfile = *(++argv); argc--;
goto b1;
case 'u': op_flags |= OPT_u; break;
case 'w': op_flags |= OPT_w; break;
default: Usage(); break;
}
b1: argc--; argv++;
}
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
TextSplit::Flags flags = TextSplit::TXTS_NONE;
if (op_flags&OPT_s)
flags = TextSplit::TXTS_ONLYSPANS;
else if (op_flags&OPT_w)
flags = TextSplit::TXTS_NOSPANS;
if (op_flags & OPT_k)
flags = (TextSplit::Flags)(flags | TextSplit::TXTS_KEEPWILD);
if (op_flags & OPT_n)
TextSplit::noNumbers();
Rcl::StopList stoplist;
if (op_flags & OPT_S) {
if (!stoplist.setFile(stopfile)) {
cerr << "Can't read stopfile: " << stopfile << endl;
exit(1);
}
}
string odata, reason;
if (argc == 1) {
const char *filename = *argv++; argc--;
if (!strcmp(filename, "stdin")) {
char buf[1024];
int nread;
while ((nread = read(0, buf, 1024)) > 0) {
odata.append(buf, nread);
}
} else if (!file_to_string(filename, odata, &reason)) {
cerr << "Failed: file_to_string(" << filename << ") failed: "
<< reason << endl;
exit(1);
}
} else {
cout << endl << teststring << endl << endl;
odata = teststring;
}
string& data = odata;
string ndata;
if ((op_flags & OPT_C)) {
if (!transcode(odata, ndata, charset, "UTF-8")) {
cerr << "Failed: transcode error" << endl;
exit(1);
} else {
data = ndata;
}
}
if (op_flags & OPT_c) {
int n = TextSplit::countWords(data, flags);
cout << n << " words" << endl;
} else {
myTermProc printproc;
Rcl::TermProc *nxt = &printproc;
Rcl::TermProcCommongrams commonproc(nxt, stoplist);
if (op_flags & OPT_S)
nxt = &commonproc;
Rcl::TermProcPrep preproc(nxt);
if (op_flags & OPT_u)
nxt = &preproc;
Rcl::TextSplitP splitter(nxt, flags);
if (op_flags & OPT_q)
printproc.setNoOut(true);
splitter.text_to_words(data);
}
}
#endif // TEST

134
src/common/textsplit.h Normal file
View file

@ -0,0 +1,134 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _TEXTSPLIT_H_INCLUDED_
#define _TEXTSPLIT_H_INCLUDED_
#include <string>
#include <list>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
#endif
class Utf8Iter;
/**
* Split text into words.
* See comments at top of .cpp for more explanations.
* This uses a callback function. It could be done with an iterator instead,
* but 'ts much simpler this way...
*/
class TextSplit {
public:
// Should we activate special processing of Chinese characters ? This
// needs a little more cpu, so it can be turned off globally. This is set
// by rclconfig, changing it means reindexing
static bool o_processCJK;
static unsigned int o_CJKNgramLen;
static const unsigned int o_CJKMaxNgramLen = 5;
static void cjkProcessing(bool onoff, unsigned int ngramlen = 2)
{
o_processCJK = onoff;
o_CJKNgramLen = ngramlen <= o_CJKMaxNgramLen ?
ngramlen : o_CJKMaxNgramLen;
}
// Are we indexing numbers ? Set by rclconfig. Change needs reindex
static bool o_noNumbers;
static void noNumbers()
{
o_noNumbers = true;
}
enum Flags {TXTS_NONE = 0,
TXTS_ONLYSPANS = 1, // Only return maximum spans (a@b.com)
TXTS_NOSPANS = 2, // Only return atomic words (a, b, com)
TXTS_KEEPWILD = 4 // Handle wildcards as letters
};
TextSplit(Flags flags = Flags(TXTS_NONE))
: m_flags(flags), m_maxWordLength(40), m_prevpos(-1)
{
}
virtual ~TextSplit() {}
/** Split text, emit words and positions. */
virtual bool text_to_words(const string &in);
/** Process one output word: to be implemented by the actual user class */
virtual bool takeword(const string& term,
int pos, // term pos
int bts, // byte offset of first char in term
int bte // byte offset of first char after term
) = 0;
// Static utility functions:
/** Count words in string, as the splitter would generate them */
static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
/** Check if this is visibly not a single block of text */
static bool hasVisibleWhite(const string &in);
/** Split text span into strings, at white space, allowing for substrings
* quoted with " . Escaping with \ works as usual inside the quoted areas.
* This has to be kept separate from smallut.cpp's stringsToStrings, which
* basically works only if whitespace is ascii, and which processes
* non-utf-8 input (iso-8859 config files work ok). This hopefully
* handles all Unicode whitespace, but needs correct utf-8 input
*/
static bool stringToStrings(const string &s, list<string> &tokens);
/** Is char CJK ? */
static bool isCJK(int c);
private:
Flags m_flags;
int m_maxWordLength;
// Current span. Might be jf.dockes@wanadoo.f
string m_span;
// Current word: no punctuation at all in there. Byte offset
// relative to the current span and byte length
int m_wordStart;
unsigned int m_wordLen;
// Currently inside number
bool m_inNumber;
// Term position of current word and span
int m_wordpos;
int m_spanpos;
// It may happen that our cleanup would result in emitting the
// same term twice. We try to avoid this
int m_prevpos;
unsigned int m_prevlen;
// This processes cjk text:
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
bool emitterm(bool isspan, string &term, int pos, int bs, int be);
bool doemit(bool spanerase, int bp, bool spanemit=false);
void discardspan();
};
#endif /* _TEXTSPLIT_H_INCLUDED_ */

128
src/common/unacpp.cpp Normal file
View file

@ -0,0 +1,128 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_UNACPP
#include <stdio.h>
#include <cstdlib>
#include <errno.h>
#include <string>
#include "unacpp.h"
#include "unac.h"
#include "debuglog.h"
#include "utf8iter.h"
bool unacmaybefold(const string &in, string &out,
const char *encoding, bool dofold)
{
char *cout = 0;
size_t out_len;
int status;
status = dofold ?
unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len) :
unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
if (status < 0) {
if (cout)
free(cout);
char cerrno[20];
sprintf(cerrno, "%d", errno);
out = string("unac_string failed, errno : ") + cerrno;
return false;
}
out.assign(cout, out_len);
if (cout)
free(cout);
return true;
}
bool unaciscapital(const string& in)
{
if (in.empty())
return false;
Utf8Iter it(in);
string shorter;
it.appendchartostring(shorter);
string noacterm, noaclowterm;
if (!unacmaybefold(shorter, noacterm, "UTF-8", false)) {
LOGINFO(("unaciscapital: unac failed for [%s]\n", in.c_str()));
return false;
}
if (!unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) {
LOGINFO(("unaciscapital: unacfold failed for [%s]\n", in.c_str()));
return false;
}
Utf8Iter it1(noacterm);
Utf8Iter it2(noaclowterm);
if (*it1 != *it2)
return true;
else
return false;
}
#else // not testing
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <iostream>
using namespace std;
#include "unacpp.h"
#include "readfile.h"
int main(int argc, char **argv)
{
bool dofold = true;
if (argc != 4) {
cerr << "Usage: unacpp <encoding> <infile> <outfile>" << endl;
exit(1);
}
const char *encoding = argv[1];
string ifn = argv[2];
const char *ofn = argv[3];
string odata;
if (!file_to_string(ifn, odata)) {
cerr << "file_to_string: " << odata << endl;
exit(1);
}
string ndata;
if (!unacmaybefold(odata, ndata, encoding, dofold)) {
cerr << "unac: " << ndata << endl;
exit(1);
}
int fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
if (fd < 0) {
cerr << "Open/Create " << ofn << " failed: " << strerror(errno)
<< endl;
exit(1);
}
if (write(fd, ndata.c_str(), ndata.length()) != (int)ndata.length()) {
cerr << "Write(2) failed: " << strerror(errno) << endl;
exit(1);
}
close(fd);
exit(0);
}
#endif

33
src/common/unacpp.h Normal file
View file

@ -0,0 +1,33 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _UNACPP_H_INCLUDED_
#define _UNACPP_H_INCLUDED_
#include <string>
#ifndef NO_NAMESPACES
using std::string;
#endif /* NO_NAMESPACES */
// A small stringified wrapper for unac.c
extern bool unacmaybefold(const string& in, string& out,
const char *encoding, bool dofold);
// Utility function to determine if string begins with capital
extern bool unaciscapital(const string& in);
#endif /* _UNACPP_H_INCLUDED_ */

220
src/common/uproplist.h Normal file
View file

@ -0,0 +1,220 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _PROPLIST_H_INCLUDED_
#define _PROPLIST_H_INCLUDED_
/**
* A subset of Unicode chars that we consider word breaks when we
* split text in words.
*
* This is used as a quick fix to the ascii-based code, and is not correct.
* the correct way would be to do what http://www.unicode.org/reports/tr29/
* says.
*
* Data from:
# PropList-4.0.1.txt
# Date: 2004-03-02, 02:42:40 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2004 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
*/
static const unsigned int uniign[] = {
0x0021, /* ; Terminal_Punctuation # Po EXCLAMATION MARK*/
0x002C, /* ; Terminal_Punctuation # Po COMMA*/
0x002D, /* ; Dash # Pd HYPHEN-MINUS*/
0x002E, /* ; Terminal_Punctuation # Po FULL STOP*/
0x003A, /* ; Terminal_Punctuation # Po [2] COLON..SEMICOLON*/
0x003B, /* ; Terminal_Punctuation # Po [2] COLON..SEMICOLON*/
0x003F, /* ; Terminal_Punctuation # Po QUESTION MARK*/
0x0085, /* NEXT LINE NEL;Cc */
0x00A0, /* NO-BREAK SPACE; Zs */
0x00A1, /* INVERTED EXCLAMATION MARK;Po */
0x00A2, /* CENT SIGN;Sc */
0x00A3, /* POUND SIGN;Sc; */
0x00A4, /* CURRENCY SIGN;Sc; */
0x00A5, /* YEN SIGN;Sc; */
0x00A6, /* BROKEN BAR;So */
0x00A7, /* SECTION SIGN;So; */
0x00A9, /* COPYRIGHT SIGN;So */
0x00AB, /* ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK*/
0x00AC, /* NOT SIGN;Sm */
0x00AD, /* ; Hyphen # Cf SOFT HYPHEN*/
0x00AE, /* registered sign */
0x00B0, /* DEGREE SIGN;So;0;ET;;;;;N;;;;; */
0x00B1, /* PLUS-MINUS SIGN;Sm;0;ET;;;;;N;PLUS-OR-MINUS SIGN;;;;*/
0x00B7, /* MIDDLE DOT;Po;0;ON;;;;;N;;;;;*/
0x00BB, /* ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK*/
0x00BF, /* INVERTED QUESTION MARK;Po */
0x00D7, /* MULTIPLICATION SIGN;Sm;0;ON;;;;;N;;;;; */
0x037E, /* ; Terminal_Punctuation # Po GREEK QUESTION MARK*/
0x0387, /* ; Terminal_Punctuation # Po GREEK ANO TELEIA*/
0x055C, /* ; STerm # Po ARMENIAN EXCLAMATION MARK*/
0x055E, /* ; STerm # Po ARMENIAN QUESTION MARK*/
0x0589, /* ; STerm # Po ARMENIAN FULL STOP*/
0x0589, /* ; Terminal_Punctuation # Po ARMENIAN FULL STOP*/
0x058A, /* ; Dash # Pd ARMENIAN HYPHEN*/
0x058A, /* ; Hyphen # Pd ARMENIAN HYPHEN*/
0x05C3, /* ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ*/
0x060C, /* ; Terminal_Punctuation # Po ARABIC COMMA*/
0x061B, /* ; Terminal_Punctuation # Po ARABIC SEMICOLON*/
0x061F, /* ; STerm # Po ARABIC QUESTION MARK*/
0x061F, /* ; Terminal_Punctuation # Po ARABIC QUESTION MARK*/
0x06D4, /* ; STerm # Po ARABIC FULL STOP*/
0x06D4, /* ; Terminal_Punctuation # Po ARABIC FULL STOP*/
0x166E, /* ; STerm # Po CANADIAN SYLLABICS FULL STOP*/
0x1680, /* ; White_Space # Zs OGHAM SPACE MARK*/
0x16EB, /* RUNIC SINGLE PUNCTUATION;Po;0;L;;;;;N;;;;;*/
0x16EC, /* RUNIC MULTIPLE PUNCTUATION;Po;0;L;;;;;N;;;;;*/
0x16ED, /* RUNIC CROSS PUNCTUATION;Po;0;L;;;;;N;;;;; */
0x1803, /* ; STerm # Po MONGOLIAN FULL STOP*/
0x1806, /* ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN*/
0x1809, /* ; STerm # Po MONGOLIAN MANCHU FULL STOP*/
0x180E, /* ; White_Space # Zs MONGOLIAN VOWEL SEPARATOR*/
0x2000, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2001, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2002, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2003, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2004, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2005, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2006, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2007, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2008, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2009, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x200A, /* ; White_Space # Zs [11] EN QUAD..HAIR SPACE*/
0x2010, /* ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN*/
0x2011, /* ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN*/
0x2012, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
0x2013, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
0x2014, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
0x2015, /* ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR*/
0x2018, /* ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK*/
0x2019, /* ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK*/
0x201A, /* ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK*/
0x201B, /* ; Quotation_Mark # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK*/
0x201C, /* ; Quotation_Mark # Pi LEFT DOUBLE QUOTATION MARK*/
0x201D, /* ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK*/
0x201E, /* ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK*/
0x201F, /* ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK*/
0x2022, /* BULLET;Po;0;ON;;;;;N;;;;; */
0x2023, /* TRIANGULAR BULLET;Po;0;ON;;;;;N;;;;;*/
0x2024, /* ONE DOT LEADER;Po;0;ON;<compat> 002E;;;;N;;;;;*/
0x2025, /* TWO DOT LEADER;Po;0;ON;<compat> 002E 002E;;;;N;;;;; */
0x2026, /* HORIZONTAL ELLIPSIS;Po;0;ON;<compat> 002E 002E 002E;;;;N;;;;; */
0x2028, /* ; White_Space # Zl LINE SEPARATOR*/
0x2029, /* ; White_Space # Zp PARAGRAPH SEPARATOR*/
0x202F, /* ; White_Space # Zs NARROW NO-BREAK SPACE*/
0x2032, /* PRIME;Po;0;ET;;;;;N;;;;;*/
0x2039, /* ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK*/
0x203A, /* ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK*/
0x203C, /* ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
0x203D, /* ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG*/
0x2047, /* ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
0x2048, /* ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
0x2049, /* ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK*/
0x2053, /* ; Dash # Po SWUNG DASH*/
0x205F, /* ; White_Space # Zs MEDIUM MATHEMATICAL SPACE*/
0x207B, /* ; Dash # Sm SUPERSCRIPT MINUS*/
0x208B, /* ; Dash # Sm SUBSCRIPT MINUS*/
0x2117, /* SOUND RECORDING COPYRIGHT;So */
0x2122, /* TRADE MARK SIGN;So; */
0x2192, /* RIGHTWARDS ARROW;Sm;0;ON;;;;;N;RIGHT ARROW;;;;*/
0x2212, /* ; Dash # Sm MINUS SIGN*/
0x2E2E, /* REVERSED QUESTION MARK;Po;0;ON;;;;;N;;;;; */
0x3000, /* ; White_Space # Zs IDEOGRAPHIC SPACE*/
0x3002, /* ; STerm # Po IDEOGRAPHIC FULL STOP*/
0x300C, /* ; Quotation_Mark # Ps LEFT CORNER BRACKET*/
0x300D, /* ; Quotation_Mark # Pe RIGHT CORNER BRACKET*/
0x300E, /* ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET*/
0x300F, /* ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET*/
0x301C, /* ; Dash # Pd WAVE DASH*/
0x301D, /* ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK*/
0x301E, /* ; Quotation_Mark # Pe LOW DOUBLE PRIME QUOTATION MARK*/
0x3030, /* ; Dash # Pd WAVY DASH*/
0x30FB, /* ; Hyphen # Pc KATAKANA MIDDLE DOT*/
0xC2B6, /* PILCROW SIGN;So;0;ON;;;;;N;PARAGRAPH SIGN;;;; */
0xC3B7, /* DIVISION SIGN;Sm;0;ON;;;;;N;;;;; */
0xFE31, /* ; Dash # Pd PRESENTATION FORM FOR VERTICAL EM DASH*/
0xFE32, /* ; Dash # Pd PRESENTATION FORM FOR VERTICAL EN DASH*/
0xFE41, /* ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET*/
0xFE42, /* ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET*/
0xFE43, /* ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET*/
0xFE44, /* ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET*/
0xFE50, /* ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP*/
0xFE51, /* ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP*/
0xFE52, /* ; STerm # Po SMALL FULL STOP*/
0xFE52, /* ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP*/
0xFE54, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
0xFE55, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
0xFE56, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
0xFE57, /* ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK*/
0xFE58, /* ; Dash # Pd SMALL EM DASH*/
0xFE63, /* ; Hyphen # Pd SMALL HYPHEN-MINUS*/
0xFF01, /* FULLWIDTH EXCLAMATION MARK;Po;0;ON;<wide> 0021;;;;N;;;;; */
0xFF02, /* FULLWIDTH QUOTATION MARK;Po;0;ON;<wide> 0022;;;;N;;;;; */
0xFF03, /* FULLWIDTH NUMBER SIGN;Po;0;ET;<wide> 0023;;;;N;;;;; */
0xFF04, /* FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;; */
0xFF05, /* FULLWIDTH PERCENT SIGN;Po;0;ET;<wide> 0025;;;;N;;;;; */
0xFF06, /* FULLWIDTH AMPERSAND;Po;0;ON;<wide> 0026;;;;N;;;;; */
0xFF07, /* FULLWIDTH APOSTROPHE;Po;0;ON;<wide> 0027;;;;N;;;;; */
0xFF08, /* FULLWIDTH LEFT PARENTHESIS;Ps;0;ON;<wide> 0028;;;;Y;FULLWIDTH OPENIN*/
0xFF09, /* FULLWIDTH RIGHT PARENTHESIS;Pe;0;ON;<wide> 0029;;;;Y;FULLWIDTH CLOS*/
0xFF0A, /* FULLWIDTH ASTERISK;Po;0;ON;<wide> 002A;;;;N;;;;; */
0xFF0B, /* FULLWIDTH PLUS SIGN;Sm;0;ES;<wide> 002B;;;;N;;;;; */
0xFF0C, /* FULLWIDTH COMMA;Po;0;CS;<wide> 002C;;;;N;;;;; */
0xFF0D, /* FULLWIDTH HYPHEN-MINUS;Pd;0;ES;<wide> 002D;;;;N;;;;; */
0xFF0E, /* FULLWIDTH FULL STOP;Po;0;CS;<wide> 002E;;;;N;FULLWIDTH PERIOD;;;; */
0xFF0F, /* FULLWIDTH SOLIDUS;Po;0;CS;<wide> 002F;;;;N;FULLWIDTH SLASH;;;; */
0xFF1A, /* ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
0xFF1B, /* ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON*/
0xFF1F, /* ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK*/
0xFF61, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP*/
0xFF62, /* ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET*/
0xFF63, /* ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET*/
0xFF64, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA*/
0xFF65, /* ; Hyphen # Pc HALFWIDTH KATAKANA MIDDLE DOT*/
};
/* Things that would visibly break a block of text, rendering obvious the need
* of quotation if a phrase search is wanted */
static const unsigned int avsbwht[] = {
0x0009, /* CHARACTER TABULATION */
0x000A, /* LINE FEED */
0x000D, /* CARRIAGE RETURN */
0x0020, /* SPACE;Zs;0;WS */
0x00A0, /* NO-BREAK SPACE;Zs;0;CS */
0x1680, /* OGHAM SPACE MARK;Zs;0;WS */
0x180E, /* MONGOLIAN VOWEL SEPARATOR;Zs;0;WS */
0x2000, /* EN QUAD;Zs;0;WS */
0x2001, /* EM QUAD;Zs;0;WS */
0x2002, /* EN SPACE;Zs;0;WS */
0x2003, /* EM SPACE;Zs;0;WS */
0x2004, /* THREE-PER-EM SPACE;Zs;0;WS */
0x2005, /* FOUR-PER-EM SPACE;Zs;0;WS */
0x2006, /* SIX-PER-EM SPACE;Zs;0;WS */
0x2007, /* FIGURE SPACE;Zs;0;WS */
0x2008, /* PUNCTUATION SPACE;Zs;0;WS */
0x2009, /* THIN SPACE;Zs;0;WS */
0x200A, /* HAIR SPACE;Zs;0;WS */
0x202F, /* NARROW NO-BREAK SPACE;Zs;0;CS */
0x205F, /* MEDIUM MATHEMATICAL SPACE;Zs;0;WS */
0x3000, /* IDEOGRAPHIC SPACE;Zs;0;WS */
};
#endif // _PROPLIST_H_INCLUDED_

Some files were not shown because too many files have changed in this diff Show more