make python recoll a package with 2 modules recoll and rclextract

This commit is contained in:
Jean-Francois Dockes 2012-12-20 17:05:34 +01:00
parent fc68d31151
commit 66e8817f77
7 changed files with 51 additions and 15 deletions

View file

@ -4,7 +4,7 @@ include $(depth)/mk/sysconf
LIBS = librcl.a
all: $(LIBS)
all: $(LIBS)
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
ar ru librcl.a $(OBJS)
$(RANLIB) librcl.a
# Future
#all: librecoll.so
#librecoll.so : $(DEPS) $(OBJS)
# g++ -shared -Wl,--no-undefined # -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) # -lxapian -lz -lX11 -lpthread -ldl
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs

View file

@ -117,7 +117,7 @@ include \$(depth)/mk/sysconf
LIBS = librcl.a
all: \$(LIBS)
all: \$(LIBS)
OBJS = $OBJS
DEPS = $DEPS
@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
ar ru librcl.a \$(OBJS)
\$(RANLIB) librcl.a
# Future
#all: librecoll.so
#librecoll.so : \$(DEPS) \$(OBJS)
# g++ -shared -Wl,--no-undefined \
# -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
# -lxapian -lz -lX11 -lpthread -ldl
EOF
for c in $SRC_CPP;do

View file

@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
outfile.assign(soutfile);
if (self->xtr == 0) {
PyErr_SetString(PyExc_AttributeError, "extract: null object");
PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
return 0;
}
if (ipath.empty()) {
PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
return 0;
}
self->xtr->setTargetMType(mimetype);
TempFile temp;
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
if (!status) {
@ -285,5 +291,5 @@ initrclextract(void)
Py_INCREF(&rclx_ExtractorType);
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0);
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
}

View file

@ -1703,6 +1703,6 @@ initrecoll(void)
// Export a few pointers for the benefit of other recoll python modules
PyObject* doctypecapsule =
PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0);
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
PyModule_AddObject(m, "doctypeptr", doctypecapsule);
}

View file

@ -28,4 +28,6 @@ typedef struct {
RclConfig *rclconfig;
} recoll_DocObject;
#define PYRECOLL_PACKAGE "recoll."
#endif // _PYRECOLL_H_INCLUDED_

View file

@ -50,8 +50,7 @@ module1 = Extension('recoll',
],
libraries = libs,
library_dirs = libdirs,
sources = ['pyrecoll.cpp',
])
sources = ['pyrecoll.cpp'])
module2 = Extension('rclextract',
define_macros = [('MAJOR_VERSION', '1'),
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
],
libraries = libs,
library_dirs = libdirs,
sources = ['pyrclextract.cpp',
])
sources = ['pyrclextract.cpp'])
setup (name = 'Recoll',
version = '1.0',
description = 'Query/Augment a Recoll full text index',
author = 'J.F. Dockes',
author_email = 'jfd@recoll.org',
url = 'http://www.recoll.org',
license = 'GPL',
long_description = '''
''',
packages = ['recoll'],
ext_package = 'recoll',
ext_modules = [module1, module2])

View file

@ -6,9 +6,15 @@ This could actually be useful for something after some customization
import sys
from getopt import getopt
import recoll
import rclextract
try:
from recoll import recoll
from recoll import rclextract
hasextract = True
except:
import recoll
hasextract = False
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevancyrating")
@ -27,9 +33,15 @@ class ptrmeths:
def extract(doc):
extractor = rclextract.Extractor(doc)
newdoc = extractor.extract(doc.ipath)
newdoc = extractor.textextract(doc.ipath)
return newdoc
def extractofile(doc, outfilename=""):
extractor = rclextract.Extractor(doc)
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
ofilename=outfilename)
return outfilename
def doquery(db, q):
# Get query object
query = db.query()
@ -48,9 +60,11 @@ def doquery(db, q):
while query.next >= 0 and query.next < nres:
doc = query.fetchone()
print query.next, ":",
# for k,v in doc.items().items():
# print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
# continue
#for k,v in doc.items().items():
#print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
#continue
#outfile = extractofile(doc)
#print "outfile:", outfile, "url", doc.url.encode("utf-8")
for k in ("title", "mtime", "author"):
value = getattr(doc, k)
# value = doc.get(k)