make python recoll a package with 2 modules recoll and rclextract

This commit is contained in:
Jean-Francois Dockes 2012-12-20 17:05:34 +01:00
parent fc68d31151
commit 66e8817f77
7 changed files with 51 additions and 15 deletions

View file

@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
ar ru librcl.a $(OBJS) ar ru librcl.a $(OBJS)
$(RANLIB) librcl.a $(RANLIB) librcl.a
# Future
#all: librecoll.so
#librecoll.so : $(DEPS) $(OBJS)
# g++ -shared -Wl,--no-undefined # -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) # -lxapian -lz -lX11 -lpthread -ldl
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp $(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs

View file

@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
ar ru librcl.a \$(OBJS) ar ru librcl.a \$(OBJS)
\$(RANLIB) librcl.a \$(RANLIB) librcl.a
# Future
#all: librecoll.so
#librecoll.so : \$(DEPS) \$(OBJS)
# g++ -shared -Wl,--no-undefined \
# -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
# -lxapian -lz -lX11 -lpthread -ldl
EOF EOF
for c in $SRC_CPP;do for c in $SRC_CPP;do

View file

@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
outfile.assign(soutfile); outfile.assign(soutfile);
if (self->xtr == 0) { if (self->xtr == 0) {
PyErr_SetString(PyExc_AttributeError, "extract: null object"); PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
return 0; return 0;
} }
if (ipath.empty()) {
PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
return 0;
}
self->xtr->setTargetMType(mimetype);
TempFile temp; TempFile temp;
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype); bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
if (!status) { if (!status) {
@ -285,5 +291,5 @@ initrclextract(void)
Py_INCREF(&rclx_ExtractorType); Py_INCREF(&rclx_ExtractorType);
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType); PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0); recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
} }

View file

@ -1703,6 +1703,6 @@ initrecoll(void)
// Export a few pointers for the benefit of other recoll python modules // Export a few pointers for the benefit of other recoll python modules
PyObject* doctypecapsule = PyObject* doctypecapsule =
PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0); PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
PyModule_AddObject(m, "doctypeptr", doctypecapsule); PyModule_AddObject(m, "doctypeptr", doctypecapsule);
} }

View file

@ -28,4 +28,6 @@ typedef struct {
RclConfig *rclconfig; RclConfig *rclconfig;
} recoll_DocObject; } recoll_DocObject;
#define PYRECOLL_PACKAGE "recoll."
#endif // _PYRECOLL_H_INCLUDED_ #endif // _PYRECOLL_H_INCLUDED_

View file

@ -50,8 +50,7 @@ module1 = Extension('recoll',
], ],
libraries = libs, libraries = libs,
library_dirs = libdirs, library_dirs = libdirs,
sources = ['pyrecoll.cpp', sources = ['pyrecoll.cpp'])
])
module2 = Extension('rclextract', module2 = Extension('rclextract',
define_macros = [('MAJOR_VERSION', '1'), define_macros = [('MAJOR_VERSION', '1'),
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
], ],
libraries = libs, libraries = libs,
library_dirs = libdirs, library_dirs = libdirs,
sources = ['pyrclextract.cpp', sources = ['pyrclextract.cpp'])
])
setup (name = 'Recoll', setup (name = 'Recoll',
version = '1.0', version = '1.0',
description = 'Query/Augment a Recoll full text index', description = 'Query/Augment a Recoll full text index',
author = 'J.F. Dockes', author = 'J.F. Dockes',
author_email = 'jfd@recoll.org', author_email = 'jfd@recoll.org',
url = 'http://www.recoll.org',
license = 'GPL',
long_description = ''' long_description = '''
''', ''',
packages = ['recoll'],
ext_package = 'recoll',
ext_modules = [module1, module2]) ext_modules = [module1, module2])

View file

@ -6,8 +6,14 @@ This could actually be useful for something after some customization
import sys import sys
from getopt import getopt from getopt import getopt
import recoll
import rclextract try:
from recoll import recoll
from recoll import rclextract
hasextract = True
except:
import recoll
hasextract = False
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime", allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevancyrating") "ipath", "fbytes", "dbytes", "relevancyrating")
@ -27,9 +33,15 @@ class ptrmeths:
def extract(doc): def extract(doc):
extractor = rclextract.Extractor(doc) extractor = rclextract.Extractor(doc)
newdoc = extractor.extract(doc.ipath) newdoc = extractor.textextract(doc.ipath)
return newdoc return newdoc
def extractofile(doc, outfilename=""):
extractor = rclextract.Extractor(doc)
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
ofilename=outfilename)
return outfilename
def doquery(db, q): def doquery(db, q):
# Get query object # Get query object
query = db.query() query = db.query()
@ -48,9 +60,11 @@ def doquery(db, q):
while query.next >= 0 and query.next < nres: while query.next >= 0 and query.next < nres:
doc = query.fetchone() doc = query.fetchone()
print query.next, ":", print query.next, ":",
# for k,v in doc.items().items(): #for k,v in doc.items().items():
# print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8') #print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
# continue #continue
#outfile = extractofile(doc)
#print "outfile:", outfile, "url", doc.url.encode("utf-8")
for k in ("title", "mtime", "author"): for k in ("title", "mtime", "author"):
value = getattr(doc, k) value = getattr(doc, k)
# value = doc.get(k) # value = doc.get(k)