make python recoll a package with 2 modules recoll and rclextract

This commit is contained in:
Jean-Francois Dockes 2012-12-20 17:05:34 +01:00
parent fc68d31151
commit 66e8817f77
7 changed files with 51 additions and 15 deletions

View file

@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
ar ru librcl.a $(OBJS)
$(RANLIB) librcl.a
# Future
#all: librecoll.so
#librecoll.so : $(DEPS) $(OBJS)
# g++ -shared -Wl,--no-undefined # -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) # -lxapian -lz -lX11 -lpthread -ldl
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs

View file

@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
ar ru librcl.a \$(OBJS)
\$(RANLIB) librcl.a
# Future
#all: librecoll.so
#librecoll.so : \$(DEPS) \$(OBJS)
# g++ -shared -Wl,--no-undefined \
# -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
# -lxapian -lz -lX11 -lpthread -ldl
EOF
for c in $SRC_CPP;do

View file

@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
outfile.assign(soutfile);
if (self->xtr == 0) {
PyErr_SetString(PyExc_AttributeError, "extract: null object");
PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
return 0;
}
if (ipath.empty()) {
PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
return 0;
}
self->xtr->setTargetMType(mimetype);
TempFile temp;
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
if (!status) {
@ -285,5 +291,5 @@ initrclextract(void)
Py_INCREF(&rclx_ExtractorType);
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0);
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
}

View file

@ -1703,6 +1703,6 @@ initrecoll(void)
// Export a few pointers for the benefit of other recoll python modules
PyObject* doctypecapsule =
PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0);
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
PyModule_AddObject(m, "doctypeptr", doctypecapsule);
}

View file

@ -28,4 +28,6 @@ typedef struct {
RclConfig *rclconfig;
} recoll_DocObject;
#define PYRECOLL_PACKAGE "recoll."
#endif // _PYRECOLL_H_INCLUDED_

View file

@ -50,8 +50,7 @@ module1 = Extension('recoll',
],
libraries = libs,
library_dirs = libdirs,
sources = ['pyrecoll.cpp',
])
sources = ['pyrecoll.cpp'])
module2 = Extension('rclextract',
define_macros = [('MAJOR_VERSION', '1'),
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
],
libraries = libs,
library_dirs = libdirs,
sources = ['pyrclextract.cpp',
])
sources = ['pyrclextract.cpp'])
setup (name = 'Recoll',
version = '1.0',
description = 'Query/Augment a Recoll full text index',
author = 'J.F. Dockes',
author_email = 'jfd@recoll.org',
url = 'http://www.recoll.org',
license = 'GPL',
long_description = '''
''',
packages = ['recoll'],
ext_package = 'recoll',
ext_modules = [module1, module2])

View file

@ -6,8 +6,14 @@ This could actually be useful for something after some customization
import sys
from getopt import getopt
try:
from recoll import recoll
from recoll import rclextract
hasextract = True
except:
import recoll
import rclextract
hasextract = False
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevancyrating")
@ -27,9 +33,15 @@ class ptrmeths:
def extract(doc):
extractor = rclextract.Extractor(doc)
newdoc = extractor.extract(doc.ipath)
newdoc = extractor.textextract(doc.ipath)
return newdoc
def extractofile(doc, outfilename=""):
extractor = rclextract.Extractor(doc)
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
ofilename=outfilename)
return outfilename
def doquery(db, q):
# Get query object
query = db.query()
@ -51,6 +63,8 @@ def doquery(db, q):
#for k,v in doc.items().items():
#print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
#continue
#outfile = extractofile(doc)
#print "outfile:", outfile, "url", doc.url.encode("utf-8")
for k in ("title", "mtime", "author"):
value = getattr(doc, k)
# value = doc.get(k)