make python recoll a package with 2 modules recoll and rclextract
This commit is contained in:
parent
fc68d31151
commit
66e8817f77
7 changed files with 51 additions and 15 deletions
|
@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
|
||||||
ar ru librcl.a $(OBJS)
|
ar ru librcl.a $(OBJS)
|
||||||
$(RANLIB) librcl.a
|
$(RANLIB) librcl.a
|
||||||
|
|
||||||
|
# Future
|
||||||
|
#all: librecoll.so
|
||||||
|
#librecoll.so : $(DEPS) $(OBJS)
|
||||||
|
# g++ -shared -Wl,--no-undefined # -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) # -lxapian -lz -lX11 -lpthread -ldl
|
||||||
|
|
||||||
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
|
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
|
||||||
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
|
||||||
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs
|
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs
|
||||||
|
|
|
@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
|
||||||
ar ru librcl.a \$(OBJS)
|
ar ru librcl.a \$(OBJS)
|
||||||
\$(RANLIB) librcl.a
|
\$(RANLIB) librcl.a
|
||||||
|
|
||||||
|
# Future
|
||||||
|
#all: librecoll.so
|
||||||
|
#librecoll.so : \$(DEPS) \$(OBJS)
|
||||||
|
# g++ -shared -Wl,--no-undefined \
|
||||||
|
# -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
|
||||||
|
# -lxapian -lz -lX11 -lpthread -ldl
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
for c in $SRC_CPP;do
|
for c in $SRC_CPP;do
|
||||||
|
|
|
@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
|
||||||
outfile.assign(soutfile);
|
outfile.assign(soutfile);
|
||||||
|
|
||||||
if (self->xtr == 0) {
|
if (self->xtr == 0) {
|
||||||
PyErr_SetString(PyExc_AttributeError, "extract: null object");
|
PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (ipath.empty()) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
self->xtr->setTargetMType(mimetype);
|
||||||
TempFile temp;
|
TempFile temp;
|
||||||
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
|
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
|
||||||
if (!status) {
|
if (!status) {
|
||||||
|
@ -285,5 +291,5 @@ initrclextract(void)
|
||||||
Py_INCREF(&rclx_ExtractorType);
|
Py_INCREF(&rclx_ExtractorType);
|
||||||
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
|
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
|
||||||
|
|
||||||
recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0);
|
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1703,6 +1703,6 @@ initrecoll(void)
|
||||||
|
|
||||||
// Export a few pointers for the benefit of other recoll python modules
|
// Export a few pointers for the benefit of other recoll python modules
|
||||||
PyObject* doctypecapsule =
|
PyObject* doctypecapsule =
|
||||||
PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0);
|
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
|
||||||
PyModule_AddObject(m, "doctypeptr", doctypecapsule);
|
PyModule_AddObject(m, "doctypeptr", doctypecapsule);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,4 +28,6 @@ typedef struct {
|
||||||
RclConfig *rclconfig;
|
RclConfig *rclconfig;
|
||||||
} recoll_DocObject;
|
} recoll_DocObject;
|
||||||
|
|
||||||
|
#define PYRECOLL_PACKAGE "recoll."
|
||||||
|
|
||||||
#endif // _PYRECOLL_H_INCLUDED_
|
#endif // _PYRECOLL_H_INCLUDED_
|
||||||
|
|
|
@ -50,8 +50,7 @@ module1 = Extension('recoll',
|
||||||
],
|
],
|
||||||
libraries = libs,
|
libraries = libs,
|
||||||
library_dirs = libdirs,
|
library_dirs = libdirs,
|
||||||
sources = ['pyrecoll.cpp',
|
sources = ['pyrecoll.cpp'])
|
||||||
])
|
|
||||||
|
|
||||||
module2 = Extension('rclextract',
|
module2 = Extension('rclextract',
|
||||||
define_macros = [('MAJOR_VERSION', '1'),
|
define_macros = [('MAJOR_VERSION', '1'),
|
||||||
|
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
|
||||||
],
|
],
|
||||||
libraries = libs,
|
libraries = libs,
|
||||||
library_dirs = libdirs,
|
library_dirs = libdirs,
|
||||||
sources = ['pyrclextract.cpp',
|
sources = ['pyrclextract.cpp'])
|
||||||
])
|
|
||||||
|
|
||||||
setup (name = 'Recoll',
|
setup (name = 'Recoll',
|
||||||
version = '1.0',
|
version = '1.0',
|
||||||
description = 'Query/Augment a Recoll full text index',
|
description = 'Query/Augment a Recoll full text index',
|
||||||
author = 'J.F. Dockes',
|
author = 'J.F. Dockes',
|
||||||
author_email = 'jfd@recoll.org',
|
author_email = 'jfd@recoll.org',
|
||||||
|
url = 'http://www.recoll.org',
|
||||||
|
license = 'GPL',
|
||||||
long_description = '''
|
long_description = '''
|
||||||
''',
|
''',
|
||||||
|
packages = ['recoll'],
|
||||||
|
ext_package = 'recoll',
|
||||||
ext_modules = [module1, module2])
|
ext_modules = [module1, module2])
|
||||||
|
|
|
@ -6,8 +6,14 @@ This could actually be useful for something after some customization
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from getopt import getopt
|
from getopt import getopt
|
||||||
import recoll
|
|
||||||
import rclextract
|
try:
|
||||||
|
from recoll import recoll
|
||||||
|
from recoll import rclextract
|
||||||
|
hasextract = True
|
||||||
|
except:
|
||||||
|
import recoll
|
||||||
|
hasextract = False
|
||||||
|
|
||||||
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
||||||
"ipath", "fbytes", "dbytes", "relevancyrating")
|
"ipath", "fbytes", "dbytes", "relevancyrating")
|
||||||
|
@ -27,9 +33,15 @@ class ptrmeths:
|
||||||
|
|
||||||
def extract(doc):
|
def extract(doc):
|
||||||
extractor = rclextract.Extractor(doc)
|
extractor = rclextract.Extractor(doc)
|
||||||
newdoc = extractor.extract(doc.ipath)
|
newdoc = extractor.textextract(doc.ipath)
|
||||||
return newdoc
|
return newdoc
|
||||||
|
|
||||||
|
def extractofile(doc, outfilename=""):
|
||||||
|
extractor = rclextract.Extractor(doc)
|
||||||
|
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
|
||||||
|
ofilename=outfilename)
|
||||||
|
return outfilename
|
||||||
|
|
||||||
def doquery(db, q):
|
def doquery(db, q):
|
||||||
# Get query object
|
# Get query object
|
||||||
query = db.query()
|
query = db.query()
|
||||||
|
@ -48,9 +60,11 @@ def doquery(db, q):
|
||||||
while query.next >= 0 and query.next < nres:
|
while query.next >= 0 and query.next < nres:
|
||||||
doc = query.fetchone()
|
doc = query.fetchone()
|
||||||
print query.next, ":",
|
print query.next, ":",
|
||||||
# for k,v in doc.items().items():
|
#for k,v in doc.items().items():
|
||||||
# print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
|
#print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
|
||||||
# continue
|
#continue
|
||||||
|
#outfile = extractofile(doc)
|
||||||
|
#print "outfile:", outfile, "url", doc.url.encode("utf-8")
|
||||||
for k in ("title", "mtime", "author"):
|
for k in ("title", "mtime", "author"):
|
||||||
value = getattr(doc, k)
|
value = getattr(doc, k)
|
||||||
# value = doc.get(k)
|
# value = doc.get(k)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue