1
0
Fork 0
mirror of https://github.com/futurepress/epub.js.git synced 2025-10-04 15:09:16 +02:00
epub.js/tools/appcache.py

111 lines
No EOL
3.6 KiB
Python

#!/usr/bin/python
"""WARNING: Script is in beta and needs to be tested thoroughly.
The script generates a rudimentary appcache file based upon the content.opf file located in either:
an uncompressed epub directory or a compressed epub file and places it in the current directory
Usage: acm_gen.py --input='/path/to/content.opf' which links to the uncompressed epub directory that includes the content.opf
OR --input='/path/to/book.epub' which links to the compressed epub file
"""
__author__ = 'Luis Aguilar'
__email__ = 'luis@berkeley.edu'
import os
import xml.etree.ElementTree as ET
import zipfile
import datetime
import epub
from optparse import OptionParser
def get_parameters():
"""
Parse the user input
"""
parser = OptionParser()
parser.add_option('-i', '--input', dest='input')
parser.add_option('-o', '--output', dest='output', default='.')
(options, args) = parser.parse_args()
# code block to check for empty path, needed? path that includes proper filename, then valid file check
if not options.input:
return parser.error('input path is empty, use --input="path.to.opf.or.epub.filename"')
elif not (options.input[-3:].lower() == 'pub' or options.input[-3:].lower() == 'opf'):
return parser.error('Please include opf or epub filename in path')
elif not os.path.isfile(options.input):
return parser.error('input epub or content.opf file could not be found, please verify path and filename')
else:
return {'input': options.input, 'output': options.output, 'file': options.input[-3:].lower()}
def process_extracted_opf(userParams):
"""
Parse the content.opf file. Is it good practice to close file used
for ElementTree processing?
"""
namespaces = {'xmlns': 'http://www.idpf.org/2007/opf',
'dc':'http://purl.org/dc/elements/1.1/',
'dcterms':'http://purl.org/dc/terms/'}
print "Parsing content.opf file at " + userParams['input']
# return list
itemHrefs = []
# begin parsing content.opf
tree = ET.parse(userParams['input'])
root = tree.getroot()
# extract item hrefs and place in return list
for child in root.findall('xmlns:manifest/xmlns:item', namespaces=namespaces):
itemHrefs.append(child.attrib['href'])
return itemHrefs
def process_epub(userParams):
"""
Parse manifest items using epub library
"""
book = epub.open_epub(userParams['input'])
print "Parsing epub file at " + userParams['input']
itemHrefs = []
for item in book.opf.manifest.values():
itemHrefs.append(item.href)
return itemHrefs
def write_appcache(itemHrefs):
"""
Create offline_appcache with extracted hrefs
"""
fileName = 'epub.appcache'
cacheHeader = 'CACHE MANIFEST\n'
# open pointer to new appcache file
# will need to add functionality that checks for existing appcache
f_appcache = open(fileName, "w")
# write file
f_appcache.write(cacheHeader)
f_appcache.write('# '+ str(datetime.datetime.now()) + '\n')
for href in itemHrefs:
f_appcache.write(href + '\n')
# close file
f_appcache.close()
def main():
# get user defined parameters
userParams = get_parameters()
# process the epub or the content file extracted from an epub
if (userParams['file']=='pub'):
itemHrefs = process_epub(userParams)
elif(userParams['file']=='opf'):
itemHrefs = process_extracted_opf(userParams)
# take extracted items and generate the appcache
write_appcache(itemHrefs)
if __name__ == '__main__':
main()