Added beta script that generates appcache file from opf or epub files.

2025-10-04 15:09:16 +02:00 · 2013-07-19 14:06:57 -07:00 · 2013-07-19 14:06:57 -07:00 · aeca141508
commit aeca141508
parent 8f06e86d9c
1 changed files with 111 additions and 0 deletions
--- a/tools/appcache.py
+++ b/tools/appcache.py
@ -0,0 +1,111 @@
+#!/usr/bin/python
+ 
+"""WARNING:  Script is in beta and needs to be tested thoroughly.
+
+The script generates a rudimentary appcache file based upon the content.opf file located in either: 
+an uncompressed epub directory or a compressed epub file and places it in the current directory
+
+Usage: acm_gen.py --input='/path/to/content.opf' which links to the uncompressed epub directory that includes the content.opf
+OR 		 --input='/path/to/book.epub' which links to the compressed epub file
+"""
+ 
+__author__ = 'Luis Aguilar'
+__email__ = 'luis@berkeley.edu'
+
+import os
+import xml.etree.ElementTree as ET
+import zipfile
+import datetime
+import epub
+from optparse import OptionParser
+
+def get_parameters():
+    """
+        Parse the user input
+    """
+    parser = OptionParser()
+    parser.add_option('-i', '--input', dest='input')
+    parser.add_option('-o', '--output', dest='output', default='.')
+    (options, args) = parser.parse_args()
+
+    # code block to check for empty path, needed? path that includes proper filename, then valid file check
+    if not options.input:
+        return parser.error('input path is empty, use --input="path.to.opf.or.epub.filename"')
+    elif not (options.input[-3:].lower() == 'pub' or options.input[-3:].lower() == 'opf'):
+        return parser.error('Please include opf or epub filename in path')
+    elif not os.path.isfile(options.input):
+        return parser.error('input epub or content.opf file could not be found, please verify path and filename')
+    else:
+        return {'input': options.input, 'output': options.output, 'file': options.input[-3:].lower()}
+
+def process_extracted_opf(userParams):
+    """
+        Parse the content.opf file.  Is it good practice to close file used
+        for ElementTree processing?
+    """
+    namespaces = {'xmlns': 'http://www.idpf.org/2007/opf',
+                    'dc':'http://purl.org/dc/elements/1.1/',
+                    'dcterms':'http://purl.org/dc/terms/'}
+
+    print "Parsing content.opf file at " + userParams['input']
+    # return list
+    itemHrefs = []
+
+    # begin parsing content.opf
+    tree = ET.parse(userParams['input'])
+    root = tree.getroot()
+    # extract item hrefs and place in return list
+    for child in root.findall('xmlns:manifest/xmlns:item', namespaces=namespaces):
+        itemHrefs.append(child.attrib['href'])
+    return itemHrefs
+
+def process_epub(userParams):
+    """
+        Parse manifest items using epub library
+    """
+    book = epub.open_epub(userParams['input'])
+
+    print "Parsing epub file at " + userParams['input']
+
+    itemHrefs = []
+    for item in book.opf.manifest.values():
+        itemHrefs.append(item.href)
+
+    return itemHrefs 
+
+def write_appcache(itemHrefs):
+    """
+        Create offline_appcache with extracted hrefs
+    """
+    fileName = 'epub.appcache'
+    cacheHeader = 'CACHE MANIFEST\n'
+
+    # open pointer to new appcache file
+    # will need to add functionality that checks for existing appcache
+    f_appcache = open(fileName, "w")
+    
+    # write file
+    f_appcache.write(cacheHeader)
+    f_appcache.write('# '+ str(datetime.datetime.now()) + '\n')
+
+    for href in itemHrefs:
+        f_appcache.write(href + '\n')
+
+    # close file
+    f_appcache.close()
+
+def main():
+    # get user defined parameters
+    userParams = get_parameters()
+
+    # process the epub or the content file extracted from an epub
+    if (userParams['file']=='pub'):
+        itemHrefs = process_epub(userParams)
+    elif(userParams['file']=='opf'):
+        itemHrefs = process_extracted_opf(userParams)
+
+    # take extracted items and generate the appcache
+    write_appcache(itemHrefs)
+ 
+if __name__ == '__main__':
+    main()