mirror of
https://github.com/Yetangitu/owncloud-apps.git
synced 2025-10-02 14:49:17 +02:00
- add rudimentary epub parser for metadata extraction
- add cover images - add configurable preview settings (should probably be in core or in a separate app) - add some metadata to feed template (file size, type and filename)
This commit is contained in:
parent
64cfb7925e
commit
007b7c7791
13 changed files with 704 additions and 12 deletions
332
files_opds/lib/epub.php
Normal file
332
files_opds/lib/epub.php
Normal file
|
@ -0,0 +1,332 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* ownCloud - Files_Opds App
|
||||
*
|
||||
* @author Frank de Lange
|
||||
* @copyright 2014 Frank de Lange
|
||||
*
|
||||
* This file is licensed under the Affero General Public License version 3 or
|
||||
* later.
|
||||
*/
|
||||
|
||||
namespace OCA\Files_Opds;
|
||||
|
||||
use \DOMXpath;
|
||||
use \DOMElement;
|
||||
use \DOMDocument;
|
||||
|
||||
/**
|
||||
* Epub class, a simpleminded read-only epub parser intended for metadata extraction
|
||||
* based on https://github.com/splitbrain/php-epub-meta
|
||||
*/
|
||||
class Epub {
|
||||
protected $xml;
|
||||
protected $xpath;
|
||||
protected $file;
|
||||
protected $meta;
|
||||
|
||||
/**
|
||||
* @brief Constructor
|
||||
*
|
||||
* @param string $file path to epub file to work on
|
||||
* @throws Exception if metadata could not be loaded
|
||||
*/
|
||||
public function __construct($file) {
|
||||
// open file
|
||||
$this->file = $file;
|
||||
$zip = new \ZipArchive();
|
||||
if(!($zip->open($this->file))){
|
||||
\OC_Log::write('epub', "Failed to read epub file", \OC_Log::ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
// read container data
|
||||
$data = $zip->getFromName('META-INF/container.xml');
|
||||
if($data == false){
|
||||
\OC_Log::write('epub', "Failed to access epub container data", \OC_Log::ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
$xml = new DOMDocument();
|
||||
$xml->registerNodeClass('DOMElement','\OCA\Files_Opds\EPubDOMElement');
|
||||
$xml->loadXML($data);
|
||||
$xpath = new EPubDOMXPath($xml);
|
||||
$nodes = $xpath->query('//n:rootfiles/n:rootfile[@media-type="application/oebps-package+xml"]');
|
||||
$this->meta = $nodes->item(0)->attr('full-path');
|
||||
|
||||
// load metadata
|
||||
$data = $zip->getFromName($this->meta);
|
||||
if(!$data){
|
||||
\OC_Log::write('epub', 'Failed to access epub metadata', \OC_Log::ERROR);
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->xml = new \DOMDocument();
|
||||
$this->xml->registerNodeClass('DOMElement','\OCA\Files_Opds\EPubDOMElement');
|
||||
$this->xml->loadXML($data);
|
||||
$this->xml->formatOutput = true;
|
||||
$this->xpath = new EPubDOMXPath($this->xml);
|
||||
|
||||
$zip->close();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief file name getter
|
||||
* @return string filename
|
||||
*/
|
||||
public static function file() {
|
||||
return $this->file;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get author(s)
|
||||
*
|
||||
* @return array $authors
|
||||
*/
|
||||
public static function Authors() {
|
||||
// read current data
|
||||
$rolefix = false;
|
||||
$authors = array();
|
||||
$nodes = $this->xpath->query('//opf:metadata/dc:creator[@opf:role="aut"]');
|
||||
if($nodes->length == 0){
|
||||
// no nodes where found, let's try again without role
|
||||
$nodes = $this->xpath->query('//opf:metadata/dc:creator');
|
||||
$rolefix = true;
|
||||
}
|
||||
foreach($nodes as $node){
|
||||
$name = $node->nodeValue;
|
||||
$as = $node->attr('opf:file-as');
|
||||
if(!$as){
|
||||
$as = $name;
|
||||
$node->attr('opf:file-as',$as);
|
||||
}
|
||||
if($rolefix){
|
||||
$node->attr('opf:role','aut');
|
||||
}
|
||||
$authors[$as] = $name;
|
||||
}
|
||||
return $authors;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get book title
|
||||
*
|
||||
* @param string $title
|
||||
*/
|
||||
public function Title(){
|
||||
return $this->get('dc:title');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get language
|
||||
*
|
||||
* @param string $lang
|
||||
*/
|
||||
public function Language(){
|
||||
return $this->get('dc:language');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get publisher info
|
||||
*
|
||||
* @return string $publisher
|
||||
*/
|
||||
public function Publisher(){
|
||||
return $this->get('dc:publisher');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get copyright info
|
||||
*
|
||||
* @return string $rights
|
||||
*/
|
||||
public function Copyright(){
|
||||
return $this->get('dc:rights');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get description
|
||||
*
|
||||
* @return string $description
|
||||
*/
|
||||
public function Description(){
|
||||
return $this->get('dc:description');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get ISBN number
|
||||
*
|
||||
* @return string $isbn
|
||||
*/
|
||||
public function ISBN(){
|
||||
return $this->get('dc:identifier','opf:scheme','ISBN');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get Google Books ID
|
||||
*
|
||||
* @return string $google
|
||||
*/
|
||||
public function Google(){
|
||||
return $this->get('dc:identifier','opf:scheme','GOOGLE');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get Amazon ID
|
||||
*
|
||||
* @return string $amazon
|
||||
*/
|
||||
public function Amazon(){
|
||||
return $this->get('dc:identifier','opf:scheme','AMAZON');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get subjects (aka. tags)
|
||||
*
|
||||
* @return array $subjects
|
||||
*/
|
||||
public function Subjects(){
|
||||
$subjects = array();
|
||||
$nodes = $this->xpath->query('//opf:metadata/dc:subject');
|
||||
foreach($nodes as $node){
|
||||
$subjects[] = $node->nodeValue;
|
||||
}
|
||||
return $subjects;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get cover data
|
||||
*
|
||||
* Returns an associative array with the following keys:
|
||||
*
|
||||
* mime - filetype (usually image/jpeg)
|
||||
* data - binary image data
|
||||
* found - internal path, or false if no image is set in epub
|
||||
*
|
||||
* @return array or null
|
||||
*/
|
||||
public function Cover(){
|
||||
$nodes = $this->xpath->query('//opf:metadata/opf:meta[@name="cover"]');
|
||||
if($nodes->length) {
|
||||
$coverid = (String) $nodes->item(0)->attr('opf:content');
|
||||
if ($coverid) {
|
||||
$nodes = $this->xpath->query('//opf:manifest/opf:item[@id="'.$coverid.'"]');
|
||||
if ($nodes->length) {
|
||||
$mime = $nodes->item(0)->attr('opf:media-type');
|
||||
$path = $nodes->item(0)->attr('opf:href');
|
||||
$path = dirname('/'.$this->meta).'/'.$path; // image path is relative to meta file
|
||||
$path = ltrim($path,'/');
|
||||
$zip = new \ZipArchive();
|
||||
if($zip->open($this->file)){
|
||||
$data = $zip->getFromName($path);
|
||||
return array(
|
||||
'mime' => $mime,
|
||||
'data' => $data,
|
||||
'found' => $path
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief simple getter for simple meta attributes
|
||||
*
|
||||
* It should only be used for attributes that are expected to be unique
|
||||
*
|
||||
* @param string $item XML node to get
|
||||
* @param string $att Attribute name
|
||||
* @param string $aval Attribute value
|
||||
* @return string node value
|
||||
*/
|
||||
protected function get($item, $att=false, $aval=false){
|
||||
$xpath = '//opf:metadata/'.$item;
|
||||
if ($att) {
|
||||
$xpath .= "[@$att=\"$aval\"]";
|
||||
}
|
||||
|
||||
$nodes = $this->xpath->query($xpath);
|
||||
if($nodes->length){
|
||||
return $nodes->item(0)->nodeValue;
|
||||
}else{
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class EPubDOMXPath extends DOMXPath {
|
||||
public function __construct(DOMDocument $doc){
|
||||
parent::__construct($doc);
|
||||
|
||||
if(is_a($doc->documentElement, '\OCA\Files_Opds\EPubDOMElement')){
|
||||
foreach($doc->documentElement->namespaces as $ns => $url){
|
||||
$this->registerNamespace($ns,$url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class EPubDOMElement extends DOMElement {
|
||||
public $namespaces = array(
|
||||
'n' => 'urn:oasis:names:tc:opendocument:xmlns:container',
|
||||
'opf' => 'http://www.idpf.org/2007/opf',
|
||||
'dc' => 'http://purl.org/dc/elements/1.1/'
|
||||
);
|
||||
|
||||
|
||||
public function __construct($name, $value='', $namespaceURI=''){
|
||||
list($ns,$name) = $this->splitns($name);
|
||||
$value = htmlspecialchars($value);
|
||||
if(!$namespaceURI && $ns){
|
||||
$namespaceURI = $this->namespaces[$ns];
|
||||
}
|
||||
parent::__construct($name, $value, $namespaceURI);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief split given name in namespace prefix and local part
|
||||
*
|
||||
* @param string $name
|
||||
* @return array (namespace, name)
|
||||
*/
|
||||
public function splitns($name){
|
||||
$list = explode(':',$name,2);
|
||||
if(count($list) < 2) array_unshift($list,'');
|
||||
return $list;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief simple EPub namespace aware attribute getter
|
||||
*
|
||||
* @param string attribute
|
||||
* @return string attribute value
|
||||
*/
|
||||
public function attr($attr){
|
||||
list($ns,$attr) = $this->splitns($attr);
|
||||
|
||||
$nsuri = '';
|
||||
if($ns){
|
||||
$nsuri = $this->namespaces[$ns];
|
||||
if(!$this->namespaceURI){
|
||||
if($this->isDefaultNamespace($nsuri)){
|
||||
$nsuri = '';
|
||||
}
|
||||
}elseif($this->namespaceURI == $nsuri){
|
||||
$nsuri = '';
|
||||
}
|
||||
}
|
||||
|
||||
if($nsuri){
|
||||
return $this->getAttributeNS($nsuri,$attr);
|
||||
}else{
|
||||
return $this->getAttribute($attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue