mirror of
https://github.com/Yetangitu/owncloud-apps.git
synced 2025-10-02 14:49:17 +02:00
files_opds: add FictionBook 2 (.fb2) metadata parser
This commit is contained in:
parent
0d1e761354
commit
c73612586c
2 changed files with 245 additions and 1 deletions
217
files_opds/lib/fb2.php
Normal file
217
files_opds/lib/fb2.php
Normal file
|
@ -0,0 +1,217 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Nextcloud - Files_Opds App
|
||||
*
|
||||
* @author Frank de Lange
|
||||
* @copyright 2016 Frank de Lange
|
||||
*
|
||||
* This file is licensed under the Affero General Public License version 3 or
|
||||
* later.
|
||||
*/
|
||||
|
||||
namespace OCA\Files_Opds;
|
||||
|
||||
/**
|
||||
* FB2 class, a simpleminded read-only fb2 parser intended for metadata extraction
|
||||
*/
|
||||
class FB2 {
|
||||
protected $file;
|
||||
|
||||
/**
|
||||
* @brief Constructor
|
||||
*
|
||||
* @param string $file path to fb2 file to work on
|
||||
* @throws Exception if metadata could not be loaded
|
||||
*/
|
||||
public function __construct($file) {
|
||||
// open file
|
||||
$this->file = $file;
|
||||
if (!($this->xml = simplexml_load_file($file))) {
|
||||
throw new \Exception("Failed to read FB2 file");
|
||||
}
|
||||
|
||||
// check for valid XML content
|
||||
if(!$this->xml) {
|
||||
throw new \Exception("Failed to access XML content");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief file name getter
|
||||
* @return string filename
|
||||
*/
|
||||
public function file() {
|
||||
return $this->file;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get author(s)
|
||||
*
|
||||
* @return array $authors
|
||||
*/
|
||||
public function Authors() {
|
||||
$authors = array();
|
||||
foreach($this->xml->description->{'title-info'}->author as $author) {
|
||||
$authors[] = (string) implode(' ', array_filter(array ($author->{'first-name'} , $author->{'middle-name'} , $author->{'last-name'}, $author->{'nickname'}),function($v){ return (!empty((string) $v));}));
|
||||
}
|
||||
return $authors;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get book title
|
||||
*
|
||||
* @return string $title
|
||||
*/
|
||||
public function Title(){
|
||||
return $this->get_string($this->xml->description->{'title-info'}->{'book-title'});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get language
|
||||
*
|
||||
* @return string $lang
|
||||
*/
|
||||
public function Language(){
|
||||
return $this->get_string($this->xml->description->{'title-info'}->{'lang'});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get date
|
||||
*
|
||||
* @return string $date
|
||||
*/
|
||||
public function Date(){
|
||||
return $this->get_string($this->xml->description->{'document-info'}->{'date'});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get publisher info
|
||||
*
|
||||
* @return string $publisher
|
||||
*/
|
||||
public function Publisher(){
|
||||
return $this->get_string($this->xml->description->{'publish-info'}->{'publisher'});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get copyright info
|
||||
*
|
||||
* @return string $rights
|
||||
*/
|
||||
public function Copyright(){
|
||||
/* no copyright info in fb2 files */
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get description
|
||||
*
|
||||
* @return string $description
|
||||
*/
|
||||
public function Description(){
|
||||
$description = null;
|
||||
if (isset($this->xml->description->{'title-info'}->{'annotation'})) {
|
||||
$description = $this->get_innerxml($this->xml->description->{'title-info'}->{'annotation'});
|
||||
}
|
||||
|
||||
return $description;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get ISBN number
|
||||
*
|
||||
* @return string $isbn
|
||||
*/
|
||||
public function ISBN(){
|
||||
return $this->get_string($this->xml->description->{'publish-info'}->{'isbn'});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get subjects (aka. tags)
|
||||
*
|
||||
* @return array $subjects
|
||||
*/
|
||||
public function Subjects(){
|
||||
$subjects = array();
|
||||
foreach($this->xml->description->{'title-info'}->keywords as $keyword) {
|
||||
$subjects[] = $keyword->__toString();
|
||||
}
|
||||
return $subjects;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get cover data
|
||||
*
|
||||
* Returns an associative array with the following keys:
|
||||
*
|
||||
* mime - filetype (usually image/jpeg)
|
||||
* data - binary image data
|
||||
* found - internal path, or null if no image is set in fb2
|
||||
*
|
||||
* @return array or null
|
||||
*/
|
||||
public function Cover(){
|
||||
$mime = "";
|
||||
$data = null;
|
||||
$path = false;
|
||||
|
||||
if ($this->xml->description->{'title-info'}->coverpage) {
|
||||
$cover_link = isset($this->xml->description->{'title-info'}->coverpage->image->attributes('xlink',true)->href)
|
||||
? $this->xml->description->{'title-info'}->coverpage->image->attributes('xlink',true)->href->__toString() : "" ;
|
||||
$cover_link = isset($this->xml->description->{'title-info'}->coverpage->image->attributes('l',true)->href)
|
||||
? $this->xml->description->{'title-info'}->coverpage->image->attributes('l',true)->href->__toString() : $cover_link ;
|
||||
|
||||
$cover_link = trim($cover_link,'#');
|
||||
|
||||
foreach ($this->xml->binary as $binary) {
|
||||
if ($binary['id']->__toString() === $cover_link) {
|
||||
$data = base64_decode($binary->__toString());
|
||||
$mime = $binary['content-type']->__toString();
|
||||
$path = $cover_link;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (!is_null($data))
|
||||
? array(
|
||||
'mime' => $mime,
|
||||
'data' => $data,
|
||||
'found' => $path
|
||||
)
|
||||
: null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get innerXML as string
|
||||
*
|
||||
* Returns the innerXML for an XML element as string
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function get_innerxml($element) {
|
||||
$innerxml = '';
|
||||
foreach (dom_import_simplexml($element)->childNodes as $child) {
|
||||
$innerxml .= $child->ownerDocument->saveXML($child);
|
||||
}
|
||||
|
||||
return $innerxml;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief get element as string
|
||||
*
|
||||
* Returns simplexml element as string, can cope with null elements
|
||||
*
|
||||
* @return string or null
|
||||
*/
|
||||
protected function get_string($element) {
|
||||
if (!empty($element)) {
|
||||
return $element->__toString();
|
||||
} else {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -205,7 +205,6 @@ class Meta
|
|||
return $meta;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief check epub for metadata
|
||||
*
|
||||
|
@ -234,6 +233,34 @@ class Meta
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief check fb2 for metadata
|
||||
*
|
||||
* @param string $path path to fb2
|
||||
* @param arrayref $meta reference to array of metadata
|
||||
*/
|
||||
public static function fb2($path,&$meta) {
|
||||
$success = false;
|
||||
try {
|
||||
$fb2 = new FB2($path);
|
||||
/* first try ISBN */
|
||||
if(!(($isbn = $fb2->ISBN()) && (Isbn::get($isbn, $meta)))) {
|
||||
/* use FB2 internal metadata instead */
|
||||
$meta['author'] = json_encode($fb2->Authors());
|
||||
$meta['title'] = $fb2->Title();
|
||||
$meta['date'] = $fb2->Date();
|
||||
$meta['publisher'] = $fb2->Publisher();
|
||||
$meta['copyright'] = $fb2->Copyright();
|
||||
$meta['language'] = $fb2->Language();
|
||||
$meta['description'] = strip_tags($fb2->Description());
|
||||
$meta['isbn'] = $fb2->ISBN();
|
||||
$meta['subjects'] = json_encode($fb2->Subjects());
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
\OCP\Util::writeLog(get_class(), $e->getMessage(), \OCP\Util::ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief check pdf for metadata
|
||||
*
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue