1
0
Fork 0
mirror of https://github.com/Yetangitu/owncloud-apps.git synced 2025-10-02 14:49:17 +02:00

Added ISBN metadata gathering, currently using ISBNdb and Google Books, more sources to be added. ISBN is scraped from text or discovered through embedded metadata.

This commit is contained in:
frankdelange 2014-12-20 01:22:06 +01:00
parent b6ac51058d
commit a161bd0d05
12 changed files with 527 additions and 54 deletions

BIN
dist/files_opds-0.5.tar.gz vendored Normal file

Binary file not shown.

View file

@ -25,6 +25,7 @@ $formats = array(
$tmpl = new \OCP\Template('files_opds', 'admin');
$tmpl->assign('feedSubtitle', Config::getApp('feed-subtitle', $l->t("%s OPDS catalog", $defaults->getName())));
$tmpl->assign('isbndbKey', Config::getApp('isbndb-key', ''));
$tmpl->assign('previewFormats', $formats);
$tmpl->assign('cover-x', Config::getApp('cover-x', '200'));
$tmpl->assign('cover-y', Config::getApp('cover-y', '200'));

View file

@ -25,12 +25,14 @@ if (isset($_POST['opdsCoverX'])) {
$opdsThumbX = isset($_POST['opdsThumbX']) ? (int) $_POST['opdsThumbX'] : 36;
$opdsThumbY = isset($_POST['opdsThumbY']) ? (int) $_POST['opdsThumbY'] : 36;
$opdsFeedSubtitle = isset($_POST['opdsFeedSubtitle']) ? $_POST['opdsFeedSubtitle'] : $l->t("%s OPDS catalog", $defaults->getName());
$opdsIsbndbKey = isset($_POST['opdsIsbndbKey']) ? $_POST['opdsIsbndbKey'] : '';
Config::setApp('cover-x', $opdsCoverX);
Config::setApp('cover-y', $opdsCoverY);
Config::setApp('thumb-x', $opdsThumbX);
Config::setApp('thumb-y', $opdsThumbX);
Config::setApp('feed_subtitle', $opdsFeedSubtitle);
Config::setApp('isbndb-key', $opdsIsbndbKey);
} else {
// set preview preferences
$opdsPreviewEpub = $_POST['opdsPreviewEpub'];

View file

@ -12,8 +12,8 @@
<!-- file ID, maps to OC file ID -->
<name>id</name>
<type>integer</type>
<default>0</default>
<notnull>true</notnull>
<primary>true</primary>
<length>11</length>
</field>
@ -105,6 +105,22 @@
<notnull>true</notnull>
<length>1024</length>
</field>
<field>
<!-- rescan (rescan if passed) -->
<name>rescan</name>
<type>timestamp</type>
<default></default>
<notnull>false</notnull>
</field>
<index>
<name>opds_metadata_id_index</name>
<field>
<name>id</name>
</field>
</index>
</declaration>
</table>
</database>

View file

@ -4,11 +4,20 @@
<name>OPDS catalog</name>
<description>Personal OPDS catalog</description>
<licence>AGPL</licence>
<version>0.4</version>
<version>0.5</version>
<author>Frank de Lange</author>
<requiremin>7.0</requiremin>
<shipped>true</shipped>
<default_enable/>
<repository type="git">https://github.com/Yetangitu/owncloud-apps</repository>
<bugs>https://github.com/Yetangitu/owncloud-apps/issues</bugs>
<homepage>https://github.com/Yetangitu/owncloud-apps</homepage>
<dependencies>
<php min-version="5.4" />
<database>pgsql</database>
<database>sqlite3</database>
<database>mysql</database>
</dependencies>
<ocsid>168132</ocsid>
</info>

View file

@ -27,7 +27,8 @@ $(document).ready(function(){
opdsCoverY : $('#opds-cover-y').val(),
opdsThumbX : $('#opds-thumb-x').val(),
opdsThumbY : $('#opds-thumb-y').val(),
opdsFeedSubtitle : $('#opds-feed-subtitle').val()
opdsFeedSubtitle : $('#opds-feed-subtitle').val(),
opdsIsbndbKey : $('#opds-isbndb-key').val()
};
OC.msg.startSaving('#opds-admin .msg');
$.post(OC.filePath('files_opds', 'ajax', 'admin.php'), data, opdsAdminCoverSettings.afterSave);
@ -42,8 +43,8 @@ $(document).ready(function(){
$('#opds-preview-opendocument').on("change", opdsAdminSettings.save);
$('#opds-preview-msoffice').on("change", opdsAdminSettings.save);
$('#opds-cover-x,#opds-cover-y,#opds-thumb-x,#opds-thumb-y,#opds-feed-subtitle').blur(opdsAdminCoverSettings.save);
$('#opds-cover-x,#opds-cover-y,#opds-thumb-x,#opds-thumb-y,#opds-feed-subtitle').keypress(function( event ) {
$('#opds-cover-x,#opds-cover-y,#opds-thumb-x,#opds-thumb-y,#opds-feed-subtitle,#opds-isbndb-key').blur(opdsAdminCoverSettings.save);
$('#opds-cover-x,#opds-cover-y,#opds-thumb-x,#opds-thumb-y,#opds-feed-subtitle,#opds-isbndb-key').keypress(function( event ) {
if (event.which == 13) {
event.preventDefault();
opdsAdminCoverSettings.save();

92
files_opds/lib/google.php Normal file
View file

@ -0,0 +1,92 @@
<?php
/**
* ownCloud - Files_Opds App
*
* @author Frank de Lange
* @copyright 2014 Frank de Lange
*
* This file is licensed under the Affero General Public License version 3 or
* later.
*/
namespace OCA\Files_Opds;
/**
* Google class for OPDS
*/
class Google
{
/**
* @brief get ISBN data for $isbn at Google (books)
*
* @param string $isbn ISBN to search for
* @param arrayref &$meta
* @return int $status (0 on success, ERRORCODE otherwise)
*/
public static function get($isbn,&$meta) {
$command = 'https://www.googleapis.com/books/v1/volumes?q=isbn:' . $isbn;
$data = json_decode(file_get_contents($command),true);
if($data['totalItems'] > 0) {
self::parse($isbn,$meta);
return true;
} else {
$meta['rescan'] = date("Y-m-d\TH:i:sP", time() + Isbn::RESCAN_NOT_FOUND);
}
return false;
}
/**
* @brief parse Google response into OPDS $meta array
*
* @param array $data Google response (json_decoded into array)
* @param arrayref &$meta OPDS metadata array
* @return int errorcode (0 if success)
*/
static function parse($data,&$meta) {
foreach ($data as $key => $value) {
switch ($key) {
case 'description':
$meta['description'] = $value;
if(array_key_exists('notes',$data)) {
$meta['description'] .= ((trim($value) == false) ? '' : "\n\n") . $data['notes'];
}
break;
case 'subject_ids':
$meta['subjects'] = json_encode($value);
break;
/* rather pointless, ISBN is what brought us here in the first place and is alread set
case 'industryIdentifiers':
foreach($value as $array) {
if ($array['type'] = 'ISBN_13') {
$isbn13 = $array['identifier'];
} elseif ($array['type'] = 'ISBN_10') {
$isbn10 = $array['identifier'];
}
}
$meta['isbn'] = (isset($isbn13)) ? $isbn13 : $isbn10;
break;
*/
case 'title':
$meta['title'] = $value;
break;
case 'authors':
$meta['author'] = json_encode($value);
break;
case 'language':
$meta['language'] = $value;
break;
case 'publisher':
$meta['publisher'] = $value;
break;
case 'publishedDate':
$meta['date'] = $value;
break;
}
}
return true;
}
}

154
files_opds/lib/isbn.php Normal file
View file

@ -0,0 +1,154 @@
<?php
/**
* ownCloud - Files_Opds App
*
* @author Frank de Lange
* @copyright 2014 Frank de Lange
*
* This file is licensed under the Affero General Public License version 3 or
* later.
*/
namespace OCA\Files_Opds;
/**
* ISBN class for OPDS
*/
class Isbn
{
const SUCCESS = 0;
const REQUEST_LIMIT_EXCEEDED = -1;
const NOT_FOUND = -2;
const ERROR = -3;
const RESCAN_LIMIT_EXCEEDED = 86400;
const RESCAN_NOT_FOUND = 604800;
const RESCAN_ERROR = 86400;
/**
* @brief try to find a valid ISBN in the given text, using a cascade of
* regexps. Can be optimized.
*
* @param string $text text to search through
* @return string $hit ISBN on success, false otherwise
*/
public static function scan($text) {
if ($hits = preg_grep('/isbn/i',$text)) {
foreach ($hits as $hit) {
$hit = preg_replace('/.*ISBN(?:[ -]?1[03]:)?\s*([xX0-9-]{10,17}).*/i','$1',$hit,1);
//$hit = preg_replace('/isbn([ -]\(?1[03]\)?)?/i','',$hit);
$hit = preg_replace('/[^0-9X]/i','',$hit);
if(self::validate($hit)) {
return $hit;
}
}
}
/* single ISBN-13 targeted pattern */
if ($hits = preg_grep('/\d{3}[ -]?\d[ -]?\d{4}[ -]?\d{4}[ -]?\d/', $text)) {
foreach ($hits as $hit) {
$hit = preg_replace('/.*(\d{3}[ -]?\d[ -]?\d{4}[ -]?\d{4}[ -]?\d).*/','$1',$hit,1);
$hit = preg_replace('/[^0-9]/i','',$hit);
if(self::validate($hit)) {
return $hit;
}
}
}
/* single ISBN-10 targeted pattern */
if ($hits = preg_grep('/\d[\d -]{8,11}[\dX]/i', $text)) {
foreach ($hits as $hit) {
$hit = preg_replace('/.*(\d[\d -]{8,11}[\dX]).*/','$1',$hit,1);
$hit = preg_replace('/[^0-9X]/i','',$hit);
if(self::validate($hit)) {
return $hit;
}
}
}
return false;
}
/**
* @brief get metadata for given ISBN
*
* @param string $isbn ISBN to use
* @param arrayref &$meta OPDS metadata
* @return bool $success (true if metadata found)
*/
public static function get($isbn,&$meta) {
/* set ISBN in metadata; can be overwritten later with ISBN13 */
$meta['isbn'] = $isbn;
/* Try ISBNdb, then Google */
if (!(Isbn::SUCCESS == Isbndb::get($isbn,$meta)) && (!(Isbn::SUCCESS == Google::get($isbn,$meta)))) {
return false;
} else {
return true;
}
}
/**
* @brief validate ISBN
*
* @param string $isbn ISBN to validate
* @return bool true if ISBN is valid
*/
public static function validate($isbn) {
if (null === $isbn || '' === $isbn) {
return false;
}
switch (strlen($isbn)) {
case 10:
return self::isIsbn10($isbn);
break;
case 13:
return self::isIsbn13($isbn);
break;
}
return false;
}
/**
* @brief check for valid ISBN10
* @param string $isbn ISBN to check
* @return bool true if valid ISBN-10
*/
static function isIsbn10 ($isbn) {
$checksum = 0;
for ($i = 0; $i < 10; ++$i) {
if (!isset($isbn{$i})) {
return false;
}
if ('X' === $isbn{$i}) {
$digit = 10;
} elseif (ctype_digit($isbn{$i})) {
$digit = $isbn{$i};
} else {
return false;
}
$checksum += $digit * intval(10 - $i);
}
return 0 === $checkSum % 11 ? true : false;
}
/**
* @brief check for valid ISBN13
* @param string $isbn ISBN to check
* @return bool true if valid ISBN-13
*/
static function isIsbn13 ($isbn) {
$checksum = 0;
for ($i = 0; $i < 13; $i +=2) {
$checksum += $isbn{$i};
}
for ($i = 1; $i < 12; $i +=2) {
$checksum += $isbn{$i} * 3;
}
return 0 === $checkSum % 10 ? true : false;
}
}

119
files_opds/lib/isbndb.php Normal file
View file

@ -0,0 +1,119 @@
<?php
/**
* ownCloud - Files_Opds App
*
* @author Frank de Lange
* @copyright 2014 Frank de Lange
*
* This file is licensed under the Affero General Public License version 3 or
* later.
*/
namespace OCA\Files_Opds;
/**
* ISBNdb class for OPDS
*/
class Isbndb
{
/**
* @brief get ISBN data for $isbn at ISBNdb
*
* @param string $isbn ISBN to search for
* @param arrayref &$meta
* @return int $status (0 on success, ERRORCODE otherwise)
*/
public static function get($isbn,&$meta) {
if ($keyString = Config::getApp('isbndb-key','')) {
$keys = explode(',', $keyString);
$key = $keys[rand(0,count($keys) - 1)];
$data = false;
$isbn = preg_replace('/[^0-9X]/i', '', $isbn);
if (Isbn::validate($isbn)) {
$command = 'http://isbndb.com/api/v2/json/' . $key . '/book/' . $isbn;
$data = json_decode(file_get_contents($command),true);
if (isset($data['error'])) {
Util::logWarn("ISBNDB: " . $data['error']);
if (!(stripos($data['error'], 'Daily request limit exceeded') === false)) {
return Isbn::REQUEST_LIMIT_EXCEEDED;
} elseif(!(stripos($data['error'], 'Unable to locate') === false)) {
return Isbn::NOT_FOUND;
} else {
return Isbn::ERROR;
}
} else {
self::parse($data['data'][0],$meta);
return Isbn::SUCCESS;
}
}
}
return false;
}
/**
* @brief parse ISBNdb response into OPDS $meta array
*
* @param array $data ISBNdb response (json_decoded into array)
* @param arrayref &$meta OPDS metadata array
* @return int errorcode (0 if success)
*/
static function parse($data,&$meta) {
/* did the call succeed? If not, schedule a rescan */
if (Isbn::REQUEST_LIMIT_EXCEEDED == $data) {
$meta['rescan'] = date("Y-m-d\TH:i:sP", time() + Isbn::RESCAN_LIMIT_EXCEEDED);
return false;
} elseif (Isbn::NOT_FOUND == $data) {
$meta['rescan'] = date("Y-m-d\TH:i:sP", time() + Isbn::RESCAN_NOT_FOUND);
return false;
} elseif (Isbn::ERROR == $data) {
$meta['rescan'] = date("Y-m-d\TH:i:sP", time() + Isbn::RESCAN_ERROR);
return false;
}
foreach ($data as $key => $value) {
switch ($key) {
case 'summary':
$meta['description'] = $value;
if(array_key_exists('notes',$data)) {
$meta['description'] .= ((trim($value) == false) ? '' : "\n\n") . $data['notes'];
}
break;
case 'subject_ids':
$meta['subjects'] = json_encode($value);
break;
case 'isbn10':
if(!(array_key_exists('isbn13', $data))) {
$meta['isbn'] = $value;
}
break;
case 'isbn13':
$meta['isbn'] = $value;
break;
case 'title':
if(!(array_key_exists('title_long',$data))) {
$meta['title'] = $value;
}
break;
case 'title_long':
$meta['title'] = $value;
break;
case 'author_data':
$meta['author'] = json_encode(array_column($value, 'name','id'));
break;
case 'language':
$meta['language'] = $value;
break;
case 'publisher_name':
$meta['publisher'] = $value;
if(array_key_exists('publisher_text',$data)) {
$meta['publisher'] .= ((trim($value) == false) ? '' : ', ') . $data['publisher_text'];
}
break;
}
}
return true;
}
}

View file

@ -40,6 +40,7 @@ class Meta
$meta['copyright'] = '';
$meta['description'] = '';
$meta['subjects'] = '';
$meta['rescan'] = null;
return $meta;
}
@ -65,20 +66,45 @@ class Meta
* @return OC_DB_StatementWrapper
*/
protected static function save($meta) {
$sql = "INSERT INTO *PREFIX*opds_metadata (`id`, `updated`, `date`, `author`, `title`, `language`, `publisher`, `isbn`, `copyright`, `description`, `subjects`) VALUES (?,?,?,?,?,?,?,?,?,?,?)";
$args = array(
$meta['id'],
$meta['updated'],
$meta['date'],
$meta['author'],
$meta['title'],
$meta['language'],
$meta['publisher'],
$meta['isbn'],
$meta['copyright'],
$meta['description'],
$meta['subjects']
);
$sql = "SELECT `id` FROM *PREFIX*opds_metadata WHERE `id`=?";
$args = array($meta['id']);
$query = \OCP\DB::prepare($sql);
$result = $query->execute($args);
$data = $result->fetchRow();
if (isset($data['id'])) {
$sql = "UPDATE *PREFIX*opds_metadata SET `updated`=?, `date`=?, `author`=?, `title`=?, `language`=?, `publisher`=?, `isbn`=?, `copyright`=?, `description`=?, `subjects`=?, `rescan`=? WHERE id=?";
$args = array(
$meta['updated'],
$meta['date'],
$meta['author'],
$meta['title'],
$meta['language'],
$meta['publisher'],
$meta['isbn'],
$meta['copyright'],
$meta['description'],
$meta['subjects'],
$meta['rescan'],
$meta['id']
);
} else {
$sql = "INSERT INTO *PREFIX*opds_metadata (`id`, `updated`, `date`, `author`, `title`, `language`, `publisher`, `isbn`, `copyright`, `description`, `subjects`, `rescan`) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)";
$args = array(
$meta['id'],
$meta['updated'],
$meta['date'],
$meta['author'],
$meta['title'],
$meta['language'],
$meta['publisher'],
$meta['isbn'],
$meta['copyright'],
$meta['description'],
$meta['subjects'],
$meta['rescan']
);
}
$query = \OCP\DB::prepare($sql);
return $query->execute($args);
@ -93,7 +119,10 @@ class Meta
* @return array of metadata
*/
public static function get($id) {
if (!($meta = self::load($id))) {
if (!($meta = self::load($id)) || (isset($meta['rescan']) && time() > $meta['rescan'])) {
if(isset($meta['rescan'])) {
$meta['rescan'] = null;
}
$meta = self::scan($id);
}
return $meta;
@ -101,8 +130,6 @@ class Meta
/**
* @brief scan files for metadata
* PLAN: use search_lucene to extract metadata? Does not seem to support PDF1.6?
* solution: first ask search_lucene, if no data then scan file?
*
* @param int $id fileid
* @return array $meta metadata
@ -110,16 +137,19 @@ class Meta
public static function scan($id) {
$meta = self::create($id);
$path = \OC\Files\Filesystem::getLocalFile(\OC\Files\Filesystem::getPath($id));
switch (strtolower(substr(strrchr($path, "."), 1))) {
case 'epub':
self::epub($path,$meta);
break;
case 'pdf':
self::pdf($path,$meta);
break;
}
/* try to call function named 'type' with signature type($path,$meta)
* eg, pdf(), epub(), etc
*/
$type = strtolower(substr(strrchr($path, "."), 1));
if(is_callable(array(__CLASS__, $type))) {
try {
self::$type($path,$meta);
} catch (Exception $e) {
Util::logWarn("no metadata scanner for format " . $type);
}
}
/* if title is not set, assume metadata was invalid or not present
* use filename as title
*/
@ -127,7 +157,7 @@ class Meta
$info = pathinfo($path);
$meta['title'] = basename($path,'.'.$info['extension']);
}
// self::save($meta);
self::save($meta);
return $meta;
}
@ -137,21 +167,23 @@ class Meta
*
* @param string $path path to epub
* @param arrayref $meta reference to array of metadata
* @return bool $success (true if metadata found)
*/
public static function epub($path,&$meta) {
$success = false;
$epub = new Epub($path);
$meta['author'] = json_encode($epub->Authors());
$meta['title'] = $epub->Title();
$meta['date'] = $epub->Date();
$meta['publisher'] = $epub->Publisher();
$meta['copyright'] = $epub->Copyright();
$meta['language'] = $epub->Language();
$meta['description'] = strip_tags($epub->Description());
$meta['isbn'] = $epub->ISBN();
$meta['subjects'] = $epub->Subjects();
return true;
/* first try ISBN */
if(!(($isbn = $epub->ISBN()) && (Isbn::get($isbn, $meta)))) {
/* use EPUB internal metadata instead */
$meta['author'] = json_encode($epub->Authors());
$meta['title'] = $epub->Title();
$meta['date'] = $epub->Date();
$meta['publisher'] = $epub->Publisher();
$meta['copyright'] = $epub->Copyright();
$meta['language'] = $epub->Language();
$meta['description'] = strip_tags($epub->Description());
$meta['isbn'] = $epub->ISBN();
$meta['subjects'] = json_encode($epub->Subjects());
}
}
/**
@ -159,10 +191,45 @@ class Meta
*
* @param string $path path to pdf
* @param arrayref $meta reference to array of metadata
* @return bool $success (true if metadata found)
*/
public static function pdf($path,&$meta) {
if(\OC_Util::runningOnWindows()) {
/* not supported when running on Windows due to use of exec() */
return;
}
return false;
/* first, try to get metadata through ISBN */
$command = ['pdftotext -l 10 "','" -'];
$output=array();
exec($command[0] . $path . $command[1], $output);
if (!(($output) && ($isbn = Isbn::scan($output)) && (Isbn::get($isbn,$meta)))) {
/* No ISBN, try PDF metadata */
$output=array();
$command = ["pdfinfo '","'|grep -we '^\(Title\|Author\|Subject\|Keywords\|CreationDate\|ModDate\)'"];
exec($command[0] . $path . $command[1], $output);
foreach($output as $data) {
list($key, $value) = explode(':',$data,2);
$value = trim($value);
}
if (!($value == '')) {
switch ($key) {
case 'Title':
$meta['title'] = $value;
break;
case 'Author':
$meta['author'] = $value;
break;
case 'Subject':
case 'Keywords':
$meta['subjects'] .= $value;
break;
case 'CreationDate':
case 'ModDate':
$meta['date'] = strtotime($value);
break;
}
}
}
}
}

View file

@ -23,10 +23,16 @@ function checkBox($format) {
<div class="section" id="opds-admin">
<h2><?php p($l->t('OPDS')); ?><span class="msg"></span></h2>
<div>
<label for="opds-feed-subtitle"><?php p($l->t('Feed subtitle:')) ?></label>
<input type="text" id="opds-feed-subtitle" title="<?php p($l->t("Enter subtitle for OPDS catalog.")); ?>" value="<?php p($_['feedSubtitle']) ?>" />
</div>
<table>
<tr>
<td><label for="opds-feed-subtitle"><?php p($l->t('Feed subtitle:')) ?></label></td>
<td><input type="text" id="opds-feed-subtitle" title="<?php p($l->t("Enter subtitle for OPDS catalog.")); ?>" value="<?php p($_['feedSubtitle']) ?>" /></td>
</tr>
<tr>
<td><label for="opds-isbndb-key"><?php p($l->t('ISBNdb key:')) ?></label></td>
<td><input type="text" id="opds-isbndb-key" title="<?php p($l->t("Enter ISBNdb key to use for metadata lookup. Leave blank to disable ISBNdb lookup.")); ?>" value="<?php p($_['isbndbKey']) ?>" /></td>
</tr>
</table>
<br>
<p><?php p($l->t('Enable preview for:')); ?></p>
<div class="indent">

View file

@ -3,15 +3,21 @@
<updated><?php p(date("Y-m-d\TH:i:sP",strtotime($_['file']['meta']['updated']))); ?></updated>
<id>id:<?php p($_['file']['id']); ?></id>
<dcterms:extent><?php p($_['file']['humansize']); ?></dcterms:extent>
<dc:language><?php p($_['file']['meta']['language']); ?></dc:language>
<?php foreach (json_decode($_['file']['meta']['author'],true) as $author): ?>
<author>
<name><?php p($author); ?></name>
</author>
<dc:identifier>urn:isbn:<?php p($_['file']['meta']['isbn']); ?></dc:identifier>
<dc:publisher><?php p($_['file']['meta']['publisher']); ?></dc:publisher>
<dc:issued><?php p($_['file']['meta']['date']); ?></dc:issued>
<?php endforeach; ?>
<?php if($_['file']['meta']['isbn']): ?>
<dc:identifier>urn:isbn:<?php p($_['file']['meta']['isbn']); ?></dc:identifier>
<?php endif; ?>
<?php if($_['file']['meta']['publisher']): ?>
<dc:publisher><?php p($_['file']['meta']['publisher']); ?></dc:publisher>
<?php endif; ?>
<?php if($_['file']['meta']['language']): ?>
<dc:language><?php p($_['file']['meta']['language']); ?></dc:language>
<?php endif; ?>
<dc:issued><?php p(date("Y-m-d\TH:i:sP",strtotime($_['file']['meta']['date']))); ?></dc:issued>
<link type="<?php p($_['file']['mimetype']); ?>"
rel="alternate"
href="?id=<?php p($_['file']['id']); ?>"/>