mirror of
https://github.com/Yetangitu/owncloud-apps.git
synced 2025-10-02 14:49:17 +02:00
Improved ISBN regexes, again
This commit is contained in:
parent
84b7c25638
commit
e85212bb66
3 changed files with 32 additions and 33 deletions
BIN
dist/files_opds-0.5.2.tar.gz
vendored
Normal file
BIN
dist/files_opds-0.5.2.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -4,7 +4,7 @@
|
|||
<name>OPDS catalog</name>
|
||||
<description>Personal OPDS catalog</description>
|
||||
<licence>AGPL</licence>
|
||||
<version>0.5.1</version>
|
||||
<version>0.5.2</version>
|
||||
<author>Frank de Lange</author>
|
||||
<requiremin>7.0</requiremin>
|
||||
<shipped>true</shipped>
|
||||
|
|
|
@ -35,38 +35,8 @@ class Isbn
|
|||
public static function scan($text) {
|
||||
$match = array();
|
||||
foreach($text as $line) {
|
||||
/* generic ISBN 10/13 pattern */
|
||||
if(preg_match_all('/ISBN(?:[ -]?[1[03]]?)?:?\s*((97[89])?[X0-9-–]{10,14})/i',$line,$match)) {
|
||||
foreach($match[1] as $hit) {
|
||||
$hit = preg_replace('/[^0-9X]/i','',$hit);
|
||||
if(self::validate($hit)) {
|
||||
return $hit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* single ISBN-13 targeted pattern (canonical format) */
|
||||
if(preg_match_all('/(97[89][ –-]?\d[ –-]?\d{4}[ –-]?\d{4}[ –-]?\d)/',$line,$match)) {
|
||||
foreach($match[1] as $hit) {
|
||||
$hit = preg_replace('/[^0-9]/','',$hit);
|
||||
if(self::validate($hit)) {
|
||||
return $hit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* single ISBN-13 targeted pattern (free format) */
|
||||
if(preg_match_all('/(9[\d –-]{11,15}\d)/',$line,$match)) {
|
||||
foreach($match[1] as $hit) {
|
||||
$hit = preg_replace('/[^0-9]/','',$hit);
|
||||
if(self::validate($hit)) {
|
||||
return $hit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* single ISBN-10 targeted pattern */
|
||||
if(preg_match_all('/(\d[\d –-]{8,11}[\dX])/i',$line,$match)) {
|
||||
/* generic ISBN 10/13 pattern. Checks for unicode dashes ('‒–—―‑‐﹣--') as well as regular hyphens. */
|
||||
if(preg_match_all('/ISBN(?:[‒–—―‑‐﹣--]?(?:1[03])?)?:?\s*(?=[\d‒–—―‑‐﹣--]{10,17})(((?:97[89])[0-9‒–—―‑‐﹣--]{9,14})|([\d‒–—―‑‐﹣--]{9,12}[\dXx]))/u', $line, $match)) {
|
||||
foreach($match[1] as $hit) {
|
||||
$hit = preg_replace('/[^0-9X]/i','',$hit);
|
||||
if(self::validate($hit)) {
|
||||
|
@ -76,6 +46,35 @@ class Isbn
|
|||
}
|
||||
}
|
||||
|
||||
/* If nothing found, try prefix-less versions. Even though ISBN numbers should be
|
||||
* presented with a 'ISBN' prefix, some publications omit this. These patterns
|
||||
* are liable to generate false positives, so they should only be run after the
|
||||
* prefixed version has exhausted the search without returning results.
|
||||
*/
|
||||
|
||||
foreach($text as $line) {
|
||||
/* prefix-less ISBN-13 targeted pattern */
|
||||
if(preg_match_all('/(97[89][\d‒–—―‑‐﹣--]\d{9,13}\d)/u',$line,$match)) {
|
||||
foreach($match[1] as $hit) {
|
||||
$hit = preg_replace('/[^0-9]/','',$hit);
|
||||
if(self::validate($hit)) {
|
||||
return $hit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* single ISBN-10 targeted pattern */
|
||||
if(preg_match_all('/(\d[\d‒–—―‑‐﹣--]{8,11}[\dXx])/u',$line,$match)) {
|
||||
foreach($match[1] as $hit) {
|
||||
$hit = preg_replace('/[^0-9X]/i','',$hit);
|
||||
if(self::validate($hit)) {
|
||||
return $hit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* No ISBN found */
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue