1
0
Fork 0
mirror of https://github.com/futurepress/epub.js.git synced 2025-10-05 15:32:55 +02:00

Add location generation in utils

This commit is contained in:
Fred Chasen 2022-02-08 21:43:58 -08:00
parent 8c9c8250a1
commit 9388decbfd
15 changed files with 438 additions and 552 deletions

View file

@ -24,16 +24,18 @@
<div id="viewer" class="spreads"></div>
<a id="prev" href="#prev" class="arrow"></a>
<a id="next" href="#next" class="arrow"></a>
<div id="controls">
<input id="current-percent" size="3" value="0" /> %
</div>
<script type="module">
import { Manifest, Rendition } from "../src/index.js"
import { generateLocations } from "../src/utils/locations.js"
const params = URLSearchParams && new URLSearchParams(document.location.search.substring(1));
const url = params && params.get("url") && decodeURIComponent(params.get("url"));
const currentSectionIndex = (params && params.get("loc")) ? params.get("loc") : undefined;
// Load the opf
// let book = await new Epub(url || "https://s3.amazonaws.com/epubjs/books/moby-dick/OPS/package.opf");
// Load the manifest
let book = await new Manifest(url || "/books/alice-manifest/manifest.jsonld");
book.opened.then(() => {
@ -158,6 +160,85 @@
return false;
};
let controls = document.getElementById("controls");
let currentPage = document.getElementById("current-percent");
let slider = document.createElement("input");
let slide = function(){
var cfi = rendition.locators.cfiFromPercentage(slider.value / 100);
rendition.display(cfi);
};
let mouseDown = false;
/*
// Generate and Save locations
let key = book.key()+'-locations';
let stored = localStorage.getItem(key);
if (stored) {
rendition.locators.unpackLocations(JSON.parse(stored));
} else {
// Or generate the locations on the fly
// Can pass an option number of chars to break sections by
// default is 150 chars
let locations = await generateLocations(book.sections, 1600);
rendition.locators.unpackLocations(locations);
// Save out the generated locations to JSON
localStorage.setItem(book.key()+'-locations', JSON.stringify(locations));
}
*/
controls.style.display = "block";
slider.setAttribute("type", "range");
slider.setAttribute("min", 0);
slider.setAttribute("max", 100);
slider.setAttribute("step", 1);
slider.setAttribute("value", 0);
slider.addEventListener("change", slide, false);
slider.addEventListener("mousedown", function(){
mouseDown = true;
}, false);
slider.addEventListener("mouseup", function(){
mouseDown = false;
}, false);
// Get the current CFI
let currentLocation = rendition.currentLocation();
// Get the Percentage (or location) from that CFI
let loc = rendition.locators.percentageFromCfi(currentLocation.start.cfi);
if (currentLocation.atEnd) {
loc = 100;
}
if (currentPage !== null) {
slider.value = `${loc}`;
currentPage.value = `${loc}`;
}
controls.appendChild(slider);
currentPage.addEventListener("change", function(){
let cfi = rendition.locators.cfiFromPercentage(currentPage.value/100);
rendition.display(cfi);
}, false);
// Listen for location changed event, get percentage from CFI
rendition.on('relocated', function(location){
let percent = rendition.locators.percentageFromCfi(location.start.cfi);
let percentage = Math.floor(percent * 100);
if (location.atEnd) {
percentage = 100;
}
if(!mouseDown) {
slider.value = percentage;
}
currentPage.value = percentage;
console.log(location);
});
</script>
</body>

View file

@ -1,6 +1,6 @@
import EventEmitter from "../utils/eventemitter.js";
import Publication from "../publication/publication.js";
import Locations from "./locations.js";
import Locations from "../publication/locations-old.js";
import Container from "./container.js";
import Packaging from "./packaging.js";
import Navigation from "./navigation.js";

View file

@ -1,518 +0,0 @@
import { qs, sprint, locationOf, defer } from "../utils/core.js";
import Queue from "../utils/queue.js";
import EpubCFI from "../utils/epubcfi.js";
import { EVENTS } from "../utils/constants.js";
import EventEmitter from "../utils/eventemitter.js";
/**
* Find Locations for a Book
* @param {Spine} spine
* @param {request} request
* @param {number} [pause=100]
*/
class Locations {
constructor(request, pause) {
this.spine = spine;
this.request = request;
this.pause = pause || 100;
this.q = new Queue(this);
this.epubcfi = new EpubCFI();
this._locations = [];
this._locationsWords = [];
this.total = 0;
this.break = 150;
this._current = 0;
this._wordCounter = 0;
this.currentLocation = '';
this._currentCfi ='';
this.processingTimeout = undefined;
}
/**
* Load all of sections in the book to generate locations
* @param {int} chars how many chars to split on
* @return {Promise<Array<string>>} locations
*/
generate(spine, chars) {
if (chars) {
this.break = chars;
}
this.q.pause();
this.spine.each(function(section) {
if (section.linear) {
this.q.enqueue(this.process.bind(this), section);
}
}.bind(this));
return this.q.run().then(function() {
this.total = this._locations.length - 1;
if (this._currentCfi) {
this.currentLocation = this._currentCfi;
}
return this._locations;
// console.log(this.percentage(this.book.rendition.location.start), this.percentage(this.book.rendition.location.end));
}.bind(this));
}
createRange () {
return {
startContainer: undefined,
startOffset: undefined,
endContainer: undefined,
endOffset: undefined
};
}
process(section) {
return section.load(this.request)
.then(function(contents) {
var completed = new defer();
var locations = this.parse(contents, section.canonical);
this._locations = this._locations.concat(locations);
section.unload();
this.processingTimeout = setTimeout(() => completed.resolve(locations), this.pause);
return completed.promise;
}.bind(this));
}
parse(contents, cfiBase, chars) {
var locations = [];
var range;
var doc = contents.ownerDocument;
var body = qs(doc, "body");
var counter = 0;
var prev;
var _break = chars || this.break;
var parser = function(node) {
var len = node.length;
var dist;
var pos = 0;
if (node.textContent.trim().length === 0) {
return false; // continue
}
// Start range
if (counter == 0) {
range = this.createRange();
range.startContainer = node;
range.startOffset = 0;
}
dist = _break - counter;
// Node is smaller than a break,
// skip over it
if(dist > len){
counter += len;
pos = len;
}
while (pos < len) {
dist = _break - counter;
if (counter === 0) {
// Start new range
pos += 1;
range = this.createRange();
range.startContainer = node;
range.startOffset = pos;
}
// pos += dist;
// Gone over
if(pos + dist >= len){
// Continue counter for next node
counter += len - pos;
// break
pos = len;
// At End
} else {
// Advance pos
pos += dist;
// End the previous range
range.endContainer = node;
range.endOffset = pos;
// cfi = section.cfiFromRange(range);
let cfi = new EpubCFI(range, cfiBase).toString();
locations.push(cfi);
counter = 0;
}
}
prev = node;
};
sprint(body, parser.bind(this));
// Close remaining
if (range && range.startContainer && prev) {
range.endContainer = prev;
range.endOffset = prev.length;
let cfi = new EpubCFI(range, cfiBase).toString();
locations.push(cfi);
counter = 0;
}
return locations;
}
/**
* Load all of sections in the book to generate locations
* @param {string} startCfi start position
* @param {int} wordCount how many words to split on
* @param {int} count result count
* @return {object} locations
*/
generateFromWords(startCfi, wordCount, count) {
var start = startCfi ? new EpubCFI(startCfi) : undefined;
this.q.pause();
this._locationsWords = [];
this._wordCounter = 0;
this.spine.each(function(section) {
if (section.linear) {
if (start) {
if (section.index >= start.spinePos) {
this.q.enqueue(this.processWords.bind(this), section, wordCount, start, count);
}
} else {
this.q.enqueue(this.processWords.bind(this), section, wordCount, start, count);
}
}
}.bind(this));
return this.q.run().then(function() {
if (this._currentCfi) {
this.currentLocation = this._currentCfi;
}
return this._locationsWords;
}.bind(this));
}
processWords(section, wordCount, startCfi, count) {
if (count && this._locationsWords.length >= count) {
return Promise.resolve();
}
return section.load(this.request)
.then(function(contents) {
var completed = new defer();
var locations = this.parseWords(contents, section, wordCount, startCfi);
var remainingCount = count - this._locationsWords.length;
this._locationsWords = this._locationsWords.concat(locations.length >= count ? locations.slice(0, remainingCount) : locations);
section.unload();
this.processingTimeout = setTimeout(() => completed.resolve(locations), this.pause);
return completed.promise;
}.bind(this));
}
//http://stackoverflow.com/questions/18679576/counting-words-in-string
countWords(s) {
s = s.replace(/(^\s*)|(\s*$)/gi, "");//exclude start and end white-space
s = s.replace(/[ ]{2,}/gi, " ");//2 or more space to 1
s = s.replace(/\n /, "\n"); // exclude newline with a start spacing
return s.split(" ").length;
}
parseWords(contents, section, wordCount, startCfi) {
var cfiBase = section.cfiBase;
var locations = [];
var doc = contents.ownerDocument;
var body = qs(doc, "body");
var prev;
var _break = wordCount;
var foundStartNode = startCfi ? startCfi.spinePos !== section.index : true;
var startNode;
if (startCfi && section.index === startCfi.spinePos) {
startNode = startCfi.findNode(startCfi.range ? startCfi.path.steps.concat(startCfi.start.steps) : startCfi.path.steps, contents.ownerDocument);
}
var parser = function(node) {
if (!foundStartNode) {
if (node === startNode) {
foundStartNode = true;
} else {
return false;
}
}
if (node.textContent.length < 10) {
if (node.textContent.trim().length === 0) {
return false;
}
}
var len = this.countWords(node.textContent);
var dist;
var pos = 0;
if (len === 0) {
return false; // continue
}
dist = _break - this._wordCounter;
// Node is smaller than a break,
// skip over it
if (dist > len) {
this._wordCounter += len;
pos = len;
}
while (pos < len) {
dist = _break - this._wordCounter;
// Gone over
if (pos + dist >= len) {
// Continue counter for next node
this._wordCounter += len - pos;
// break
pos = len;
// At End
} else {
// Advance pos
pos += dist;
let cfi = new EpubCFI(node, cfiBase);
locations.push({ cfi: cfi.toString(), wordCount: this._wordCounter });
this._wordCounter = 0;
}
}
prev = node;
};
sprint(body, parser.bind(this));
return locations;
}
/**
* Get a location from an EpubCFI
* @param {EpubCFI} cfi
* @return {number}
*/
locationFromCfi(cfi){
let loc;
if (EpubCFI.prototype.isCfiString(cfi)) {
cfi = new EpubCFI(cfi);
}
// Check if the location has not been set yet
if(this._locations.length === 0) {
return -1;
}
loc = locationOf(cfi, this._locations, this.epubcfi.compare);
if (loc > this.total) {
return this.total;
}
return loc;
}
/**
* Get a percentage position in locations from an EpubCFI
* @param {EpubCFI} cfi
* @return {number}
*/
percentageFromCfi(cfi) {
if(this._locations.length === 0) {
return null;
}
// Find closest cfi
var loc = this.locationFromCfi(cfi);
// Get percentage in total
return this.percentageFromLocation(loc);
}
/**
* Get a percentage position from a location index
* @param {number} location
* @return {number}
*/
percentageFromLocation(loc) {
if (!loc || !this.total) {
return 0;
}
return (loc / this.total);
}
/**
* Get an EpubCFI from location index
* @param {number} loc
* @return {EpubCFI} cfi
*/
cfiFromLocation(loc){
var cfi = -1;
// check that pg is an int
if(typeof loc != "number"){
loc = parseInt(loc);
}
if(loc >= 0 && loc < this._locations.length) {
cfi = this._locations[loc];
}
return cfi;
}
/**
* Get an EpubCFI from location percentage
* @param {number} percentage
* @return {EpubCFI} cfi
*/
cfiFromPercentage(percentage){
let loc;
if (percentage > 1) {
console.warn("Normalize cfiFromPercentage value to between 0 - 1");
}
// Make sure 1 goes to very end
if (percentage >= 1) {
let cfi = new EpubCFI(this._locations[this.total]);
cfi.collapse();
return cfi.toString();
}
loc = Math.ceil(this.total * percentage);
return this.cfiFromLocation(loc);
}
/**
* Load locations from JSON
* @param {json} locations
*/
load(locations){
if (typeof locations === "string") {
this._locations = JSON.parse(locations);
} else {
this._locations = locations;
}
this.total = this._locations.length - 1;
return this._locations;
}
/**
* Save locations to JSON
* @alias toJSON
* @return {json}
*/
save(){
return this.toJSON();
}
getCurrent(){
return this._current;
}
setCurrent(curr){
var loc;
if(typeof curr == "string"){
this._currentCfi = curr;
} else if (typeof curr == "number") {
this._current = curr;
} else {
return;
}
if(this._locations.length === 0) {
return;
}
if(typeof curr == "string"){
loc = this.locationFromCfi(curr);
this._current = loc;
} else {
loc = curr;
}
this.emit(EVENTS.LOCATIONS.CHANGED, {
percentage: this.percentageFromLocation(loc)
});
}
/**
* Get the current location
*/
get currentLocation() {
return this._current;
}
/**
* Set the current location
*/
set currentLocation(curr) {
this.setCurrent(curr);
}
/**
* Locations length
*/
length () {
return this._locations.length;
}
/**
* Export locations as an Array
* @return {array}
*/
toArray() {
return this._locations;
}
/**
* Export locations as JSON
* @return {json}
*/
toJSON() {
return JSON.stringify(this._locations);
}
destroy () {
this.spine = undefined;
this.request = undefined;
this.pause = undefined;
this.q.stop();
this.q = undefined;
this.epubcfi = undefined;
this._locations = undefined
this.total = undefined;
this.break = undefined;
this._current = undefined;
this.currentLocation = undefined;
this._currentCfi = undefined;
clearTimeout(this.processingTimeout);
}
}
EventEmitter(Locations.prototype);
export default Locations;

View file

@ -35,7 +35,7 @@ class Spine {
let manifestItem = this.manifest[item.idref];
item.id = item.idref;
item.canonical = this.epubcfi.generateChapterComponent(this.spineNodeIndex, item.index, item.idref);
item.cfiBase = this.epubcfi.generateChapterComponent(this.spineNodeIndex, item.index, item.idref);
item.cfiPos = index;
if(manifestItem) {

View file

@ -726,7 +726,7 @@ class DefaultViewManager {
pages.push(pg);
}
let mapping = this.mapping.page(view.contents, view.section.canonical, startPos, endPos);
let mapping = this.mapping.page(view.contents, view.section.cfiBase, startPos, endPos);
return {
index,

View file

@ -434,7 +434,7 @@ class IframeView {
this.window = this.iframe.contentWindow;
this.document = this.iframe.contentDocument;
this.contents = new Contents(this.document, this.document.body, this.section.canonical, this.section.index);
this.contents = new Contents(this.document, this.document.body, this.section.cfiBase, this.section.index);
this.rendering = false;

View file

@ -48,6 +48,12 @@ class Manifest extends Publication {
this.pagelist = await this.loadNavigation(pagelistUrl, "pagelist");
}
const locationsUrl = this.locationsUrl;
console.log(locationsUrl);
if (locationsUrl) {
this.locations = await await this.load(locationsUrl, "json");
}
}
async open(url) {
@ -224,6 +230,35 @@ class Manifest extends Publication {
return pagelistUrl && pagelistUrl.url;
}
/**
* Get or set the locations url
* @param {string} [url]
* @return {string} pagelistUrl
*/
get locationsUrl() {
let locationsUrl = this.resources.find((resource) => {
return resource.rel.includes("locations");
});
return locationsUrl && locationsUrl.url;
}
set locationsUrl(url) {
let locationsUrl = this.resources.find((resource) => {
return resource.rel.includes("locations");
});
if (locationsUrl) {
locationsUrl.url = url;
} else {
locationsUrl = {
rel: ["locations"],
url: url
};
this.resources.push(locationsUrl);
}
return locationsUrl && locationsUrl.url;
}
get readingOrder() {
return this.sections;
}

View file

@ -41,6 +41,14 @@ class Locator {
this.data.id = id;
}
get cfi() {
return this.data.cfi;
}
set cfi(cfi) {
this.data.cfi = cfi;
}
get type() {
return this.data.type;
}

View file

@ -159,7 +159,8 @@ class Publication {
const id = encodeURIComponent(filename(item.url).split(".")[0]);
item.id = id;
// Index 2 for Sections
item.canonical = item.canonical || `2/${index * 2}[${id}]`;
item.cfiBase = item.cfiBase || `2/${index * 2}[${id}]`
item.canonical = item.canonical || item.cfiBase;
const resource = new Resource(item);
this.data.sections.append(resource);
@ -211,7 +212,8 @@ class Publication {
const id = encodeURIComponent(filename(item.url).split(".")[0]);
item.id = id;
// Index 4 for Resources
item.canonical = item.canonical || `4/${index * 2}[${id}]`;
item.cfiBase = item.cfiBase || `4/${index * 2}[${id}]`
item.canonical = item.canonical || item.cfiBase;
const resource = new Resource(item);
this.data.resources.add(resource);
@ -313,10 +315,7 @@ class Publication {
}
for (const item of items) {
item.url = this.resolve(item.url);
item.canonical = item.canonical || item.url;
const loc = new Locator(item);
const loc = new Locator({ url: item, cfi: item});
this.data.locations.append(loc);
}
@ -397,7 +396,7 @@ class Publication {
* @return {string} key
*/
key(identifier) {
var ident = identifier || this.metadata.identifier;
let ident = identifier || this.metadata.get("id") || this.metadata.get("identifier");
return `epubjs-${EPUBJS_VERSION}-${ident}`;
}

View file

@ -22,7 +22,8 @@ class Resource {
properites: item.properites,
rel: item.rel || [],
name: item.name,
cfiPos: item.cfiPos
cfiPos: item.cfiPos,
cfiBase: item.cfiBase
}
}
@ -106,6 +107,14 @@ class Resource {
this.data.cfiPos = pos;
}
get cfiBase() {
return this.data.cfiBase;
}
set cfiBase(base) {
this.data.cfiBase = base;
}
/**
* Load the resource from its url
*/

View file

@ -24,12 +24,27 @@ class Locators {
}
unpackLocations(locations) {
this.locations = locations;
if (!locations) {
return;
}
this.locations = [];
for (const [key, location] of locations) {
if (EpubCFI.prototype.isCfiString(location)) {
this.locations.push(location);
} else if (location.cfi) {
this.locations.push(location.cfi);
}
}
this.totalLocations = this.locations.length - 1;
}
unpackPages(pages) {
if (!pages) {
return;
}
this.pages = pages;
this.pageLocations = [];
this.firstPage = parseInt(this.pages[0]);
this.lastPage = parseInt(this.pages[this.pages.length-1]);
this.totalPages = this.lastPage - this.firstPage;
@ -71,11 +86,11 @@ class Locators {
* @return {number}
*/
percentageFromCfi(cfi) {
if(this.locations.length === 0) {
if(!this.locations || this.locations.length === 0) {
return null;
}
// Find closest cfi
var loc = this.locationFromCfi(cfi);
let loc = this.locationFromCfi(cfi);
// Get percentage in total
return this.percentageFromLocation(loc);
}

View file

@ -22,7 +22,7 @@ class Mapping {
*/
section(view) {
var ranges = this.findRanges(view);
var map = this.rangeListToCfiList(view.section.canonical, ranges);
var map = this.rangeListToCfiList(view.section.cfiBase, ranges);
return map;
}

View file

@ -158,15 +158,12 @@ class Rendition {
// Hold queue until publication is opened
this.q.enqueue(this.publication.opened);
// this.starting = new defer();
// /**
// * @member {promise} started returns after the rendition has started
// * @memberof Rendition
// */
// this.started = this.starting.promise;
// Block the queue until rendering is started
this.q.enqueue(this.start);
/**
* @member {promise} started returns after the rendition has started
* @memberof Rendition
*/
this.started = this.q.enqueue(this.start);
// Start rendering
if (this.settings.element) {
@ -352,8 +349,8 @@ class Rendition {
this.displaying = displaying;
// Check if this is a publication percentage
if (this.publication.locations.length && isFloat(target)) {
target = this.publication.locations.cfiFromPercentage(parseFloat(target));
if (this.locators.locations.length && isFloat(target)) {
target = this.locators.cfiFromPercentage(parseFloat(target));
}
if (typeof target === "undefined") {
@ -848,7 +845,7 @@ class Rendition {
}
};
if (this.publication.locations.length) {
if (this.locators.locations.length) {
let locationStart = this.locators.locationFromCfi(start.mapping.start);
let locationEnd = this.locators.locationFromCfi(end.mapping.end);
@ -862,7 +859,7 @@ class Rendition {
}
}
if (this.publication.pagelist.length) {
if (this.locators.pages.length) {
let pageStart = this.locators.pageFromCfi(start.mapping.start);
let pageEnd = this.locators.pageFromCfi(end.mapping.end);

View file

@ -29,7 +29,7 @@ class Section {
this.next = item.next;
this.prev = item.prev;
this.canonical = item.canonical;
this.cfiBase = item.cfiBase;
if (hooks) {
this.hooks = hooks;
@ -218,7 +218,7 @@ class Section {
* @return {string} cfi an EpubCFI string
*/
cfiFromRange(_range) {
return new EpubCFI(_range, this.canonical).toString();
return new EpubCFI(_range, this.cfiBase).toString();
}
/**
@ -227,7 +227,7 @@ class Section {
* @return {string} cfi an EpubCFI string
*/
cfiFromElement(el) {
return new EpubCFI(el, this.canonical).toString();
return new EpubCFI(el, this.cfiBase).toString();
}
/**

260
src/utils/locations.js Normal file
View file

@ -0,0 +1,260 @@
import {qs, sprint} from "./core.js";
import Queue from "./queue.js";
import EpubCFI from "./epubcfi.js";
import request from "./request.js";
export async function generateLocations(sections, options={}) {
let q = new Queue();
let chars = options.chars || 150;
let requestMethod = options.request || request;
let pause = options.pause || 100;
let processing = [];
let locations = [];
q.pause();
for (const [key, section] of sections) {
processing.push(q.enqueue(process, section, chars, requestMethod, pause));
}
q.run();
let processed = await Promise.all(processing);
for (const group of processed) {
locations.push(...group);
}
return locations;
}
function createRange() {
return {
startContainer: undefined,
startOffset: undefined,
endContainer: undefined,
endOffset: undefined
};
}
async function process(section, chars, requestMethod, pause) {
let contents = await requestMethod(section.url);
let locations = parse(contents, section.cfiBase);
return locations;
}
function parse(doc, cfiBase, chars) {
let locations = [];
let range;
let body = qs(doc, "body");
let counter = 0;
let prev;
let _break = chars;
let parser = function(node) {
let len = node.length;
let dist;
let pos = 0;
if (node.textContent.trim().length === 0) {
return false; // continue
}
// Start range
if (counter == 0) {
range = createRange();
range.startContainer = node;
range.startOffset = 0;
}
dist = _break - counter;
// Node is smaller than a break,
// skip over it
if(dist > len){
counter += len;
pos = len;
}
while (pos < len) {
dist = _break - counter;
if (counter === 0) {
// Start new range
pos += 1;
range = createRange();
range.startContainer = node;
range.startOffset = pos;
}
// pos += dist;
// Gone over
if(pos + dist >= len){
// Continue counter for next node
counter += len - pos;
// break
pos = len;
// At End
} else {
// Advance pos
pos += dist;
// End the previous range
range.endContainer = node;
range.endOffset = pos;
// cfi = section.cfiFromRange(range);
let cfi = new EpubCFI(range, cfiBase).toString();
locations.push(cfi);
counter = 0;
}
}
prev = node;
};
sprint(body, parser);
// Close remaining
if (range && range.startContainer && prev) {
range.endContainer = prev;
range.endOffset = prev.length;
let cfi = new EpubCFI(range, cfiBase).toString();
locations.push(cfi);
counter = 0;
}
return locations;
}
/**
* Load all of sections in the book to generate locations
* @param {string} startCfi start position
* @param {int} wordCount how many words to split on
* @param {int} count result count
* @return {object} locations
*/
export async function generateLocationsFromWords(sections, options={}) {
let q = new Queue();
let wordCount = options.wordCount;
let count = options.count;
let startCfi = options.startCfi;
let requestMethod = options.request || request;
let pause = options.pause || 100;
let processing = [];
let locations = [];
let start = startCfi ? new EpubCFI(startCfi) : undefined;
let wordCounter = 0;
q.pause();
for (const [key, section] of sections) {
if (start) {
if (section.cfiPos >= start.spinePos) {
processing.push(q.enqueue(processWords, section, wordCount, start, wordCounter, requestMethod));
}
} else {
processing.push(q.enqueue(processWords, section, wordCount, start, wordCounter, requestMethod));
}
}
q.run();
let processed = await Promise.all(processing);
for (const group of processed) {
if (count && locations.length >= count) {
break;
}
let remainingCount = count - locations.length;
let toAdd = group.length >= count ? group.slice(0, remainingCount) : group;
locations.push(...toAdd);
}
return locations;
}
async function processWords(section, wordCount, startCfi, wordCounter, requestMethod) {
let contents = await requestMethod(section.url);
let locations = parseWords(contents, section, wordCount, startCfi, wordCounter);
return locations;
}
//http://stackoverflow.com/questions/18679576/counting-words-in-string
function countWords(s) {
s = s.replace(/(^\s*)|(\s*$)/gi, "");//exclude start and end white-space
s = s.replace(/[ ]{2,}/gi, " ");//2 or more space to 1
s = s.replace(/\n /, "\n"); // exclude newline with a start spacing
return s.split(" ").length;
}
function parseWords(doc, section, wordCount, startCfi, wordCounter) {
let cfiBase = section.cfiBase;
let locations = [];
let body = qs(doc, "body");
let prev;
let _break = wordCount;
let foundStartNode = startCfi ? startCfi.spinePos !== section.index : true;
let startNode;
if (startCfi && section.cfiPos === startCfi.spinePos) {
startNode = startCfi.findNode(startCfi.range ? startCfi.path.steps.concat(startCfi.start.steps) : startCfi.path.steps, doc);
}
let parser = function(node) {
if (!foundStartNode) {
if (node === startNode) {
foundStartNode = true;
} else {
return false;
}
}
if (node.textContent.length < 10) {
if (node.textContent.trim().length === 0) {
return false;
}
}
let len = countWords(node.textContent);
let dist;
let pos = 0;
if (len === 0) {
return false; // continue
}
dist = _break - wordCounter;
// Node is smaller than a break,
// skip over it
if (dist > len) {
wordCounter += len;
pos = len;
}
while (pos < len) {
dist = _break - wordCounter;
// Gone over
if (pos + dist >= len) {
// Continue counter for next node
wordCounter += len - pos;
// break
pos = len;
// At End
} else {
// Advance pos
pos += dist;
let cfi = new EpubCFI(node, cfiBase);
locations.push({ cfi: cfi.toString(), wordCount: wordCounter });
wordCounter = 0;
}
}
prev = node;
};
sprint(body, parser.bind(this));
return locations;
}