mirror of
https://github.com/Yetangitu/owncloud-apps.git
synced 2025-10-02 14:49:17 +02:00
files_reade: working towards working search, W.I.P.
This commit is contained in:
parent
2dd54dd028
commit
4b31f04330
14 changed files with 580 additions and 173 deletions
|
@ -1,11 +1,36 @@
|
|||
var FindStates = {
|
||||
FIND_FOUND: 0,
|
||||
FIND_NOTFOUND: 1,
|
||||
FIND_WRAPPED: 2,
|
||||
FIND_PENDING: 3
|
||||
};
|
||||
|
||||
var FIND_SCROLL_OFFSET_TOP = -50;
|
||||
var FIND_SCROLL_OFFSET_LEFT = -400;
|
||||
|
||||
var CHARACTERS_TO_NORMALIZE = {
|
||||
'\u2018': '\'', // Left single quotation mark
|
||||
'\u2019': '\'', // Right single quotation mark
|
||||
'\u201A': '\'', // Single low-9 quotation mark
|
||||
'\u201B': '\'', // Single high-reversed-9 quotation mark
|
||||
'\u201C': '"', // Left double quotation mark
|
||||
'\u201D': '"', // Right double quotation mark
|
||||
'\u201E': '"', // Double low-9 quotation mark
|
||||
'\u201F': '"', // Double high-reversed-9 quotation mark
|
||||
'\u00BC': '1/4', // Vulgar fraction one quarter
|
||||
'\u00BD': '1/2', // Vulgar fraction one half
|
||||
'\u00BE': '3/4', // Vulgar fraction three quarters
|
||||
};
|
||||
|
||||
PDFJS.reader.SearchController = function () {
|
||||
|
||||
var reader = this,
|
||||
book = this.book,
|
||||
query = "";
|
||||
|
||||
var $searchBox = $("#searchBox"),
|
||||
$clearBtn = $("#searchBox").next(),
|
||||
$clear_search = $("#clear_search"),
|
||||
$clear_search = $("#clear_search"),
|
||||
$searchResults = $("#searchResults"),
|
||||
$searchView = $("#searchView"),
|
||||
$body = $("#viewer iframe").contents().find('body'),
|
||||
|
@ -21,6 +46,456 @@ PDFJS.reader.SearchController = function () {
|
|||
$searchView.removeClass("open");
|
||||
};
|
||||
|
||||
this.onUpdateResultsCount = null;
|
||||
this.onUpdateState = null;
|
||||
|
||||
// Compile the regular expression for text normalization once.
|
||||
var replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
|
||||
this.normalizationRegex = new RegExp('[' + replace + ']', 'g');
|
||||
|
||||
var reset = function () {
|
||||
this.startedTextExtraction = false;
|
||||
this.extractTextPromises = [];
|
||||
this.pendingFindMatches = Object.create(null);
|
||||
this.active = false; // If active, find results will be highlighted.
|
||||
this.pageContents = []; // Stores the text for each page.
|
||||
this.pageMatches = [];
|
||||
this.pageMatchesLength = null;
|
||||
this.matchCount = 0;
|
||||
this.selected = { // Currently selected match.
|
||||
pageIdx: -1,
|
||||
matchIdx: -1
|
||||
};
|
||||
this.offset = { // Where the find algorithm currently is in the document.
|
||||
pageIdx: null,
|
||||
matchIdx: null
|
||||
};
|
||||
this.pagesToSearch = null;
|
||||
this.resumePageIdx = null;
|
||||
this.state = null;
|
||||
this.dirtyMatch = false;
|
||||
this.findTimeout = null;
|
||||
};
|
||||
|
||||
reset();
|
||||
|
||||
|
||||
var normalize = function (text) {
|
||||
return text.replace(this.normalizationRegex, function (ch) {
|
||||
return CHARACTERS_TO_NORMALIZE[ch];
|
||||
});
|
||||
};
|
||||
|
||||
// Helper for multiple search - fills matchesWithLength array
|
||||
// and takes into account cases when one search term
|
||||
// include another search term (for example, "tamed tame" or "this is").
|
||||
// Looking for intersecting terms in the 'matches' and
|
||||
// leave elements with a longer match-length.
|
||||
|
||||
var _prepareMatches = function (
|
||||
matchesWithLength, matches, matchesLength) {
|
||||
|
||||
function isSubTerm(matchesWithLength, currentIndex) {
|
||||
var currentElem, prevElem, nextElem;
|
||||
currentElem = matchesWithLength[currentIndex];
|
||||
nextElem = matchesWithLength[currentIndex + 1];
|
||||
// checking for cases like "TAMEd TAME"
|
||||
if (currentIndex < matchesWithLength.length - 1 &&
|
||||
currentElem.match === nextElem.match) {
|
||||
currentElem.skipped = true;
|
||||
return true;
|
||||
}
|
||||
// checking for cases like "thIS IS"
|
||||
for (var i = currentIndex - 1; i >= 0; i--) {
|
||||
prevElem = matchesWithLength[i];
|
||||
if (prevElem.skipped) {
|
||||
continue;
|
||||
}
|
||||
if (prevElem.match + prevElem.matchLength < currentElem.match) {
|
||||
break;
|
||||
}
|
||||
if (prevElem.match + prevElem.matchLength >=
|
||||
currentElem.match + currentElem.matchLength) {
|
||||
currentElem.skipped = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
var i, len;
|
||||
// Sorting array of objects { match: <match>, matchLength: <matchLength> }
|
||||
// in increasing index first and then the lengths.
|
||||
matchesWithLength.sort(function(a, b) {
|
||||
return a.match === b.match ?
|
||||
a.matchLength - b.matchLength : a.match - b.match;
|
||||
});
|
||||
for (i = 0, len = matchesWithLength.length; i < len; i++) {
|
||||
if (isSubTerm(matchesWithLength, i)) {
|
||||
continue;
|
||||
}
|
||||
matches.push(matchesWithLength[i].match);
|
||||
matchesLength.push(matchesWithLength[i].matchLength);
|
||||
}
|
||||
};
|
||||
|
||||
var calcFindPhraseMatch = function (
|
||||
query, pageIndex, pageContent) {
|
||||
var matches = [];
|
||||
var queryLen = query.length;
|
||||
var matchIdx = -queryLen;
|
||||
while (true) {
|
||||
matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
|
||||
if (matchIdx === -1) {
|
||||
break;
|
||||
}
|
||||
matches.push(matchIdx);
|
||||
}
|
||||
this.pageMatches[pageIndex] = matches;
|
||||
};
|
||||
|
||||
var calcFindWordMatch = function (
|
||||
query, pageIndex, pageContent) {
|
||||
var matchesWithLength = [];
|
||||
// Divide the query into pieces and search for text on each piece.
|
||||
var queryArray = query.match(/\S+/g);
|
||||
var subquery, subqueryLen, matchIdx;
|
||||
for (var i = 0, len = queryArray.length; i < len; i++) {
|
||||
subquery = queryArray[i];
|
||||
subqueryLen = subquery.length;
|
||||
matchIdx = -subqueryLen;
|
||||
while (true) {
|
||||
matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
|
||||
if (matchIdx === -1) {
|
||||
break;
|
||||
}
|
||||
// Other searches do not, so we store the length.
|
||||
matchesWithLength.push({
|
||||
match: matchIdx,
|
||||
matchLength: subqueryLen,
|
||||
skipped: false
|
||||
});
|
||||
}
|
||||
}
|
||||
// Prepare arrays for store the matches.
|
||||
if (!this.pageMatchesLength) {
|
||||
this.pageMatchesLength = [];
|
||||
}
|
||||
this.pageMatchesLength[pageIndex] = [];
|
||||
this.pageMatches[pageIndex] = [];
|
||||
// Sort matchesWithLength, clean up intersecting terms
|
||||
// and put the result into the two arrays.
|
||||
_prepareMatches(matchesWithLength, this.pageMatches[pageIndex],
|
||||
this.pageMatchesLength[pageIndex]);
|
||||
};
|
||||
|
||||
var calcFindMatch = function (pageIndex) {
|
||||
var pageContent = normalize(this.pageContents[pageIndex]);
|
||||
var query = normalize(this.state.query);
|
||||
var caseSensitive = this.state.caseSensitive;
|
||||
var phraseSearch = this.state.phraseSearch;
|
||||
var queryLen = query.length;
|
||||
|
||||
if (queryLen === 0) {
|
||||
// Do nothing: the matches should be wiped out already.
|
||||
return;
|
||||
}
|
||||
|
||||
if (!caseSensitive) {
|
||||
pageContent = pageContent.toLowerCase();
|
||||
query = query.toLowerCase();
|
||||
}
|
||||
|
||||
if (phraseSearch) {
|
||||
calcFindPhraseMatch(query, pageIndex, pageContent);
|
||||
} else {
|
||||
calcFindWordMatch(query, pageIndex, pageContent);
|
||||
}
|
||||
|
||||
updatePage(pageIndex);
|
||||
if (this.resumePageIdx === pageIndex) {
|
||||
this.resumePageIdx = null;
|
||||
nextPageMatch();
|
||||
}
|
||||
|
||||
// Update the matches count
|
||||
if (this.pageMatches[pageIndex].length > 0) {
|
||||
this.matchCount += this.pageMatches[pageIndex].length;
|
||||
updateUIResultsCount();
|
||||
}
|
||||
};
|
||||
|
||||
var extractText = function () {
|
||||
|
||||
if (this.startedTextExtraction) {
|
||||
return;
|
||||
}
|
||||
this.startedTextExtraction = true;
|
||||
|
||||
this.pageContents = [];
|
||||
var extractTextPromisesResolves = [];
|
||||
var numPages = reader.settings.numPages;
|
||||
for (var i = 0; i < numPages; i++) {
|
||||
this.extractTextPromises.push(new Promise(function (resolve) {
|
||||
extractTextPromisesResolves.push(resolve);
|
||||
}));
|
||||
}
|
||||
|
||||
var self = this;
|
||||
function extractPageText(pageIndex) {
|
||||
reader.getPageTextContent(pageIndex).then(
|
||||
function textContentResolved(textContent) {
|
||||
var textItems = textContent.items;
|
||||
var str = [];
|
||||
|
||||
for (var i = 0, len = textItems.length; i < len; i++) {
|
||||
str.push(textItems[i].str);
|
||||
}
|
||||
|
||||
// Store the pageContent as a string.
|
||||
self.pageContents.push(str.join(''));
|
||||
|
||||
extractTextPromisesResolves[pageIndex](pageIndex);
|
||||
if ((pageIndex + 1) < reader.settings.numPages) {
|
||||
console.log("extracting text from page " + parseInt(pageIndex + 1));
|
||||
extractPageText(pageIndex + 1);
|
||||
} else {
|
||||
console.log("finished extracting text");
|
||||
for (var i=0;i < reader.settings.numPages;i++) {
|
||||
console.log("PAGE: " + parseInt(i + 1));
|
||||
console.log(self.pageContents[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
extractPageText(0);
|
||||
};
|
||||
|
||||
var executeCommand = function (cmd, state) {
|
||||
if (this.state === null || cmd !== 'findagain') {
|
||||
this.dirtyMatch = true;
|
||||
}
|
||||
this.state = state;
|
||||
updateUIState(FindStates.FIND_PENDING);
|
||||
|
||||
console.log("execute command ", cmd, " with state ", state);
|
||||
|
||||
reader.firstPagePromise.then(function() {
|
||||
extractText();
|
||||
|
||||
clearTimeout(this.findTimeout);
|
||||
if (cmd === 'find') {
|
||||
// Only trigger the find action after 250ms of silence.
|
||||
this.findTimeout = setTimeout(nextMatch.bind(this), 250);
|
||||
} else {
|
||||
nextMatch();
|
||||
}
|
||||
}.bind(this));
|
||||
};
|
||||
|
||||
var updatePage = function (index) {
|
||||
|
||||
if (this.selected.pageIdx === index) {
|
||||
// If the page is selected, scroll the page into view, which triggers
|
||||
// rendering the page, which adds the textLayer. Once the textLayer is
|
||||
// build, it will scroll onto the selected match.
|
||||
reader.settings.currentPage = index + 1;
|
||||
}
|
||||
|
||||
//var page = this.pdfViewer.getPageView(index);
|
||||
//if (page.textLayer) {
|
||||
// page.textLayer.updateMatches();
|
||||
//}
|
||||
};
|
||||
|
||||
var nextMatch = function () {
|
||||
|
||||
var previous = this.state.findPrevious;
|
||||
var currentPageIndex = reader.settings.currentPage - 1;
|
||||
var numPages = reader.settings.numPages;
|
||||
|
||||
this.active = true;
|
||||
|
||||
if (this.dirtyMatch) {
|
||||
// Need to recalculate the matches, reset everything.
|
||||
this.dirtyMatch = false;
|
||||
this.selected.pageIdx = this.selected.matchIdx = -1;
|
||||
this.offset.pageIdx = currentPageIndex;
|
||||
this.offset.matchIdx = null;
|
||||
this.hadMatch = false;
|
||||
this.resumePageIdx = null;
|
||||
this.pageMatches = [];
|
||||
this.matchCount = 0;
|
||||
this.pageMatchesLength = null;
|
||||
var self = this;
|
||||
|
||||
for (var i = 0; i < numPages; i++) {
|
||||
// Wipe out any previous highlighted matches.
|
||||
updatePage(i);
|
||||
|
||||
// As soon as the text is extracted start finding the matches.
|
||||
if (!(i in this.pendingFindMatches)) {
|
||||
this.pendingFindMatches[i] = true;
|
||||
this.extractTextPromises[i].then(function(pageIdx) {
|
||||
delete self.pendingFindMatches[pageIdx];
|
||||
calcFindMatch(pageIdx);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there's no query there's no point in searching.
|
||||
if (this.state.query === '') {
|
||||
updateUIState(FindStates.FIND_FOUND);
|
||||
return;
|
||||
}
|
||||
|
||||
// If we're waiting on a page, we return since we can't do anything else.
|
||||
if (this.resumePageIdx) {
|
||||
return;
|
||||
}
|
||||
|
||||
var offset = this.offset;
|
||||
// Keep track of how many pages we should maximally iterate through.
|
||||
this.pagesToSearch = numPages;
|
||||
// If there's already a matchIdx that means we are iterating through a
|
||||
// page's matches.
|
||||
if (offset.matchIdx !== null) {
|
||||
var numPageMatches = this.pageMatches[offset.pageIdx].length;
|
||||
if ((!previous && offset.matchIdx + 1 < numPageMatches) ||
|
||||
(previous && offset.matchIdx > 0)) {
|
||||
// The simple case; we just have advance the matchIdx to select
|
||||
// the next match on the page.
|
||||
this.hadMatch = true;
|
||||
offset.matchIdx = (previous ? offset.matchIdx - 1 :
|
||||
offset.matchIdx + 1);
|
||||
updateMatch(true);
|
||||
return;
|
||||
}
|
||||
// We went beyond the current page's matches, so we advance to
|
||||
// the next page.
|
||||
advanceOffsetPage(previous);
|
||||
}
|
||||
// Start searching through the page.
|
||||
nextPageMatch();
|
||||
};
|
||||
|
||||
var matchesReady = function (matches) {
|
||||
var offset = this.offset;
|
||||
var numMatches = matches.length;
|
||||
var previous = this.state.findPrevious;
|
||||
|
||||
if (numMatches) {
|
||||
// There were matches for the page, so initialize the matchIdx.
|
||||
this.hadMatch = true;
|
||||
offset.matchIdx = (previous ? numMatches - 1 : 0);
|
||||
updateMatch(true);
|
||||
return true;
|
||||
}
|
||||
// No matches, so attempt to search the next page.
|
||||
advanceOffsetPage(previous);
|
||||
if (offset.wrapped) {
|
||||
offset.matchIdx = null;
|
||||
if (this.pagesToSearch < 0) {
|
||||
// No point in wrapping again, there were no matches.
|
||||
updateMatch(false);
|
||||
// while matches were not found, searching for a page
|
||||
// with matches should nevertheless halt.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Matches were not found (and searching is not done).
|
||||
return false;
|
||||
};
|
||||
|
||||
/**
|
||||
* The method is called back from the text layer when match presentation
|
||||
* is updated.
|
||||
* @param {number} pageIndex - page index.
|
||||
* @param {number} index - match index.
|
||||
* @param {Array} elements - text layer div elements array.
|
||||
* @param {number} beginIdx - start index of the div array for the match.
|
||||
*/
|
||||
var updateMatchPosition = function (
|
||||
pageIndex, index, elements, beginIdx) {
|
||||
if (this.selected.matchIdx === index &&
|
||||
this.selected.pageIdx === pageIndex) {
|
||||
//var spot = {
|
||||
// top: FIND_SCROLL_OFFSET_TOP,
|
||||
// left: FIND_SCROLL_OFFSET_LEFT
|
||||
//};
|
||||
//scrollIntoView(elements[beginIdx], spot,
|
||||
// /* skipOverflowHiddenElements = */ true);
|
||||
}
|
||||
console.log("would scroll into view here except for the fact that Reader is a non-scrolling reader...");
|
||||
};
|
||||
|
||||
var nextPageMatch = function () {
|
||||
if (this.resumePageIdx !== null) {
|
||||
console.error('There can only be one pending page.');
|
||||
}
|
||||
do {
|
||||
var pageIdx = this.offset.pageIdx;
|
||||
var matches = this.pageMatches[pageIdx];
|
||||
if (!matches) {
|
||||
// The matches don't exist yet for processing by "matchesReady",
|
||||
// so set a resume point for when they do exist.
|
||||
this.resumePageIdx = pageIdx;
|
||||
break;
|
||||
}
|
||||
} while (!matchesReady(matches));
|
||||
};
|
||||
|
||||
var advanceOffsetPage = function (previous) {
|
||||
var offset = this.offset;
|
||||
var numPages = this.extractTextPromises.length;
|
||||
offset.pageIdx = (previous ? offset.pageIdx - 1 : offset.pageIdx + 1);
|
||||
offset.matchIdx = null;
|
||||
|
||||
this.pagesToSearch--;
|
||||
|
||||
if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
|
||||
offset.pageIdx = (previous ? numPages - 1 : 0);
|
||||
offset.wrapped = true;
|
||||
}
|
||||
};
|
||||
|
||||
var updateMatch = function (found) {
|
||||
var state = FindStates.FIND_NOTFOUND;
|
||||
var wrapped = this.offset.wrapped;
|
||||
this.offset.wrapped = false;
|
||||
|
||||
if (found) {
|
||||
var previousPage = this.selected.pageIdx;
|
||||
this.selected.pageIdx = this.offset.pageIdx;
|
||||
this.selected.matchIdx = this.offset.matchIdx;
|
||||
state = (wrapped ? FindStates.FIND_WRAPPED : FindStates.FIND_FOUND);
|
||||
// Update the currently selected page to wipe out any selected matches.
|
||||
if (previousPage !== -1 && previousPage !== this.selected.pageIdx) {
|
||||
updatePage(previousPage);
|
||||
}
|
||||
}
|
||||
|
||||
updateUIState(state, this.state.findPrevious);
|
||||
if (this.selected.pageIdx !== -1) {
|
||||
updatePage(this.selected.pageIdx);
|
||||
}
|
||||
};
|
||||
|
||||
var updateUIResultsCount = function () {
|
||||
if (this.onUpdateResultsCount) {
|
||||
onUpdateResultsCount(this.matchCount);
|
||||
}
|
||||
};
|
||||
|
||||
var updateUIState = function (state, previous) {
|
||||
if (this.onUpdateState) {
|
||||
onUpdateState(state, previous, this.matchCount);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
var search = function(q) {
|
||||
if (q === undefined) {
|
||||
q = $searchBox.val();
|
||||
|
@ -38,7 +513,9 @@ PDFJS.reader.SearchController = function () {
|
|||
|
||||
reader.SearchController.query = q;
|
||||
|
||||
runQuery(q, $searchResults[0]);
|
||||
//runQuery(q, $searchResults[0]);
|
||||
|
||||
executeCommand('find', {query: q});
|
||||
|
||||
};
|
||||
|
||||
|
@ -84,72 +561,11 @@ PDFJS.reader.SearchController = function () {
|
|||
book.off("renderer:chapterDisplayed", highlightQuery);
|
||||
};
|
||||
|
||||
// perform search and build result list
|
||||
var runQuery = function(query, element) {
|
||||
|
||||
return new Promise(function(resolve, reject) {
|
||||
|
||||
var results = [];
|
||||
|
||||
for (var i = 0; i < book.spine.length; i++) {
|
||||
var spineItem = book.spine[i];
|
||||
results.push(new Promise(function(resolve, reject) {
|
||||
new Promise(function(resolve, reject) {
|
||||
resolve(new PDFJS.Chapter(spineItem, book.store, book.credentials));
|
||||
}).then(function(chapter) {
|
||||
return new Promise(function(resolve, reject) {
|
||||
chapter.load().then(function() {
|
||||
resolve(chapter);
|
||||
}).catch(reject);
|
||||
});
|
||||
}).then(function(chapter) {
|
||||
return Promise.resolve(chapter.find(query));
|
||||
}).then(function(result) {
|
||||
resolve(result);
|
||||
});
|
||||
}));
|
||||
}
|
||||
Promise.all(results).then(function(results) {
|
||||
return new Promise(function(resolve, reject) {
|
||||
resolve(results);
|
||||
var mergedResults = [].concat.apply([], results);
|
||||
element.innerHTML = "";
|
||||
for (var i = 0; i < mergedResults.length; i++) {
|
||||
try {
|
||||
var listitem = document.createElement("li");
|
||||
var link = document.createElement("a");
|
||||
listitem.classList.add("list_item");
|
||||
listitem.id = "search-"+i;
|
||||
link.href=mergedResults[i].cfi;
|
||||
link.textContent = mergedResults[i].excerpt;
|
||||
link.classList.add("toc_link");
|
||||
link.addEventListener("click", function(e) {
|
||||
e.preventDefault();
|
||||
book.gotoCfi(this.getAttribute("href"));
|
||||
$searchResults.find(".list_item")
|
||||
.removeClass("currentChapter");
|
||||
$(this).parent("li").addClass("currentChapter");
|
||||
$(this).data('query', query);
|
||||
book.on("renderer:chapterDisplayed", highlightQuery);
|
||||
});
|
||||
listitem.appendChild(link);
|
||||
element.appendChild(listitem);
|
||||
} catch (e) {
|
||||
console.warn(e);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
return {
|
||||
"show" : onShow,
|
||||
"hide" : onHide,
|
||||
"show": onShow,
|
||||
"hide": onHide,
|
||||
"search": search,
|
||||
"query" : query,
|
||||
"clear" : clear,
|
||||
"unhighlight" : unhighlight
|
||||
"executeCommand": executeCommand
|
||||
};
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue