This question is specific to pdf.js, a javascript based pdf renderer. I’m building a custom version where I need to extract the text that I select inside the pdf.
There are other posts where you can fetch the text from one page or the whole pdf document such as the one here , but I’m looking to grab a specific text that the user selects and perhaps alert it or print it in the console.
Advertisement
Answer
What you are looking for is window.getSelection() method. This method returns a specific Selection object with the range of the selected text on the web page.
Here is how you can use getSelection()
together with pdf.js:
JavaScript
x
29
29
1
function getHightlightCoords() {
2
var pageIndex = PDFViewerApplication.pdfViewer.currentPageNumber - 1;
3
var page = PDFViewerApplication.pdfViewer.getPageView(pageIndex);
4
var pageRect = page.canvas.getClientRects()[0];
5
var selectionRects = window.getSelection().getRangeAt(0).getClientRects();
6
var viewport = page.viewport;
7
var selected = selectionRects.map(function (r) {
8
return viewport.convertToPdfPoint(r.left - pageRect.x, r.top - pageRect.y).concat(
9
viewport.convertToPdfPoint(r.right - pageRect.x, r.bottom - pageRect.y));
10
});
11
return {page: pageIndex, coords: selected};
12
}
13
14
15
function showHighlight(selected) {
16
var pageIndex = selected.page;
17
var page = PDFViewerApplication.pdfViewer.getPageView(pageIndex);
18
var pageElement = page.canvas.parentElement;
19
var viewport = page.viewport;
20
selected.coords.forEach(function (rect) {
21
var bounds = viewport.convertToViewportRectangle(rect);
22
var el = document.createElement('div');
23
el.setAttribute('style', 'position: absolute; background-color: pink;' +
24
'left:' + Math.min(bounds[0], bounds[2]) + 'px; top:' + Math.min(bounds[1], bounds[3]) + 'px;' +
25
'width:' + Math.abs(bounds[0] - bounds[2]) + 'px; height:' + Math.abs(bounds[1] - bounds[3]) + 'px;');
26
pageElement.appendChild(el);
27
});
28
}
29