Skip to content
Advertisement

How can I get selected text in pdf in Javascript?

I’m writing a Chrome Extention to manipulate pdf file so I want to get selected text in the pdf. How can I do that.

Some thing like that:

enter image description here

Advertisement

Answer

You can use the internal undocumented commands of the built-in PDF viewer.

Here’s an example of a content script:

function getPdfSelectedText() {
  return new Promise(resolve => {
    window.addEventListener('message', function onMessage(e) {
      if (e.origin === 'chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjai' &&
          e.data && e.data.type === 'getSelectedTextReply') {
        window.removeEventListener('message', onMessage);
        resolve(e.data.selectedText);
      }
    });
    // runs code in page context to access postMessage of the embedded plugin
    const script = document.createElement('script');
    if (chrome.runtime.getManifest().manifest_version > 2) {
      script.src = chrome.runtime.getURL('query-pdf.js');
    } else {
      script.textContent = `(${() => {
        document.querySelector('embed').postMessage({type: 'getSelectedText'}, '*');
      }})()`;
    }
    document.documentElement.appendChild(script);
    script.remove();
  });
}

chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
  if (msg === 'getPdfSelection') {
    getPdfSelectedText().then(sendResponse);
    return true;
  }
});

ManifestV3 extensions also need this:

  • manifest.json should expose query-pdf.js

      "web_accessible_resources": [{
        "resources": ["query-pdf.js"],
        "matches": ["<all_urls>"],
        "use_dynamic_url": true
      }]
    
  • query-pdf.js

    document.querySelector('embed').postMessage({type: 'getSelectedText'}, '*')
    
Advertisement