-
-
Save andreicrnd/a09d1b848623bd8b85823cb05e66dbaa to your computer and use it in GitHub Desktop.
Render the text of a PDF with PDF.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<meta charset="utf-8"> | |
<title>Render the text of a PDF with PDF.js</title> | |
<style> | |
.page-container { | |
box-shadow: 0 1px 3px #444; | |
position: relative; | |
font-size: 1px; | |
line-height: 1; | |
} | |
span { | |
position: absolute; | |
cursor: text; | |
white-space: pre; | |
transform-origin: left bottom; | |
} | |
</style> | |
<body> | |
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script> | |
<script> | |
PDFJS.disableWorker = true | |
// PDFJS.workerSrc = 'bower_components/pdfjs-dist/build/pdf.worker.js' | |
// PDFJS.cMapUrl = 'bower_components/pdfjs-dist/cmaps' | |
// PDFJS.cMapPacked = true | |
PDFJS.disableRange = true | |
// PDFJS.disableStream = true | |
</script> | |
<script> | |
PDFJS.getDocument('https://peerj.com/articles/2548.pdf').then(function (pdf) { | |
var ctx = document.createElement('canvas').getContext('2d', { alpha: false }); | |
for (var i = 1; i <= pdf.numPages; i++) { | |
pdf.getPage(i).then(function (page) { | |
var viewport = page.getViewport(1.5); | |
var pageContainer = document.createElement('div'); | |
pageContainer.classList.add('page-container'); | |
pageContainer.style.width = viewport.width + 'px'; | |
pageContainer.style.height = viewport.height + 'px'; | |
// var pageContainer = document.createElementNS('http://www.w3.org/2000/svg', 'svg:svg'); | |
// pageContainer.setAttribute('width', viewport.width + 'px'); | |
// pageContainer.setAttribute('height', viewport.height + 'px'); | |
// pageContainer.setAttribute('font-size', 1); | |
document.body.appendChild(pageContainer); | |
// page.getOperatorList().then(function (opList) { | |
// var svgGfx = new PDFJS.SVGGraphics(page.commonObjs, page.objs); | |
// | |
// svgGfx.getSVG(opList, viewport).then(function (svg) { | |
// pageContainer.appendChild(svg); | |
// }); | |
// }); | |
page.getTextContent({ normalizeWhitespace: true }).then(function (textContent) { | |
textContent.items.forEach(function (textItem) { | |
var tx = PDFJS.Util.transform( | |
PDFJS.Util.transform(viewport.transform, textItem.transform), | |
[1, 0, 0, -1, 0, 0] | |
); | |
var style = textContent.styles[textItem.fontName]; | |
// adjust for font ascent/descent | |
var fontSize = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3])); | |
if (style.ascent) { | |
tx[5] -= fontSize * style.ascent; | |
} else if (style.descent) { | |
tx[5] -= fontSize * (1 + style.descent); | |
} else { | |
tx[5] -= fontSize / 2; | |
} | |
// adjust for rendered width | |
if (textItem.width > 0) { | |
ctx.font = tx[0] + 'px ' + style.fontFamily; | |
var width = ctx.measureText(textItem.str).width; | |
if (width > 0) { | |
//tx[0] *= (textItem.width * viewport.scale) / width; | |
tx[0] = (textItem.width * viewport.scale) / width; | |
} | |
} | |
// var item = document.createElementNS('http://www.w3.org/2000/svg', 'svg:text'); | |
// item.textContent = textItem.str; | |
// item.setAttribute('font-family', style.fontFamily); | |
// item.setAttribute('transform', 'matrix(' + tx.join(' ') + ')'); | |
var item = document.createElement('span'); | |
item.textContent = textItem.str; | |
item.style.fontFamily = style.fontFamily; | |
//item.style.transform = 'matrix(' + tx.join(',') + ')'; | |
item.style.fontSize = fontSize + 'px'; | |
item.style.transform = 'scaleX(' + tx[0] + ')'; | |
item.style.left = tx[4] + 'px'; | |
item.style.top = tx[5] + 'px'; | |
pageContainer.appendChild(item); | |
}); | |
}); | |
}); | |
} | |
}); | |
</script> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment