-
-
Save Lokawn/95b868238ae8fb24f2b83123d05467c0 to your computer and use it in GitHub Desktop.
Merge all attributes of multiple PDF files using MuPDF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Merge all attributes of multiple PDF files (pdf-full-merge.js) | |
// Extended from MuPDF's docs/examples/pdf-merge.js | |
// Ever had problems with tools that don't copy certain attributes of a PDF? | |
// This script uses MuPDF to merge/join/concatenate as much as possible, including: | |
// - bookmarks / outlines / table of contents | |
// - link attributes of said outlines, such as viewrect and zoom | |
// - whether outlines appear open or closed by default | |
// - annotations | |
// - 3d objects / PDF3D | |
// - author / title metadata | |
// - first page / zoom settings metadata | |
// Notable exceptions/omissions: | |
// - javascript scripts and actions | |
// This script can also serve as an example on how to change any of these | |
// settings manually, or copy them differently. | |
function copyPage(dstDoc, srcDoc, pageNumber, dstFromSrc) { | |
var srcPage, dstPage | |
srcPage = srcDoc.findPage(pageNumber) | |
dstPage = dstDoc.newDictionary() | |
dstPage.Type = dstDoc.newName("Page") | |
if (srcPage.MediaBox) dstPage.MediaBox = dstFromSrc.graftObject(srcPage.MediaBox) | |
if (srcPage.Rotate) dstPage.Rotate = dstFromSrc.graftObject(srcPage.Rotate) | |
if (srcPage.Resources) dstPage.Resources = dstFromSrc.graftObject(srcPage.Resources) | |
if (srcPage.Contents) dstPage.Contents = dstFromSrc.graftObject(srcPage.Contents) | |
if (srcPage.Group) dstPage.Group = dstFromSrc.graftObject(srcPage.Group) | |
if (srcPage.Annots) dstPage.Annots = dstFromSrc.graftObject(srcPage.Annots) | |
dstDoc.insertPage(-1, dstDoc.addObject(dstPage)) | |
} | |
function copyAllPages(dstDoc, srcDoc) { | |
var dstFromSrc = dstDoc.newGraftMap() | |
var k, n = srcDoc.countPages() | |
for (k = 0; k < n; ++k) | |
copyPage(dstDoc, srcDoc, k, dstFromSrc) | |
} | |
function copyOutlineRec(cursor, srcDoc, startPage, list) { | |
list.forEach(function (node) { | |
// Rewrite link destination URI, to increment the "page" parameter | |
var uri = node.uri | |
if (uri.slice(0, 6) == "#page=") { | |
var amp = uri.search("&") | |
if (amp == -1) amp = uri.length | |
var page = Number(uri.slice(6, amp)) | |
var rest = uri.slice(amp) | |
uri = "#page=" + (startPage + page) + rest | |
} | |
// Insert empty item, to insert its children first | |
cursor.insert({}) | |
cursor.prev() | |
if (node.down) { | |
cursor.down() | |
copyOutlineRec(cursor, srcDoc, startPage, node.down) | |
cursor.up() | |
} | |
// Add information to item afterwards, so we can set its open status | |
cursor.update({ title: node.title, uri: uri, open: node.open }) | |
cursor.next() | |
}) | |
} | |
function copyOutline(dstDoc, srcDoc, startPage) { | |
var cursor = dstDoc.outlineIterator() | |
while (cursor.item()) | |
cursor.next() | |
copyOutlineRec(cursor, srcDoc, startPage, srcDoc.loadOutline()) | |
} | |
function copyMeta(dstDoc, srcDoc) { | |
var dstRoot = dstDoc.getTrailer().Root | |
var srcRoot = srcDoc.getTrailer().Root | |
var dstInfo = dstDoc.getTrailer().Info | |
var srcInfo = srcDoc.getTrailer().Info | |
// Use the metadata items found in the most recent pdf file | |
if (srcRoot.PageLayout) dstRoot.PageLayout = srcRoot.PageLayout | |
if (srcRoot.PageMode) dstRoot.PageMode = srcRoot.PageMode | |
if (srcInfo.Title) dstInfo.Title = srcInfo.Title | |
if (srcInfo.Author) dstInfo.Author = srcInfo.Author | |
if (srcInfo.Subject) dstInfo.Subject = srcInfo.Subject | |
if (srcInfo.Keywords) dstInfo.Keywords = srcInfo.Keywords | |
} | |
function pdfmerge() { | |
var srcDoc, dstDoc, i | |
// Initialize PDF and set version | |
dstDoc = new PDFDocument() | |
dstDoc.getTrailer().Root.Version = "1.4" | |
dstDoc.getTrailer().Info = dstDoc.addObject({}) | |
var pages = 0 | |
for (i = 1; i < scriptArgs.length; ++i) { | |
srcDoc = new PDFDocument(scriptArgs[i]) | |
copyAllPages(dstDoc, srcDoc) | |
copyOutline(dstDoc, srcDoc, pages) | |
copyMeta(dstDoc, srcDoc) | |
pages += srcDoc.countPages() | |
} | |
// Set static metadata | |
var dstInfo = dstDoc.getTrailer().Info | |
dstInfo.Creator = dstDoc.newString("pdf-full-merge.js") | |
dstInfo.Producer = dstDoc.newString("MuPDF") | |
dstInfo.CreationDate = dstDoc.newString( | |
"D:" + new Date().toISOString().replace(/[-:T]/g,"").split(".")[0]) | |
dstDoc.save(scriptArgs[0], "compress,garbage") | |
} | |
if (scriptArgs.length < 2) | |
print("usage: mutool run pdf-full-merge.js output.pdf input1.pdf input2.pdf ...") | |
else | |
pdfmerge() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment