Skip to content

Instantly share code, notes, and snippets.

@ahnl
Created September 12, 2023 17:33
Show Gist options
  • Save ahnl/fb69760bccccb11b16275941a1817522 to your computer and use it in GitHub Desktop.
Save ahnl/fb69760bccccb11b16275941a1817522 to your computer and use it in GitHub Desktop.
Transfer PDF annotations from broken.pdf to fixed.pdf
import PyPDF2
def transfer_annotations(src_pdf, dest_pdf, output_pdf):
# Open the source and destination PDF files
with open(src_pdf, 'rb') as src, open(dest_pdf, 'rb') as dest:
src_reader = PyPDF2.PdfFileReader(src)
dest_reader = PyPDF2.PdfFileReader(dest)
pdf_writer = PyPDF2.PdfFileWriter()
# Check the number of pages match in both PDFs
if src_reader.numPages != dest_reader.numPages:
print("Warning: The number of pages in the source and destination PDFs do not match.")
return
# Iterate over all the pages
for page_num in range(src_reader.numPages):
# Get the page from the destination PDF (since you want to retain its content)
page = dest_reader.getPage(page_num)
# Get the page from the source PDF to extract annotations
src_page = src_reader.getPage(page_num)
# Extract annotations from the source page
annotations = src_page.get("/Annots")
if annotations:
# Resolve the IndirectObject to its actual value
if isinstance(annotations, PyPDF2.generic.IndirectObject):
annotations = annotations.getObject()
# If annotations are not present in the destination page, create an empty list
if page.get("/Annots") is None:
page.__setitem__(PyPDF2.generic.NameObject("/Annots"), PyPDF2.generic.ArrayObject())
# Append the annotations from the source page to the destination page
page["/Annots"].extend(annotations)
# Add the merged page to the PDF writer object
pdf_writer.addPage(page)
# Create the output PDF with merged annotations
with open(output_pdf, 'wb') as out:
pdf_writer.write(out)
print(f"Annotations transferred. Output saved as {output_pdf}")
# Transfer annotations from broken.pdf to original.pdf
transfer_annotations("broken.pdf", "original.pdf", "output.pdf")
@ahnl
Copy link
Author

ahnl commented Sep 12, 2023

pip install PyPDF2==1.26.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment