Created
September 12, 2023 17:33
-
-
Save ahnl/fb69760bccccb11b16275941a1817522 to your computer and use it in GitHub Desktop.
Transfer PDF annotations from broken.pdf to fixed.pdf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import PyPDF2 | |
def transfer_annotations(src_pdf, dest_pdf, output_pdf): | |
# Open the source and destination PDF files | |
with open(src_pdf, 'rb') as src, open(dest_pdf, 'rb') as dest: | |
src_reader = PyPDF2.PdfFileReader(src) | |
dest_reader = PyPDF2.PdfFileReader(dest) | |
pdf_writer = PyPDF2.PdfFileWriter() | |
# Check the number of pages match in both PDFs | |
if src_reader.numPages != dest_reader.numPages: | |
print("Warning: The number of pages in the source and destination PDFs do not match.") | |
return | |
# Iterate over all the pages | |
for page_num in range(src_reader.numPages): | |
# Get the page from the destination PDF (since you want to retain its content) | |
page = dest_reader.getPage(page_num) | |
# Get the page from the source PDF to extract annotations | |
src_page = src_reader.getPage(page_num) | |
# Extract annotations from the source page | |
annotations = src_page.get("/Annots") | |
if annotations: | |
# Resolve the IndirectObject to its actual value | |
if isinstance(annotations, PyPDF2.generic.IndirectObject): | |
annotations = annotations.getObject() | |
# If annotations are not present in the destination page, create an empty list | |
if page.get("/Annots") is None: | |
page.__setitem__(PyPDF2.generic.NameObject("/Annots"), PyPDF2.generic.ArrayObject()) | |
# Append the annotations from the source page to the destination page | |
page["/Annots"].extend(annotations) | |
# Add the merged page to the PDF writer object | |
pdf_writer.addPage(page) | |
# Create the output PDF with merged annotations | |
with open(output_pdf, 'wb') as out: | |
pdf_writer.write(out) | |
print(f"Annotations transferred. Output saved as {output_pdf}") | |
# Transfer annotations from broken.pdf to original.pdf | |
transfer_annotations("broken.pdf", "original.pdf", "output.pdf") |
Author
ahnl
commented
Sep 12, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment