Skip to content

Instantly share code, notes, and snippets.

@urjeetpatel
Created February 26, 2020 17:40
Show Gist options
  • Save urjeetpatel/9d5f4cc75192b64e60994a1a3d51cb33 to your computer and use it in GitHub Desktop.
Save urjeetpatel/9d5f4cc75192b64e60994a1a3d51cb33 to your computer and use it in GitHub Desktop.
Use Python to remove pages from a pdf
#! /usr/bin/python3
import click
import PyPDF2
def parseIntSet(inputString):
selection = set()
invalid = set()
tokens = [x.strip() for x in inputString.split(",")]
for token in tokens:
try:
selection.add(int(token))
except ValueError:
#try to parse string
try:
subtokens = [int(x) for x in token.split("-")]
subtokens.sort()
selection.update(range(subtokens[0], subtokens[-1]+1))
except:
invalid.add(token)
return selection
@click.command()
@click.argument("src", nargs=1)
@click.argument("dst", nargs=1)
@click.argument("skip", nargs=1)
def merge(skip, src, dst):
pdf_writer = PyPDF2.PdfFileWriter()
print(f"Processing {src}")
pdf_file_object = open(src, "rb")
pdf_reader = PyPDF2.PdfFileReader(pdf_file_object)
skipped = parseIntSet(skip)
for pagenum in range(pdf_reader.numPages):
if pagenum+1 in skipped:
continue
print(f" Adding page:{pagenum+1} of {pdf_reader.numPages}")
page = pdf_reader.getPage(pagenum)
pdf_writer.addPage(page)
pdf_out_file = open(dst, "wb")
pdf_writer.write(pdf_out_file)
pdf_out_file.close()
print("Done")
if __name__ == "__main__":
merge()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment