Skip to content

Instantly share code, notes, and snippets.

@tigrouind
Last active July 20, 2025 13:34
Show Gist options
  • Save tigrouind/123f2bc6512e0f560cc857f4f46c4b30 to your computer and use it in GitHub Desktop.
Save tigrouind/123f2bc6512e0f560cc857f4f46c4b30 to your computer and use it in GitHub Desktop.
Extract JPEG images from a PDF and create a zip. Name of images is based on PDF.
import fitz # PyMuPDF
import sys
import os
import zipfile
def extract_jpg_images_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
images = []
for page_number in range(len(doc)):
page = doc[page_number]
for img in page.get_images(full=True):
xref = img[0]
base_image = doc.extract_image(xref)
img_bytes = base_image["image"]
img_ext = base_image["ext"]
if img_ext.lower() == "jpg" or img_ext.lower() == "jpeg":
images.append((img_bytes, img_ext))
return images
def save_images_and_zip(images, pdf_path):
base = os.path.splitext(os.path.basename(pdf_path))[0]
temp_dir = f"{base}_extracted_jpgs"
os.makedirs(temp_dir, exist_ok=True)
img_paths = []
for i, (img_bytes, img_ext) in enumerate(images):
img_filename = f"{base}_{i+1}.{img_ext}"
img_path = os.path.join(temp_dir, img_filename)
with open(img_path, "wb") as img_file:
img_file.write(img_bytes)
img_paths.append(img_path)
zip_filename = f"{base}.zip"
with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
for img_path in img_paths:
zipf.write(img_path, arcname=os.path.basename(img_path))
# Cleanup extracted files
for img_path in img_paths:
os.remove(img_path)
os.rmdir(temp_dir)
print(f"ZIP created: {zip_filename}")
def main():
if len(sys.argv) != 2:
print("Usage: python extract_jpgs_from_pdf.py <file.pdf>")
sys.exit(1)
pdf_path = sys.argv[1]
images = extract_jpg_images_from_pdf(pdf_path)
if not images:
print("No JPG images found in PDF.")
sys.exit(0)
save_images_and_zip(images, pdf_path)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment