mara004 · April 24, 2025 20:57 · mara004 · Nov 30, 2023 · mara004 · Apr 3, 2025
diff --git a/pypdfjs.py b/pypdfjs.py
 # Four lines intentionally left blank




 # SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
 # SPDX-License-Identifier: Apache-2.0 OR MPL-2.0

 # See also https://github.com/extremeheat/JSPyBridge/blob/master/examples/python/pdfjs.py

 # Py-Depends: pillow, javascript >= 1.1.0 (jspybridge)
 # Js-Depends: pdfjs-dist, canvas
 # Use `python -m pip install` and `python -m javascript --install`
 # NOTE This currently assumes you have a custom pdf.js build in the same directory as this file, because require("pdfjs-dist") appears broken on the author's nodejs 20. See upstream build instructions. Commit 8b50836d is confirmed to work. Patch the require() calls if you want otherwise.

 import time
 import argparse
 from pathlib import Path
 import PIL.Image
 import javascript

 THIS_DIR = str(Path(__file__).resolve().parent)

 # NOTE canvas must be the build pdfjs is linked against, otherwise it'll fail with type error
 pdfjs = javascript.require( str(THIS_DIR / Path("pdf.js/build/generic/build/pdf.js")) )
 libcanvas = javascript.require( str(THIS_DIR / Path("pdf.js/node_modules/canvas")) )


 def render_pdf(input, outdir, scale):
    
    pdf = pdfjs.getDocument(input).promise
    n_pages = pdf.numPages
    n_digits = len(str(n_pages))
    
    for i in range(1, n_pages+1):
        
        page = pdf.getPage(i)
        viewport = page.getViewport({"scale": scale})
        w, h = int(viewport.width), int(viewport.height)
        
        canvas = libcanvas.createCanvas(w, h)
        context = canvas.getContext("2d")
        page.render({"canvasContext": context, "viewport": viewport}).promise
        
        # Note that blobValueOf() is much faster than valueOf()["data"] for large byte buffers.
        js_buffer = canvas.toBuffer("raw")
        starttm = time.time()
        py_buffer = js_buffer.blobValueOf()
        print(f"Data transfer took {time.time() - starttm}s"); del starttm
        
        pil_image = PIL.Image.frombuffer("RGBX", (w, h), py_buffer, "raw", "BGRX", 0, 1)
        pil_image.save(outdir / f"out_{i:0{n_digits}d}.jpg")
    
    pdf.destroy()


 def main():
    
    parser = argparse.ArgumentParser(
        description="Render a PDF file with Mozilla pdf.js via JsPyBridge.\n" +
        "Known issues: - URL support is buggy; - certain PDFs may hit memory limits.",
    )
    path_type = lambda p: Path(p).expanduser().resolve()
    input_type = lambda p: p if p.startswith("http") else str(path_type(p))
    parser.add_argument(
        "input", type=input_type,
        help="Input file path or URL.",
    )
    parser.add_argument("--outdir", "-o", type=path_type, required=True)
    parser.add_argument("--scale", type=float, default=4)
    
    args = parser.parse_args()
    if not args.outdir.exists():
        args.outdir.mkdir(parents=True, exist_ok=True)
    
    render_pdf(args.input, args.outdir, scale=args.scale)


 main()
diff --git a/pypdfjs_shmem.py b/pypdfjs_shmem.py
 # SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
 # SPDX-License-Identifier: Apache-2.0 OR MPL-2.0

 # This is an experimental pdf.js interface using shared memory.
 # Unfortunately, shm-typed-array does not support Windows (except through Cygwin/WSL), so this is not exactly portable.
 # For another version by the same author that uses pipe-based data transfer via JSPyBridge's .blobValueOf(), see above.

 # Py-Depends: pillow, javascript (JSPyBridge), posix_ipc
 # Js-Depends: pdfjs-dist, canvas, shm-typed-array
 # You can use `python -m pip install`, and `python -m javascript --install`
 # NOTE This currently assumes you have a custom pdf.js build in the same directory as this file, because require("pdfjs-dist") appears broken on the author's nodejs 20. See upstream build instructions. Commit 8b50836d is confirmed to work. Patch the require() calls if you want otherwise.

 import time

 starttm = time.time()
 import mmap
 import argparse
 from pathlib import Path

 # third-party
 import PIL.Image
 import javascript
 import posix_ipc

 THIS_DIR = str(Path(__file__).resolve().parent)

 # NOTE canvas must be the build pdfjs is linked against, otherwise it'll fail with type error
 pdfjs = javascript.require( str(THIS_DIR / Path("pdf.js/build/generic/build/pdf.js")) )
 libcanvas = javascript.require( str(THIS_DIR / Path("pdf.js/node_modules/canvas")) )
 libshm = javascript.require("shm-typed-array")

 print(f"Imports took {time.time() - starttm}s"); del starttm


 def render_pdf(input, outdir, scale):
    
    pdf = pdfjs.getDocument(input).promise
    n_pages = pdf.numPages
    n_digits = len(str(n_pages))
    
    starttm = time.time()
    sizes = []
    for i in range(n_pages):
        page = pdf.getPage(i+1)
        viewport = page.getViewport({"scale": scale})
        w, h = int(viewport.width), int(viewport.height)
        sizes.append( (w, h) )
    
    max_alloc = max(w*h for w, h in sizes) * 4
    print(f"Shared memory size in bytes: {max_alloc} (took {time.time() - starttm}s to determine)"); del starttm
    
    memkey = "/pypdfjs_render_shm"
    js_shm = libshm.create(max_alloc, "Buffer", memkey)
    assert js_shm is not None, "Shared memory of this name already exists, go to /dev/shm and remove it."
    py_shm_handle = posix_ipc.SharedMemory(memkey)
    
    try:
        py_shm = mmap.mmap(py_shm_handle.fd, py_shm_handle.size)
        for i in range(n_pages):
            page = pdf.getPage(i+1)
            viewport = page.getViewport({"scale": scale})
            w, h = sizes[i]
            
            canvas = libcanvas.createCanvas(w, h)
            context = canvas.getContext("2d")
            page.render({"canvasContext": context, "viewport": viewport}).promise
            
            # the author is not aware of a way to create a canvas backed by an external buffer, so this copies
            js_buffer = canvas.toBuffer("raw")
            starttm = time.time()
            js_buffer.copy(js_shm)
            py_shm.seek(0)
            print(f"Data transfer took {time.time() - starttm}s")
            
            pil_image = PIL.Image.frombuffer("RGBX", (w, h), py_shm, "raw", "BGRX", 0, 1)
            pil_image.save(outdir / f"out_{i:0{n_digits}d}.jpg")
    finally:
        # Need to use native (non-js) functions to reliably destroy shared memory. Bridge seems to break in case of KeyboardInterrupt.
        py_shm_handle.close_fd()
        py_shm_handle.unlink()
        assert not Path("/dev/shm" + memkey).exists()
    
    pdf.destroy()


 def main():
    
    parser = argparse.ArgumentParser(
        description="Render a PDF file with Mozilla pdf.js via JsPyBridge.\n" +
        "Known issues: - URL support is buggy; - certain PDFs may hit memory limits.",
    )
    path_type = lambda p: Path(p).expanduser().resolve()
    input_type = lambda p: p if p.startswith("http") else str(path_type(p))
    parser.add_argument(
        "input", type=input_type,
        help="Input file path or URL.",
    )
    parser.add_argument("--outdir", "-o", type=path_type, required=True)
    parser.add_argument("--scale", type=float, default=4)
    
    args = parser.parse_args()
    if not args.outdir.exists():
        args.outdir.mkdir(parents=True, exist_ok=True)
    
    render_pdf(args.input, args.outdir, scale=args.scale)


 main()
diff --git a/pypdfjs_shmem_direct.py b/pypdfjs_shmem_direct.py
 # SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
 # SPDX-License-Identifier: Apache-2.0 OR MPL-2.0

 def import_js():
    global javascript
    import javascript

 from threading import Thread
 import_thread = Thread(target=import_js)
 print("Starting import thread")
 import_thread.start()

 import mmap
 import ctypes as ct
 import argparse
 from pathlib import Path
 from ctypes.util import find_library
 import PIL.Image

 LIBC_PATH = find_library("c")
 py_libc = ct.CDLL(LIBC_PATH)

 # flags, see /usr/include/bits/fcntl-linux.h
 O_CREAT    = 0o100   # 64
 O_EXCL     = 0o200   # 128
 O_NONBLOCK = 0o4000  # 2048
 O_RDONLY   = 0o0     # 0
 O_WRONLY   = 0o1     # 1
 O_RDWR     = 0o2     # 2
 O_TRUNC    = 0o1000  # 512

 # <unistd.h>
 py_ftruncate = py_libc.ftruncate
 py_ftruncate.argtypes = [ct.c_int, ct.c_long]
 py_ftruncate.restype = ct.c_int

 # <sys/mman.h>
 py_shm_open = py_libc.shm_open
 py_shm_open.argtypes = [ct.c_char_p, ct.c_int, ct.c_uint]
 py_shm_open.restype = ct.c_int

 py_shm_unlink = py_libc.shm_unlink
 py_shm_unlink.argtypes = [ct.c_char_p]
 py_shm_unlink.restype = ct.c_int

 THIS_DIR = str(Path(__file__).resolve().parent)
 PDFJS_PATH = THIS_DIR / Path("pdf.js/build/generic/build/pdf.js")
 CANVAS_PATH = THIS_DIR / Path("pdf.js/node_modules/canvas")

 print("Waiting for import thread to finish")
 import_thread.join()

 print("Importing JS libraries")
 globalThis = javascript.globalThis
 pdfjs = javascript.require(str(PDFJS_PATH))
 libcanvas = javascript.require(str(CANVAS_PATH))
 koffi = javascript.require("koffi")

 js_libc = koffi.load(LIBC_PATH)
 js_shm_open = js_libc.func("int shm_open(char* name, int oflag, unsigned int mode)")
 js_mmap = js_libc.func("void* mmap(void* addr, size_t len, int prot, int flags, int fd, long offset)")
 print("Done")


 def render_pdf(input, outdir, scale):
    
    pdf = pdfjs.getDocument(input).promise
    n_pages = pdf.numPages
    n_digits = len(str(n_pages))
    
    print("Determine shared memory size ...")
    sizes = []
    for i in range(1, n_pages+1):
        page = pdf.getPage(i)
        viewport = page.getViewport({"scale": scale})
        w, h = int(viewport.width), int(viewport.height)
        sizes.append( (w, h) )
    
    shm_size = max(w*h for w, h in sizes) * 4
    print(f"Shared memory size in bytes: {shm_size}")
    
    shm_key_s = "pdfjs_render_shm"
    shm_key_b = shm_key_s.encode("ascii")
    
    try:
        print("Creating shared memory ...")
        shm_fd_py = py_shm_open(shm_key_b, O_CREAT|O_RDWR, 0o666)
        py_ftruncate(shm_fd_py, shm_size)
        
        print("Setting up cross-language handles for shared memory ...")
        shm_fd_js = js_shm_open(shm_key_s, O_RDWR, 0o666)
        print(shm_fd_py, shm_fd_js)
        memmap_py = mmap.mmap(shm_fd_py, shm_size, flags=mmap.MAP_SHARED, prot=mmap.PROT_READ)
        memview_py = memoryview(memmap_py)
        memmap_ptr_js = js_mmap(0, shm_size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, shm_fd_js, 0)
        array_buffer_js = koffi.view(memmap_ptr_js, shm_size)
        array_js = globalThis.Uint8Array(array_buffer_js, 0, shm_size)
        print(memmap_py, memmap_ptr_js, array_buffer_js, array_js)
        
        for i in range(1, n_pages+1):
            
            print(f"Page {i}")
            
            page = pdf.getPage(i)
            viewport = page.getViewport({"scale": scale})
            w, h = int(viewport.width), int(viewport.height)
            # page_memsize = w*h*4
            
            canvas = libcanvas.createCanvas(w, h)
            context = canvas.getContext("2d")
            page.render({"canvasContext": context, "viewport": viewport}).promise
            
            # TODO file a feature request with the node-canvas library to create a canvas in shared memory directly
            js_buffer = canvas.toBuffer("raw")
            js_buffer.copy(array_js)
            
            # We assume that PIL doesn't mind if the buffer is longer
            # NOTE passing a memoryview requires PIL >= 9.5
            pil_image = PIL.Image.frombuffer("RGBX", (w, h), memview_py, "raw", "BGRX", 0, 1)
            pil_image.save(outdir / f"out_{i:0{n_digits}d}.jpg")
        
    finally:
        try:
            # The memoryview must be explicitly del'ed to avoid the following exception when attempting to close the mmap: "BufferError: cannot close exported pointers exist"
            del memview_py
            memmap_py.close()
        finally:
            py_shm_unlink(shm_key_b)
    
    pdf.destroy()


 def main():
    
    parser = argparse.ArgumentParser(
        description="Render a PDF file with Mozilla pdf.js via JsPyBridge."
    )
    path_type = lambda p: Path(p).expanduser().resolve()
    input_type = lambda p: p if p.startswith("http") else str(path_type(p))
    parser.add_argument(
        "input", type=input_type,
        help="Input file path or URL.",
    )
    parser.add_argument("--outdir", "-o", type=path_type, required=True)
    parser.add_argument("--scale", type=float, default=4)
    
    args = parser.parse_args()
    if not args.outdir.exists():
        args.outdir.mkdir(parents=True, exist_ok=True)
    
    render_pdf(args.input, args.outdir, scale=args.scale)


 main()
	# Four lines intentionally left blank




	# SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
	# SPDX-License-Identifier: Apache-2.0 OR MPL-2.0

	# See also https://github.com/extremeheat/JSPyBridge/blob/master/examples/python/pdfjs.py

	# Py-Depends: pillow, javascript >= 1.1.0 (jspybridge)
	# Js-Depends: pdfjs-dist, canvas
	# Use `python -m pip install` and `python -m javascript --install`
	# NOTE This currently assumes you have a custom pdf.js build in the same directory as this file, because require("pdfjs-dist") appears broken on the author's nodejs 20. See upstream build instructions. Commit 8b50836d is confirmed to work. Patch the require() calls if you want otherwise.

	import time
	import argparse
	from pathlib import Path
	import PIL.Image
	import javascript

	THIS_DIR = str(Path(__file__).resolve().parent)

	# NOTE canvas must be the build pdfjs is linked against, otherwise it'll fail with type error
	pdfjs = javascript.require( str(THIS_DIR / Path("pdf.js/build/generic/build/pdf.js")) )
	libcanvas = javascript.require( str(THIS_DIR / Path("pdf.js/node_modules/canvas")) )


	def render_pdf(input, outdir, scale):

	pdf = pdfjs.getDocument(input).promise
	n_pages = pdf.numPages
	n_digits = len(str(n_pages))

	for i in range(1, n_pages+1):

	page = pdf.getPage(i)
	viewport = page.getViewport({"scale": scale})
	w, h = int(viewport.width), int(viewport.height)

	canvas = libcanvas.createCanvas(w, h)
	context = canvas.getContext("2d")
	page.render({"canvasContext": context, "viewport": viewport}).promise

	# Note that blobValueOf() is much faster than valueOf()["data"] for large byte buffers.
	js_buffer = canvas.toBuffer("raw")
	starttm = time.time()
	py_buffer = js_buffer.blobValueOf()
	print(f"Data transfer took {time.time() - starttm}s"); del starttm

	pil_image = PIL.Image.frombuffer("RGBX", (w, h), py_buffer, "raw", "BGRX", 0, 1)
	pil_image.save(outdir / f"out_{i:0{n_digits}d}.jpg")

	pdf.destroy()


	def main():

	parser = argparse.ArgumentParser(
	description="Render a PDF file with Mozilla pdf.js via JsPyBridge.\n" +
	"Known issues: - URL support is buggy; - certain PDFs may hit memory limits.",
	)
	path_type = lambda p: Path(p).expanduser().resolve()
	input_type = lambda p: p if p.startswith("http") else str(path_type(p))
	parser.add_argument(
	"input", type=input_type,
	help="Input file path or URL.",
	)
	parser.add_argument("--outdir", "-o", type=path_type, required=True)
	parser.add_argument("--scale", type=float, default=4)

	args = parser.parse_args()
	if not args.outdir.exists():
	args.outdir.mkdir(parents=True, exist_ok=True)

	render_pdf(args.input, args.outdir, scale=args.scale)


	main()
	# SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
	# SPDX-License-Identifier: Apache-2.0 OR MPL-2.0

	# This is an experimental pdf.js interface using shared memory.
	# Unfortunately, shm-typed-array does not support Windows (except through Cygwin/WSL), so this is not exactly portable.
	# For another version by the same author that uses pipe-based data transfer via JSPyBridge's .blobValueOf(), see above.

	# Py-Depends: pillow, javascript (JSPyBridge), posix_ipc
	# Js-Depends: pdfjs-dist, canvas, shm-typed-array
	# You can use `python -m pip install`, and `python -m javascript --install`
	# NOTE This currently assumes you have a custom pdf.js build in the same directory as this file, because require("pdfjs-dist") appears broken on the author's nodejs 20. See upstream build instructions. Commit 8b50836d is confirmed to work. Patch the require() calls if you want otherwise.

	import time

	starttm = time.time()
	import mmap
	import argparse
	from pathlib import Path

	# third-party
	import PIL.Image
	import javascript
	import posix_ipc

	THIS_DIR = str(Path(__file__).resolve().parent)

	# NOTE canvas must be the build pdfjs is linked against, otherwise it'll fail with type error
	pdfjs = javascript.require( str(THIS_DIR / Path("pdf.js/build/generic/build/pdf.js")) )
	libcanvas = javascript.require( str(THIS_DIR / Path("pdf.js/node_modules/canvas")) )
	libshm = javascript.require("shm-typed-array")

	print(f"Imports took {time.time() - starttm}s"); del starttm


	def render_pdf(input, outdir, scale):

	pdf = pdfjs.getDocument(input).promise
	n_pages = pdf.numPages
	n_digits = len(str(n_pages))

	starttm = time.time()
	sizes = []
	for i in range(n_pages):
	page = pdf.getPage(i+1)
	viewport = page.getViewport({"scale": scale})
	w, h = int(viewport.width), int(viewport.height)
	sizes.append( (w, h) )

	max_alloc = max(wh for w, h in sizes) 4
	print(f"Shared memory size in bytes: {max_alloc} (took {time.time() - starttm}s to determine)"); del starttm

	memkey = "/pypdfjs_render_shm"
	js_shm = libshm.create(max_alloc, "Buffer", memkey)
	assert js_shm is not None, "Shared memory of this name already exists, go to /dev/shm and remove it."
	py_shm_handle = posix_ipc.SharedMemory(memkey)

	try:
	py_shm = mmap.mmap(py_shm_handle.fd, py_shm_handle.size)
	for i in range(n_pages):
	page = pdf.getPage(i+1)
	viewport = page.getViewport({"scale": scale})
	w, h = sizes[i]

	canvas = libcanvas.createCanvas(w, h)
	context = canvas.getContext("2d")
	page.render({"canvasContext": context, "viewport": viewport}).promise

	# the author is not aware of a way to create a canvas backed by an external buffer, so this copies
	js_buffer = canvas.toBuffer("raw")
	starttm = time.time()
	js_buffer.copy(js_shm)
	py_shm.seek(0)
	print(f"Data transfer took {time.time() - starttm}s")

	pil_image = PIL.Image.frombuffer("RGBX", (w, h), py_shm, "raw", "BGRX", 0, 1)
	pil_image.save(outdir / f"out_{i:0{n_digits}d}.jpg")
	finally:
	# Need to use native (non-js) functions to reliably destroy shared memory. Bridge seems to break in case of KeyboardInterrupt.
	py_shm_handle.close_fd()
	py_shm_handle.unlink()
	assert not Path("/dev/shm" + memkey).exists()

	pdf.destroy()


	def main():

	parser = argparse.ArgumentParser(
	description="Render a PDF file with Mozilla pdf.js via JsPyBridge.\n" +
	"Known issues: - URL support is buggy; - certain PDFs may hit memory limits.",
	)
	path_type = lambda p: Path(p).expanduser().resolve()
	input_type = lambda p: p if p.startswith("http") else str(path_type(p))
	parser.add_argument(
	"input", type=input_type,
	help="Input file path or URL.",
	)
	parser.add_argument("--outdir", "-o", type=path_type, required=True)
	parser.add_argument("--scale", type=float, default=4)

	args = parser.parse_args()
	if not args.outdir.exists():
	args.outdir.mkdir(parents=True, exist_ok=True)

	render_pdf(args.input, args.outdir, scale=args.scale)


	main()
	# SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
	# SPDX-License-Identifier: Apache-2.0 OR MPL-2.0

	def import_js():
	global javascript
	import javascript

	from threading import Thread
	import_thread = Thread(target=import_js)
	print("Starting import thread")
	import_thread.start()

	import mmap
	import ctypes as ct
	import argparse
	from pathlib import Path
	from ctypes.util import find_library
	import PIL.Image

	LIBC_PATH = find_library("c")
	py_libc = ct.CDLL(LIBC_PATH)

	# flags, see /usr/include/bits/fcntl-linux.h
	O_CREAT = 0o100 # 64
	O_EXCL = 0o200 # 128
	O_NONBLOCK = 0o4000 # 2048
	O_RDONLY = 0o0 # 0
	O_WRONLY = 0o1 # 1
	O_RDWR = 0o2 # 2
	O_TRUNC = 0o1000 # 512

	# <unistd.h>
	py_ftruncate = py_libc.ftruncate
	py_ftruncate.argtypes = [ct.c_int, ct.c_long]
	py_ftruncate.restype = ct.c_int

	# <sys/mman.h>
	py_shm_open = py_libc.shm_open
	py_shm_open.argtypes = [ct.c_char_p, ct.c_int, ct.c_uint]
	py_shm_open.restype = ct.c_int

	py_shm_unlink = py_libc.shm_unlink
	py_shm_unlink.argtypes = [ct.c_char_p]
	py_shm_unlink.restype = ct.c_int

	THIS_DIR = str(Path(__file__).resolve().parent)
	PDFJS_PATH = THIS_DIR / Path("pdf.js/build/generic/build/pdf.js")
	CANVAS_PATH = THIS_DIR / Path("pdf.js/node_modules/canvas")

	print("Waiting for import thread to finish")
	import_thread.join()

	print("Importing JS libraries")
	globalThis = javascript.globalThis
	pdfjs = javascript.require(str(PDFJS_PATH))
	libcanvas = javascript.require(str(CANVAS_PATH))
	koffi = javascript.require("koffi")

	js_libc = koffi.load(LIBC_PATH)
	js_shm_open = js_libc.func("int shm_open(char* name, int oflag, unsigned int mode)")
	js_mmap = js_libc.func("void* mmap(void* addr, size_t len, int prot, int flags, int fd, long offset)")
	print("Done")


	def render_pdf(input, outdir, scale):

	pdf = pdfjs.getDocument(input).promise
	n_pages = pdf.numPages
	n_digits = len(str(n_pages))

	print("Determine shared memory size ...")
	sizes = []
	for i in range(1, n_pages+1):
	page = pdf.getPage(i)
	viewport = page.getViewport({"scale": scale})
	w, h = int(viewport.width), int(viewport.height)
	sizes.append( (w, h) )

	shm_size = max(wh for w, h in sizes) 4
	print(f"Shared memory size in bytes: {shm_size}")

	shm_key_s = "pdfjs_render_shm"
	shm_key_b = shm_key_s.encode("ascii")

	try:
	print("Creating shared memory ...")
	shm_fd_py = py_shm_open(shm_key_b, O_CREAT\|O_RDWR, 0o666)
	py_ftruncate(shm_fd_py, shm_size)

	print("Setting up cross-language handles for shared memory ...")
	shm_fd_js = js_shm_open(shm_key_s, O_RDWR, 0o666)
	print(shm_fd_py, shm_fd_js)
	memmap_py = mmap.mmap(shm_fd_py, shm_size, flags=mmap.MAP_SHARED, prot=mmap.PROT_READ)
	memview_py = memoryview(memmap_py)
	memmap_ptr_js = js_mmap(0, shm_size, mmap.PROT_READ\|mmap.PROT_WRITE, mmap.MAP_SHARED, shm_fd_js, 0)
	array_buffer_js = koffi.view(memmap_ptr_js, shm_size)
	array_js = globalThis.Uint8Array(array_buffer_js, 0, shm_size)
	print(memmap_py, memmap_ptr_js, array_buffer_js, array_js)

	for i in range(1, n_pages+1):

	print(f"Page {i}")

	page = pdf.getPage(i)
	viewport = page.getViewport({"scale": scale})
	w, h = int(viewport.width), int(viewport.height)
	# page_memsize = wh4

	canvas = libcanvas.createCanvas(w, h)
	context = canvas.getContext("2d")
	page.render({"canvasContext": context, "viewport": viewport}).promise

	# TODO file a feature request with the node-canvas library to create a canvas in shared memory directly
	js_buffer = canvas.toBuffer("raw")
	js_buffer.copy(array_js)

	# We assume that PIL doesn't mind if the buffer is longer
	# NOTE passing a memoryview requires PIL >= 9.5
	pil_image = PIL.Image.frombuffer("RGBX", (w, h), memview_py, "raw", "BGRX", 0, 1)
	pil_image.save(outdir / f"out_{i:0{n_digits}d}.jpg")

	finally:
	try:
	# The memoryview must be explicitly del'ed to avoid the following exception when attempting to close the mmap: "BufferError: cannot close exported pointers exist"
	del memview_py
	memmap_py.close()
	finally:
	py_shm_unlink(shm_key_b)

	pdf.destroy()


	def main():

	parser = argparse.ArgumentParser(
	description="Render a PDF file with Mozilla pdf.js via JsPyBridge."
	)
	path_type = lambda p: Path(p).expanduser().resolve()
	input_type = lambda p: p if p.startswith("http") else str(path_type(p))
	parser.add_argument(
	"input", type=input_type,
	help="Input file path or URL.",
	)
	parser.add_argument("--outdir", "-o", type=path_type, required=True)
	parser.add_argument("--scale", type=float, default=4)

	args = parser.parse_args()
	if not args.outdir.exists():
	args.outdir.mkdir(parents=True, exist_ok=True)

	render_pdf(args.input, args.outdir, scale=args.scale)


	main()