Skip to content

Instantly share code, notes, and snippets.

@rcsmit
Last active October 9, 2024 23:30
Show Gist options
  • Save rcsmit/42cf7bedc73d8741725aa9b815772d5a to your computer and use it in GitHub Desktop.
Save rcsmit/42cf7bedc73d8741725aa9b815772d5a to your computer and use it in GitHub Desktop.
jpg to pdf - streamlit
import os
import time
import streamlit as st
from PIL import Image, ExifTags
from io import BytesIO
import pytesseract
import re
import os
# based on https://www.youtube.com/watch?v=RPN-HxvAQnQ
def clean_filename(filename, replace_with='_'):
"""Replace forbidden characters
https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1
Args:
filename (str): filename
replace_with (str) : character to use
Returns:
str: corrected string
"""
# Define forbidden characters based on the operating system
forbidden_chars = r'[<>:"/\\|?*\&\.\,\n\%\^!@#$(){}\[\]\'\"]' # Windows forbidden characters
if os.name != 'nt': # If not Windows (Linux/Mac)
forbidden_chars = r'[/?<>\\:*|"]' # Unix-based forbidden characters
# Remove forbidden characters using regex, replacing them with '_'
cleaned_filename = re.sub(forbidden_chars, replace_with, filename)
# Return the cleaned filename
return cleaned_filename
def correct_image_rotation(image):
"""Many modern cameras and smartphones store orientation information in the image file
as EXIF metadata rather than physically rotating the image. When viewing these images in
some applications (e.g., image viewers or some PDF readers), they may appear correctly
rotated because these applications honor the EXIF orientation metadata.
However, PIL.Image.open() doesn't always apply this by default,
so we need to manually correct it.
https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1
Args:
image (_type_): _description_
Returns:
_type_: _description_
"""
try:
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation] == 'Orientation':
break
exif = image._getexif()
if exif is not None:
orientation = exif.get(orientation)
if orientation == 3:
image = image.rotate(180, expand=True)
elif orientation == 6:
image = image.rotate(270, expand=True)
elif orientation == 8:
image = image.rotate(90, expand=True)
except (AttributeError, KeyError, IndexError):
# If there's no EXIF data or no orientation tag, we skip the rotation.
pass
return image
def main_streamlit_upload_files():
# Title of the app
st.title("JPEG/JPG to PDF Converter")
# Image file upload
uploaded_images_1 = st.file_uploader("Upload JPEG or JPG Image 1", type=["jpeg", "jpg"], accept_multiple_files=False)
uploaded_images_2 = st.file_uploader("Upload JPEG or JPG Image 2", type=["jpeg", "jpg"], accept_multiple_files=False)
uploaded_images_3 = st.file_uploader("Upload JPEG or JPG Image 3", type=["jpeg", "jpg"], accept_multiple_files=False)
uploaded_images_4 = st.file_uploader("Upload JPEG or JPG Image 4", type=["jpeg", "jpg"], accept_multiple_files=False)
uploaded_images_5 = st.file_uploader("Upload JPEG or JPG Image 5", type=["jpeg", "jpg"], accept_multiple_files=False)
uploaded_images = [uploaded_images_1, uploaded_images_2, uploaded_images_3, uploaded_images_4, uploaded_images_5]
# Convert images to PDF
if st.button("Convert to PDF"):
placeholder = st.empty()
placeholder.info("Converting")
# Create an empty list to hold the images
image_list = []
# Loop through the uploaded images
for i,uploaded_image in enumerate(uploaded_images):
if uploaded_image is not None:
# Open the image using PIL
image = Image.open(uploaded_image)
image = correct_image_rotation(image)
if i==0:
filename_proposed = str(((pytesseract.image_to_string(image))))
# Convert image to RGB if it's not in RGB format (for PDF compatibility)
if image.mode != "RGB":
image = image.convert("RGB")
image_list.append(image)
if image_list:
# Create a BytesIO buffer to save the PDF
pdf_buffer = BytesIO()
# Save the images as PDF
image_list[0].save(pdf_buffer, format="PDF", save_all=True, append_images=image_list[1:])
pdf_buffer.seek(0)
#filename_given = st.text_input("Filename", filename_proposed[:50])
filename_given = clean_filename(filename_proposed[:50], replace_with='_')
placeholder.info(filename_given)
# Provide the download button for the PDF
st.download_button(
"Download PDF",
data=pdf_buffer,
file_name=f"{filename_given}.pdf",
mime="application/pdf"
)
else:
st.error("No valid images found.")
else:
st.info(".")
if __name__ == "__main__":
main_streamlit_upload_files()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment