Last active
October 9, 2024 23:30
-
-
Save rcsmit/42cf7bedc73d8741725aa9b815772d5a to your computer and use it in GitHub Desktop.
jpg to pdf - streamlit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import streamlit as st | |
from PIL import Image, ExifTags | |
from io import BytesIO | |
import pytesseract | |
import re | |
import os | |
# based on https://www.youtube.com/watch?v=RPN-HxvAQnQ | |
def clean_filename(filename, replace_with='_'): | |
"""Replace forbidden characters | |
https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1 | |
Args: | |
filename (str): filename | |
replace_with (str) : character to use | |
Returns: | |
str: corrected string | |
""" | |
# Define forbidden characters based on the operating system | |
forbidden_chars = r'[<>:"/\\|?*\&\.\,\n\%\^!@#$(){}\[\]\'\"]' # Windows forbidden characters | |
if os.name != 'nt': # If not Windows (Linux/Mac) | |
forbidden_chars = r'[/?<>\\:*|"]' # Unix-based forbidden characters | |
# Remove forbidden characters using regex, replacing them with '_' | |
cleaned_filename = re.sub(forbidden_chars, replace_with, filename) | |
# Return the cleaned filename | |
return cleaned_filename | |
def correct_image_rotation(image): | |
"""Many modern cameras and smartphones store orientation information in the image file | |
as EXIF metadata rather than physically rotating the image. When viewing these images in | |
some applications (e.g., image viewers or some PDF readers), they may appear correctly | |
rotated because these applications honor the EXIF orientation metadata. | |
However, PIL.Image.open() doesn't always apply this by default, | |
so we need to manually correct it. | |
https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1 | |
Args: | |
image (_type_): _description_ | |
Returns: | |
_type_: _description_ | |
""" | |
try: | |
for orientation in ExifTags.TAGS.keys(): | |
if ExifTags.TAGS[orientation] == 'Orientation': | |
break | |
exif = image._getexif() | |
if exif is not None: | |
orientation = exif.get(orientation) | |
if orientation == 3: | |
image = image.rotate(180, expand=True) | |
elif orientation == 6: | |
image = image.rotate(270, expand=True) | |
elif orientation == 8: | |
image = image.rotate(90, expand=True) | |
except (AttributeError, KeyError, IndexError): | |
# If there's no EXIF data or no orientation tag, we skip the rotation. | |
pass | |
return image | |
def main_streamlit_upload_files(): | |
# Title of the app | |
st.title("JPEG/JPG to PDF Converter") | |
# Image file upload | |
uploaded_images_1 = st.file_uploader("Upload JPEG or JPG Image 1", type=["jpeg", "jpg"], accept_multiple_files=False) | |
uploaded_images_2 = st.file_uploader("Upload JPEG or JPG Image 2", type=["jpeg", "jpg"], accept_multiple_files=False) | |
uploaded_images_3 = st.file_uploader("Upload JPEG or JPG Image 3", type=["jpeg", "jpg"], accept_multiple_files=False) | |
uploaded_images_4 = st.file_uploader("Upload JPEG or JPG Image 4", type=["jpeg", "jpg"], accept_multiple_files=False) | |
uploaded_images_5 = st.file_uploader("Upload JPEG or JPG Image 5", type=["jpeg", "jpg"], accept_multiple_files=False) | |
uploaded_images = [uploaded_images_1, uploaded_images_2, uploaded_images_3, uploaded_images_4, uploaded_images_5] | |
# Convert images to PDF | |
if st.button("Convert to PDF"): | |
placeholder = st.empty() | |
placeholder.info("Converting") | |
# Create an empty list to hold the images | |
image_list = [] | |
# Loop through the uploaded images | |
for i,uploaded_image in enumerate(uploaded_images): | |
if uploaded_image is not None: | |
# Open the image using PIL | |
image = Image.open(uploaded_image) | |
image = correct_image_rotation(image) | |
if i==0: | |
filename_proposed = str(((pytesseract.image_to_string(image)))) | |
# Convert image to RGB if it's not in RGB format (for PDF compatibility) | |
if image.mode != "RGB": | |
image = image.convert("RGB") | |
image_list.append(image) | |
if image_list: | |
# Create a BytesIO buffer to save the PDF | |
pdf_buffer = BytesIO() | |
# Save the images as PDF | |
image_list[0].save(pdf_buffer, format="PDF", save_all=True, append_images=image_list[1:]) | |
pdf_buffer.seek(0) | |
#filename_given = st.text_input("Filename", filename_proposed[:50]) | |
filename_given = clean_filename(filename_proposed[:50], replace_with='_') | |
placeholder.info(filename_given) | |
# Provide the download button for the PDF | |
st.download_button( | |
"Download PDF", | |
data=pdf_buffer, | |
file_name=f"{filename_given}.pdf", | |
mime="application/pdf" | |
) | |
else: | |
st.error("No valid images found.") | |
else: | |
st.info(".") | |
if __name__ == "__main__": | |
main_streamlit_upload_files() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment