rcsmit · October 9, 2024 23:30
diff --git a/gistfile1.txt b/gistfile1.txt
 import os
 import time
 import streamlit as st
 from PIL import Image, ExifTags
 from io import BytesIO
 import pytesseract
 import re
 import os

 # based on https://www.youtube.com/watch?v=RPN-HxvAQnQ

 def clean_filename(filename, replace_with='_'):

    """Replace forbidden characters

    https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1

    Args:
        filename (str): filename
        replace_with (str) : character to use 

    Returns:
        str: corrected string
    """    
    # Define forbidden characters based on the operating system
    forbidden_chars = r'[<>:"/\\|?*\&\.\,\n\%\^!@#$(){}\[\]\'\"]'  # Windows forbidden characters
    if os.name != 'nt':  # If not Windows (Linux/Mac)
        forbidden_chars = r'[/?<>\\:*|"]'  # Unix-based forbidden characters
    
    # Remove forbidden characters using regex, replacing them with '_'
    cleaned_filename = re.sub(forbidden_chars, replace_with, filename)
    
    # Return the cleaned filename
    return cleaned_filename

 def correct_image_rotation(image):
    """Many modern cameras and smartphones store orientation information in the image file 
    as EXIF metadata rather than physically rotating the image. When viewing these images in 
    some applications (e.g., image viewers or some PDF readers), they may appear correctly 
    rotated because these applications honor the EXIF orientation metadata. 
    However, PIL.Image.open() doesn't always apply this by default, 
    so we need to manually correct it.

    https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1
    
    Args:
        image (_type_): _description_

    Returns:
        _type_: _description_
    """    
    try:
        for orientation in ExifTags.TAGS.keys():
            if ExifTags.TAGS[orientation] == 'Orientation':
                break
        exif = image._getexif()
        
        if exif is not None:
            orientation = exif.get(orientation)

            if orientation == 3:
                image = image.rotate(180, expand=True)
            elif orientation == 6:
                image = image.rotate(270, expand=True)
            elif orientation == 8:
                image = image.rotate(90, expand=True)
    except (AttributeError, KeyError, IndexError):
        # If there's no EXIF data or no orientation tag, we skip the rotation.
        pass

    return image


 def main_streamlit_upload_files():
    
    # Title of the app
    st.title("JPEG/JPG to PDF Converter")

    # Image file upload
    uploaded_images_1 = st.file_uploader("Upload JPEG or JPG Image 1", type=["jpeg", "jpg"], accept_multiple_files=False)
    uploaded_images_2 = st.file_uploader("Upload JPEG or JPG Image 2", type=["jpeg", "jpg"], accept_multiple_files=False)
    uploaded_images_3 = st.file_uploader("Upload JPEG or JPG Image 3", type=["jpeg", "jpg"], accept_multiple_files=False)
    uploaded_images_4 = st.file_uploader("Upload JPEG or JPG Image 4", type=["jpeg", "jpg"], accept_multiple_files=False)
    uploaded_images_5 = st.file_uploader("Upload JPEG or JPG Image 5", type=["jpeg", "jpg"], accept_multiple_files=False)
    
   
    uploaded_images = [uploaded_images_1,  uploaded_images_2, uploaded_images_3, uploaded_images_4, uploaded_images_5]
    # Convert images to PDF
    if st.button("Convert to PDF"):
        placeholder = st.empty()
        placeholder.info("Converting")
        # Create an empty list to hold the images
        image_list = []

        # Loop through the uploaded images
        for i,uploaded_image in enumerate(uploaded_images):
            if uploaded_image is not None:
                # Open the image using PIL
                image = Image.open(uploaded_image)
                image = correct_image_rotation(image)
                if i==0:
                    filename_proposed = str(((pytesseract.image_to_string(image))))
                # Convert image to RGB if it's not in RGB format (for PDF compatibility)
                if image.mode != "RGB":
                    image = image.convert("RGB")
                image_list.append(image)
        
        if image_list:
            # Create a BytesIO buffer to save the PDF
            pdf_buffer = BytesIO()
            # Save the images as PDF
            image_list[0].save(pdf_buffer, format="PDF", save_all=True, append_images=image_list[1:])
            pdf_buffer.seek(0)
            #filename_given = st.text_input("Filename", filename_proposed[:50])
            filename_given = clean_filename(filename_proposed[:50], replace_with='_')
            placeholder.info(filename_given)
            # Provide the download button for the PDF
            st.download_button(
                "Download PDF",
                data=pdf_buffer,
                file_name=f"{filename_given}.pdf",
                mime="application/pdf"
            )
        else:
            st.error("No valid images found.")
    else:
        st.info(".")


 if __name__ == "__main__":
    main_streamlit_upload_files()
	import os
	import time
	import streamlit as st
	from PIL import Image, ExifTags
	from io import BytesIO
	import pytesseract
	import re
	import os

	# based on https://www.youtube.com/watch?v=RPN-HxvAQnQ

	def clean_filename(filename, replace_with='_'):

	"""Replace forbidden characters

	https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1

	Args:
	filename (str): filename
	replace_with (str) : character to use

	Returns:
	str: corrected string
	"""
	# Define forbidden characters based on the operating system
	forbidden_chars = r'[<>:"/\\\|?*\&\.\,\n\%\^!@#$(){}\[\]\'\"]' # Windows forbidden characters
	if os.name != 'nt': # If not Windows (Linux/Mac)
	forbidden_chars = r'[/?<>\\:*\|"]' # Unix-based forbidden characters

	# Remove forbidden characters using regex, replacing them with '_'
	cleaned_filename = re.sub(forbidden_chars, replace_with, filename)

	# Return the cleaned filename
	return cleaned_filename

	def correct_image_rotation(image):
	"""Many modern cameras and smartphones store orientation information in the image file
	as EXIF metadata rather than physically rotating the image. When viewing these images in
	some applications (e.g., image viewers or some PDF readers), they may appear correctly
	rotated because these applications honor the EXIF orientation metadata.
	However, PIL.Image.open() doesn't always apply this by default,
	so we need to manually correct it.

	https://chatgpt.com/c/6706ff08-4a8c-8004-9d62-c1f28cdd7de1

	Args:
	image (_type_): _description_

	Returns:
	_type_: _description_
	"""
	try:
	for orientation in ExifTags.TAGS.keys():
	if ExifTags.TAGS[orientation] == 'Orientation':
	break
	exif = image._getexif()

	if exif is not None:
	orientation = exif.get(orientation)

	if orientation == 3:
	image = image.rotate(180, expand=True)
	elif orientation == 6:
	image = image.rotate(270, expand=True)
	elif orientation == 8:
	image = image.rotate(90, expand=True)
	except (AttributeError, KeyError, IndexError):
	# If there's no EXIF data or no orientation tag, we skip the rotation.
	pass

	return image


	def main_streamlit_upload_files():

	# Title of the app
	st.title("JPEG/JPG to PDF Converter")

	# Image file upload
	uploaded_images_1 = st.file_uploader("Upload JPEG or JPG Image 1", type=["jpeg", "jpg"], accept_multiple_files=False)
	uploaded_images_2 = st.file_uploader("Upload JPEG or JPG Image 2", type=["jpeg", "jpg"], accept_multiple_files=False)
	uploaded_images_3 = st.file_uploader("Upload JPEG or JPG Image 3", type=["jpeg", "jpg"], accept_multiple_files=False)
	uploaded_images_4 = st.file_uploader("Upload JPEG or JPG Image 4", type=["jpeg", "jpg"], accept_multiple_files=False)
	uploaded_images_5 = st.file_uploader("Upload JPEG or JPG Image 5", type=["jpeg", "jpg"], accept_multiple_files=False)


	uploaded_images = [uploaded_images_1, uploaded_images_2, uploaded_images_3, uploaded_images_4, uploaded_images_5]
	# Convert images to PDF
	if st.button("Convert to PDF"):
	placeholder = st.empty()
	placeholder.info("Converting")
	# Create an empty list to hold the images
	image_list = []

	# Loop through the uploaded images
	for i,uploaded_image in enumerate(uploaded_images):
	if uploaded_image is not None:
	# Open the image using PIL
	image = Image.open(uploaded_image)
	image = correct_image_rotation(image)
	if i==0:
	filename_proposed = str(((pytesseract.image_to_string(image))))
	# Convert image to RGB if it's not in RGB format (for PDF compatibility)
	if image.mode != "RGB":
	image = image.convert("RGB")
	image_list.append(image)

	if image_list:
	# Create a BytesIO buffer to save the PDF
	pdf_buffer = BytesIO()
	# Save the images as PDF
	image_list[0].save(pdf_buffer, format="PDF", save_all=True, append_images=image_list[1:])
	pdf_buffer.seek(0)
	#filename_given = st.text_input("Filename", filename_proposed[:50])
	filename_given = clean_filename(filename_proposed[:50], replace_with='_')
	placeholder.info(filename_given)
	# Provide the download button for the PDF
	st.download_button(
	"Download PDF",
	data=pdf_buffer,
	file_name=f"{filename_given}.pdf",
	mime="application/pdf"
	)
	else:
	st.error("No valid images found.")
	else:
	st.info(".")


	if __name__ == "__main__":
	main_streamlit_upload_files()