Skip to content

Instantly share code, notes, and snippets.

@filipeandre
Created April 21, 2025 14:52
Show Gist options
  • Save filipeandre/5fb414528036f8ed0549f5b22d3e3ee3 to your computer and use it in GitHub Desktop.
Save filipeandre/5fb414528036f8ed0549f5b22d3e3ee3 to your computer and use it in GitHub Desktop.
PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools that help users train better models and apply them into practice.
# define functions to run the ocr,
# annotate the images with the bounding boxes
# consolidate the results to a dataframe
# and iterate over a folder of images
from PIL import Image, ImageDraw
import pandas as pd
from paddleocr import PaddleOCR
import os
import traceback
def paddle_inference(img_path, lang='en'):
# Initialize PaddleOCR
ocr = PaddleOCR(use_gpu=True, use_angle_cls=True, lang=lang, min_subgraph_size=30)
# Perform OCR on the image
ocr_result = ocr.ocr(img_path, cls=True)
# Process OCR results
results = []
for line in ocr_result:
for boxes, txt_info in line:
if len(txt_info) == 2: # Ensure there are 2 elements (text and confidence)
text, confidence = txt_info
results.append({
'coordinates': boxes,
'text': text,
'confidence': confidence
})
if not results:
print("No text detected.")
return None, None
# Draw bounding boxes on the image
image = Image.open(img_path).convert('RGB')
draw = ImageDraw.Draw(image)
for res in results:
# Flatten the list of coordinates for PIL drawing
coordinates = [pt for box in res['coordinates'] for pt in box]
draw.polygon(coordinates, outline='red')
# Extract the base filename without the extension
base_filename = os.path.splitext(os.path.basename(img_path))[0]
# Save the result image
result_path = f'results/{base_filename}_result.jpg'
image.save(result_path)
# Create DataFrame
df = pd.DataFrame(results)
return result_path, df
# Define the consolidate_ocr_results function
def consolidate_ocr_results(filename, ocr_df):
# Ensure 'confidence' column is numeric for the comparison
ocr_df['confidence'] = pd.to_numeric(ocr_df['confidence'], errors='coerce')
# Filter the dataframe for scores greater than 0.25
filtered_df = ocr_df[ocr_df['confidence'] > 0.25]
# Combine the filtered OCR'd text into one cell
all_text = ' '.join(filtered_df['text'])
# Combine the filtered scores into one cell, converting each to a string
all_scores = ' '.join([str(score) for score in filtered_df['confidence']])
# Create a new DataFrame with the combined text, scores, and filename
result_df = pd.DataFrame({
'Filename': [filename],
'OCR_Text': [all_text],
'Scores': [all_scores]
})
return result_df
# Define the process_folder function
def process_folder(folder_path, lang='en'):
# Get a list of all image files in the provided folder
supported_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')
files = [f for f in os.listdir(folder_path) if f.lower().endswith(supported_extensions)]
# Prepare a list to store all results dataframes
results_dfs = []
# Loop over each file and process it
for file in files:
file_path = os.path.join(folder_path, file)
# Call paddle_inference function on the file
try:
result_image_path, ocr_dataframe = paddle_inference(file_path, lang)
if ocr_dataframe is not None:
# If OCR results exist, consolidate them using the provided function
consolidated_df = consolidate_ocr_results(file, ocr_dataframe)
# Add the result dataframe to our list
results_dfs.append(consolidated_df)
except Exception as e:
error_message = f"An error occurred while processing {file}: {e}\n{traceback.format_exc()}\n"
print(error_message)
# Append error message to 'errors.txt' file
with open(os.path.join(folder_path, 'errors.txt'), 'a') as error_file:
error_file.write(error_message)
# Concatenate all dataframes in the list into a single dataframe
all_results_df = pd.concat(results_dfs, ignore_index=True) if results_dfs else pd.DataFrame()
# Return the final dataframe containing all results
return all_results_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment