Skip to content

Instantly share code, notes, and snippets.

@gabonator
Created August 4, 2024 22:07
Show Gist options
  • Save gabonator/dd9192c3cfa884fbed59a2420f98c85e to your computer and use it in GitHub Desktop.
Save gabonator/dd9192c3cfa884fbed59a2420f98c85e to your computer and use it in GitHub Desktop.
Align a photo of table for OCR, trim to contents
import sys
import cv2
import numpy as np
def custom_processing_function(b, g, r):
r = int(r)
g = int(g)
b = int(b)
y = (r+g+b)/3
mdif = max(abs(y-r), abs(y-g), abs(y-b))
if mdif < 30:
y = y * 3 - 200
y = max(0, min(y, 255))
return y, y, y
y = y * 4 - 200
y = max(0, min(y, 255))
return y, y, y
# Load the image
image = cv2.imread(sys.argv[1])
height, width, channels = image.shape
for y in range(height):
for x in range(width):
b, g, r = image[y, x]
b, g, r = custom_processing_function(b, g, r)
image[y, x] = [b, g, r]
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply GaussianBlur to reduce noise and improve contour detection
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Apply thresholding to get a binary image
_, thresh = cv2.threshold(blurred, 128, 255, cv2.THRESH_BINARY_INV)
# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Sort contours by area (largest first)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
# Assume the largest contour is the table
for contour in contours:
# Approximate the contour to a polygon
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# We expect a rectangle, so it should have 4 points
if len(approx) == 4:
# Get the points of the contour
pts = np.array(approx).reshape(4, 2)
# Order the points in a consistent order (top-left, top-right, bottom-right, bottom-left)
pts = sorted(pts, key=lambda x: x[1])
if pts[0][0] > pts[1][0]:
pts[0], pts[1] = pts[1], pts[0]
if pts[2][0] > pts[3][0]:
pts[2], pts[3] = pts[3], pts[2]
ordered_pts = np.array([pts[0], pts[1], pts[3], pts[2]], dtype='float32')
rw = np.linalg.norm(ordered_pts[0] - ordered_pts[1])
rh = np.linalg.norm(ordered_pts[1] - ordered_pts[2])
# Define the destination points (top-left, top-right, bottom-right, bottom-left)
height = int(2400*rh/rw)
width = 2400
dst_pts = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype='float32')
# Compute the perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_pts, dst_pts)
# Apply the perspective transform
aligned_table = cv2.warpPerspective(image, matrix, (width, height))
cv2.imwrite(sys.argv[2], aligned_table)
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment