Created
August 4, 2024 22:06
-
-
Save gabonator/f8eb7706053d89f1da10ca13b8ee9707 to your computer and use it in GitHub Desktop.
OCR with OSX Vision
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// OCR code for processing photos of data structured in tables without installation of any package | |
// use: swift recognize.swift photo.jpeg | |
// It should return the 2D structure as JSON | |
import Cocoa | |
import Vision | |
struct TableCell { | |
var x: Int | |
var y: Int | |
var width: Int | |
var height: Int | |
var text: String | |
} | |
func recognizeText(in image: NSImage) -> [TableCell] { | |
guard let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else { | |
print("Could not get CGImage") | |
return [] | |
} | |
var items: [TableCell] = [] | |
// Create a request handler with the CGImage | |
let requestHandler = VNImageRequestHandler(cgImage: cgImage, options: [:]) | |
// Create a text recognition request | |
let request = VNRecognizeTextRequest { request, error in | |
if let error = error { | |
print("Text recognition error: \(error)") | |
return | |
} | |
// Process the results | |
guard let results = request.results as? [VNRecognizedTextObservation] else { | |
print("No text detected") | |
return | |
} | |
for (_, observation) in results.enumerated() { | |
if let topCandidate = observation.topCandidates(1).first { | |
//print("Text \(index + 1): \(topCandidate.string) [\(Int(observation.boundingBox.origin.x*100)) \(Int(100-observation.boundingBox.origin.y*100))]") | |
items.append(TableCell( | |
x:Int(observation.boundingBox.origin.x*1000), | |
y:Int(1000-observation.boundingBox.origin.y*1000), | |
width:Int(observation.boundingBox.size.width*1000), | |
height:Int(observation.boundingBox.size.height*1000), | |
text: topCandidate.string)) | |
} | |
} | |
} | |
// Specify recognition level and language | |
request.recognitionLevel = .accurate | |
request.recognitionLanguages = ["sk-SK"] //["en-US"] | |
do { | |
// Perform the request | |
try requestHandler.perform([request]) | |
} catch { | |
print("Failed to perform request: \(error)") | |
return [] | |
} | |
return items | |
} | |
func intersects(a: (Int, Int), b: (Int, Int)) -> Bool { | |
return !(a.1 < b.0 || a.0 > b.1) | |
} | |
func groupCoordinatesByProximity(coordinates: [(Int, Int)]) -> [(Int, Int)] { | |
let sortedCoordinates = coordinates.sorted {$0.0 < $1.0} | |
var bins: [(Int, Int)] = [] | |
for coordinate in sortedCoordinates { | |
var found = false | |
for var bin in bins { | |
if intersects(a:bin, b:coordinate) { | |
bin.0 = min(bin.0, coordinate.0) | |
bin.1 = max(bin.1, coordinate.1) | |
found = true | |
break | |
} | |
} | |
if !found { | |
bins.append(coordinate) | |
} | |
} | |
return bins | |
} | |
func findCoord(rng: (Int, Int), arr: [(Int, Int)]) -> Int { | |
for (index, cset) in arr.enumerated() { | |
if intersects(a:rng, b:cset) { | |
return index | |
} | |
} | |
return -1 | |
} | |
// Load an image | |
if let image = NSImage(contentsOfFile: CommandLine.arguments[1]) { | |
var cells = recognizeText(in: image) | |
cells.sort(by:{$0.y < $1.y}) | |
//for c in cells { | |
// print("\(c.x) \(c.y) \(c.text)") | |
//} | |
let trimy = cells.first(where:{ $0.text == "objektu" })?.y ?? -10 | |
cells = cells.filter{ $0.y >= trimy+10 } | |
let xs = cells.map { ($0.x, $0.x+$0.width) } | |
let xsg = groupCoordinatesByProximity(coordinates:xs) | |
let ys = cells.map { ($0.y, $0.y+$0.height) } | |
let ysg = groupCoordinatesByProximity(coordinates:ys) | |
let columns = xsg.count | |
let rows = ysg.count | |
var table: [[String]] = Array(repeating: Array(repeating: "", count: columns), count: rows) | |
for cell in cells { | |
table[findCoord(rng:(cell.y, cell.y+cell.height), arr:ysg)][findCoord(rng:(cell.x, cell.x+cell.width), arr:xsg)] += cell.text | |
} | |
print(table) | |
} else { | |
print("Could not load image") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment