Skip to content

Instantly share code, notes, and snippets.

@Deepayan137
Created January 3, 2019 06:07

Revisions

  1. Deepayan137 created this gist Jan 3, 2019.
    58 changes: 58 additions & 0 deletions evaluate.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    import re
    import sys
    import os
    import tempfile
    import subprocess
    import pdb
    import pandas as pd
    import numpy as np
    from collections import defaultdict
    from ocr.baselines.base_config import *



    def calculate_word_accuracy(**kwargs):
    """ Calculates the word level accuracy of the OCR result using corrected result as ground truth. """

    path = kwargs['path']
    acc = kwargs['accuracy']
    files = list(map(lambda f: path+'/' + f, os.listdir(path)))
    def clean(base_name):
    base_name = base_name.split('.')[0]
    return base_name + '_ocr.txt'
    count = 0
    ch_acc = defaultdict(float)
    for file_ in files:
    # pdb.set_trace()
    if '_ocr' not in file_:
    gt_file = file_
    pr_file = clean(file_)
    count+=1
    print(count)
    try:
    cmd = ['ocr-evaluation-tools/dist/bin/ocrevalutf8.fix', '{}'.format(acc), '{}'.format(gt_file), '{}'.format(pr_file)]
    process = subprocess.run(cmd, stdout=subprocess.PIPE)
    accuracy = process.stdout.decode().splitlines()[4].strip().split()[0].replace('%', '')
    ch_acc[file_] = float(accuracy)
    except Exception as e:
    print(e)
    pass
    df = pd.DataFrame(list(ch_acc.items()), columns=['file', 'accuracy'])
    df.to_csv('ocr/stats/{}.csv'.format('rahul'))
    print(np.mean(list(ch_acc.values())))


    def main(**kwargs):
    opt = Config()
    opt._parse(kwargs)
    dir_ = opt.path
    accuracy = opt.accuracy

    calculate_word_accuracy(path=dir_,
    accuracy=accuracy)

    if __name__=='__main__':
    import fire
    fire.Fire(main)

    # python -m ocr.baselines.evaluate --path=<path>