Deepayan137 · January 3, 2019 06:07 · Jan 3, 2019
diff --git a/evaluate.py b/evaluate.py
@@ -0,0 +1,58 @@
+import re
+import sys
+import os
+import tempfile
+import subprocess
+import pdb
+import pandas as pd
+import numpy as np
+from collections import defaultdict
+from ocr.baselines.base_config import *
+
+
+
+def calculate_word_accuracy(**kwargs):
+    """ Calculates the word level accuracy of the OCR result using corrected result as ground truth. """
+
+    path = kwargs['path']
+    acc = kwargs['accuracy']
+    files = list(map(lambda f: path+'/' + f, os.listdir(path)))
+    def clean(base_name):
+        base_name = base_name.split('.')[0]
+        return base_name + '_ocr.txt'
+    count = 0
+    ch_acc = defaultdict(float)
+    for file_ in files:
+        # pdb.set_trace()
+        if '_ocr' not in file_:
+            gt_file = file_
+            pr_file = clean(file_)
+            count+=1
+            print(count)
+            try:
+                cmd = ['ocr-evaluation-tools/dist/bin/ocrevalutf8.fix', '{}'.format(acc), '{}'.format(gt_file), '{}'.format(pr_file)]
+                process = subprocess.run(cmd, stdout=subprocess.PIPE)
+                accuracy = process.stdout.decode().splitlines()[4].strip().split()[0].replace('%', '')
+                ch_acc[file_] = float(accuracy)
+            except Exception as e:
+                print(e)
+                pass
+    df = pd.DataFrame(list(ch_acc.items()), columns=['file', 'accuracy'])
+    df.to_csv('ocr/stats/{}.csv'.format('rahul'))
+    print(np.mean(list(ch_acc.values())))        
+
+
+def main(**kwargs):
+    opt = Config()
+    opt._parse(kwargs)
+    dir_ = opt.path
+    accuracy = opt.accuracy
+
+    calculate_word_accuracy(path=dir_,
+                            accuracy=accuracy)
+
+if __name__=='__main__':
+    import fire
+    fire.Fire(main)    
+
+# python -m ocr.baselines.evaluate --path=<path>