#!/usr/bin/env python from __future__ import print_function from PIL import Image, ImageTk import numpy as np import cv2 import math import Tkinter import subprocess import tempfile import os import sys from itertools import izip_longest def grouper(n, iterable, padvalue=None): "grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')" return izip_longest(*[iter(iterable)]*n, fillvalue=padvalue) letters_as_img=[] page_fname = sys.argv[1] image = cv2.imread(page_fname) img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) mean = img_gray.mean() cols, rows = np.where(img_gray <= mean) a = np.column_stack((cols, rows)) center, size, angle = cv2.minAreaRect(a) rot = False if angle > 45: angle -= 90 size = (size[1], size[0]) rot = True elif angle < -45: angle += 90 size = (size[1], size[0]) rot = True rotation_mat = cv2.getRotationMatrix2D((0,0), -angle, 1.) radians = math.radians(angle) sin = math.sin(radians) cos = math.cos(radians) height, width = image.shape[:2] bound_w = bound_h = int(math.sqrt(height**2+width**2)*2) if rot == False: rotation_mat[1,2] = -sin*width else: rotation_mat[0,2] = cos*height print("rotating image %f degrees" % (-angle)) image = cv2.warpAffine(image, rotation_mat, (bound_w, bound_h), flags=cv2.INTER_CUBIC) bordersize = 2 radians = math.radians(-angle) sin = math.sin(radians) cos = math.cos(radians) new_center = (cos*center[0]-sin*center[1]+rotation_mat[1,2],sin*center[0]+cos*center[1]+rotation_mat[0,2]) #Image.fromarray(image).save("out.png") image = image[int(new_center[0]-size[0]/2)-bordersize:int(new_center[0]+size[0]/2)+bordersize, int(new_center[1]-size[1]/2)-bordersize:int(new_center[1]+size[1]/2)+bordersize] #Image.fromarray(image).show() #exit() height, width = image.shape[:2] rows = 57 #rows = 30 cols = 76 letter_height = (height-bordersize*2)/float(rows) #letter_height = (height-bordersize*2)/30.0 letter_width = (width-bordersize*2)/float(cols) print("letter size: %f x %f"%(letter_width, letter_height)) #for row in range(30): for row in range(rows): for col in range(cols): miny = int(row*letter_height) maxy = int((row+1)*letter_height)+2*bordersize if maxy > height: maxy = height minx = int(col*letter_width) maxx = int((col+1)*letter_width)+2*bordersize if maxx > width: maxx = width letter = image[miny:maxy,minx:maxx] letters_as_img.append(Image.fromarray(letter)) letters = [None]*len(letters_as_img) tobase64 = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'] print("reading from training_set") samples = [] responses = [] for i in range(64): directory="training_set/%02d"%i for f in os.listdir(directory): path=os.path.join(directory, f) if not os.path.isfile(path): continue # do not train with data collected from the current page if "real-"+page_fname in f: continue im = cv2.imread(path, cv2.IMREAD_GRAYSCALE) im = cv2.resize(im, (50,100)) im = im.reshape((1,5000)) samples.append(im) responses.append(i) samples = np.reshape(samples, newshape=(len(responses), 5000)) samples = samples.astype(np.float32) responses = np.array(responses,np.float32) responses = responses.reshape((responses.size,1)) print("training model") model = cv2.KNearest() model.train(samples,responses) if os.path.exists(page_fname+".txt"): print("loading existing results from file") decoded = [] with open(page_fname+".txt") as f: for line in f: line = line.strip() line = line.ljust(cols) decoded.extend(list(line)) if len(decoded) == len(letters): letters = decoded letters = [l if l != '?' else None for l in letters] else: raise Exception("diff. lens: ", len(decoded), len(letters)) if None not in letters: print("page is fully recovered") print("printing sloppy OCR characters") for i,img in enumerate(letters_as_img): if i%100 == 0: print((i*100)/len(letters_as_img)) im = np.asarray(img.convert('L')) im = cv2.resize(im, (50,100)) im = im.reshape((1,5000)) im = np.float32(im) letter_knearest = tobase64[int(model.find_nearest(im, k=1)[0])] if letter_knearest != letters[i]: print("wrongly detected a %s as a %s" % (letters[i], letter_knearest)) print("storing it as training samples") for i,(letter,im) in enumerate(zip(letters,letters_as_img)): im.save("training_set/%02d/real-%s-%04d.png"%(tobase64.index(letter), page_fname, i)) else: for i,img in enumerate(letters_as_img): if i%100 == 0: print((i*100)/len(letters_as_img)) if letters[i] is not None: continue #fh = tempfile.NamedTemporaryFile(mode='w', suffix=".png", delete=False) #fname = fh.name #fh.close() #img.save(fname) #letter_tess = subprocess.check_output(['tesseract', fname, 'stdout', 'base64.conf']) #letter_tess = letter_tess.strip() #os.unlink(fname) im = np.asarray(img.convert('L')) im = cv2.resize(im, (50,100)) im = im.reshape((1,5000)) im = np.float32(im) letter_knearest = tobase64[int(model.find_nearest(im, k=1)[0])] #if letter_tess == letter_knearest: # letters[i] = letter_tess letters[i] = letter_knearest root = Tkinter.Tk() letters_as_tk = [ImageTk.PhotoImage(i.resize((100, 200), Image.ANTIALIAS)) for i in letters_as_img] index_label0 = Tkinter.Label(root) index_label0.grid(row=0, column=0) picture_display0 = Tkinter.Label(root) picture_display0.grid(row=0, column=1) letter_label0 = Tkinter.Label(root, font=("Liberation Mono", 132)) letter_label0.grid(row=0, column=2) index_label1 = Tkinter.Label(root) index_label1.grid(row=1, column=0) picture_display1 = Tkinter.Label(root) picture_display1.grid(row=1, column=1) letter_label1 = Tkinter.Label(root, font=("Liberation Mono", 132)) letter_label1.grid(row=1, column=2) index_label2 = Tkinter.Label(root) index_label2.grid(row=2, column=0) picture_display2 = Tkinter.Label(root) picture_display2.grid(row=2, column=1) letter_label2 = Tkinter.Label(root, font=("Liberation Mono", 132)) letter_label2.grid(row=2, column=2) current_index = 0 def display_index(idx): if idx > 0: picture_display0.config(image=letters_as_tk[idx-1]) letter_label0.config(text=letters[idx-1] if letters[idx-1] is not None else "") index_label0.config(text="%d" % (idx-1)) else: picture_display0.config(image=None) letter_label0.config(text="") index_label0.config(text="") picture_display1.config(image=letters_as_tk[idx]) letter_label1.config(text=letters[idx] if letters[idx] is not None else "") index_label1.config(text="%d" % idx) if idx < len(letters_as_img)-1: picture_display2.config(image=letters_as_tk[idx+1]) letter_label2.config(text=letters[idx+1] if letters[idx+1] is not None else "") index_label2.config(text="%d" % (idx+1)) else: picture_display2.config(image=None) letter_label2.config(text="") index_label2.config(text="") display_index(current_index) def printKey(e): global current_index if e.keysym == 'Tab': # jump to next unknown entry while current_index < len(letters)-1 and letters[current_index] is not None: current_index += 1 display_index(current_index) return if e.keysym == 'Escape': lines = grouper(cols, letters, padvalue='_') print("saving status as " + page_fname + ".txt") with open(page_fname+".txt", 'w') as f: for line in lines: print(''.join([c if c is not None else "?" for c in line]), file=f) root.quit() return if e.keysym in ['Space', 'Right', 'space', "Enter"]: current_index += 1 if current_index >= len(letters_as_img): current_index = len(letters_as_img) - 1 display_index(current_index) return if e.keysym in ['BackSpace', 'Left']: current_index -= 1 if current_index <= 0: current_index = 0 display_index(current_index) return if e.char in tobase64: letters[current_index] = e.char current_index += 1 if current_index >= len(letters_as_img): current_index = len(letters_as_img) - 1 display_index(current_index) root.bind("", printKey) root.mainloop()