You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

266 lines
8.8 KiB
Python

#!/usr/bin/env python
from __future__ import print_function
from PIL import Image, ImageTk
import numpy as np
import cv2
import math
import Tkinter
import subprocess
import tempfile
import os
import sys
from itertools import izip_longest
def grouper(n, iterable, padvalue=None):
"grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')"
return izip_longest(*[iter(iterable)]*n, fillvalue=padvalue)
letters_as_img=[]
page_fname = sys.argv[1]
image = cv2.imread(page_fname)
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
mean = img_gray.mean()
cols, rows = np.where(img_gray <= mean)
a = np.column_stack((cols, rows))
center, size, angle = cv2.minAreaRect(a)
rot = False
if angle > 45:
angle -= 90
size = (size[1], size[0])
rot = True
elif angle < -45:
angle += 90
size = (size[1], size[0])
rot = True
rotation_mat = cv2.getRotationMatrix2D((0,0), -angle, 1.)
radians = math.radians(angle)
sin = math.sin(radians)
cos = math.cos(radians)
height, width = image.shape[:2]
bound_w = bound_h = int(math.sqrt(height**2+width**2)*2)
if rot == False:
rotation_mat[1,2] = -sin*width
else:
rotation_mat[0,2] = cos*height
print("rotating image %f degrees" % (-angle))
image = cv2.warpAffine(image, rotation_mat, (bound_w, bound_h), flags=cv2.INTER_CUBIC)
bordersize = 2
radians = math.radians(-angle)
sin = math.sin(radians)
cos = math.cos(radians)
new_center = (cos*center[0]-sin*center[1]+rotation_mat[1,2],sin*center[0]+cos*center[1]+rotation_mat[0,2])
#Image.fromarray(image).save("out.png")
image = image[int(new_center[0]-size[0]/2)-bordersize:int(new_center[0]+size[0]/2)+bordersize,
int(new_center[1]-size[1]/2)-bordersize:int(new_center[1]+size[1]/2)+bordersize]
#Image.fromarray(image).show()
#exit()
height, width = image.shape[:2]
rows = 57
#rows = 30
cols = 76
letter_height = (height-bordersize*2)/float(rows)
#letter_height = (height-bordersize*2)/30.0
letter_width = (width-bordersize*2)/float(cols)
print("letter size: %f x %f"%(letter_width, letter_height))
#for row in range(30):
for row in range(rows):
for col in range(cols):
miny = int(row*letter_height)
maxy = int((row+1)*letter_height)+2*bordersize
if maxy > height:
maxy = height
minx = int(col*letter_width)
maxx = int((col+1)*letter_width)+2*bordersize
if maxx > width:
maxx = width
letter = image[miny:maxy,minx:maxx]
letters_as_img.append(Image.fromarray(letter))
letters = [None]*len(letters_as_img)
tobase64 = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/']
print("reading from training_set")
samples = []
responses = []
for i in range(64):
directory="training_set/%02d"%i
for f in os.listdir(directory):
path=os.path.join(directory, f)
if not os.path.isfile(path):
continue
# do not train with data collected from the current page
if "real-"+page_fname in f:
continue
im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
im = cv2.resize(im, (50,100))
im = im.reshape((1,5000))
samples.append(im)
responses.append(i)
samples = np.reshape(samples, newshape=(len(responses), 5000))
samples = samples.astype(np.float32)
responses = np.array(responses,np.float32)
responses = responses.reshape((responses.size,1))
print("training model")
model = cv2.KNearest()
model.train(samples,responses)
if os.path.exists(page_fname+".txt"):
print("loading existing results from file")
decoded = []
with open(page_fname+".txt") as f:
for line in f:
line = line.strip()
line = line.ljust(cols)
decoded.extend(list(line))
if len(decoded) == len(letters):
letters = decoded
letters = [l if l != '?' else None for l in letters]
else:
raise Exception("diff. lens: ", len(decoded), len(letters))
if None not in letters:
print("page is fully recovered")
print("printing sloppy OCR characters")
for i,img in enumerate(letters_as_img):
if i%100 == 0:
print((i*100)/len(letters_as_img))
im = np.asarray(img.convert('L'))
im = cv2.resize(im, (50,100))
im = im.reshape((1,5000))
im = np.float32(im)
letter_knearest = tobase64[int(model.find_nearest(im, k=1)[0])]
if letter_knearest != letters[i]:
print("wrongly detected a %s as a %s" % (letters[i], letter_knearest))
print("storing it as training samples")
for i,(letter,im) in enumerate(zip(letters,letters_as_img)):
im.save("training_set/%02d/real-%s-%04d.png"%(tobase64.index(letter), page_fname, i))
else:
for i,img in enumerate(letters_as_img):
if i%100 == 0:
print((i*100)/len(letters_as_img))
if letters[i] is not None:
continue
#fh = tempfile.NamedTemporaryFile(mode='w', suffix=".png", delete=False)
#fname = fh.name
#fh.close()
#img.save(fname)
#letter_tess = subprocess.check_output(['tesseract', fname, 'stdout', 'base64.conf'])
#letter_tess = letter_tess.strip()
#os.unlink(fname)
im = np.asarray(img.convert('L'))
im = cv2.resize(im, (50,100))
im = im.reshape((1,5000))
im = np.float32(im)
letter_knearest = tobase64[int(model.find_nearest(im, k=1)[0])]
#if letter_tess == letter_knearest:
# letters[i] = letter_tess
letters[i] = letter_knearest
root = Tkinter.Tk()
letters_as_tk = [ImageTk.PhotoImage(i.resize((100, 200), Image.ANTIALIAS)) for i in letters_as_img]
index_label0 = Tkinter.Label(root)
index_label0.grid(row=0, column=0)
picture_display0 = Tkinter.Label(root)
picture_display0.grid(row=0, column=1)
letter_label0 = Tkinter.Label(root, font=("Liberation Mono", 132))
letter_label0.grid(row=0, column=2)
index_label1 = Tkinter.Label(root)
index_label1.grid(row=1, column=0)
picture_display1 = Tkinter.Label(root)
picture_display1.grid(row=1, column=1)
letter_label1 = Tkinter.Label(root, font=("Liberation Mono", 132))
letter_label1.grid(row=1, column=2)
index_label2 = Tkinter.Label(root)
index_label2.grid(row=2, column=0)
picture_display2 = Tkinter.Label(root)
picture_display2.grid(row=2, column=1)
letter_label2 = Tkinter.Label(root, font=("Liberation Mono", 132))
letter_label2.grid(row=2, column=2)
current_index = 0
def display_index(idx):
if idx > 0:
picture_display0.config(image=letters_as_tk[idx-1])
letter_label0.config(text=letters[idx-1] if letters[idx-1] is not None else "")
index_label0.config(text="%d" % (idx-1))
else:
picture_display0.config(image=None)
letter_label0.config(text="")
index_label0.config(text="")
picture_display1.config(image=letters_as_tk[idx])
letter_label1.config(text=letters[idx] if letters[idx] is not None else "")
index_label1.config(text="%d" % idx)
if idx < len(letters_as_img)-1:
picture_display2.config(image=letters_as_tk[idx+1])
letter_label2.config(text=letters[idx+1] if letters[idx+1] is not None else "")
index_label2.config(text="%d" % (idx+1))
else:
picture_display2.config(image=None)
letter_label2.config(text="")
index_label2.config(text="")
display_index(current_index)
def printKey(e):
global current_index
if e.keysym == 'Tab':
# jump to next unknown entry
while current_index < len(letters)-1 and letters[current_index] is not None:
current_index += 1
display_index(current_index)
return
if e.keysym == 'Escape':
lines = grouper(cols, letters, padvalue='_')
print("saving status as " + page_fname + ".txt")
with open(page_fname+".txt", 'w') as f:
for line in lines:
print(''.join([c if c is not None else "?" for c in line]), file=f)
root.quit()
return
if e.keysym in ['Space', 'Right', 'space', "Enter"]:
current_index += 1
if current_index >= len(letters_as_img):
current_index = len(letters_as_img) - 1
display_index(current_index)
return
if e.keysym in ['BackSpace', 'Left']:
current_index -= 1
if current_index <= 0:
current_index = 0
display_index(current_index)
return
if e.char in tobase64:
letters[current_index] = e.char
current_index += 1
if current_index >= len(letters_as_img):
current_index = len(letters_as_img) - 1
display_index(current_index)
root.bind("<KeyPress>", printKey)
root.mainloop()