6756 lines
289 KiB
Text
6756 lines
289 KiB
Text
/*
|
|
rule based OCR engine, partly rewritten for edges (old=pixel)
|
|
*/
|
|
/*
|
|
This is a Optical-Character-Recognition program
|
|
Copyright (C) 2000-2009 Joerg Schulenburg
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
see README for email address
|
|
|
|
>>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<<
|
|
|
|
I have invested lot of time, to write this part of the program.
|
|
This engine should recognize chars allways right or return UNKNOWN.
|
|
If you change something, test all other example files too,
|
|
to be sure that all things work better. (JoergS)
|
|
|
|
This engine was pixelbased until 0.40 which was not successfull enough.
|
|
Also code changes always hade side effects. The vectorisation of the code
|
|
starts from version 0.41 with the chars XNz and seems to be much better
|
|
to handle. Vectorization means we frame each character by a chain of
|
|
vectors and dont care about pixels anymore. Unfortunatly I have to
|
|
replace all the pixel codes, which is a long process. Old code will be lost.
|
|
(JorgS)
|
|
|
|
|
|
ToDo:
|
|
- if box1->p and b differ, reduce probability
|
|
- probability makes life much easier here
|
|
- use only one box!?, may be bits have usefull infos
|
|
- divide this file, suggestion: classify chars:
|
|
high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz
|
|
or
|
|
often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,:
|
|
or
|
|
every char (large overhead)
|
|
- two-pass version (first pass without tolerance)
|
|
2nd pass with tolerance (ex: one tiny more in sdata->holes)
|
|
|
|
general feature extraction:
|
|
- white holes at middle, upper, lower position (cost much time)
|
|
- test lines and triangles insteat of rectangles
|
|
|
|
char is removed, wchar_t is used (better code)
|
|
|
|
making a static global variable-set x.x0,x.x1, and call test_a,
|
|
test_b ... (faster compilation, but not reentrant!)
|
|
|
|
- adding slant-angle (if detected) to distinguish between l and / ?
|
|
- ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/";
|
|
for better context correction or output: "Ha[lI][lI]o!"
|
|
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
// #include "pgm2asc.h"
|
|
#include "ocr0.h"
|
|
// #include "ocr1.h"
|
|
#include "amiga.h"
|
|
#include "pnm.h"
|
|
#include "gocr.h"
|
|
|
|
#define IFV if(JOB->cfg.verbose&4)
|
|
#define MM {IFV fprintf(stderr,"\nDBG %c L%04d (%d,%d): ",(char)c_ask,__LINE__,box1->x0,box1->y0);}
|
|
|
|
// the old debug mode (0.40) was only for a special char, for another char
|
|
// code must be recompiled with C_ASK='char'
|
|
// new debug mode (0.41) explains why char is declined or accepted as ABC...
|
|
// the output can be filtered by external scripts
|
|
// ToDo: we could reduce output to filter string
|
|
#ifndef DO_DEBUG /* can be defined outside (configure --with-debug) */
|
|
#define DO_DEBUG 0 /* 0 is the default */
|
|
#endif
|
|
|
|
/* this macro is for debugging output: "if char is declined, why?" */
|
|
#if DO_DEBUG /* 0=Work mode, 1=debugging mode */
|
|
// Setac: output, that char is choosen with a probability
|
|
// Break: output, why the char is not choosen
|
|
// MSG: debugging functions for char C_ASK, mostly messages
|
|
// DBG: definitions usefull only for debugging
|
|
#define Setac(box1,ac,ad) { MM;IFV fprintf(stderr,"setac %d",ad);setac(box1,ac,ad); }
|
|
#define Break { MM;IFV fprintf(stderr,"break"); break; }
|
|
#define MSG(x) { MM;IFV x }
|
|
#define DBG(x) x
|
|
#else
|
|
#define Setac(box1,ac,ad) setac(box1,ac,ad)
|
|
#define Break break
|
|
#define MSG(x)
|
|
#define DBG(x)
|
|
#endif
|
|
|
|
/* extern "C"{ */
|
|
|
|
// static inline int sq(int x) { return x*x; } /* square */
|
|
|
|
/*
|
|
* go from vector j1 to vector j2 and measure maximum deviation of
|
|
* the steps from the line connecting j1 and j2
|
|
* return the squared maximum distance
|
|
* in units of the box size times 1024
|
|
* ToDo: 1) better give back max-dx and max-dy ???
|
|
* errors if j1 and j2 are in different frames or belong to
|
|
* more then one frame?
|
|
* 2) Better get deviation from a complete vector graphic?
|
|
* The vectorgraphic is the ideal test char adapted to the
|
|
* extrem vertices of the real char.
|
|
*/
|
|
int line_deviation( struct box *box1, int j1, int j2 ) {
|
|
int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2;
|
|
r1x=box1->frame_vector[j1][0];
|
|
r1y=box1->frame_vector[j1][1];
|
|
r2x=box1->frame_vector[j2][0];
|
|
r2y=box1->frame_vector[j2][1];
|
|
if (!box1->num_frames) return(-1);
|
|
if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
|
|
j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
|
|
fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__);
|
|
return(-1);
|
|
}
|
|
/* get the frame the endvector belongs to */
|
|
for (i=0;i<box1->num_frames;i++)
|
|
if (j2<box1->num_frame_vectors[i]) break;
|
|
frame=i;
|
|
/* frame(j1)<=frame(j2) possible */
|
|
for (i=j1;;i++) { // do it for each vector between j1 and j2
|
|
if (i >= box1->num_frame_vectors[frame])
|
|
i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
|
|
if (i==j2) break;
|
|
// for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~}
|
|
r3x=box1->frame_vector[i][0];
|
|
r3y=box1->frame_vector[i][1];
|
|
// Language=german
|
|
// german: Abstand Punkt von Strecke, Laenge Lotrechte
|
|
// germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1
|
|
// germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x]
|
|
// Schnittpunkt : l1=l2,
|
|
// eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0
|
|
// eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0
|
|
// eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x)
|
|
// eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y)
|
|
// eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly)
|
|
l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1
|
|
if (l2==0) {
|
|
// fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging
|
|
d=-1024;
|
|
} else
|
|
d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x)
|
|
+((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024..
|
|
if (d<=-1024) { x=r1x; y=r1y; } // starting point
|
|
else {
|
|
if (d>=1024) { x=r2x; y=r2y; } // end point
|
|
else {
|
|
x=((r1x+r2x)+1)/2+(d*(r2x-r1x))/2048;
|
|
y=((r1y+r2y)+1)/2+(d*(r2y-r1y))/2048;
|
|
/* we have the crossing point x,y now */
|
|
}
|
|
}
|
|
dist=sq((x-r3x)*1024/(box1->x1-box1->x0+1))
|
|
+sq((y-r3y)*1024/(box1->y1-box1->y0+1)); // 0..2*sq(1024)
|
|
if (dist>maxdist) maxdist=dist;
|
|
// for debugging:
|
|
// fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d"
|
|
// " vector= %d %d crosspoint= %d %d ",
|
|
// j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y);
|
|
}
|
|
return maxdist;
|
|
}
|
|
|
|
/*
|
|
* search vectors between j1 and j2 for nearest point a to point r
|
|
* example:
|
|
*
|
|
* r-> $$...$$ $ - mark vectors
|
|
* @@$..@@ @ - black pixels
|
|
* @@$..@@ . - white pixels
|
|
* @@@@.$@
|
|
* a-> @@$@$@@
|
|
* @$.@@@@
|
|
* @@..$@@
|
|
* @@..$@@
|
|
* j1 --> $$...$$ <-- j2
|
|
*
|
|
* ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
|
|
* j1 and j2 must be in the same frame
|
|
* return aa?
|
|
* 2009-07:
|
|
* - change from normalized (dx=128,dy=128) to absolute distance
|
|
* - simpler and no squeeze effect (problem getting right i2 for "3")
|
|
*/
|
|
int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) {
|
|
int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */
|
|
int frame=0;
|
|
// int x0=box1->x0, y0=box1->y0,
|
|
// x1=box1->x1, y1=box1->y1;
|
|
// int dx=box1->x1-x0+1, dy=box1->y1-y0+1;
|
|
if (!box1->num_frames) return(-1);
|
|
if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
|
|
j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
|
|
fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2);
|
|
out_x(box1);
|
|
return(-1);
|
|
}
|
|
aa[0]=x=box1->frame_vector[j2][0]; /* x */
|
|
aa[1]=y=box1->frame_vector[j2][1]; /* y */
|
|
/* maximum is (distance*128)^2 if r is inside the box */
|
|
// aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy);
|
|
aa[2]=d=2*(sq(x-rx)+sq(y-ry)); /* must be greater than min. dist, Jul09 */
|
|
aa[3]=j2; /* vector index */
|
|
/* get the frame the endvector belongs to */
|
|
for (i=0;i<box1->num_frames;i++)
|
|
if (j2<box1->num_frame_vectors[i]) break;
|
|
frame=i;
|
|
/* frame(j1)<=frame(j2) possible */
|
|
for (i=j1;;i++) {
|
|
if (i >= box1->num_frame_vectors[frame])
|
|
i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
|
|
x=box1->frame_vector[i][0]; /* take a vector */
|
|
y=box1->frame_vector[i][1];
|
|
/* distance to upper left end, normalized to 128 */
|
|
// d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); // old 2009-07
|
|
d=sq(x-rx)+sq(y-ry);
|
|
if (d<aa[2]) { aa[0]=x; aa[1]=y; aa[2]=d; aa[3]=i; }
|
|
if (i==j2) break;
|
|
}
|
|
return aa[3];
|
|
}
|
|
|
|
// test for umlauts, if ret>0 and m==1 box1 is changed
|
|
// m>0 modify box1->dots
|
|
// m==2 modify box1->y0
|
|
// called by pgm2asc + ocr0(?)
|
|
int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){
|
|
// pix p=*(box1->p);
|
|
int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3,
|
|
xl,xr,yu,yl; // left, right, upper and lower border of dots
|
|
wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */
|
|
DBG( wchar_t c_ask='"'; )
|
|
r=0;
|
|
x0=box1->x0; x1=box1->x1; dx=x1-x0+1;
|
|
y0=box1->y0; y1=box1->y1; dy=y1-y0+1;
|
|
m1=box1->m1; m2=box1->m2; m3=box1->m3;
|
|
xl=x0; xr=x1; yu=yl=y0;
|
|
if( dy < 5 || 4*y0 > 3*m2+m3 ) return 0; // no low chars: .,-=
|
|
/* modifier in box included? */
|
|
if( 2*y1 > m1+m2 ){
|
|
/* modifier in box included? */
|
|
for(y=y0;2*y<y0+y1;y++)if( get_bw(xl,xr,y,y,box1->p,cs,1)==0 ) break;
|
|
if( 2*y<y0+y1 ){ /* yes => extract */
|
|
yl=y;
|
|
while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++;
|
|
if( m&2 ) box1->y0=y; /* set new upper bond */
|
|
}
|
|
}
|
|
if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */
|
|
if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap?
|
|
while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xl<x0+x1) xl++;
|
|
for(;xl<x1;xl++)if( get_bw(xl,xl,yu,yl,box1->p,cs,1)==1 ) break;
|
|
for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break;
|
|
|
|
if ( yl-1>yu ) { // tall box ij"a"o"u
|
|
#if 0
|
|
x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x;
|
|
fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
|
|
fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
|
|
#define DEBUG 1
|
|
#endif
|
|
{
|
|
|
|
x=xl;y=yu;
|
|
if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap?
|
|
else
|
|
if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0
|
|
|| get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours
|
|
if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0
|
|
|| get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 )
|
|
{ int i,j,x;
|
|
r=1;
|
|
// ...@@@.... RING_ABOVE // ..@@@..@@. TILDE
|
|
// ..@...@... // @@.@@@@@..
|
|
// ..@...@... // @.........
|
|
// ..@..@@...
|
|
// ...@@@....
|
|
for (i=yu;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==1) break;
|
|
for ( ;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==0) break;
|
|
for (j=xl;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==1) break;
|
|
for ( ;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==0) break;
|
|
for ( x=j;x<xr;x++) if (get_bw(x,x,yu,i,box1->p,cs,1)==1) break;
|
|
// vert. gap detected
|
|
if( j<xr && x<xr && j<x && xr-xl>2
|
|
&& num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!!
|
|
&& num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2
|
|
&& num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2
|
|
){ // may be the following lines are not quite ok
|
|
while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yl<y0+y1) yl++;
|
|
r=2;
|
|
// out_x(box1);printf(" x,y=%d,%d i=%d xl=%d xr=%d yu=%d yl=%d",x0,y0,i-x0,xl-x0,xr-x0,yu-y0,yl-y0);
|
|
mod = DIAERESIS;
|
|
}
|
|
if( m&2 ) box1->y0=yl;
|
|
/* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */
|
|
// out_x(box1);
|
|
}
|
|
if(r==0){ // divided fr != fi
|
|
while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yu<y0+y1) yu++;
|
|
if(m)box1->y0=yu;
|
|
}
|
|
if( r==1 ){ yl--;
|
|
// .@@@. ..@@.
|
|
// .@@.. .@@..
|
|
// .@... .@@..
|
|
//
|
|
// if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
|
|
// > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
|
|
// && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
|
|
// < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03
|
|
if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
|
|
- loop(box1->p,xr,yu,xr-xl,cs,0,LE)
|
|
> loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
|
|
- loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03
|
|
mod = ACUTE_ACCENT; // '
|
|
|
|
if( xr-xl+1 > 3*(yl-yu+1)
|
|
&& get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 )
|
|
mod = MACRON; // "-" above
|
|
|
|
// .@@@. .@@..
|
|
// ..@@. ..@@.
|
|
// ...@. ..@@.
|
|
//
|
|
// if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
|
|
// < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
|
|
// && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
|
|
// > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03
|
|
if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
|
|
- loop(box1->p,xr,yu,xr-xl,cs,0,LE)
|
|
< loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
|
|
- loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03
|
|
mod = GRAVE_ACCENT; // `
|
|
|
|
#ifdef DEBUG
|
|
fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
|
|
fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
|
|
#endif
|
|
if( (xr-xl+1) < 2*(yl-yu+1)+2
|
|
&& 2*(xr-xl+1)+2 > (yl-yu+1) ) {
|
|
int i,i1,i2,i3,i4;
|
|
i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI);
|
|
i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI);
|
|
i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO);
|
|
i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO);
|
|
for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
|
|
if (getpixel(box1->p,xl+i,yu+i)< cs) break; i3=i;
|
|
for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
|
|
if (getpixel(box1->p,xl+i,yu+i)>=cs) break; i3=i-i3;
|
|
for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
|
|
if (getpixel(box1->p,xr-i,yu+i)< cs) break; i4=i;
|
|
for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
|
|
if (getpixel(box1->p,xr-i,yu+i)>=cs) break; i4=i-i4;
|
|
#ifdef DEBUG
|
|
fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4);
|
|
#endif
|
|
if ( (xr-xl<5 && yl-yu<8) /* to small */
|
|
|| (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */
|
|
&& abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2
|
|
&& abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4)
|
|
)
|
|
mod = DOT_ABOVE; // "." above, ToDo: improve it!
|
|
}
|
|
|
|
if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
|
|
> loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8
|
|
|| loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
|
|
> loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 )
|
|
&& ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
|
|
> loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8
|
|
|| loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
|
|
> loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 )
|
|
&& num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1
|
|
&& ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2
|
|
|| num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 ))
|
|
mod = CIRCUMFLEX_ACCENT; // "^"
|
|
|
|
if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
|
|
< loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
|
|
|| loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
|
|
< loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
|
|
&& ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
|
|
< loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
|
|
|| loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
|
|
< loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
|
|
&& ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
|
|
|| num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
|
|
&& num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
|
|
mod = CARON; // "v" above
|
|
|
|
if( /* test for bow (new0.3.6) */
|
|
loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
|
|
+ loop(box1->p,xl,yl ,xr-xl,cs,0,RI)
|
|
- 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1
|
|
&& xr-xl>10)
|
|
if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
|
|
< loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
|
|
|| loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
|
|
< loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
|
|
&& ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
|
|
< loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
|
|
|| loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
|
|
< loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
|
|
&& ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
|
|
|| num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
|
|
&& num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
|
|
mod = BREVE; // round "u" above
|
|
|
|
if( xr-xl>3 && yl-yu>1 )
|
|
if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
|
|
> loop(box1->p,xl,yl,xr-xl,cs,0,RI)
|
|
&& loop(box1->p,xr,yu,xr-xl,cs,0,LE)
|
|
< loop(box1->p,xr,yl,xr-xl,cs,0,LE)
|
|
&& num_cross(xl,xr,yu,yu,box1->p,cs) == 2
|
|
&& num_cross(xl,xr,yl,yl,box1->p,cs) == 2 )
|
|
mod = TILDE;
|
|
|
|
if( xr-xl>2 && yl-yu>2)
|
|
if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 )
|
|
if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 )
|
|
if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 )
|
|
mod = RING_ABOVE;
|
|
|
|
#ifdef DEBUG
|
|
printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s",
|
|
(int)mod,yu-box1->y0,yl-box1->y0,
|
|
xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON":
|
|
((mod==ACUTE_ACCENT)?"ACUTE":
|
|
((mod==TILDE)?"TILDE":"?"))));
|
|
out_x(box1);
|
|
#endif
|
|
|
|
}
|
|
}
|
|
if (m) box1->dots=r; // set to 0 also possible after division
|
|
if (m) box1->modifier=mod; /* should be resetted after compose ??? */
|
|
MSG(fprintf(stderr,"umlaut mod=%s dots=%d y0o=%d",decode(mod,ASCII),r,y0);)
|
|
}
|
|
// printf(" modifier=%c",mod);
|
|
if (modifier) *modifier=mod; /* set modifier */
|
|
return r;
|
|
}
|
|
|
|
|
|
static wchar_t ocr0_eE(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
|
|
|
|
// --- most frequent letter e first!!!
|
|
// --- test e ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6)
|
|
DBG( wchar_t c_ask='e'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num != 1) ad=97*ad/100;
|
|
/* ToDo: may be a two pass version intolerant/tolerant is better */
|
|
if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test
|
|
if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break;
|
|
if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break;
|
|
if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2
|
|
&& num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt
|
|
x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40
|
|
if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); }
|
|
if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); }
|
|
if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); }
|
|
i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break;
|
|
j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( j<i ) Break;
|
|
j=loop(box1->p,x0,y1 ,x1-x0,cs,0,RI); if( j<i ) Break;
|
|
i=loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break;
|
|
j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( j<i ) i=j;
|
|
j=loop(box1->p,x0 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
|
|
j=loop(box1->p,x1 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
|
|
i=loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break;
|
|
j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
|
|
j=loop(box1->p,x1 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
|
|
j=2*loop(box1->p,x0, (y0+y1)/2,x1-x0,cs,0,RI)
|
|
-loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI)
|
|
-loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI);
|
|
if (dx>3 && j>=dx/4) Break; // ~g 4x6font
|
|
for(y=1;y<dy/2;y++) if( num_cross(x0,x1,y0+y,y0+y,box1->p,cs) == 2 ) break;
|
|
if( y==dy/2 ) Break; // v0.2.5 ~ bad_t
|
|
for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++)
|
|
if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++;
|
|
if( dx>4 && dy>5 && (i<dx/4-1 || i==0) ) Break; // ~g but 4x6-e
|
|
// look for horizontal white line (right gap) => set x,y
|
|
for(x=0,y=i=y0+dy/3;i<y1-dy/6;i++){
|
|
j=loop(box1->p,x1,i,y1-y0,cs,0,LE);
|
|
if(j>=x) { x=j;y=i; }
|
|
}
|
|
if (x<dx/2){ // no gap found, fat font???
|
|
// check smallest thickness left > 2* smallest thickness right
|
|
for(i1=dx,i=y0+dy/3;i<y1-dy/6;i++){
|
|
j =loop(box1->p,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break;
|
|
j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI);
|
|
if (j<i1) i1=j; // smallest thickness on left bow
|
|
}
|
|
for(i2=dx,y=i=y0+dy/3;i<y1-dy/6;i++){
|
|
j =loop(box1->p,x1 ,i,y1-y0,cs,0,LE);
|
|
j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE);
|
|
if(j<i2) { i2=j;y=i; }
|
|
} if (3*i2>2*i1) Break; // not accepted, if right line is not very thinn
|
|
x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE);
|
|
x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE);
|
|
x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE);
|
|
if (3*i2>i1) ad=99*ad/100;
|
|
if (2*i2>i1) ad=99*ad/100;
|
|
bad_e=60; // used later?
|
|
}
|
|
if (x<dx/2) Break;
|
|
for(i=1,j=x0+dx/6;j<x1-dx/6 && i;j++)
|
|
if( num_cross(j,j,y0,y,box1->p,cs) > 1 ) i=0;
|
|
if( i ) Break;
|
|
// ..@@@@...<-
|
|
// .@@@@@@;.
|
|
// @@,...@@.
|
|
// @@.....@,
|
|
// @@@@@@@@@
|
|
// @@.,;.@,. <- problem (y) == bad_e>50
|
|
// @@.....@.
|
|
// @@,...@@.
|
|
// .@@@,@@@.
|
|
// ..@@@@;..<-
|
|
if (dy>11 && bad_e<50)
|
|
if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e"
|
|
if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1
|
|
&& num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break;
|
|
// if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){
|
|
if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){
|
|
if( sdata->hchar ) Break; // ~ \it t
|
|
// look if thinn font (may be h-line is broken) Mai00
|
|
for(j=0,i=x0+dx/8;i<x1-1;i++)
|
|
if( get_bw(i,i,y0+dy/4,y,box1->p,cs,1) == 1 ) j++;
|
|
if(j<2*dx/4) Break;
|
|
}
|
|
if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break;
|
|
if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break;
|
|
if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) {
|
|
ad=95*ad/100; } /* 8*10 @ (=at) is not an e */
|
|
// look for horizontal gap
|
|
for(x=0,y=i=y0+dy/4;i<y1-dy/4;i++){
|
|
j=loop(box1->p,x0,i,x1-x0,cs,0,RI);
|
|
if(j>=x) { x=j;y=i; }
|
|
}
|
|
if (y>y0+dy/4 && y<y1-dy/4 && x>dx/2) Break; // s
|
|
if (x>dx/4) ad=99*ad/100;
|
|
|
|
if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0
|
|
&& num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0
|
|
&& num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) {
|
|
if (sdata->gchar) Break; // ~p
|
|
ad=99*ad/100;
|
|
}
|
|
/* upper case is for 5x6 box */
|
|
if( sdata->hchar // broken B ? should also work when linedetection fails
|
|
&& loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) {
|
|
x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
|
|
if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x
|
|
&& loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break;
|
|
if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x
|
|
&& loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break;
|
|
}
|
|
x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI);
|
|
if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q
|
|
if (box1->m2) {
|
|
if (sdata->gchar) ad=99*ad/100;
|
|
if (sdata->hchar) ad=99*ad/100;
|
|
} else ad=99*ad/100;
|
|
|
|
Setac(box1,(wchar_t)'e',ad);
|
|
if (ad>=100) return 'e';
|
|
break;
|
|
}
|
|
// --- test E ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4
|
|
// rewritten for vectors 0.43
|
|
int i1, i2, i3, i4, i5; // line derivation + corners
|
|
DBG( wchar_t c_ask='E'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the upper right end of the h */
|
|
if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */
|
|
if (aa[0][2]>d/2) Break; /* upper left end */
|
|
if (aa[1][2]>d/2) Break; /* lower left end */
|
|
if (aa[2][2]>d/2) Break; /* lowerright end */
|
|
/*
|
|
E f near E
|
|
|
|
OOOOOOOO OOOO
|
|
O5 O O
|
|
O4 O
|
|
OOOO3 OOOOOO
|
|
O2 O
|
|
O O
|
|
O1 O O
|
|
OOOOOOOO OOOOOO
|
|
*/
|
|
// check the bow from below
|
|
for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal!
|
|
} if (i!=aa[2][3]) Break; // ~AHKMNRX
|
|
// search most left+down between bottom right and top right
|
|
i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1);
|
|
i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0);
|
|
i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2);
|
|
i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3);
|
|
i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3);
|
|
i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2);
|
|
if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break;
|
|
if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f
|
|
|
|
MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
|
|
// holes right open?
|
|
for( i=1,y=y0; y<y0+dy/4 && i; y++ ) // long black line
|
|
if( get_bw(x0+dx/3,x1-dx/6,y,y,box1->p,cs,2) == 0 ) i=0;
|
|
if( i ) Break;
|
|
for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line
|
|
if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0;
|
|
if( i ) Break;
|
|
for( i=1,y=y0+dy/3; y<y1-dy/3 && i; y++ ){ // black line
|
|
j=loop(box1->p,x0 ,y,dx,cs,0,RI);
|
|
j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0;
|
|
} if( i ) Break;
|
|
x=x1-dx/3; y=y0; // von oben durchbohren!
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break;
|
|
x=x1-dx/3; y=y1; // von unten durchbohren!
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( y<y1-dy/4 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( y<y0-dy/3 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || y<y0+dy/2 ) Break;
|
|
x=x1-dx/3; y=y0; // von oben durchbohren!
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
|
|
y+=dy/15;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x0 ) Break;
|
|
if (dx>15 && x==x0) ad=99*ad/100; // to thin
|
|
x+=dx/15+1;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break;
|
|
// if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break;
|
|
if (sdata->holes.num > 0) Break;
|
|
i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break;
|
|
j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break; i=j;
|
|
j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break;
|
|
j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE);
|
|
for( x=dx,y=y0+dy/6; y<y1-dy/9; y++ ) // left border straight
|
|
{ i=loop(box1->p,x0,y,dx,cs,0,RI);
|
|
if (i>j/2 && ad>98) ad=99*ad/100;
|
|
if (i>dx/4) break;
|
|
if(i<x) x=i;
|
|
} if( y<y1-dy/9 ) Break; // t
|
|
if(dy>3*dx) // ~[
|
|
if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break;
|
|
|
|
if (box1->m2) {
|
|
if (!hchar) ad=ad*99/100;
|
|
if ( gchar) ad=ad*99/100;
|
|
}
|
|
Setac(box1,(wchar_t)'E',ad);
|
|
if (ad>=100) return 'E';
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_n(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
int i,j,d,x,y,i1,i2,i3,handwritten=0,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test n ---------------------------------------------------
|
|
// glued rm is very similar to glued nn -> thickness of h-line should grow
|
|
// may02: tested for 8x12 font
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='n'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs);
|
|
j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs);
|
|
if( (i<2 || i>3) && j!=2 ) Break;
|
|
if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */
|
|
y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */
|
|
if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1
|
|
&& num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1
|
|
&& num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr
|
|
// ~thick_w
|
|
y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break;
|
|
if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break;
|
|
|
|
y=3*dy/4;
|
|
if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1
|
|
&& num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p
|
|
y=dy/2;
|
|
if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2
|
|
&& num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr
|
|
/* printed n */
|
|
x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
|
|
x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
|
|
x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line
|
|
x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap
|
|
i=dy/4; y=13*dy/16;
|
|
if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n
|
|
if (i<2 && i<dy/2) i++; // correct for small fonts like 8x12
|
|
// the same game for the lower part =>l1 l2 l3 l4 ???
|
|
for(x=i1;x<i2;x++) if( loop(sdata->bp,x, 0,dy,cs,0,DO)>=i ) break;
|
|
if(x <i2) Break; // gap detected
|
|
for(x=i1;x<i2;x++) if( loop(sdata->bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break;
|
|
if(x==i2) Break; // no gap detected (glued serifs ??? )
|
|
// glued rm as nn ???
|
|
for(y=0,x=(i1+i2)/2;x<i2;x++){
|
|
i=loop(sdata->bp,x,0,dy,cs,0,DO);
|
|
i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness
|
|
if( i>y ) y=i; if( i<y/2 ) break;
|
|
}
|
|
if(x <i2) Break; // unusual property for n
|
|
if( dy>7 )
|
|
if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE)
|
|
+loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1
|
|
> loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE)
|
|
+loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o
|
|
if( dy>7 && dx>7 )
|
|
if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0
|
|
&& loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o
|
|
} else { /* check handwritten n */
|
|
if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3
|
|
&& num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break;
|
|
i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break;
|
|
i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break;
|
|
i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI);
|
|
if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break;
|
|
i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI);
|
|
if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break;
|
|
handwritten=80;
|
|
}
|
|
|
|
i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5)
|
|
if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr
|
|
i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE);
|
|
if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv
|
|
|
|
if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0
|
|
&& get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0
|
|
&& get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P
|
|
|
|
// glued ri ???
|
|
if( box1->dots>0 && box1->m1 )
|
|
if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 )
|
|
if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2
|
|
|| num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break;
|
|
|
|
|
|
i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2)
|
|
i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE);
|
|
x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
|
|
if (sdata->hchar && i-x>1) Break; // ß
|
|
x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs
|
|
i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (i<x) x=i;
|
|
i=loop(sdata->bp, 0, 1,dx,cs,0,LE); if (i<x) x=i;
|
|
i=loop(sdata->bp, 0, 2,dx,cs,0,LE); if (i<x) x=i;
|
|
if (sdata->hchar && x>0) Break; // fl
|
|
|
|
if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M
|
|
if (sdata->hchar || 2*y0<box1->m1+box1->m2) ad=96*ad/100;
|
|
if (sdata->gchar) ad=96*ad/100; // ß fl
|
|
if (dx<5) { // for small fonts no middle line is possible for m
|
|
ad=99*ad/100; // 4x6 m
|
|
if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) {
|
|
ad=97*ad/100; // ~m
|
|
if (dy<=4) Setac(box1,'m',97); // only for 4x6 font!
|
|
}
|
|
}
|
|
Setac(box1,'n',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_M(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int d,x,y,i0,i1,i2,i3,t1,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// ------------------ test M ---------------------------
|
|
for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres
|
|
// min-suchen fuer m
|
|
DBG( wchar_t c_ask='M'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
for (y=dy/4;y<=3*dy/4;y++)
|
|
if (num_cross(0,dx-1,y,y,bp,cs)>=3) break;
|
|
if (y>3*dy/4 && dx>4) Break;
|
|
if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2
|
|
&& num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */
|
|
if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break;
|
|
|
|
x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl
|
|
x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break;
|
|
|
|
if( loop(bp, 0,7*dy/16,dx,cs,0,RI)
|
|
+ loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K
|
|
|
|
if( dy>8 /* following lines should be extend to range check */
|
|
&& loop(bp, dx/4,dy-1, dy,cs,0,UP)<dy/4
|
|
&& loop(bp,3*dx/8,dy-1, dy,cs,0,UP)<dy/4 )
|
|
if( loop(bp, 0,dy-1-dy/ 8,dx,cs,0,RI)
|
|
< loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)-dx/32 ) Break; // ~it_u
|
|
if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)==2
|
|
&& num_cross(0,dx-1, dy/4, dy/4,bp,cs)> 2
|
|
&& num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u
|
|
if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2
|
|
&& num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v
|
|
|
|
if( loop(bp,3*dx/4, 0,dy,cs,0,DO)
|
|
> loop(bp,2*dx/4, 0,dy,cs,0,DO)
|
|
&& loop(bp,3*dx/4,dy-1,dy,cs,0,UP)
|
|
< loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N
|
|
if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO)
|
|
> loop(bp,2*dx/4, dy/8,dy,cs,0,DO)
|
|
&& loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP)
|
|
< loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N
|
|
|
|
// i0 is lower end of upper serifen (widest gap? )
|
|
i0=0;
|
|
|
|
if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ?
|
|
if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){
|
|
for(y=dy/2+1;y<dy;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs)<3 ) break;
|
|
}
|
|
if( num_cross(0,dx-1,y,y,bp,cs)==2 ){
|
|
x =loop(bp,dx-1 ,y-1,dx,cs,0,LE);
|
|
x+=loop(bp,dx-1-x,y-1,dx,cs,1,LE);
|
|
x+=loop(bp,dx-1-x,y-1,dx,cs,0,LE);
|
|
if( loop(bp,dx-x,y-1,dy,cs,0,UP)>y-2 ) Break; // ~N
|
|
}
|
|
}
|
|
}
|
|
// MNWK
|
|
for(i2=0,i1=x=dx/2;x<dx-dx/4;x++){ // lowest pixel
|
|
y=loop(bp,x,0,dy,cs,0,DO); if(y>i2) {i2=y;i1=x;} else break; }
|
|
i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO);
|
|
if(i2<dy/4) {
|
|
if (!sdata->hchar) Break; // rm
|
|
ad=99*ad/100;
|
|
}
|
|
if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN
|
|
|
|
// if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A
|
|
if (sdata->holes.num != 0) Break;
|
|
t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI);
|
|
t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line?
|
|
if( 7*(t1+1)<dx )
|
|
if( num_cross(i1,dx-1,i2-1,i2-1,bp,cs)!=2
|
|
|| num_cross(0 ,i1 ,i2-1,i2-1,bp,cs)!=2 ) Break; // too hard ???
|
|
|
|
// ~u_n-pair
|
|
if( num_cross(0,dx-1,0,0,bp,cs)!=2
|
|
&& num_cross(0,dx-1,1,1,bp,cs)!=2
|
|
&& num_cross(0,dx-1,2,2,bp,cs)!=2 ) Break;
|
|
|
|
// ~nn v0.2.4a3
|
|
if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)==4
|
|
&& num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)==4 ){
|
|
i1 =loop(bp, 0, dy/4,dx,cs,0,RI);
|
|
i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
|
|
i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
|
|
i2 =loop(bp, 0,3*dy/4,dx,cs,0,RI);
|
|
i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
|
|
i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
|
|
if( i1>=i2 ) Break; // no good M
|
|
i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
|
|
i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
|
|
if( i1>=i2 ) Break; // no good M
|
|
i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
|
|
i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
|
|
if( i1<=i2 ) Break; // no good M
|
|
}
|
|
if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2
|
|
&& num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u
|
|
|
|
if (dy<17)
|
|
if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100;
|
|
if (dx>5) /* 4x6 font has only 1 cross at y=1 */
|
|
if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt
|
|
if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt
|
|
if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H
|
|
|
|
if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2
|
|
&& num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2
|
|
&& dx>8 && dy>12 ){
|
|
ad=98*ad/100;
|
|
for(y=5*dy/16;y<5*dy/8;y++) // look for H-line
|
|
if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break;
|
|
if( y<5*dy/8 ) ad=95*ad/100;
|
|
if( y<5*dy/8 )
|
|
if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0
|
|
|| num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad!
|
|
}
|
|
|
|
if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2
|
|
&& loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100;
|
|
|
|
if(!hchar){
|
|
ad=98*ad/100; /* not sure */
|
|
if( loop(bp,0, dy/4,dx,cs,0,RI)
|
|
< loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued
|
|
}
|
|
if( gchar ) ad=98*ad/100;
|
|
if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */
|
|
Setac(box1,'M',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_N(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
(*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
|
|
dbg[9],
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test N ------- +hchar -gchar
|
|
for(ad=d=100;dx>3 && dy>3;){ // 4x6font
|
|
int j;
|
|
DBG( wchar_t c_ask='N'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num > 0) ad=98*ad/100; /* # */
|
|
if (dx<6) ad=99*ad/100;
|
|
if (dx<5) ad=99*ad/100;
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the 4 ends of the x */
|
|
if (aa[0][2]>d) Break;
|
|
if (aa[1][2]>d) Break;
|
|
if (aa[2][2]>d) Break;
|
|
if (aa[3][2]>d) Break;
|
|
if (aa[3][0]-aa[0][0]<dx/2) Break;
|
|
if (aa[2][0]-aa[1][0]<dx/2) Break;
|
|
if (aa[1][1]-aa[0][1]<dy/2) Break;
|
|
if (aa[2][1]-aa[3][1]<dy/2) Break;
|
|
if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold an N */
|
|
if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
|
|
if (abs(aa[3][1]-aa[0][1])>(dy+2)/5) Break; /* glued tu */
|
|
if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */
|
|
/* left and right vertical line */
|
|
d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
|
|
ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
|
|
|
|
/* i1: uppermost left ^ from bottom (near 0,0) */
|
|
i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0);
|
|
x=box1->frame_vector[i1][0];
|
|
y=box1->frame_vector[i1][1];
|
|
MSG( fprintf(stderr,"i1= %d (%d,%d) left ^ from below", i1,x-x0,y-y0);)
|
|
if (y-y0 > 5*dy/8) Break;
|
|
if (x-x0 > 5*dx/8) Break;
|
|
/* i3: uppermost right ^ ~H */
|
|
i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
|
|
MSG( fprintf(stderr,"i3= %d (%d,%d) right ^ (ad=%d)",\
|
|
i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
|
|
|
|
/* check lower border of diagonal line, may fail on fonts where
|
|
* line ends on middle of right vertical line (screen font) */
|
|
dbg[0]=d=line_deviation(box1,i1, aa[2][3]);
|
|
/* check right border of left vertical line */
|
|
/* but split to possible lower left serif + vert. line */
|
|
j=nearest_frame_vector(box1,aa[1][3],i1, x0+dx/2, y1+dy/2);
|
|
dbg[1]=d=line_deviation(box1, aa[1][3],j );
|
|
+line_deviation(box1, j,i1);
|
|
MSG(fprintf(stderr," i1-a2 %d a1_serif-i1 %d ad=%d",dbg[0],dbg[1],ad);)
|
|
if (dbg[0] > sq(1024/4)) Break;
|
|
if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024)
|
|
if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024)
|
|
// serif N has d=sq(1024/3)=116508
|
|
MSG( fprintf(stderr,"ad %d", ad); )
|
|
|
|
/* i2: lowest right v from top, same frame? N-tilde etc.? */
|
|
i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8);
|
|
x=box1->frame_vector[i2][0];
|
|
y=box1->frame_vector[i2][1];
|
|
MSG( fprintf(stderr,"i2= %d (%d,%d) lowest right v from top",\
|
|
i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);)
|
|
if (y-y0 < 3*dy/8) Break;
|
|
if (x-x0 < 3*dx/8) Break;
|
|
// test H
|
|
if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4
|
|
&& box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8
|
|
&& y<=box1->frame_vector[i1][1]) Break;
|
|
/* check if upper left and lower right point are joined directly */
|
|
/* but split to possible upper right serif + down line */
|
|
j=nearest_frame_vector(box1,i2,aa[0][3], x0+dx/2, y0-dy/2);
|
|
dbg[2]=d=line_deviation(box1,i2, j)
|
|
+line_deviation(box1, j, aa[0][3]);
|
|
/* check if upper right and lower right point are joined directly */
|
|
/* but split to possible upper right serif + vert. line */
|
|
j=nearest_frame_vector(box1,aa[3][3],i2, x0+dx/2, y0-dy/2);
|
|
dbg[3]=d=line_deviation(box1, aa[3][3],j)
|
|
+line_deviation(box1, j,i2); // ToDo: split once more?
|
|
MSG( fprintf(stderr," i2-a0 %d a3-i2 %d ad %d",dbg[2],dbg[3], ad); )
|
|
if (dbg[2] > sq(1024/4)) Break;
|
|
// serif N, ToDo: do it better
|
|
if (dbg[3] > sq(1024/4)) ad=97*ad/100;
|
|
if (dbg[3] > sq(1024/3)) Break;
|
|
MSG( fprintf(stderr,"ad %d", ad); )
|
|
|
|
MSG( fprintf(stderr,"check against melted tu"); )
|
|
// i1 = left ^ from below, i2 = lowest right v from top
|
|
// sample gocr_Device*: 3-8,dy=27
|
|
if ( (box1->frame_vector[i1][1]-y0)
|
|
-(y1-box1->frame_vector[i2][1])>dy/8) ad=99*ad/100; /* ~ tu */
|
|
MSG( fprintf(stderr,"tu ad %d", ad); )
|
|
if (box1->frame_vector[i2][0]
|
|
-box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */
|
|
MSG( fprintf(stderr,"i2-i1<=dx/8 ad %d", ad); )
|
|
/* i1: uppermost left ^ from bottom (near 0,0) */
|
|
/* i2: lowest right v from top, same frame? N-tilde etc.? */
|
|
if (box1->frame_vector[i2][1]
|
|
-box1->frame_vector[i1][1]<=dy/8) {
|
|
// may happen on screen fonts 7x10
|
|
if (dx>8) ad=97*ad/100; /* too flat (ff,H) */
|
|
}
|
|
MSG( fprintf(stderr,"i2-i1<=dy/8 ad %d", ad); )
|
|
if (box1->frame_vector[i2][1]
|
|
-box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100;
|
|
MSG( \
|
|
fprintf(stderr,"^v %d %d %d %d line dev %d %d %d %d max %d %d ad %d",\
|
|
box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
|
|
box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
|
|
dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024),ad);)
|
|
ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
MSG( fprintf(stderr,"ad %d", ad); )
|
|
ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
MSG( fprintf(stderr,"ad %d", ad); )
|
|
ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
MSG( fprintf(stderr,"ad %d", ad); )
|
|
ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
MSG( fprintf(stderr,"ad %d", ad); )
|
|
|
|
if (!hchar) ad=99*ad/100;
|
|
if ( gchar) ad=98*ad/100; // \sc N
|
|
Setac(box1,'N',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_h(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
|
|
|
|
// --- test h ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
// rewritten for vectors 0.42
|
|
int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
|
|
DBG( wchar_t c_ask='h'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the upper right end of the h */
|
|
if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
|
|
if (aa[0][2]>d/2) Break; /* upper left end */
|
|
if (aa[1][2]>d/2) Break; /* lower left end */
|
|
if (aa[2][2]>d/2) Break; /* lowerright end */
|
|
/*
|
|
type A B=italic ???
|
|
18 OOO
|
|
O O O
|
|
O O
|
|
O7OOO OOOO
|
|
O4 O O O
|
|
O O O O
|
|
O O O O O
|
|
2O3 5O6 O OOO
|
|
*/
|
|
i1=i8=aa[0][3];
|
|
i2=i3=aa[1][3];
|
|
i5=i6=aa[2][3];
|
|
// check the bow from below (fails on melted serifs)
|
|
for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[ i][1]
|
|
<box1->frame_vector[i4][1]) i4=i; // get next maximum
|
|
if (box1->frame_vector[ i][1]<=y0) break; // fatal!
|
|
}
|
|
if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
|
|
if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
|
|
// two steps for i7 to go around pitfalls on italic h
|
|
i7=nearest_frame_vector(box1, i6, i8, (x0+x1)/2, (y0+y1)/2);
|
|
i7=nearest_frame_vector(box1, i6, i7, x0, (y0+y1)/2);
|
|
i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
|
|
i5=nearest_frame_vector(box1, i4, i6, (x0+x1)/2, y1);
|
|
|
|
MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
|
|
/* ... new part /// old obsolete part ... */
|
|
if( get_bw(0 ,dx/2,dy/8 ,dy/8 ,bp,cs,1) != 1 ) Break;
|
|
if( get_bw(0 ,dx/2,dy/2 ,dy/2 ,bp,cs,1) != 1 ) Break;
|
|
if( get_bw(dx/2 ,dx-1,dy-1-dy/3,dy-1-dy/3,bp,cs,1) != 1 ) Break;
|
|
if( get_bw(dx/2 ,dx/2,dy/5 ,dy-1-dy/3,bp,cs,1) != 1 ) Break;
|
|
if( get_bw(dx-1-dx/3,dx-1,0 ,1 ,bp,cs,1) == 1 ) Break;
|
|
if( get_bw(dx-1-dx/3,dx-1,1 ,dy/6 ,bp,cs,1) == 1 ) Break;
|
|
if( dy>18 )
|
|
if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break;
|
|
if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s-
|
|
for( x=x0+dx/3;x<x1-dx/3;x++)
|
|
if( get_bw(x, x,y1-dy/4, y1, box1->p,cs,1) == 0 ) break;
|
|
if( x>=x1-dx/3 ) Break;
|
|
for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){
|
|
if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--;
|
|
} if( i ) Break;
|
|
for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){
|
|
if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--;
|
|
} if( i ) Break;
|
|
// if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen
|
|
if (sdata->holes.num > 0)
|
|
if (sdata->holes.hole[0].y0 > dy/3
|
|
&& sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
|
|
// if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini
|
|
if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8
|
|
< loop(bp,dx-1,dy/2,dx,cs,0,LE)
|
|
&& loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8
|
|
< loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00
|
|
i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
|
|
if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu
|
|
ad=(99-(1<<i))*ad/100;
|
|
if (num_cross(x0,x0,y0,y0+dy/8+2, box1->p,cs) == 0 ) ad=97*ad/100;
|
|
if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100;
|
|
if (ad<1) break;
|
|
}
|
|
i =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
i+=loop(bp,i,dy/4,dx,cs,1,RI)+1;
|
|
for ( ; i<dx-dx/3; i++ )
|
|
if( loop(bp,i,0,dy,cs,0,DO)>5*dy/8 ) {
|
|
ad=98*ad/100; // melted hi, li, but handwritten h
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) {
|
|
ad=98*ad/100; // melted fr
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
|
|
i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters
|
|
if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) {
|
|
ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
|
|
i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i<dx/4) {
|
|
ad=98*ad/100;
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
if( num_cross(dx-i+1+dx/8,dx-i+1+dx/8,0,1+dy/16,bp,cs) > 0 ) {
|
|
ad=95*ad/100; // melted fi
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)<dx/4) {
|
|
ad=98*ad/100; // fi
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
if (loop(box1->p,x1,y0 ,dx,cs,0,LE)<dx/4
|
|
|| loop(box1->p,x1,y0+1,dx,cs,0,LE)<dx/4) {
|
|
ad=98*ad/100; // li
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
|
|
|
|
if (sdata->holes.num > 0) ad=97*ad/100;
|
|
if (box1->m2) {
|
|
if ( gchar) ad=98*ad/100;
|
|
if (!hchar) ad=97*ad/100;
|
|
} else ad=99*ad/100;
|
|
Setac(box1,'h',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_H(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test H ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='H'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if ( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2
|
|
&& num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break;
|
|
if ( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
|
|
&& num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break;
|
|
if ( loop(bp,0 ,dy/8,dx,cs,0,RI)
|
|
+ loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A
|
|
for ( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
|
|
{ j=loop(box1->p,x0 ,y,dx,cs,0,RI)
|
|
+loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; }
|
|
if ( !i ) Break;
|
|
for ( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
|
|
{ j=loop(bp,0 ,y,dx,cs,0,RI)
|
|
+loop(bp,dx-1,y,dx,cs,0,LE); if( j1-j>dx/5 ) i=0; }
|
|
if (!i) Break; // ~K Jul00
|
|
for (i=0,ya=y=y0+dy/3; y<y1-dy/3; y++ ) // horizontal line
|
|
{ j=loop(box1->p,x0 ,y,dx,cs,0,RI);
|
|
j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
|
|
if (i<=dx/2) Break; ya-=y0;
|
|
if (num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
|
|
&& num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */
|
|
for (y=ya; y<dy-dy/4; y++ ) // ~M Dec00
|
|
if (num_cross(0,dx-1,y ,y ,bp,cs) > 2
|
|
&& num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
|
|
if (y<dy-dy/4) Break;
|
|
for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
|
|
if (get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
|
|
} if (i) Break;
|
|
for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
|
|
if (get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
|
|
} if (i) Break;
|
|
for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
|
|
if (num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
|
|
} if (i) Break;
|
|
for (i=1,y=y0;y<=y0+dy/4 && i;y++){
|
|
if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if (i) Break;
|
|
for(i=1,y=y1-dy/4;y<=y1 && i;y++){
|
|
if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if (i) Break;
|
|
if (get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break;
|
|
if (get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break;
|
|
i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break;
|
|
i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) Break;
|
|
i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) Break;
|
|
if (abs(i1+i3-2*i2)>dx/16+1) Break;
|
|
// test for thick tall N looking like a H
|
|
if (num_cross(x0,x1,y0, y1 , box1->p,cs) < 2 ) Break; // sure N
|
|
if (num_cross(x0,x1,y0,(y0+y1)/2, box1->p,cs) < 2 ) Break; // sure N
|
|
i1=loop(bp, 0, dy/4,dx,cs,0,RI);
|
|
i1=loop(bp, i1, dy/4,dx,cs,1,RI); // right side of left vert. line
|
|
i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
|
|
i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI); // right side of left vert. line
|
|
i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE);
|
|
i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE); // left side of right vert. line
|
|
if (dx<10 && i1-i2>dx/4) Break;
|
|
if (dx<10 && i1-i2>dx/8) ad=99*ad/100; // 7x10 ~N
|
|
i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI);
|
|
i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI);
|
|
i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI);
|
|
if (i<dx/2-1 && 5*i1>6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2) Break; // ???
|
|
if (dx>8)
|
|
if (loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
|
|
-loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4
|
|
&& loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
|
|
-loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break;
|
|
if (sdata->holes.num != 0) Break;
|
|
MSG( fprintf(stderr,"i123 %d %d %d",i1,i2,i3); )
|
|
if ( gchar) ad=99*ad/100;
|
|
if (!hchar) ad=98*ad/100;
|
|
Setac(box1,'H',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_k(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
|
|
|
|
// --- test k ---------------------------------------------------
|
|
for(ad=100;dx>2 && dy>3;){ // min 3x4
|
|
// rewritten for vectors 0.43
|
|
int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
|
|
DBG( wchar_t c_ask='k'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the upper right end of the h */
|
|
if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
|
|
if (aa[0][2]>d/2) Break; /* upper left end */
|
|
if (aa[1][2]>d/2) Break; /* lower left end */
|
|
if (aa[2][2]>d/2) Break; /* lowerright end */
|
|
/*
|
|
type A B=italic ???
|
|
18 OOO
|
|
O O O
|
|
O O6 O
|
|
O7 OO O OO
|
|
O4OO OO OO
|
|
O OO O O
|
|
O OO O O O
|
|
2O3 O5 O OOO
|
|
*/
|
|
i1=i8=aa[0][3];
|
|
i2=i3=aa[1][3];
|
|
i5= aa[2][3];
|
|
// check the bow from below
|
|
for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[ i][1]
|
|
<box1->frame_vector[i4][1]) i4=i; // get next maximum
|
|
if (box1->frame_vector[ i][1]<=y0) break; // fatal!
|
|
}
|
|
if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
|
|
if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
|
|
i6=nearest_frame_vector(box1, i5, i8, x1, (2*y0+y1)/3);
|
|
// two steps for i7 to go around pitfalls on italic h
|
|
i7=nearest_frame_vector(box1, i6, i8, x0, y1);
|
|
i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
|
|
i =nearest_frame_vector(box1, i5, i6, x0, (y0+2*y1)/3);
|
|
if (x1-box1->frame_vector[i][0]<dx/4) Break; // h
|
|
if (x1-box1->frame_vector[i][0]<dx/2) ad=98*ad/100;
|
|
|
|
MSG(fprintf(stderr,"i1-7 ad %d %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7,ad);)
|
|
if( num_cross(0, dx-1,0,0,bp,cs) != 1
|
|
&& num_cross(0, dx-1,1,1,bp,cs) != 1 ) Break;
|
|
if( num_cross(0,3*dx/4, dy/8 , dy/8 ,bp,cs) != 1
|
|
|| num_cross(0,3*dx/4,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
|
|
if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
|
|
&& num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
|
|
if( dx<8
|
|
&& num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) != 2
|
|
&& num_cross(dx-2,dx-2,dy/4,dy-1,bp,cs) != 2 ) Break;
|
|
i1=loop(bp,0,dy/2-dy/4,dx,cs,0,RI);
|
|
i2=loop(bp,0,dy/2 ,dx,cs,0,RI);if(i2>dx/2) Break;
|
|
i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI);
|
|
if (abs(i1+i3-2*i2)>(dx+8)/16+1 || i1<i3-1) Break; // v-line on left side?
|
|
if( get_bw(x0 ,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1
|
|
if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if (sdata->holes.num > 0)
|
|
if (sdata->holes.hole[0].y0 > dy/4) Break;
|
|
// if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break;
|
|
for(y=y0+1;y<y0+dy/2;y++) // luecke ???
|
|
if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
|
|
if( y<y0+dy/2 ) Break;
|
|
for(i=1,x=x0;x<=x0+dx/2 && i;x++)
|
|
if(get_line(x,y0 ,x ,y1,box1->p,cs,100)>50) i=0;
|
|
if( i ) Break; // no vertikal line!
|
|
|
|
/* check for falling line in the lower left corner */
|
|
for (j=x=0,y=5*dy/8;y<7*dy/8;y++) {
|
|
i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; }
|
|
} // x=dx/6 on fat k
|
|
if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 <dx/4) Break;
|
|
if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 <dx/2) ad=98*ad/100;
|
|
x=dx-1-x; y=j;
|
|
i =loop(bp,dx-1,dy-1,dx,cs,0,LE); if(i>dx/2)
|
|
i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break;
|
|
i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2;
|
|
if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break;
|
|
|
|
for(y=y0+dy/3;y<y1;y++) if( num_cross(x0,x1,y,y,box1->p,cs)==2 ) break;
|
|
if( y==y1 ) Break;
|
|
if(
|
|
// num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens!
|
|
sdata->holes.num > 0 )
|
|
if (sdata->holes.hole[0].x1>dx-1-dx/4
|
|
|| sdata->holes.hole[0].y1>dy-1-dy/4
|
|
|| sdata->holes.hole[0].y0< dy/4) Break;
|
|
// if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break;
|
|
i=loop(bp,0,dy-1,dx,cs,0,RI);
|
|
i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz
|
|
i =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
if (i>dx/4
|
|
&& i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2
|
|
&& loop(bp, 0,0,dx,cs,0,RI)<=dx/4
|
|
&& loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q
|
|
|
|
if( 2*y0>(box1->m1+box1->m2) ) ad=99*ad/100;
|
|
|
|
if ( gchar) ad=99*ad/100;
|
|
if (!hchar) ad=99*ad/100;
|
|
Setac(box1,'k',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_K(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */
|
|
|
|
// --- test K ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // Mar2000 perfect??? no ocr-a X Jul09
|
|
DBG( wchar_t c_ask='K'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
for(y=dy/8;y<dy-dy/8;y++)
|
|
if( !get_bw(0,dx/2,y,y,bp,cs,1) ) break;
|
|
if( y<dy-dy/8 ) Break;
|
|
for(j=0,i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
|
|
y= loop(box1->p,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; }
|
|
if (dy>15 && j>dy/8){
|
|
j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2;
|
|
y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1;
|
|
}
|
|
if(y>=dy/4) i=0; /* ok, found gap */
|
|
} if( i ) Break;
|
|
for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
|
|
i=loop(box1->p,x,y1,dy,cs,0,UP);
|
|
/* on small chars bypass possible low left serifs */
|
|
if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
|
|
if (i2>1) i+=i2-1; }
|
|
if (i>y) { y=i; i1=x; }
|
|
} if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
|
|
for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){
|
|
if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) Break;
|
|
for(i=1,y=y0;y<=y0+dy/4 && i;y++){
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) Break;
|
|
if( dx<10 ){
|
|
for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
|
|
} if( i ) Break;
|
|
}
|
|
for(i=1,y=y1-dy/4;y<=y1 && i;y++){
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) Break;
|
|
if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k
|
|
if( loop(bp,0, dy/4,dx,cs,0,RI)
|
|
+loop(bp,0,3*dy/4,dx,cs,0,RI)
|
|
<2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 )
|
|
{ if (dy>=16) { Break; } else ad=98*ad/100; } // ~X Jul09
|
|
|
|
i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break;
|
|
j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE);
|
|
x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x;
|
|
if(j<=i ) Break; i=j;
|
|
j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break;
|
|
// out_x(box1); // detailed analysis
|
|
//
|
|
// a d <= that are main points of K
|
|
// | /
|
|
// b/e
|
|
// | \ .
|
|
// c f
|
|
ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2;
|
|
yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2;
|
|
yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
for(yd=ye=yf=xe=y=i=0,xf=xd=dx;y<dy/4;y++){ // range 0..1/4
|
|
x =loop(bp,dx-1, y,dx,cs,0,LE); if(x<xd){ xd=x;yd= y; }
|
|
x =loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(x<xf){ xf=x;yf=dy-1-y; }
|
|
x =loop(bp,dx-1,dy/2+y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2+y; }
|
|
x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; }
|
|
#if 0 // removed v0.2.4a2
|
|
x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border
|
|
x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross
|
|
x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2+y; }
|
|
#endif
|
|
x =loop(bp,0 ,dy/2-y,dx,cs,0,RI);
|
|
x+=loop(bp,x ,dy/2-y,dx,cs,1,RI); // test 2nd cross
|
|
x+=loop(bp,x ,dy/2-y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2-y; }
|
|
x =dx-1-loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x<xb){ xb=x;yb=dy/2-y; }
|
|
}
|
|
xd=dx-1-xd;xe=dx-1-xe;xf=dx-1-xf;
|
|
xb+=loop(bp,xb,yb,dx,cs,1,RI)/4; // detect center of line
|
|
xe-=loop(bp,xe,ye,dx,cs,1,LE)/4;
|
|
xd-=loop(bp,xd,yd,dx,cs,1,LE)/4;
|
|
xf-=loop(bp,xf,yf,dx,cs,1,LE)/4;
|
|
#if 0
|
|
MSG( \
|
|
printf("a=%d %d b=%d %d c=%d %d d=%d %d e=%d %d f=%d %d dxdy %d %d",\
|
|
xa,ya,xb,yb,xc,yc,xd,yd,xe,ye,xf,yf,dx,dy);\
|
|
)
|
|
#endif
|
|
if( get_line2(xa,ya,xc,yc,bp,cs,100)<95 ) Break;
|
|
if( dx>8 ){ // example szaka0103
|
|
if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n}
|
|
i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22
|
|
if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up
|
|
if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break;
|
|
if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) Break; // right down
|
|
xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n}
|
|
} else {
|
|
if( dy<16 && !hchar ) Break;
|
|
if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4
|
|
&& loop(bp,1,1,dy,cs,1,DO)<=3*dx/4
|
|
&& loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x
|
|
}
|
|
if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){
|
|
ad=99*ad/100; /* broken B ? */
|
|
if (sdata->holes.num > 0)
|
|
if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
|
|
// if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B
|
|
}
|
|
if(box1->m3 && !hchar) ad=99*ad/100;
|
|
if(box1->m3 && gchar) ad=99*ad/100;
|
|
// printf(" ok xe=%d",xe);
|
|
Setac(box1,'K',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_f(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
(*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
|
|
ab[8][4], /* special points (x,y,dist^2,vector_idx) */
|
|
ad; /* tmp-vars */
|
|
/* x=mindist_to_a y=0 "t"
|
|
0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f"
|
|
1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$..
|
|
.@... .@.. 2>.@@. start upper end .@.. .@..
|
|
2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$.
|
|
3>$@$$. 3>$@$. $@@$ $@$. .@..
|
|
4>.$... 4>.$.. 4>.$$. lower end .$.. .$..
|
|
.@... .@.. .@@. .@.. .@..
|
|
.@... .@.. .@@. .@.. .@..
|
|
5>.$... 5>.$.. 5>.$$. lower start .$.. .$..
|
|
6>..... 6>$... 6>.... optional left bow
|
|
*/
|
|
// --- test f like t ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar!
|
|
// rewritten for vectors 0.43
|
|
int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners
|
|
DBG( wchar_t c_ask='f'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the upper right end of the h */
|
|
if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */
|
|
if (aa[0][2]>d ) Break; /* upper left end */
|
|
/*
|
|
9
|
|
OOO
|
|
O 7 O8
|
|
O6
|
|
1OOOO5
|
|
O4
|
|
O
|
|
2O3
|
|
OOOOO
|
|
*/
|
|
i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8);
|
|
/* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */
|
|
i =aa[1][3]; if (box1->frame_vector[i][1]<y1-dy/8)
|
|
i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y1+dy/4);
|
|
i2=nearest_frame_vector(box1, i1, i, x1, y1);
|
|
i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y1+dy/4);
|
|
i3=nearest_frame_vector(box1, i,aa[3][3], x0, y1);
|
|
i7=nearest_frame_vector(box1, i3,aa[3][3],(x0+x1)/2, y0);
|
|
i8=nearest_frame_vector(box1, i7,aa[0][3], x1, (3*y0+y1)/4);
|
|
i9=nearest_frame_vector(box1,aa[3][3],aa[0][3],(x0+2*x1)/3,y0-dy/4);
|
|
i5=nearest_frame_vector(box1, i3, i7, x1+dx/4, (5*y0+3*y1)/8);
|
|
i4=nearest_frame_vector(box1, i3, i5, x0, (3*y0+y1)/4);
|
|
i6=nearest_frame_vector(box1, i5, i7, x0, (y0+3*y1)/4);
|
|
|
|
MSG(fprintf(stderr,"i1-9 %d %d %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7,i8,i9);)
|
|
|
|
// check if vertical line is near to the left side
|
|
if (box1->frame_vector[i2][0]-x0>dx/2) Break; // ~3
|
|
i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2);
|
|
// MSG(fprintf(stderr,"i %d",i);)
|
|
if (box1->frame_vector[i ][0]
|
|
-box1->frame_vector[i9][0]>dx/8) Break; // ~3
|
|
|
|
if( (box1->dots) ) Break; // Bold-face is gchar
|
|
if (dy<=box1->m3-box1->m2+1) Break;
|
|
for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2
|
|
i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break;
|
|
i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; }
|
|
if( y<3*dy/4 && y>dy/4
|
|
&& num_cross(0,dx-1,y ,y ,bp,cs) != 1
|
|
&& num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise
|
|
) break;
|
|
} if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || y<dy/8 ) Break;
|
|
// x is thickest width of vertical line here
|
|
i=loop(bp,(dx+1)/2,0,dy,cs,0,DO)/2;
|
|
if( i>dy/8
|
|
&& num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0
|
|
&& num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y
|
|
|
|
if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8
|
|
&& loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow
|
|
i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1;
|
|
if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break;
|
|
|
|
// the middle bar appear in a wide vertical range, get part below
|
|
for (i1=dx,i2=y,j=y+1;j<dy-dy/4;j++){
|
|
i=loop(bp,0,j,dx,cs,0,RI);
|
|
i=loop(bp,i,j,dx,cs,1,RI); // thickness vert. line
|
|
if (i<i1) { i1=i; i2=j; if (2*i<=x) break; }
|
|
} i=i1; j=i2; /* i=dx, j=y below horiz-bar */
|
|
MSG(fprintf(stderr,"j=%d i=%d y=%d x=%d",j,i,y,x);)
|
|
// bar should have twice of the thickness of v-line
|
|
if (x<2*i && x<dx) Break;
|
|
if (x<i+2+dx/8) ad=97*ad/100; // fat f
|
|
|
|
// check for the upper bow to the right top side
|
|
i3=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0);
|
|
MSG(fprintf(stderr,"xy= %d %d %d %d",x0,y0,\
|
|
box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);)
|
|
ab[7][0]=box1->frame_vector[i3][0];
|
|
ab[7][1]=box1->frame_vector[i3][1];
|
|
ab[7][3]=i3;
|
|
if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t
|
|
// because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0)
|
|
// as the upper right end of the "t"
|
|
if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t
|
|
|
|
|
|
j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f
|
|
if ((2*x<dx && j<=dx/2) || 3*x<dx) Break; // bar should be not to small
|
|
for(i=dy/8;i<dy;i++)
|
|
if (loop(bp,0,i,dx,cs,0,RI)>(j+dx/4)) break;
|
|
if (i<dy) Break; // check for v-line
|
|
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<dx/2 )
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-1
|
|
<=loop(bp,dx-1, y ,dx,cs,0,LE) )
|
|
if( loop(bp,dx-1, y-1,dx,cs,0,LE)
|
|
<=loop(bp,dx-1, y ,dx,cs,0,LE) ) Break; // ~1
|
|
|
|
if( loop(bp,0,dy/2,dx,cs,0,RI)-1
|
|
>loop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X
|
|
|
|
i=y;j=1; // j used as flag
|
|
if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r
|
|
if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1
|
|
&& num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc.
|
|
// check for upper bow to right
|
|
for(y=1;j && y<i; y++) // no @@ pattern
|
|
if( num_cross(0,dx-1,y ,y ,bp,cs) ==2 ) j=0;
|
|
if (j==0) { ad=(ad+101)/2; }
|
|
for(y=1;j && y<i; y++) // no @@ pattern, try to detect it
|
|
for(x=0;j && x<dx ;x++){ // ..
|
|
if( (getpixel(bp,x ,y )>=cs || dx<7) && getpixel(bp,x+1,y )>=cs
|
|
&& getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs )
|
|
{ j=0;break; }
|
|
} if(j) ad=98*ad/100; // not detected
|
|
|
|
// if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e
|
|
if (sdata->holes.num != 0) Break; // ~e
|
|
for(i1=i2=dx,y=7*dy/8;y<dy;y++){
|
|
x=loop(bp,0 ,y,dx,cs,0,RI);if(x<i1)i1=x;
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE);if(x<i2)i2=x;
|
|
}
|
|
if(i1>i2+dx/4) Break; // ~t ~e
|
|
if(i1>i2+1) ad=96*ad/100; // ~t ~e
|
|
if( loop(bp,0,3*dy/4,dx,cs,0,RI)<i1-dx/4 ) Break;
|
|
if( dx>5 && !hchar)
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 )
|
|
if( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dx/2 ) Break; // ~c
|
|
if( dx>8 )
|
|
if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3
|
|
|| loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 )
|
|
if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S
|
|
|
|
if (!hchar)
|
|
if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0
|
|
&& num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2
|
|
&& num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r
|
|
|
|
if (dy>15)
|
|
if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1
|
|
&& num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H
|
|
|
|
if( dx>4 )
|
|
if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)-
|
|
loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1
|
|
&& loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)<dy/4 ) {
|
|
if( loop(bp,dx-1 ,5*dy/16,dx,cs,0,LE)-
|
|
loop(bp,0 ,5*dy/16,dx,cs,0,RI)>=dx/5+1) ad=98*ad/100; // ~E
|
|
i=loop(bp,dx/8,0,dy,cs,0,DO);
|
|
if (i<dy/8 || i>dy/2) {
|
|
ad=98*ad/100; // ~E, could also be a "f" with big serifs
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
if (!gchar) { ad=98*ad/100;
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
}
|
|
i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2;
|
|
if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)<dy/4)
|
|
if (loop(bp,0 ,3*dy/4,dx ,cs,0,RI)<dx/4) {
|
|
ad=98*ad/100; // ~E but serif-f
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
|
|
if( loop(bp,0,dy/4,dx ,cs,0,RI)>1
|
|
&& loop(bp,0, 0,dy/4,cs,0,DO)<dy/4 ) {
|
|
ad=95*ad/100; // ~I
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
|
|
if (get_bw(x0+dx/16,x1-dx/16,y0,y0,box1->p,cs,2) == 0) { // white pixels?
|
|
ad=98*ad/100; // F
|
|
MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
|
|
if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% !
|
|
if (box1->m4>0 && gchar && ad<99 &&
|
|
8*box1->y1 >= box1->m4*7+box1->m3) ad++;
|
|
Setac(box1,'f',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_bB(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test B ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='B'; )
|
|
if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */
|
|
for(i=1,y=y0;y<y1-dy/2 && i;y++)
|
|
if( get_bw(x0,x0+dx/2, y , y ,box1->p,cs,1) != 1 ) i=0;
|
|
if( !i ) Break;
|
|
for(i=1,y=y1-dy/2;y<y1 && i;y++)
|
|
if( get_bw(x0,x0+dx/3, y , y ,box1->p,cs,1) != 1 ) i=0;
|
|
if( !i ) Break;
|
|
if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break;
|
|
if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 )
|
|
if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break;
|
|
/* --- detect center of lower hole --- */
|
|
y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break;
|
|
y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break;
|
|
y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (y<y0+3*dy/8) Break;
|
|
if (y<y0+dy/2) ad=96*ad/100;
|
|
if( num_cross(0,dx-1,y-y0 ,y-y0 ,bp,cs) != 2 )
|
|
if( num_cross(0,dx-1,y-y0+1,y-y0+1,bp,cs) != 2 ) Break;
|
|
if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
|
|
if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 )
|
|
if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 2 ) Break;
|
|
for( y=dy/4;y<3*dy/4;y++ ) if( num_cross(0,dx-1,y,y,bp,cs)==1 ) break;
|
|
if( y==3*dy/4 ) Break;
|
|
|
|
if( loop(box1->p,x0,y0+ y ,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 )
|
|
if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 )
|
|
if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8
|
|
i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI);
|
|
i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
|
|
i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i;
|
|
i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i;
|
|
i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI);
|
|
if(dy>16 && i3<i2 && i1+i3<2*i2){
|
|
if (i3+i1<2*i2-dx/16) ad=98*ad/100; // ~8
|
|
if (i3+i1<2*i2-dx/8 ) ad=96*ad/100;
|
|
if( loop(box1->p,x0,y0+ 1 ,dx,cs,0,RI)
|
|
>= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
|
|
if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
|
|
if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 )
|
|
if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00
|
|
}
|
|
|
|
if (sdata->holes.num != 2) Break;
|
|
if (sdata->holes.hole[0].y0 < y-1
|
|
&& sdata->holes.hole[1].y0 < y-1 ) Break;
|
|
if (sdata->holes.hole[0].y1 > y+1
|
|
&& sdata->holes.hole[1].y1 > y+1 ) Break;
|
|
// if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break;
|
|
// if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break;
|
|
// out_x(box1);
|
|
|
|
for( x=dx,y=dy/6; y<dy-dy/8; y++ ) // left border straight
|
|
{ i=loop(box1->p,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break;
|
|
if(i<x) x=i;
|
|
} if( y<dy-dy/8 ) Break; // ~8 bad_a
|
|
|
|
for( x=dx,y=1;y<dy/4;y++ ) // right border straight
|
|
{ i=loop(bp,dx-1,dy-y,dx,cs,0,LE);
|
|
if( i<x ) x=i; else if( i>x )break;
|
|
} if( y<dy/4 ) Break; // ~ff (serifen?)
|
|
|
|
x=loop(bp,0,dy/2 ,dx,cs,0,RI);
|
|
i=loop(bp,0,dy/2-1,dx,cs,0,RI); if (i>x) x=i; // allow dust
|
|
i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i;
|
|
if ( loop(bp,0, dy/8,dx,cs,0,RI)
|
|
+loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex!
|
|
|
|
if(!hchar){ // ~ fat_a
|
|
ad=99*ad/100;
|
|
x =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100;
|
|
}
|
|
|
|
if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality
|
|
if (gchar) ad=99*ad/100;
|
|
Setac(box1,'B',ad);
|
|
break;
|
|
}
|
|
// --- test b ---------------------------------------------------
|
|
for(ad=d=100;dx>3 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='b'; )
|
|
if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
|
|
for(y=y0;y<y1;y++)
|
|
if( get_bw(x0 , x0+dx/2, y , y ,box1->p,cs,1) != 1 ) Break;
|
|
if(y<y1-dy/32-1) Break;
|
|
if( get_bw(x0+ dx/2, x0+dx/2, y1-dy/3, y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break;
|
|
if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // &
|
|
if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 )
|
|
if( dy<16 ||
|
|
num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b
|
|
for(i=j=0,y=dy/2;y<dy-dy/8;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i++; else j++;
|
|
if( i<2*j ) Break; // v024a4
|
|
if (sdata->holes.num != 1) Break;
|
|
if (sdata->holes.hole[0].y0 < dy/4) Break;
|
|
if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1)
|
|
*(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16
|
|
< dx*dy) ad=90*ad/100; // hole to small
|
|
if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break;
|
|
i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
|
|
j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break;
|
|
if (!hchar) ad=99*ad/100;
|
|
if ( gchar) ad=99*ad/100;
|
|
Setac(box1,'b',ad);
|
|
if (ad>=100) return 'b';
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_dD(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test D ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='D'; )
|
|
if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break;
|
|
if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 )
|
|
if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break;
|
|
if( num_cross(x0 ,x1 ,y0+dy/3,y0+dy/3,box1->p,cs) != 2 ) Break;
|
|
if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break;
|
|
if (sdata->holes.num != 1) Break;
|
|
if (sdata->holes.hole[0].y0 > dy/3) Break;
|
|
if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
|
|
// if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
|
|
// test if left edge is straight
|
|
for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){
|
|
i=loop(bp,0,y,x1-x0,cs,0,RI);
|
|
if( i+2+dx/16<=x ) break;
|
|
if( i>x ) x=i;
|
|
}
|
|
if (y>=dy/5 ) Break;
|
|
/* test if right edge is falling */
|
|
for(x=dx,y=0;y<dy/3;y++){
|
|
i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
|
|
if( i>x+dx/16 ) break;
|
|
if( i<x ) x=i;
|
|
}
|
|
if (y<dy/3 ) Break;
|
|
/* test if right edge is raising */
|
|
for(x=dx,y=bp->y-1;y>2*dy/3;y--){
|
|
i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
|
|
if( i>x+dx/16 ) break;
|
|
if( i<x ) x=i;
|
|
}
|
|
if (y>2*dy/3 ) Break;
|
|
if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <=
|
|
loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P
|
|
|
|
y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2;
|
|
if ( y>=dy/16 ) { y-=dy/16;
|
|
if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A
|
|
}
|
|
|
|
ya=loop(bp, 0,dy-1,dy,cs,0,UP);
|
|
yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP);
|
|
if (ya<dy/2 && ya>dy/16 && ya>yb) Break; // ~O
|
|
if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>dx/16) ad=99*ad/100; // O? 0907
|
|
if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>=
|
|
loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) ad=97*ad/100; // ~O 0907
|
|
|
|
if ( loop(bp, dx/2, 0,dy,cs,0,DO)
|
|
-loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b
|
|
|
|
|
|
|
|
if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2
|
|
&& loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2
|
|
&& loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O
|
|
|
|
if(box1->dots) ad=ad*94/100;
|
|
if ( gchar) ad=99*ad/100;
|
|
if (!hchar) ad=99*ad/100;
|
|
Setac(box1,'D',ad);
|
|
break;
|
|
}
|
|
// --- test d ---------------------------------------------------
|
|
for(d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='d'; )
|
|
ad=100;
|
|
if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if(dy>19) // 0..dx/3 dont work on serif d, 0..dx/4 is more safe
|
|
if( get_bw(x0 , x0+dx/4, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x0 , x0+dx/4, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A
|
|
if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) >
|
|
loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break;
|
|
// more than dx/8 of width should show two horizontal lines (o-like)
|
|
for (i=dx/8+1,x=0;x<dx && i;x++) {
|
|
if (num_cross(x, x, 0, dy-1, bp, cs) == 2) i--; // fails for serif?
|
|
} if (i>1) Break; // "d as a" patch
|
|
if (i==1) ad=99*ad/100; // "d as a" patch, serif?
|
|
for(i=dy/6+1,y=dy/4;y<dy && i;y++){
|
|
if( num_cross(0 ,dx-1,y ,y , bp,cs) == 2 ) i--;
|
|
if( num_cross(0 ,dx-1,y ,y , bp,cs) > 3 ) i++; // ~al
|
|
} if( i ) ad=98*ad/100;
|
|
for(i=dy/8+1,y=0;y<dy/2 && i;y++){
|
|
if( num_cross(0 ,dx-1,y ,y , bp,cs) == 1 )
|
|
if( num_cross(dx/2,dx-1,y ,y , bp,cs) == 1 ) i--;
|
|
} if( i ) Break;
|
|
if (sdata->holes.num<1) Break;
|
|
if (sdata->holes.num>1) {
|
|
if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample)
|
|
MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1););
|
|
if ( sdata->holes.hole[0].y0 < dy/4 ) Break;
|
|
if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et
|
|
// if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break;
|
|
if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al
|
|
if (dy>15) { Break; } else ad=96*ad/100;
|
|
}
|
|
if (!hchar) ad=98*ad/100;
|
|
if ( gchar) ad=99*ad/100;
|
|
Setac(box1,'d',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_F(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test F ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
|
|
DBG( wchar_t c_ask='F'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break;
|
|
|
|
for (x=0,y=0;y<dy/4;y++) {
|
|
j=loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(j<3 || 3*j<dx) break; // ~f Jun00
|
|
if (j>x) x=j;
|
|
} if (y<dy/4 || x<dx/2) Break;
|
|
|
|
for( i=1,y=0; y<dy/4 && i; y++ ){ // long black line
|
|
j=loop(bp,0,y,dx,cs,0,RI);
|
|
j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/2 ) i=0; }
|
|
if( i ) Break;
|
|
|
|
x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
|
|
x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke
|
|
for( i=1,y=dy/3; y<dy-1-dy/3 && i; y++ ) // black line
|
|
{ j=loop(bp,0,y,dx,cs,0,RI);
|
|
j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; }
|
|
if( i ) Break;
|
|
|
|
y=dy/8; if (y<1) y=1;
|
|
for( i=1; y<dy-1-dy/2; y++ ){ // search horizontal white gap
|
|
x =loop(bp,dx-1,y,dx,cs,0,LE); if(x<2) continue; // skip serifs
|
|
j =loop(bp,dx-x,y,dy/4,cs,0,UP);
|
|
x+=loop(bp,dx-x,y-j+1,dx,cs,0,LE); if (x>=dx/3) { i=0; break; }
|
|
}
|
|
if( i ) Break;
|
|
|
|
// check for vertical line on left side
|
|
for(i=1,y=1;y<=dy/2 && i;y++)
|
|
if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0;
|
|
if( !i ) Break;
|
|
|
|
for(i=1,y=dy/2;y<dy && i;y++)
|
|
if( get_bw(0,dx/3,y,y,bp,cs,1) != 1 ) i=0;
|
|
if( !i ) Break;
|
|
|
|
i=loop(bp,dx-1,dy-1,dx,cs,0,LE); // serif or E ?
|
|
if (i<=dx/3) {
|
|
if (loop(bp,dx-1,(dy+4)/8,dx,cs,0,LE)>dx/8 // no serif
|
|
|| loop(bp, 0, dy-3,dx,cs,0,RI)<1) break;
|
|
ad=99*ad/100;
|
|
}
|
|
if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E
|
|
if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break;
|
|
|
|
if( loop(bp,0, bp->y/4,dx,cs,0,RI) <
|
|
loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break;
|
|
// if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break;
|
|
if (sdata->holes.num > 0) Break;
|
|
for(i=0,x=dx/4;x<dx-1;x++)
|
|
if( num_cross(x,x,0,dy-2,bp,cs) == 2 ) i++;
|
|
if ( i<1 ) Break; // 0.2.4a4
|
|
|
|
if(dy<20) /* special case of small fi, not very elegant */
|
|
if( get_bw( 1, 1,1,1,bp,cs,1) == 1
|
|
&& get_bw( 0, 0,2,2,bp,cs,1) == 1
|
|
&& get_bw(dx-2,dx-1,0,0,bp,cs,1) == 0
|
|
&& get_bw( 0, 1,0,0,bp,cs,1) == 0
|
|
&& get_bw( 0, 0,0,1,bp,cs,1) == 0 ) Break;
|
|
|
|
// check for screen font f
|
|
i= loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1;
|
|
if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100;
|
|
|
|
// check for screen font P
|
|
i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE);
|
|
if (i<1) {
|
|
j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
|
|
j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO);
|
|
if (j<=dy/2) {
|
|
i=loop(bp,bp->x-1,0,dx,cs,0,LE);
|
|
ad=ad*98/100;
|
|
if (i>dx/8) Break;
|
|
if (i) ad=98*ad/100;
|
|
}
|
|
}
|
|
|
|
if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4
|
|
if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r
|
|
}
|
|
if (gchar) ad=99*ad/100;
|
|
Setac(box1,'F',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_uU(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test uU ---------------------------------------------------
|
|
// in Mitte so breit wie oben (bei V kontinuierlich schmaler)
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='u'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
for(y=y0+dy/4;y<y1-dy/4;y++) /* also handwritten u */
|
|
if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
|
|
if( y<y1-dy/4 ) Break;
|
|
if( get_bw(dx/2,dx/2,dy/2,dy-1,bp,cs,1)==0 ) Break;
|
|
if( get_bw(dx/2,dx-1,dy/2,dy/2,bp,cs,1)==0 ) Break;
|
|
for(i=0,x=3*dx/8;x<dx-dx/4;x++){
|
|
y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
|
|
} if( i<dy/4 ) Break; x--;
|
|
if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) Break;
|
|
if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) Break;
|
|
|
|
for(i=dy/8+2,y=dy/8;y<dy-(dy+2)/4 && i;y++){ // 12%+1 Fehler
|
|
j=num_cross(0,dx/2-((y>dy/2)?dx/8:0),y,y,bp,cs);
|
|
if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
|
|
if( y<dy/2 && (j<1 && j>2) ) { i--; ad=90*ad/100; }
|
|
if( y>dy/2 && j!=1 ) { i--; ad=95*ad/100; }
|
|
} if( !i ) Break;
|
|
for(i=dy/16+1,y=dy/8;y<dy-dy/4 && i;y++){ // 12%+1 Fehler
|
|
j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
|
|
if( y>dy/2 && (j<1 && j>2) ) i--;
|
|
if( y<dy/2 && j!=1 ) i--;
|
|
} if( !i ) Break;
|
|
for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
|
|
if( get_bw( x, x, y0, y0+dy/3,box1->p,cs,1) != 1 ) i=0;
|
|
} if( i ) Break;
|
|
for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
|
|
if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--;
|
|
} if( !i ) Break;
|
|
for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
|
|
if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0;
|
|
if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) ad=98*ad/100;
|
|
} if( !i ) Break;
|
|
if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
|
|
&& num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v}
|
|
|
|
i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
|
|
j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI);
|
|
if( i<j ) Break; // ~ll v0.2.4a3
|
|
if(dy>15)
|
|
if( loop(bp,dx-1,dy/16,dx,cs,0,LE)
|
|
> loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn)
|
|
if( hchar && dy>7)
|
|
if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx
|
|
&& loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16
|
|
&& loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16
|
|
&& loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16
|
|
&& loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16
|
|
) Break; // melted ll
|
|
|
|
i=loop(bp, 0,dy-2-dy/8,dx,cs,0,RI);
|
|
j=loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE);
|
|
if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v
|
|
if (i+j>=dx/2) ad=97*ad/100;
|
|
|
|
if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w
|
|
if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>0 ) ad=98*ad/100; // w
|
|
|
|
if (ad==100) ad=99; // ToDo: only if lines.wt<100
|
|
bc='u';
|
|
if (gchar) ad=98*ad/100;
|
|
if (hchar) bc='U';
|
|
if (box1->dots>0) ad=99*ad/100;
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_micro(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test \mu µ MICRO_SIGN --------------------------------------
|
|
// in Mitte so breit wie oben (bei V kontinuierlich schmaler)
|
|
if( gchar && !hchar )
|
|
for(ad=d=100;dx>2 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='u'; )
|
|
if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
|
|
for(y=y0+dy/8;y<box1->m3-dy/4;y++)
|
|
if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
|
|
if( y<box1->m3-dy/4 ) break;
|
|
if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
|
|
if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
|
|
for(y=dy/2;y<dy;y++){
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE); if(8*x>5*dx) break;
|
|
} if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y;
|
|
for(i=0,x=2*dx/8;x<dx-1-dx/4;x++){
|
|
y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
|
|
} if( i<dy/4 ) break; x--;
|
|
if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) break;
|
|
if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) break;
|
|
for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
|
|
j=num_cross(0,dx/2,y,y,bp,cs);
|
|
if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
|
|
if( y<dy/2 && (j<1 && j>2) ) i--;
|
|
if( y>dy/2 && j!=1 ) i--;
|
|
} if( !i ) break;
|
|
for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
|
|
j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
|
|
if( y>dy/2 && (j<1 && j>2) ) i--;
|
|
if( y<dy/2 && j!=1 ) i--;
|
|
} if( !i ) break;
|
|
for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
|
|
if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
|
|
} if( i ) break;
|
|
for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
|
|
if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--;
|
|
} if( !i ) break;
|
|
if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break;
|
|
if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break;
|
|
if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break;
|
|
if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break;
|
|
|
|
Setac(box1,MICRO_SIGN,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_vV(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test v -------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='v'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap
|
|
y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap
|
|
if (x>3*dx/4 || y<dy/4) Break;
|
|
if( get_bw(x0,x0+x,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v?
|
|
if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break;
|
|
|
|
// UVW
|
|
if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1)
|
|
&& (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v
|
|
&& (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */
|
|
|| ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1)
|
|
&& (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break;
|
|
// UV
|
|
if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
|
|
if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
|
|
if( loop(bp,0 ,dy/6 ,dx,cs,0,RI)
|
|
>=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) && dy>6 ) Break;
|
|
if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
|
|
>loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
|
|
&& loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
|
|
>loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ?
|
|
if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
|
|
>=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
|
|
&& loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
|
|
>=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21
|
|
if( loop(bp,dx-1,dy/6 ,dx,cs,0,LE)
|
|
>=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) && dy>6 ) Break;
|
|
x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS
|
|
x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U
|
|
if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
|
|
&& num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u}
|
|
|
|
#if 0
|
|
// measure thickness of lower v
|
|
i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
|
|
+loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
|
|
j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI)
|
|
+loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE);
|
|
if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y
|
|
#endif
|
|
/* V has serifs only on upper site! Y also on bottom, check it. Okt00 */
|
|
i=loop(bp, 0, 0,dx,cs,0,RI);
|
|
i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness
|
|
i=loop(bp, 0, 1,dx,cs,0,RI);
|
|
i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest
|
|
i=loop(bp, 0,dy/4,dx,cs,0,RI);
|
|
i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i;
|
|
i=loop(bp, 0,dy ,dx,cs,0,RI);
|
|
i=loop(bp, i,dy ,dx,cs,1,RI); i3=i; // thickness
|
|
i=loop(bp, 0,dy-1,dx,cs,0,RI);
|
|
i=loop(bp, i,dy-1,dx,cs,1,RI); if(i>i3) i3=i; // thiggest
|
|
if( y0 < box1->m2 )
|
|
if( i1-i2 > dx/32+2
|
|
&& i3-i2 > dx/32+2 ) Break; // ~serif_Y
|
|
|
|
if( y0 < box1->m2 ) // uppercase V ?
|
|
if( i1-i2 < dx/32+2 ) /* no serif detected */
|
|
if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){
|
|
j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI);
|
|
j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI);
|
|
if (j<i2+1) Break; // ~Y
|
|
if (j<=i2+1) ad=99*ad/100; // ~Y
|
|
}
|
|
|
|
ad=99*ad/100; // be carefull (remove later)
|
|
|
|
if( loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI)
|
|
>loop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100;
|
|
|
|
if (num_cross(0,dx-1, dy/2, dy/2,bp,cs)==1
|
|
&& num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 1) Break; // 5x8 * Jul09
|
|
|
|
if (gchar) ad=99*ad/100;
|
|
bc='v';
|
|
if( hchar ) bc='V';
|
|
Setac(box1, bc, ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_rR(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test r -------
|
|
for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font
|
|
DBG( wchar_t c_ask='r'; )
|
|
if (sdata->holes.num > 0
|
|
&& ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left
|
|
|| sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp
|
|
) Break; /* tolerant against a tiny hole */
|
|
if( 2*dy<box1->m3-box1->m1) Break;
|
|
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break;
|
|
x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t
|
|
if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t
|
|
if( dx>4 )
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00
|
|
|
|
i=dy-(dy+20)/32; // ignore dust on the ground
|
|
|
|
for( y=4*dy/8; y<i; y++ ){ // center down v-line
|
|
if( y<dy-2*dy/8 && num_cross(0,dx-1,y,y,bp,cs) !=1 ) break;
|
|
i1= loop(bp,0 ,y,dx,cs,0,RI); if(i1>3*dx/8) break;
|
|
i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break;
|
|
if( (i1+(dx-i2
|
|
-1))/2 >= 4*dx/8 ) break; // mass middle should be left
|
|
}
|
|
if (y<i) Break;
|
|
|
|
for( x=4*dx/8; x<dx-dx/8; x++ ){ // right upper h-line
|
|
if( get_bw(x,x,0,(dy+2)/4,bp,cs,1) !=1 ) break; }
|
|
if (x<dx-dx/8) Break;
|
|
|
|
if( loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)>5*dx/8 // not a C
|
|
&& get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break;
|
|
|
|
if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8
|
|
&& loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8
|
|
&& loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c
|
|
|
|
if( loop(bp, 0,3*dy/8,dx,cs,0,RI)
|
|
> loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) {
|
|
if( loop(bp, 0, dy/8,dx,cs,0,RI)<dx/8 ) Break; // ~z (broken)
|
|
ad=98*ad/100;
|
|
}
|
|
|
|
if( loop(bp,0,dy/3,dx,cs,0,RI)>3*dx/4 ) Break; // ~i
|
|
if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I
|
|
&& get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break;
|
|
if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1
|
|
&& num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3
|
|
|
|
// itallic t is sometimes not high enough, look for v-like shape
|
|
for(y=3*dy/4;y<dy-1;y++)
|
|
if( num_cross(0,dx-1,y, y ,bp,cs)==2
|
|
&& num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) break; // ~t
|
|
if(y<dy-1) Break;
|
|
if (loop(bp,dx-1-dx/4,dy-1,dx,cs,0,UP)<dy/4) ad=98*ad/100; // ~f (serif)
|
|
if( num_cross(dx-1,dx-1,0,3*dy/4,bp,cs)>1 ) ad=95*ad/100; // ~f
|
|
if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2
|
|
&& num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f
|
|
|
|
if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */
|
|
if (hchar) ad=96*ad/100;
|
|
if (gchar) ad=97*ad/100;
|
|
Setac(box1,'r',ad);
|
|
break; // not 100% sure!
|
|
}
|
|
// --- test R ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='R'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P
|
|
if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C
|
|
if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8
|
|
&& loop(bp, dx/2,dy/16,dx,cs,0,RI)<dx/2
|
|
&& dy>=16 ) Break;
|
|
for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line
|
|
if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0;
|
|
} if( !i ) Break;
|
|
for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line
|
|
if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
|
|
} if( !i ) Break;
|
|
for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
|
|
i=loop(box1->p,x,y1,dy,cs,0,UP);
|
|
/* on small chars bypass possible low left serifs */
|
|
if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
|
|
if (i2>1) i+=i2-1; }
|
|
if (i>y) { y=i; i1=x; }
|
|
} if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
|
|
for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){ // vert crossed 2 ???
|
|
if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) Break;
|
|
for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) Break;
|
|
for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
|
|
} if( i ) ad=95*ad/100; /* sometimes there is a small gap */
|
|
for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) Break;
|
|
if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru
|
|
x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru
|
|
x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc
|
|
x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x;
|
|
x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x;
|
|
x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd
|
|
|
|
i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight
|
|
i2=loop(bp,0, dy/2,dx,cs,0,RI);
|
|
i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break;
|
|
if (dy>15)
|
|
if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE)
|
|
&& loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff
|
|
if (dy>7)
|
|
if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE)
|
|
>loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) {
|
|
ad=98*ad/100;
|
|
if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0
|
|
&& loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ??
|
|
}
|
|
j=sdata->holes.num;
|
|
if (j != 1) {
|
|
i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL);
|
|
// j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
|
|
if (i==0) ad=90*ad/100; /* some times there is a small gap */
|
|
if (j>1 || j>i) Break;
|
|
}
|
|
if (sdata->holes.num < 1) ad=90*ad/100;
|
|
if (sdata->holes.num==1)
|
|
if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha
|
|
|
|
if (!hchar) ad=98*ad/100;
|
|
if ( gchar) ad=98*ad/100;
|
|
Setac(box1,'R',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_m(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
handwritten=0,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test m -------
|
|
for(ad=d=100;dx>4 && dy>3;){
|
|
DBG( wchar_t c_ask='m'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num > 0) ad=96*ad/100;
|
|
x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K
|
|
y=dy/2;
|
|
i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3)
|
|
i=num_cross(0,dx-1,y+1,y+1,bp,cs);
|
|
if (num_cross(0,dx-1,dy/2,dy/2,bp,cs)==1) Break; // 5x8 * Jul09
|
|
if (i<3 && i>5) Break; // m ru rn, handwritten m
|
|
// im or glued.mm cut to nm
|
|
if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
|
|
if (i>=5) { // melted rw ? (Oct08 JS)
|
|
x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // 1st v-line
|
|
x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
|
|
x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i1=x-i1; // 2nd v-line
|
|
x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i2=x;// 2nd gap
|
|
x+=loop(bp,x,y,dx-x,cs,0,RI); i2=x-i2; // 3th v-line
|
|
// printf("\nDBG i1,i2 %d %d", i1, i2);
|
|
if (i1>2*i2) Break; // rw
|
|
}
|
|
for (i=0,y=dy-1-dy/8;y>dy/2;y--) {
|
|
i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break;
|
|
} if (i>3) Break;
|
|
for ( ;y>dy/2;y--) {
|
|
i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break;
|
|
} if (i>5) Break; y++; i5=y;
|
|
if (y> dy/2) handwritten=10;
|
|
if (y>3*dy/4) handwritten=60;
|
|
/* @@...............
|
|
@@......,........
|
|
@@,...@@@....@@@.
|
|
@@,,.@@@@..@@@@@,
|
|
@@@.@@@@@.@@@@@@,
|
|
@@;@@@@@@@@@;,@@,
|
|
@@@@@,.@@@@,,,@@@ <- i5
|
|
,@@@...;@@....@@@
|
|
.@;...........,@@
|
|
...............@@
|
|
i1 i2 i3 i4
|
|
*/
|
|
x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
|
|
x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap
|
|
x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line
|
|
x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap
|
|
x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line
|
|
if (x>=dx) Break; // missing 3th v-line, ~W
|
|
MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);)
|
|
if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn
|
|
if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn
|
|
// the same game for the lower part =>l1 l2 l3 l4 ???
|
|
i =loop(bp,0,5*dy/8,dx,cs,0,RI);
|
|
i =loop(bp,i,5*dy/8,dx,cs,1,RI);
|
|
x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI);
|
|
x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI);
|
|
if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */
|
|
for(y=0,x=i1;x<i2;x++) {
|
|
i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
|
|
}
|
|
if(y<dy/4 || y<y1-y0-i5-1-dy/16) Break; // no gap detected
|
|
for(y=0,x=i3;x<i4;x++) {
|
|
i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
|
|
}
|
|
if(y<dy/4) Break; // no gap detected
|
|
for(x=i1;x<i4;x++) if( loop(bp,x,0,dy,cs,0,DO)>=dy/2 ) break;
|
|
if(x<i4 && handwritten<10) Break; // gap detected
|
|
// glued rn as m ??? hmm seems a ballance act
|
|
if(i2-i1>i4-i3+dx/16){
|
|
for(y=0,x=(i1+i2)/2;x<i2;x++){
|
|
i=loop(bp,x,0,dy,cs,0,DO);
|
|
i=loop(bp,x,i,dy,cs,1,DO); // measure thickness
|
|
if( i>y ) y=i; if( 2*i<y ) Break;
|
|
}
|
|
if(x <i2) Break; // unusual property for m (see n)
|
|
}
|
|
if(gchar) ad=99*ad/100;
|
|
if(hchar) ad=99*ad/100;
|
|
|
|
if( loop(bp,dx-1,dy/16,dx,cs,0,LE)<2
|
|
&& loop(bp,dx-1,dy/4 ,dx,cs,0,LE)>3 ) Break; // melted WT
|
|
|
|
x=loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)<dy/2) Break; // melt toc
|
|
if (loop(bp,(i3+i4)/2,0,dy,cs,0,DO)>dy/2) Break; // N
|
|
|
|
// {\it m}
|
|
if( loop(bp,1, dy/4,dx,cs,0,RI)
|
|
>loop(bp,0,7*dy/8,dx,cs,0,RI) )
|
|
Setac(box1,'m',98*ad/100);
|
|
|
|
if (handwritten<10){
|
|
x =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
x+=loop(bp,x,dy/4,dx,cs,1,RI);
|
|
for( ;x<i4;x++){ // x=i1 ?
|
|
i=loop(bp,x,0,dy,cs,0,DO);
|
|
if (i>=dy/4) ad=99*ad/100;
|
|
if (i>(dy+2)/4) ad=95*ad/100;
|
|
if (3*i>dy) Break;
|
|
}
|
|
if(x<i4) Break; // gap detected
|
|
}
|
|
|
|
if (box1->dots) ad=99*ad/100;
|
|
Setac(box1,'m',ad);
|
|
if (ad>=100) return 'm';
|
|
break;
|
|
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_tT(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,i1,i2,i3,i4,j,d,x,y,yb,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test T ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx
|
|
DBG( wchar_t c_ask='T'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
// upper horizontal line
|
|
i1= loop (bp, dx/8,0,dy,cs,0,DO); // left side
|
|
i2= loop (bp,dx-1-dx/8,0,dy,cs,0,DO); // right side
|
|
i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side
|
|
i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side
|
|
if (i1>dy/4 || i2>dy/4) Break;
|
|
for (x=dx/8;x<dx-1-dx/8;x++) {
|
|
i= loop (bp,x,0,dy,cs,0,DO);
|
|
if (i>i1+dy/8 && i>i2+dy/8) break;
|
|
if (i<i1-dy/8 && i<i2-dy/8) break;
|
|
} if (x<dx-1-dx/8) Break;
|
|
if( get_bw( 0,dx-1, dy/2, dy/2,bp,cs,1) != 1 ) Break;
|
|
if( get_bw( 0,(dx-1)/8, dy/2,dy-1-dy/8,bp,cs,1) == 1 ) Break;
|
|
if( get_bw( 0,3*dx/16, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
|
|
if( get_bw(dx-1-dx/4,dx-1, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
|
|
// center width
|
|
for( y=dy/4;y<3*dy/4;y++){ // between top and bottom-serif
|
|
i=dx/4+loop(bp,dx/4,y,dx,cs,0,RI); // left side of vertical line
|
|
j= loop(bp, i,y,dx,cs,1,RI); // width of vertical line
|
|
if (2*j>dx+1 || i+j>=dx || i<dx/4-1) break; // ~r?7 Jan08,
|
|
// but allow serif T Jul09
|
|
} if (y<3*dy/4) Break; // Jan07
|
|
// down width
|
|
for( y=3*dy/4;y<dy;y++){
|
|
i= loop(bp,dx/4,y,dx,cs,0,RI);
|
|
i= loop(bp, i,y,dx,cs,1,RI);if(4*i>3*x) break; //~I
|
|
} if( y<dy ) Break;
|
|
|
|
i =dx/4+loop(bp,dx/4,dy/4,dx,cs,0,RI);if(i>3*dx/4) Break; // ~7
|
|
i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break;
|
|
|
|
if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1
|
|
&& num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break;
|
|
if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1
|
|
&& num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break;
|
|
if (box1->m3 && 2*y1>box1->m3+box1->m4
|
|
&& loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4
|
|
&& loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) ad=96*ad/100; // ~J
|
|
if (gchar) ad=98*ad/100;
|
|
if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) ad=99*ad/100; // ~J
|
|
i = loop(bp,0,dy/2,dx,cs,0,RI);
|
|
j = loop(bp,i,dy/2,dx,cs,1,RI);
|
|
if( 2*i>=dx || 2*(dx-j-i)<i) ad=95*ad/100; // ~J
|
|
|
|
Setac(box1,'T',ad);
|
|
if (ad>=100) return 'T';
|
|
break;
|
|
}
|
|
// --- test t ---------------------------------------------------
|
|
// written t can look like a + or even with missing right side
|
|
// smallest t found in win-screenshot (prop-font) dx=2
|
|
for(ad=d=100;dx>1 && dy>=box1->m3-box1->m2-1;){ // sometimes no hchar!
|
|
DBG( wchar_t c_ask='t'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection?
|
|
if (num_cross(0,dx-1,0 ,0 ,bp,cs) != 1) { // font4x5 *=x+-
|
|
if (dy<10) Break; ad=98*ad/100;
|
|
}
|
|
if (num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1) { // *=x+-
|
|
if (dy<10) Break; ad=98*ad/100;
|
|
}
|
|
for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line
|
|
i=loop(bp,0,y,dx,cs,0,RI);
|
|
i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line
|
|
i=num_cross(0,dx-1,y ,y ,bp,cs);
|
|
j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break;
|
|
if( y<11*dy/16
|
|
&& num_cross(0,dx-1,y ,y ,bp,cs) != 1
|
|
&& ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise
|
|
) break;
|
|
} if( y<4*dy/8 ) Break;
|
|
if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8)
|
|
if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP)
|
|
<=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 )
|
|
if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C
|
|
|
|
if (x<dx/2) ad=95*ad/100; // unusual small ?
|
|
if (x>=dx && 9*dx>=8*dy) { ad=99*ad/100; } // +
|
|
|
|
i=loop(bp,dx-1,0,dx,cs,0,LE);
|
|
for(y=0;y<dy/4;y++){
|
|
if( num_cross(0,dx-1,y ,y ,bp,cs) == 2
|
|
&& num_cross(0,dx-1,y+1,y+1,bp,cs) == 2 ) break;
|
|
j=loop(bp,dx-1,y,dx,cs,0,LE); if(j-i>1) break; i=j;
|
|
}
|
|
if( y<dy/4 ) Break; // ~f
|
|
|
|
i=loop(bp,dx-1,yb,dx,cs,0,LE);
|
|
for(y=dy/8;y<yb;y++)
|
|
if( loop(bp,dx-1,y,dx,cs,0,LE)>i ) break;
|
|
if( y==yb ) break;
|
|
|
|
j=loop(bp,0, dy/2,dx,cs,0,RI);
|
|
j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness
|
|
j=loop(bp,0, dy/4,dx,cs,0,RI);
|
|
j=loop(bp,j, dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
|
|
j=loop(bp,0,3*dy/4,dx,cs,0,RI);
|
|
j=loop(bp,j,3*dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
|
|
if( 2*x<3*i ) Break;
|
|
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-dx/8
|
|
<=loop(bp,dx-1, yb ,dx,cs,0,LE) )
|
|
if( loop(bp,dx-1, yb ,dx,cs,0,LE)-dx/8
|
|
>=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ???
|
|
|
|
j=1;
|
|
for(y=1;j && y<yb; y++) // no @@ pattern
|
|
for(x=0;j && x<dx-2;x++){ // ..
|
|
if( getpixel(bp,x ,y )>=cs && getpixel(bp,x+1,y )>=cs
|
|
&& getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; }
|
|
} if(!j) Break;
|
|
|
|
if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2
|
|
&& num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er)
|
|
|
|
if( dy>= 16
|
|
&& loop(bp, 0, 3*dy/4,dx,cs,0,RI)
|
|
>=loop(bp, 0, dy-2,dx,cs,0,RI)
|
|
&& loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
|
|
<=loop(bp,dx-1, dy-2,dx,cs,0,LE)
|
|
&& loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16
|
|
<loop(bp,dx-1,3*dy/16,dx,cs,0,LE)
|
|
&& ( loop(bp, 0, 1,dx,cs,0,RI)
|
|
>loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16
|
|
|| loop(bp,dx-1, 0,dx,cs,0,LE)==0
|
|
|| loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02
|
|
if(dx<8 && dy>12){ // thin f's could easily confound with t
|
|
x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE);
|
|
if (x)
|
|
if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16
|
|
&& loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1
|
|
>=loop(bp, 0, dy-2,dx,cs,0,RI)
|
|
&& loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
|
|
<=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break;
|
|
}
|
|
if (dx>7)
|
|
if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1
|
|
&& num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0
|
|
&& num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 )
|
|
if (sdata->holes.num > 0)
|
|
if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6
|
|
// if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6
|
|
|
|
if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2
|
|
&& num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){
|
|
ad=99*ad/100; /* italic t ? */
|
|
if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
|
|
if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
|
|
}
|
|
|
|
x= loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
i= loop(bp,dx-1,dy/8,dx,cs,0,LE);
|
|
if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */
|
|
|
|
x= loop(bp,0, 0,dx,cs,0,RI);
|
|
i= loop(bp,0, 1,dx,cs,0,RI); if (i<x) x=i;
|
|
i= loop(bp,0,dy/4,dx,cs,0,RI);
|
|
if (i-x>1) Break; // l
|
|
|
|
// this happens quite often, do not be to strong
|
|
if (!box1->m2) ad=99*ad/100;
|
|
if (box1->m2) {
|
|
if (!hchar) ad=99*ad/100; /* some times t is not long enough */
|
|
if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */
|
|
if( y0>=box1->m2 ) ad=99*ad/100; /* to short */
|
|
}
|
|
|
|
if (sdata->holes.num > 0) ad=95*ad/100;
|
|
if (gchar) ad=99*ad/100;
|
|
if (box1->dots) ad=90*ad/100;
|
|
Setac(box1,'t',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_sS(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
wchar_t ac;
|
|
|
|
// --- test sS near 5 ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font)
|
|
DBG( wchar_t c_ask='s'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3
|
|
&& num_cross(5*dx/8,3*dx/8,0,dy-1,bp,cs)!=3
|
|
&& dy>4 ) Break;
|
|
if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1
|
|
&& num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break;
|
|
// get the upper and lower hole koords
|
|
y=dy/4;
|
|
x =loop(bp,0,dy/4,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */
|
|
x +=loop(bp,x,dy/4,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */
|
|
i1 =loop(bp,x,dy/4,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x
|
|
y=11*dy/16;
|
|
x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break;
|
|
x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break;
|
|
if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})}
|
|
i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x
|
|
MSG(fprintf(stderr,"~3 upper right gap (i1=%d,dy/4)",i1);)
|
|
// use i3 temporary for x of left upper border of slanted S
|
|
i3 =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
for( y=dy/4;y<dy/2;y++ ) { // Jul09 ~3 font4x5
|
|
x = loop(bp,0,y,dx,cs,0,RI);
|
|
if (x>i3+dx/8
|
|
&& loop(bp,x-1,y,dx,cs,0,UP)>dy/8+1) break; // +1 for s font 5x8 Jul09
|
|
}
|
|
if( y<dy/2 ) Break;
|
|
y=dy/2-loop(bp,dx-1,dy/2,dy/2,cs,1,UP);
|
|
// if( !joined(bp,i1,dy/4,dx-1,y,cs) ){
|
|
// break; // sometimes thick small fonts have no gap
|
|
// }
|
|
for(y=dy/4;y<dy/2;y++){
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE);if(x>dx/8) break;
|
|
}
|
|
if(y==dy/2) Break; // Mai00
|
|
|
|
y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO);
|
|
if( !joined(bp,0,y,i2,11*dy/16,cs) ) Break;
|
|
|
|
if (sdata->holes.num > 0)
|
|
if (sdata->holes.hole[0].y0 > dy/4) Break; // ???
|
|
// if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break;
|
|
|
|
i1=loop(bp,dx-1,dy-1,dx,cs,0,LE);
|
|
i2=loop(bp,dx-1,dy-2,dx,cs,0,LE);
|
|
if (i2-i1 >= dx/4) Break; // ~{ 5x7font
|
|
|
|
i1=loop(bp, 0, 0,dx,cs,0,RI);
|
|
i2=loop(bp, 0, 1,dx,cs,0,RI);
|
|
if (i2-i1 >= dx/4) Break; // ~} 5x7font
|
|
|
|
// sS5 \sl z left upper v-bow ?
|
|
|
|
i1=loop(bp, 0,dy/2,dx,cs,0,RI);
|
|
i1=loop(bp, i1,dy/2,dx,cs,1,RI);
|
|
if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment
|
|
|
|
i1=loop(bp,0, dy/16,dx,cs,0,RI);
|
|
i2=loop(bp,0,4*dy/16,dx,cs,0,RI);
|
|
i3=loop(bp,0,7*dy/16,dx,cs,0,RI);
|
|
if( 2*i2+dx/32 >= i1+i3 ){
|
|
if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break;
|
|
// very small s?
|
|
i1+=loop(bp,i1, dy/16,dx,cs,1,RI);
|
|
i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI);
|
|
i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI);
|
|
if( 2*i2+dx/32 >= i1+i3 ) Break;
|
|
}
|
|
|
|
for(y=7*dy/16;y<5*dy/8;y++){
|
|
if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 )
|
|
if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 )
|
|
if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5
|
|
} if(y<5*dy/8) Break; // v0.2.4a5
|
|
if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE)
|
|
> loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00
|
|
ac='s';
|
|
if (gchar) { ad=98*ad/100; MSG(fprintf(stderr,"gchar=bad");) }
|
|
if ( loop(bp, dx-1, 0,dx,cs,1,LE) // ToDo: improve
|
|
> loop(bp, 0,dy-1,dx,cs,1,RI)+dx/8 ) ad=98*ad/100; // ~5 4x5 font
|
|
if( hchar ){ // (slanted) S but 5 is very similar! check it
|
|
ac='S';
|
|
if ( loop(bp,3*dx/4, 0,dy,cs,1,DO) // ToDo: improve
|
|
> loop(bp, dx/4,dy-1,dy,cs,1,UP) ) ad=99*ad/100; // ~5
|
|
if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE)
|
|
> loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5
|
|
if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI)
|
|
> loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5
|
|
}
|
|
Setac(box1,ac,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_gG(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test g ---------------------------------------------------
|
|
/* some g's have crotchet at upper right end, so hchar can be set */
|
|
// ~italic g
|
|
for(ad=d=100;dx>2 && dy>4;){ // min 3x5
|
|
DBG( wchar_t c_ask='g'; )
|
|
if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p
|
|
if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
|
|
if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 )
|
|
if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break;
|
|
if (sdata->holes.num < 1) Break;
|
|
for (i=0;i<sdata->holes.num;i++){
|
|
if (sdata->holes.hole[i].y1 < 5*dy/8+1) break;
|
|
} if (i==sdata->holes.num) Break; // no upper hole found
|
|
// if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break;
|
|
for(y=dy/4;y<dy;y++) if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
|
|
if( y==dy ) Break; // ~q
|
|
if( get_bw(0,dx/2,7*dy/8,7*dy/8,bp,cs,1) != 1 ) Break; // ~q
|
|
y =loop(bp,dx/16,0,dy,cs,0,DO); if(y<=dy/8)
|
|
y+=loop(bp,dx/16,y,dy,cs,1,DO); if(16*y>=15*dy) Break; // ~B
|
|
|
|
if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) {
|
|
ad=98*ad/100; // ~&
|
|
if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100;
|
|
if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100;
|
|
}
|
|
// looking for a gap
|
|
for (x=0,y=dy/4;y<dy-dy/4;y++){
|
|
i=loop(bp,dx-1,y,dy,cs,0,LE); if (i>x) x=i;
|
|
} // in a good font x is greater dx/2
|
|
|
|
if (x<dx/2) { // bad font? or %
|
|
if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2
|
|
|| num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100;
|
|
if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
|
|
|| num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
|
|
}
|
|
if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o
|
|
|
|
/* test for horizontal symmetry ~8 */
|
|
for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
|
|
if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
|
|
if (y==dy) Break; /* ~8 */
|
|
|
|
if (box1->m4==0) ad=98*ad/100;
|
|
if ( hchar) ad=96*ad/100;
|
|
if (!gchar) ad=96*ad/100;
|
|
ad=98*ad/100;
|
|
Setac(box1,'g',ad);
|
|
break;
|
|
}
|
|
// --- test rundes G ---------------------------------------------
|
|
for(ad=d=100;dx>3 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='G'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S
|
|
for( y=y0+dy/4;y<y1-dy/3;y++ )
|
|
if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
|
|
if( y==y1-dy/3 ) Break; // no gap
|
|
|
|
if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1
|
|
|| num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e
|
|
|
|
x=x0; y=y1;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow?
|
|
if( y<y0+dy/4 ) Break; // filter W
|
|
|
|
x=x1; y=y1-dy/3; // upper right offen bow
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
|
|
if( x<x1-3*dx/8 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
|
|
if( x<x0+dx/2 ){ // not sure, try again (not best)
|
|
x=x1; y=y1-dy/4;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
|
|
if( x<x0+dx/2 ) Break;
|
|
}
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle
|
|
if( x<=x1 ) Break;
|
|
if( y<y0+3*dy/8 ) Break;
|
|
if( y>y1-dy/4 ) Break;
|
|
|
|
x=x1-dx/3;y=y1; // follow left C-bow, filter S
|
|
turmite(box1->p,&x,&y,x0,x1,y0+dy/4,y1,cs,LE,UP); // w=LE b=UP
|
|
if( y>y0+dy/4+1 ) Break; /* leave box below for S or on top for CG */
|
|
MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
|
|
/* if (y<y0) y++; else x++; */ /* enter the box again */
|
|
turmite(box1->p,&x,&y,x0,x1,y0 ,y1,cs,RI,UP);
|
|
MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
|
|
if( y>y0 ) Break;
|
|
if (sdata->holes.num > 0) Break;
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break;
|
|
if( dx>4 && dy>6){ // no (<[
|
|
for(i=1,y=0;i && y<dy/3;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
|
|
if( i ) ad=98*ad/100;
|
|
for(i=1,y=0;i && y<dy/3;y++)
|
|
if( num_cross(0,dx-1,dy-1-y,dy-1-y,bp,cs) == 2 ) i=0;
|
|
if( i ) Break;
|
|
}
|
|
for(i=1,y=dy/2;i && y<dy;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
|
|
if( i ) Break;
|
|
for(i=0,y=3*dy/4;y<dy;y++){
|
|
x=loop(bp,0,y,dx,cs,0,RI); // Kante abfallend <=> Z
|
|
if( x<i-dx/20 ) break;
|
|
if( x>i ) i=x;
|
|
} if( y<dy ) Break;
|
|
|
|
// only check the middle!
|
|
for(i=0,i1=y=dy/4;y<dy-dy/4;y++){ // look for horizontal line
|
|
x=loop(bp,dx-1 ,y,dx/4,cs,0,LE);
|
|
x=loop(bp,dx-1-x,y,dx/2,cs,1,LE); if(x>i){ i=x;i1=y; }
|
|
} if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ?
|
|
// check from above for gap and left vertical line (~S)
|
|
x =loop(bp,0,i1,dx ,cs,0,RI);
|
|
x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow
|
|
x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100;
|
|
MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);)
|
|
|
|
i=1; // Mar06: adapted to 4x6 font
|
|
for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
|
|
for(y=dy/2;y<dy-1 && i;y++){ // .@
|
|
if( getpixel(bp,x ,y )>=cs
|
|
&& getpixel(bp,x+1,y )< cs
|
|
&& getpixel(bp,x+1,y-1)< cs
|
|
&& getpixel(bp,x ,y-1)< cs ) { i=0;break; }
|
|
}
|
|
if(i) ad=95*ad/100; // ~C
|
|
if(!hchar) ad=98*ad/100;
|
|
if( gchar) ad=98*ad/100;
|
|
|
|
Setac(box1,'G',ad);
|
|
break;
|
|
}
|
|
// --- test \it g like 9 ----------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
|
|
DBG( wchar_t c_ask='g'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select
|
|
&& num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break;
|
|
for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe
|
|
j=loop(box1->p,x0,y,dx,cs,0,RI);
|
|
if( j>2 && j>dx/4 && y<y1-3 && j<dx/2 ) // long bow
|
|
j+=loop(box1->p,x0+j-2,y+1,dx,cs,0,RI)-2;
|
|
if( j>x ) { x=j; i=y; }
|
|
}
|
|
if( x<4*dx/8 ) Break;
|
|
if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1
|
|
&& num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break;
|
|
if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break;
|
|
if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break;
|
|
if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 &&
|
|
loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q
|
|
for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe
|
|
j=loop(box1->p,x1,y,dx,cs,0,LE);
|
|
if( j>x ) { x=j; i=y; }
|
|
} if( x>dx/2 ) Break; // no g
|
|
i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break;
|
|
i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
|
|
i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex
|
|
i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break;
|
|
i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
|
|
for(y=dy/4;y<dy-1-dy/4;y++){
|
|
i2=loop(bp,dx-1,y,dx,cs,0,LE);
|
|
if(i1+i3-2*i2<-1-dx/16) break; // konvex from right ~g ~3
|
|
} if(y<dy-1-dy/4) Break;
|
|
x=loop(bp,dx -1,6*dy/8,dx,cs,0,LE); if(x>0){ x--; // robust
|
|
y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP);
|
|
if(y<dy/8) Break; // ~q (serif!)
|
|
}
|
|
// %
|
|
if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2) ad=90*ad/100;
|
|
if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
|
|
|| num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
|
|
|
|
if (box1->m4==0) ad=98*ad/100;
|
|
if ( hchar) ad=96*ad/100;
|
|
if (!gchar) {
|
|
if (box1->m4 - box1->m3 > 2) ad=96*ad/100;
|
|
else {
|
|
if (y1 > box1->m3) ad=99*ad/100;
|
|
else ad=97*ad/100;
|
|
}
|
|
}
|
|
if (ad>99) ad=99; // never be sure to have a 9
|
|
Setac(box1,'g',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
// rewritten for vector usage v0.41
|
|
static wchar_t ocr0_xX(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
// pix *bp=sdata->bp; // obsolete
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs;
|
|
int dx=x1-x0+1, dy=y1-y0+1, /* size */
|
|
(*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
|
|
ad; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test xX ---------------------------------------------------
|
|
// rewritten for vectors 0.41
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges
|
|
DBG( wchar_t c_ask='x'; )
|
|
if (sdata->holes.num > 0) Break; /* # */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the 4 ends of the x */
|
|
if (aa[0][2]>d) Break;
|
|
if (aa[1][2]>d) Break;
|
|
if (aa[2][2]>d) Break;
|
|
if (aa[3][2]>d) Break;
|
|
if (aa[3][0]-aa[0][0]<dx/2) Break;
|
|
if (aa[2][0]-aa[1][0]<dx/2) Break;
|
|
if (aa[1][1]-aa[0][1]<dy/2) Break;
|
|
if (aa[2][1]-aa[3][1]<dy/2) Break;
|
|
if (aa[0][1]-y0 > dy/16
|
|
&& aa[3][1]-y0 > dy/16
|
|
&& y1-aa[1][1] > dy/16
|
|
&& y1-aa[2][1] > dy/16) Break; // 10x10 ~* (X + vert. line)
|
|
/* searching for 4 notches between neighbouring ends */
|
|
|
|
/* 0907: false positive on font4x5 '*'='-' | 'x' */
|
|
i1=nearest_frame_vector(box1, aa[0][3], aa[1][3], x0+dx, y0+dy/4);
|
|
i3=nearest_frame_vector(box1, aa[0][3], aa[1][3], x0+dx, y1-dy/4);
|
|
i2=nearest_frame_vector(box1, i1, i3, x0, y0+dy/2);
|
|
if (box1->frame_vector[i2][0]<=x0+ dx/8) Break;
|
|
if (box1->frame_vector[i2][0]<=x0+ dx/4) ad=98*ad/100;
|
|
i1=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0 , y0+dy/4);
|
|
i3=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0 , y1-dy/4);
|
|
i2=nearest_frame_vector(box1, i1, i3, x1, y0+dy/2);
|
|
if (box1->frame_vector[i2][0]>=x1- dx/8) Break;
|
|
if (box1->frame_vector[i2][0]>=x1- dx/4) ad=98*ad/100;
|
|
|
|
/* only left side */
|
|
for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[i][0]
|
|
>=box1->frame_vector[j][0]) j=i; /* notice most right vector */
|
|
} if (j==i) Break;
|
|
/* calculate the distance to the center */
|
|
x=box1->frame_vector[j][0];
|
|
y=box1->frame_vector[j][1]; i1=j;
|
|
if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
|
|
if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
|
|
if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
|
|
if ( aa[1][0] >= x ) Break;
|
|
if ( aa[0][0] > x ) Break;
|
|
if ( aa[0][0] >= x ) ad=99*ad/100;
|
|
if (x-x0<dx/8) Break;
|
|
if (x-x0<dx/4) ad=99*ad/100;
|
|
/* check if upper left and center point are joined directly */
|
|
ld=line_deviation(box1, aa[0][3], j);
|
|
MSG(fprintf(stderr," 0-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
/* check if lower left and center point are joined directly */
|
|
ld=line_deviation(box1, j, aa[1][3]);
|
|
MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
|
|
/* only lower side */
|
|
for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[i][1]
|
|
<=box1->frame_vector[j][1]) j=i; /* notice most upper vector */
|
|
} if (j==i) Break;
|
|
/* calculate the distance to the center */
|
|
x=box1->frame_vector[j][0];
|
|
y=box1->frame_vector[j][1]; i2=j;
|
|
if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
|
|
if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
|
|
if ( aa[1][1]+aa[2][1]-2*y<=0) Break;
|
|
/* check if lower left and center point are joined directly */
|
|
ld=line_deviation(box1, aa[1][3], j);
|
|
MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
/* check if lower right and center point are joined directly */
|
|
ld=line_deviation(box1, j, aa[2][3]);
|
|
MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
|
|
/* only right side */
|
|
for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[i][0]
|
|
<=box1->frame_vector[j][0]) j=i; /* notice most left vector */
|
|
} if (j==i) Break;
|
|
/* calculate the distance to the center */
|
|
x=box1->frame_vector[j][0];
|
|
y=box1->frame_vector[j][1]; i3=j;
|
|
if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
|
|
if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
|
|
if ( aa[2][0]+aa[3][0]-2*x<=0) Break;
|
|
if ( aa[3][0] <= x ) Break;
|
|
if ( aa[2][0] < x ) Break;
|
|
if ( aa[2][0] <= x ) ad=99*ad/100;
|
|
if (dx-(x-x0)<dx/8) Break;
|
|
if (dx-(x-x0)<dx/4) ad=99*ad/100;
|
|
/* check if lower right and center point are joined directly */
|
|
ld=line_deviation(box1, aa[2][3], j);
|
|
MSG(fprintf(stderr," 2-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
/* check if upper right and center point are joined directly */
|
|
ld=line_deviation(box1, j, aa[3][3]);
|
|
MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
|
|
/* only upper side */
|
|
for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[i][1]
|
|
>=box1->frame_vector[j][1]) j=i; /* notice lowest vector */
|
|
} if (j==i) Break;
|
|
/* calculate the distance to the center */
|
|
x=box1->frame_vector[j][0];
|
|
y=box1->frame_vector[j][1]; i4=j;
|
|
if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
|
|
if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
|
|
if ( aa[3][1]+aa[0][1]-2*y>=0) Break;
|
|
/* check if upper left and center point are joined directly */
|
|
ld=line_deviation(box1, aa[3][3], j);
|
|
MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
/* check if lower left and center point are joined directly */
|
|
ld=line_deviation(box1, j, aa[0][3]);
|
|
MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
|
|
// center crossing of diagonal lines is small?
|
|
if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break;
|
|
|
|
if (gchar) ad=99*ad/100;
|
|
bc='x'; if(hchar) bc='X';
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
// --- test \it x ---------------------------------------------------
|
|
#if 0
|
|
for(ad=d=99;dx>4 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='x'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
|
|
if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
|
|
if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3
|
|
&& num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break;
|
|
if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3
|
|
&& num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break;
|
|
if( gchar ) ad=97*ad/100;
|
|
if( hchar ) ad=96*ad/100;
|
|
bc='x';
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
#endif
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_yY(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test italic yY --------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='y'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num > 0) ad=97*ad/100;
|
|
if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2
|
|
&& num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break;
|
|
if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
|
|
&& num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
|
|
if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
|
|
&& num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break;
|
|
if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2
|
|
&& num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break;
|
|
for(yc=y=0,xc=x=dx/4;x<dx-dx/4;x++){ // search deepest point
|
|
i=loop(bp,x,0,dy,cs,0,DO); if(i>y){ yc=y=i;xc=x; }
|
|
} if( y>12*dy/16 || y<3*dy/8 ) Break;
|
|
ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break;
|
|
yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break;
|
|
for(y=dy/8;y<yc-dy/8;y++){
|
|
if( num_cross(xc,dx-1,y,y,bp,cs) != 1 ) break;
|
|
if( num_cross(0 ,xc ,y,y,bp,cs) < 1 ) break;
|
|
} if(y<yc-dy/8) Break;
|
|
yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE);
|
|
g_debug(fprintf(stderr," debug_yY: \n"
|
|
" /a b \n"
|
|
" | | \n"
|
|
" -c/ \n"
|
|
" \e-d \n");)
|
|
g_debug(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
|
|
xa,ya,xb,yb,xc,yc,xd,yd);)
|
|
if(xd>6*dx/8) ad=99*ad/100; // why this???
|
|
if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break;
|
|
// printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd);
|
|
if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break;
|
|
// if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
|
|
// printf("ok");
|
|
bc='y';
|
|
if(gchar && !hchar) bc='y'; else
|
|
if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ???
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
// --- test yY ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='y'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) {
|
|
if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
|
|
} else {
|
|
if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
|
|
}
|
|
if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2
|
|
&& num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break;
|
|
if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break;
|
|
if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
|
|
&& num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
|
|
if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
|
|
&& num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1
|
|
&& num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break;
|
|
if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00
|
|
< loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break;
|
|
for(y=0,x=dx/4;x<dx-dx/4;x++){ // search lowest point
|
|
i=loop(bp,x,0,dy,cs,0,DO); if(i>y) y=i;
|
|
} if( y>10*dy/16 || y<2*dy/8 ) Break;
|
|
for(xc=xb=xa=dx,yc=yb=ya=y=0;y<dy/4;y++){
|
|
x =loop(bp, 0 , y,dx,cs,0,RI); if(x<xa){ xa=x;ya=y; }
|
|
x =loop(bp,dx-1 , y,dx,cs,0,LE); if(x<xb){ xb=x;yb=y; }
|
|
}
|
|
if(yb>dy/8) Break;
|
|
for(i=dx,yc=y=dy/4;y<3*dy/4;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break;
|
|
x =loop(bp,dx-1 ,y,dx,cs,0,LE);
|
|
x+=loop(bp,dx-1-x,y,dx,cs,1,LE);
|
|
j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; }
|
|
} yc+=dy/16+1;
|
|
yc+=loop(bp,xc,yc,i,cs,1,DO)/2;
|
|
xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2;
|
|
xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2;
|
|
yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break;
|
|
/* check for serife at lower end */
|
|
for (i=0,x=dx-1;i<dy/4;i++) {
|
|
j=loop(bp,dx-1,dy-1-i,dx,cs,0,LE);
|
|
if (j>x+dx/16+1) break; /* detect serif */
|
|
if (j<x) x=j;
|
|
} if (i<dy/4) xd-=loop(bp,xd,yd,dx,cs,1,LE)/2;
|
|
MSG( fprintf(stderr," debug_yY: \n"
|
|
" a b \n"
|
|
" \\ / \n"
|
|
" c \n"
|
|
" ed ");)
|
|
MSG(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
|
|
xa,ya,xb,yb,xc,yc,xd,yd);)
|
|
// check upper left line
|
|
if( get_line2(xa,ya,xc ,yc,bp,cs,100)<95
|
|
&& get_line2(xa,ya,xc-1,yc,bp,cs,100)<95 ) Break;
|
|
// check upper right line
|
|
if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95
|
|
&& get_line2(xb,yb,xc-1,yc,bp,cs,100)<95 ) {
|
|
// Times-Italic y ???
|
|
xb+=loop(bp,xb,yb,dx/4,cs,1,RI)-1;
|
|
yb+=loop(bp,xb,yb,dy/8,cs,1,DO)-1;
|
|
if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95 ) Break;
|
|
}
|
|
if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
|
|
|
|
// decission between V and Y is sometimes very difficult
|
|
// hope that the following code is the ultimate solution
|
|
if( yc>=5*dy/8 && !gchar)
|
|
if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 )
|
|
if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 )
|
|
{ if (dx>4) { Break; } else ad=ad*98/100; } // ~V
|
|
xa=loop(bp,0,dy/8,dx,cs,0,RI);
|
|
xb=loop(bp,0,dy/2,dx,cs,0,RI);
|
|
xc=loop(bp,0,dy-1,dx,cs,0,RI);
|
|
if( 2*xb< xa+xc ) ad=98*ad/100; // ~V
|
|
if( 2*xb<=xa+xc ) ad=98*ad/100;
|
|
if( 2*xb<=xa+xc+1 ) ad=98*ad/100;
|
|
|
|
bc='y';
|
|
if ((!gchar) && (!hchar)) ad=98*ad/100;
|
|
if(y0<box1->m2-(box1->m2-box1->m1)/4)
|
|
{ bc='Y'; if(gchar) ad=98*ad/100; }
|
|
// SMALL-CAPS ???
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_zZ(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
int i1,i2,i3,i4,i5,dbg[9],
|
|
d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
(*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
|
|
ad; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test zZ -------
|
|
for(ad=d=100;dx>3 && dy>3;){ // dy>dx
|
|
DBG( wchar_t c_ask='z'; ) /* for debugging purpose */
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num > 0) ad=98*ad/100; /* # */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the 4 edges of the z */
|
|
if (aa[0][2]>d) Break;
|
|
if (aa[1][2]>d) Break;
|
|
if (aa[2][2]>d) Break;
|
|
if (aa[3][2]>d) Break;
|
|
if (aa[3][0]-aa[0][0]<dx/2) Break;
|
|
if (aa[2][0]-aa[1][0]<dx/2) Break;
|
|
if (aa[1][1]-aa[0][1]<dy/2) Break;
|
|
if (aa[2][1]-aa[3][1]<dy/2) Break;
|
|
if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold a z */
|
|
if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
|
|
if (aa[3][1]-y0>dy/8) ad=99*ad/100;
|
|
if (aa[0][1]-y0>dy/8) ad=99*ad/100;
|
|
if (2*dx<dy) ad=99*ad/100;
|
|
MSG( \
|
|
fprintf(stderr,"aa.xy %d %d %d %d %d %d %d %d ad %d", \
|
|
aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
|
|
aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,ad);)
|
|
/* upper and lower horizontal line */
|
|
d=line_deviation(box1, aa[3][3], aa[0][3]); if (d>2*sq(1024/4)) Break;
|
|
ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break;
|
|
|
|
/* search uppermost right > from left side */
|
|
i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0);
|
|
x=box1->frame_vector[i1][0];
|
|
y=box1->frame_vector[i1][1];
|
|
if (y-y0 > 5*dy/8) Break;
|
|
if (x-x0 < 3*dx/8) Break;
|
|
if (x-aa[0][0]<=dx/4) Break; // ~lI
|
|
if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI
|
|
if (x-aa[0][0]< dx/2) ad=99*ad/100; // ~lI, dx/2 for fat 8x10 font
|
|
/* search most right > ~2 from left side */
|
|
i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2);
|
|
MSG(fprintf(stderr,"i23xy= %d %d %d %d ad %d",x-x0,y-y0,box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
|
|
/* dy=7 i3.y=2 for fat screen font (Apr09) */
|
|
if ( box1->frame_vector[i3][1]-y0> (dy+2)/4
|
|
&& box1->frame_vector[i3][0]-x>= 0) Break;
|
|
if ( box1->frame_vector[i3][1]-y> dy/8+1 // +1 needed for 5x7 Z Jul09
|
|
&& box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100;
|
|
if ( box1->frame_vector[i3][1]-y> dy/8+1
|
|
&& box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100;
|
|
if ( box1->frame_vector[i3][1]-y> dy/16+1) ad=99*ad/100; // 9x10 ~2
|
|
if (box1->frame_vector[i3][0]-aa[0][0]
|
|
< aa[3][0]-box1->frame_vector[i3][0]) Break; // ~lI
|
|
if (box1->frame_vector[i3][0]-aa[0][0]
|
|
<(aa[3][0]-box1->frame_vector[i3][0]-1)*2) ad=98*ad/100; // ~lI
|
|
/* better test for a bow or peaked angle */
|
|
/* upper part of a 2, on a Z a and b should be at c
|
|
.....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25
|
|
...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18
|
|
..$@@$@@@$@@@@@...
|
|
..@@@.....$$@@@@..
|
|
..@@.......@$@@@b.
|
|
..$.........$@@@@.
|
|
.$$..........$@@@.
|
|
.$...........@@@@.
|
|
.............@@@@.<
|
|
.............$@@$.
|
|
............$@@@..
|
|
............@@$...
|
|
............$@$...
|
|
--- snip ----
|
|
*/
|
|
i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0);
|
|
i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx);
|
|
d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0])
|
|
+sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]);
|
|
if (d>2*sq(dx/8+1)) Break;
|
|
if (d>0 && dx<8 ) ad=99*ad/100; // 7x10 2 ~Z
|
|
if (d>1 && dx<16) ad=98*ad/100; // 9x10 2 ~Z d=2
|
|
MSG( fprintf(stderr,"i45xy %d %d d %d ad %d upper right bow?",i4,i5,d,ad); )
|
|
|
|
/* check if upper left and upper right point are joined directly */
|
|
dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break;
|
|
/* check if lower right and upper left point are joined directly */
|
|
dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break;
|
|
|
|
/* search lowest left < from right side */
|
|
i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1);
|
|
x=box1->frame_vector[i2][0];
|
|
y=box1->frame_vector[i2][1];
|
|
if (y-y0 < 3*dy/8) Break;
|
|
if (x-x0 > 5*dx/8) Break;
|
|
if (aa[2][0]-x<=dx/4) Break; // ~lI
|
|
if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI
|
|
if (aa[2][0]-x< dx/2) ad=99*ad/100; // ~lI, dx/2 for 9x7 fat3 z
|
|
/* check if upper right and lower left point are joined directly */
|
|
dbg[2]=d=line_deviation(box1,i2, aa[3][3]);
|
|
MSG(fprintf(stderr,"left diag line? d= %d krit= 2..3*%d",d,sq(1024/4));)
|
|
if ( d >3*sq(1024/4)) Break;
|
|
if (dx>9 && d >2*sq(1024/4)) Break; // to strong for 5x7 Z
|
|
/* check if lower left and lower right point are joined directly */
|
|
dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break;
|
|
|
|
MSG( fprintf(stderr," i12 %d %d ad %d", i1, i2, ad); )
|
|
/* upper right nick - lower left nick, 0 for fat screen fonts 8x10 */
|
|
if (box1->frame_vector[i1][0]
|
|
-box1->frame_vector[i2][0]<dx/8-1) Break; /* 0 for 9x7 fat3 z */
|
|
/* 16384, 26788 for 8x10 screen font (Apr09) */
|
|
MSG( \
|
|
fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d ad %d",\
|
|
box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
|
|
box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
|
|
dbg[0], dbg[1], dbg[2], dbg[3], sq(1024)/16, ad);)
|
|
ad=(100-dbg[0]/sq(1024/4))*ad/100;
|
|
ad=(100-dbg[1]/sq(1024/4))*ad/100;
|
|
ad=(100-dbg[2]/sq(1024/4))*ad/100;
|
|
ad=(100-dbg[3]/sq(1024/4))*ad/100;
|
|
|
|
if ( gchar) ad=98*ad/100;
|
|
bc='z';
|
|
if( hchar ) bc='Z';
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_wW(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */
|
|
wchar_t ac;
|
|
|
|
// ------- test w ~{\it w} ---------------
|
|
for(ad=d=100;dx>3 && dy>3;){ // dy<=dx
|
|
DBG( wchar_t c_ask='w'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
// xa xe
|
|
// \ xc / <=ya connected xa-xb-xc-xd-xe
|
|
// xb xd <=yb
|
|
// get two lowest points i3,i4,ya
|
|
// out_x(box1);
|
|
// ~ul ~uf
|
|
// out_x(box1);
|
|
for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break;
|
|
if(y<dy/2) Break;
|
|
yb=dy-1;
|
|
if (dx>4) { /* 4x6 is to small */
|
|
for(y=dy-1-dy/16;y>3*dy/4;y--)
|
|
if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
|
|
if(y==3*dy/4) Break;
|
|
}
|
|
yb=y;
|
|
t1=loop(bp,0 ,dy/4,dx,cs,0,RI);
|
|
t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line?
|
|
for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++;
|
|
else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++;
|
|
if(i+5<dy/4 && 7*t1<dy) Break; // only for large letters
|
|
if(i+j==0 && (dy>6 || dx>4)) Break;
|
|
if(i+j==0 && dx<=4){
|
|
if (abs(loop(bp, 1,dy-1,dy,cs,0,UP)
|
|
-loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N
|
|
if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2
|
|
&& loop(bp, 0,dy-1,dy,cs,0,UP)>0)
|
|
|| ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2
|
|
&& loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV
|
|
ad=ad*99/100; // 4x6 font
|
|
MSG(fprintf(stderr,"ad=%d",ad);)
|
|
}
|
|
if( num_cross(0,dx-1, 1, 1,bp,cs)< 2
|
|
&& num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break;
|
|
x =loop(bp,0 ,yb,dx,cs,0,RI);
|
|
xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break;
|
|
x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
|
|
xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
|
|
for(y=0,xc=x=xb+1;x<xd;x++)
|
|
if((i=loop(bp,x,dy-1,dy,cs,0,UP))>y){xc=x;y=i;}
|
|
if(dx>4 && !y) Break;
|
|
ya=dy-1-y; // flat
|
|
y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--;
|
|
if (dy>6 || dx>4) { // ~4x6 font
|
|
if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2
|
|
&& num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
|
|
if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2
|
|
&& num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
|
|
}
|
|
ya-=y/2;
|
|
x =loop(bp,0 ,1 ,dx,cs,0,RI);
|
|
xa=loop(bp,x ,1 ,dx,cs,1,RI);
|
|
if( x+xa>xb ){ // may be, here is a small but thick letter
|
|
// later add some proofs
|
|
xa=x+xa/4;
|
|
} else {
|
|
xa=x+xa/2;
|
|
}
|
|
x =loop(bp,dx-1 ,1 ,dx,cs,0,LE);
|
|
xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2;
|
|
MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d",
|
|
xa,1,xb,yb,xc,ya,xd,yb,xe,1);)
|
|
if (ya<dy/2 && xc<dx/2) ad=95*ad/100; /* ~N */
|
|
i= loop(bp,xa ,1 ,dx,cs,1,RI);
|
|
for (x=xa;x<xa+i;x++)
|
|
if( get_line2(x,1,xb,yb,bp,cs,100)>94 ) break;
|
|
if (x==xa+i) Break; // no vert. line found
|
|
if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95
|
|
&& get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95
|
|
&& get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break;
|
|
if( get_line2(xc, ya,xd, yb,bp,cs,100)<95
|
|
&& get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break;
|
|
if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95
|
|
&& get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w
|
|
&& get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break;
|
|
// if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
|
|
// ~ur
|
|
MSG(fprintf(stderr,"ad=%d",ad);)
|
|
for(i=0,y=5*dy/8;y<dy;y++){
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
|
|
if (x<i) ad=98*ad/100;
|
|
} if( y<dy ) Break;
|
|
MSG(fprintf(stderr,"ad=%d",ad);)
|
|
ac=((hchar)?'W':'w');
|
|
if (gchar) ad=98*ad/100;
|
|
Setac(box1,ac,ad);
|
|
break;
|
|
}
|
|
// --- test ~w {\it w} ohmega? also handwritten -------
|
|
// italic
|
|
for(ad=d=100;dx>3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar)
|
|
DBG( wchar_t c_ask='w'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
// ~ul ~uf
|
|
if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break;
|
|
if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40;
|
|
if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80;
|
|
for(i=0,y=0;y<dy-1;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs)==3 ) i++;
|
|
if(i<=dy/8) Break; // three legs
|
|
// xa xe
|
|
// \ xc / <=yb connected xa-xb-xc-xd-xe
|
|
// xb xd
|
|
for(y=dy/2;y<dy-1-dy/8;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs)==3 ) break;
|
|
yb=y;
|
|
x =loop(bp,0 ,yb,dx,cs,0,RI);
|
|
x+=loop(bp,x ,yb,dx,cs,1,RI); if(x>dx/2) Break;
|
|
xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break;
|
|
x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
|
|
x+=loop(bp,dx-1-x,yb,dx,cs,1,LE);
|
|
xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
|
|
if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break;
|
|
if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break;
|
|
if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break;
|
|
if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break;
|
|
if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break;
|
|
// if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
|
|
if (sdata->holes.num != 0) Break;
|
|
// ~ur
|
|
for(i=0,y=3*dy/4;y<dy;y++){
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
|
|
} if( y<dy ) Break; // fail for overlapping neighbouring slanted chars?
|
|
ac=((hchar)?'W':'w');
|
|
if (gchar) ad=98*ad/100;
|
|
Setac(box1,ac,ad);
|
|
Break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_aA(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad,ya; /* tmp-vars */
|
|
|
|
// --- test A ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='A'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
// first selection (rough sieve)
|
|
if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1
|
|
&& get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B
|
|
ya=0; /* upper end, not 0 for modified A etc. */
|
|
if (box1->modifier)
|
|
for (ya=0;ya<dy/2;ya++)
|
|
if (num_cross(0,dx-1,ya,ya,bp,cs)==0) break;
|
|
if (ya>=dy/2) ya=0; // already subtracted?
|
|
if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi
|
|
&& num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1
|
|
&& num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1
|
|
&& num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break;
|
|
if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2
|
|
&& num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break;
|
|
if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R
|
|
for(y=ya+dy/8;y<ya+dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
|
|
if( y==ya+dy/2 ) Break; i1=y;
|
|
if (dy>20) i1++; /* get arround some noise fat font */
|
|
|
|
x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break;
|
|
x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x;
|
|
x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2;
|
|
// hole (i2,i1)
|
|
y+=loop(bp,i2,y,dy,cs,1,DO);
|
|
y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
|
|
if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) }
|
|
if (y>5*dy/6) Break;
|
|
|
|
if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1)
|
|
|| sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break;
|
|
// if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break;
|
|
// out_x(box1);
|
|
i3=0;i4=0;
|
|
for(x=dx/3;x<2*dx/3;x++){
|
|
i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
|
|
i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
|
|
if(i4==1) i3=x;
|
|
} if(i4<1 || i4>2 || i3==0){
|
|
// ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);)
|
|
Break;
|
|
}
|
|
if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break;
|
|
|
|
i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE);
|
|
i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE);
|
|
i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE);
|
|
if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */
|
|
if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 ) Break;
|
|
|
|
i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie
|
|
i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI);
|
|
i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI);
|
|
if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */
|
|
if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 || i1<i3) Break;
|
|
|
|
// lower ends could be round on thick fonts
|
|
for(i3=dx,y=ya+(dy-ya)/4;y<7*dy/8;y++){ // increasing width
|
|
i1=loop(bp, 0, y,dx,cs,0,RI);
|
|
i2=loop(bp,dx-1, y,dx,cs,0,LE);
|
|
if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
|
|
} if(y<7*dy/8) Break;
|
|
if ( loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)
|
|
-loop(bp, 0,dy/2 ,dx,cs,0,RI)>0) ad=97*ad/100; // italic-a
|
|
|
|
if (!hchar) ad=99*ad/100; // italic-a
|
|
Setac(box1,'A',ad);
|
|
break;
|
|
}
|
|
// --- test a -------------------------------------------
|
|
// with a open bow above the circle starting
|
|
// on the right side of the circle
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='a'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 )
|
|
if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 )
|
|
if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break;
|
|
if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d
|
|
if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an
|
|
&& num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break;
|
|
|
|
for( x=dx/4;x<dx-dx/4;x++ ){ // ar
|
|
i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
|
|
i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
|
|
} if( x<dx-dx/4 ) Break;
|
|
|
|
for(i=dx/8+1,x=dx/4;x<=dx-1-dx/4 && i;x++){
|
|
if( num_cross(x,x,0,bp->y-1, bp,cs) == 3 ) i--;
|
|
} if( i ) Break;
|
|
|
|
i1=loop(bp,0, dy/8,dx,cs,0,RI);
|
|
i3=loop(bp,0,3*dy/4,dx,cs,0,RI);
|
|
for(y=dy/8+1;y<3*dy/4;y++){
|
|
i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break;
|
|
} if(y==3*dy/4) Break; // ~6
|
|
// ~ s (small thick s), look for vertikal line piece
|
|
for(x=3*dx/4;x<dx;x++)
|
|
if( loop(bp,x,dy/4,dy/2,cs,1,DO)>dy/4 ) break;
|
|
if( x==dx ) Break;
|
|
|
|
if (sdata->holes.num != 1) ad=96*ad/100; else
|
|
if (sdata->holes.num == 1)
|
|
if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break;
|
|
// if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break;
|
|
if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){
|
|
i =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
i =loop(bp,i,dy/4,dx,cs,1,RI);
|
|
if(i<dx/4+1) Break; // fat a
|
|
i =loop(bp,0,dy/4,dx,cs,0,RI);
|
|
i+=loop(bp,i,dy/4,dx,cs,1,RI);
|
|
for(y=dy/4;y<dy/2;y++)
|
|
if( num_cross(0,dx-1,y,y, bp,cs) !=2 ) break;
|
|
x =loop(bp,0,y-1,dx,cs,0,RI);
|
|
x+=loop(bp,x,y-1,dx,cs,1,RI);
|
|
if(x>i) Break; // ~ 8
|
|
}
|
|
/* test for horizontal symmetry ~8 */
|
|
for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
|
|
if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
|
|
if (y==dy) Break; /* ~8 */
|
|
if (hchar
|
|
&& loop(bp,dx/4,0,dy,cs,0,DO)>dy/4) Break; // ~ serif d
|
|
|
|
if (hchar) ad=96*ad/100;
|
|
if (gchar) ad=96*ad/100;
|
|
Setac(box1,'a',ad);
|
|
break;
|
|
}
|
|
// --- test hand written a ---------------------------------------------------
|
|
// rarely char, without bow above the circle
|
|
for(ad=d=100;dx>3 && dy>3;){ // min 4x4
|
|
DBG( wchar_t c_ask='a'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
i = loop(bp,dx/2, 0 ,dy,cs,0,DO); if (i>dy/4) Break;
|
|
i+= loop(bp,dx/2, i ,dy,cs,1,DO); if (i>dy/2) Break;
|
|
i = loop(bp,dx/2, i ,dy,cs,0,DO); if (i<dy/4) Break;
|
|
if( get_bw(x0 , x0 ,y1 , y1 ,box1->p,cs,1) == 1 ) Break;
|
|
|
|
if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break;
|
|
if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
|
|
if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
|
|
i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break;
|
|
y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break;
|
|
// normal 'a' has a well separated vertical line right from the circle
|
|
// but fat 'a' is like a 'o', only bigger on the right side
|
|
if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */
|
|
&& num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */
|
|
&& num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 )
|
|
{ if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
|
|
<4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;}
|
|
else ad=98*ad/100;
|
|
}
|
|
if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2
|
|
|| num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00
|
|
|
|
if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 )
|
|
if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break;
|
|
if (sdata->holes.num != 1)
|
|
if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 )
|
|
// if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 )
|
|
Break;
|
|
if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break;
|
|
|
|
if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
|
|
loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
|
|
|
|
if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4
|
|
&& loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100;
|
|
|
|
x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
|
|
i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break;
|
|
|
|
for( x=dx/4;x<dx-dx/4;x++ ){ // ar
|
|
i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
|
|
i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
|
|
} if( x<dx-dx/4 ) Break;
|
|
|
|
if( num_cross(x0 , x1, y1, y1,box1->p,cs) == 1 )
|
|
if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 )
|
|
if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4
|
|
&& loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o
|
|
if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q
|
|
|
|
if (hchar) ad=98*ad/100;
|
|
if (gchar) ad=98*ad/100;
|
|
// handwritten-a (alpha)
|
|
Setac(box1,'a',ad);
|
|
break;
|
|
}
|
|
// --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font -------------------------
|
|
/* not sure if we should move this to a get_CentralEuropean-function */
|
|
for(ad=d=100;dx>2 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='A'; )
|
|
if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
|
|
// first selection (grobes Sieb)
|
|
if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B
|
|
if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi
|
|
&& num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1
|
|
&& num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1
|
|
&& num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break;
|
|
if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break;
|
|
if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2
|
|
&& num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break;
|
|
if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R
|
|
for(y=dy/8;y<dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
|
|
if( y==dy/2 ) break; i1=y;
|
|
if (dy>20) i1++; /* get arround some noise fat font */
|
|
|
|
x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break;
|
|
x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x;
|
|
x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2;
|
|
// hole (i2,i1)
|
|
y+=loop(bp,i2,y,dy,cs,1,DO);
|
|
y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
|
|
if (y>5*dy/6) break;
|
|
|
|
if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break;
|
|
// if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break;
|
|
// out_x(box1);
|
|
i3=0;i4=0;
|
|
for(x=dx/3;x<2*dx/3;x++){
|
|
i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
|
|
i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
|
|
if(i4==1) i3=x;
|
|
} if(i4<1 || i4>2 || i3==0){
|
|
// ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);)
|
|
break;
|
|
}
|
|
if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break;
|
|
/* dy/4 changed to dy/6 because of screenfonts */
|
|
/* there are strange fonts, one has a serif on the upper end of A */
|
|
if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break;
|
|
if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break;
|
|
|
|
i1=loop(bp,dx-1, dy/4,dx,cs,0,LE);
|
|
i2=loop(bp,dx-1, dy/2,dx,cs,0,LE);
|
|
i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
|
|
if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 ) break;
|
|
|
|
i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie
|
|
i2=loop(bp,0 , dy/2,dx,cs,0,RI);
|
|
i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI);
|
|
if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 || i1<i3) break;
|
|
|
|
// lower ends could be round on thick fonts
|
|
for(i3=dx,y=dy/4;y<6*dy/8;y++){ // increasing width
|
|
i1=loop(bp, 0, y,dx,cs,0,RI);
|
|
i2=loop(bp,dx-1, y,dx,cs,0,LE);
|
|
if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
|
|
} if(y<6*dy/8) break;
|
|
|
|
if (!hchar) ad=96*ad/100;
|
|
if (!gchar) ad=98*ad/100;
|
|
Setac(box1,(wchar_t)LATIN_CAPITAL_LETTER_A_WITH_OGONEK,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_cC(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad,t1; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test c,C ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>2;){ // min 3x4
|
|
DBG( wchar_t c_ask='c'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[
|
|
|
|
for(y=y0+dy/4;y<y0+3*dy/4;y++)
|
|
if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
|
|
if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap
|
|
|
|
// measure thickness of line!
|
|
t1=loop(bp, 0,dy/2,dx,cs,0,RI);
|
|
t1=loop(bp,t1,dy/2,dx,cs,1,RI);
|
|
if (t1>dx/2) Break;
|
|
|
|
for(y=i1,i2=0,x=x0+dx/2;x<x0+6*dx/8;x++){
|
|
i=y-1+loop(box1->p,x0+dx/2,i1,dy,cs,0,DO);
|
|
if( i>i2 ) { i2=i; }
|
|
} if(i2<y0+5*dy/8-t1/2) Break; // i2: lowest white point above lower bow
|
|
|
|
i3=i1+1-loop(box1->p,x0+5*dx/8,i1,dy,cs,0,UP);
|
|
i =i1+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(i<i3) i3=i;
|
|
if(i3>y0+ dy/4+t1/2) Break; // highest, i3: highest point below top-line
|
|
|
|
for(y=i1;y<y1-dy/8;y++)
|
|
if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if( y-i1<dy/6 ) Break; i2=y-1; // i2: lower end of right gap
|
|
// pixelbased num_cross for streight lines could fail on small fonts
|
|
if( num_cross(x1-dx/4,x1-dx/4,i2,y0,box1->p,cs) < 1 ) Break; // ~L
|
|
if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16)
|
|
if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1
|
|
&& num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 // may fail
|
|
&& num_cross(x1 ,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r
|
|
|
|
i=1;
|
|
for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
|
|
for(y=dy/2;y<dy-1-dy/8 && i;y++){ // .@
|
|
if( getpixel(bp,x ,y )>=cs
|
|
&& getpixel(bp,x+1,y )< cs
|
|
&& getpixel(bp,x+1,y-1)< cs
|
|
&& getpixel(bp,x ,y-1)< cs ) { i=0;break; }
|
|
}
|
|
if(!i) ad=95*ad/100; // ~G
|
|
|
|
i=loop(bp,0,dy/2,dx,cs,0,RI);
|
|
for(y=0;y<dy;y++)if( loop(bp,0,y,dx,cs,0,RI)<i-1-dx/32 ) break;
|
|
if( y<dy ) Break; // ~r
|
|
// out_x(box1);
|
|
for(i5=0,i4=dx,y=dy/2;y>=dy/4;y--){
|
|
x =loop(bp,0,y,dx,cs,0,RI);
|
|
x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x;
|
|
i =loop(bp,x,y,dx,cs,0,RI); if(i<i4) i4=i;
|
|
if( i5<x-dx/32 && i>i4+dx/32 ) break; // unusual for c, more a bad e?
|
|
} if( y>=dy/4 ) Break;
|
|
|
|
if( !hchar ){ // test for e where the middle line is partly removed
|
|
x= loop(bp,0,dy/2,dx,cs,0,RI);
|
|
x=x +loop(bp,x,dy/2,dx,cs,1,RI);
|
|
y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1;
|
|
i=x +loop(bp,x,y,dx,cs,1,RI);
|
|
i=i +loop(bp,i,y,dx,cs,0,RI);
|
|
if( num_cross(x ,x ,1,dy/2,bp,cs) > 1
|
|
|| num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 )
|
|
if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1
|
|
|| num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e
|
|
}
|
|
if( dy>16 && dy>3*dx && hchar ){ // ~[
|
|
x= loop(bp,0, dy/16,dx,cs,0,RI);
|
|
x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
|
|
i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2;
|
|
if( i>=x )
|
|
if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break;
|
|
|
|
}
|
|
if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */
|
|
|
|
x =loop(bp, 0,dy/2,dx,cs,0,RI);
|
|
i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
if( (i<dx/2 || i<3) && hchar && dy>7 )
|
|
if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
|
|
&& loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
|
|
&& loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE)
|
|
> loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)
|
|
&& loop(bp,dx-1, dy/ 8,dx,cs,0,LE)
|
|
> loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~(
|
|
|
|
// printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16);
|
|
// ~G without characteristic crotchet
|
|
if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4)
|
|
if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){
|
|
Setac(box1,'G',90);
|
|
Break;
|
|
}
|
|
|
|
if (hchar){
|
|
i=1;
|
|
for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
|
|
for(y= 1;y<dy/4 && i;y++){ // .@
|
|
if( getpixel(bp,x ,y )>=cs
|
|
&& getpixel(bp,x+1,y )< cs
|
|
&& getpixel(bp,x+1,y-1)< cs
|
|
&& getpixel(bp,x ,y-1)< cs ) { i=0;break; }
|
|
}
|
|
if (i) ad=98*ad/100; // ~(
|
|
if (dy>2*dx) ad=99*ad/100;
|
|
}
|
|
if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100;
|
|
|
|
i= loop(bp,dx-1,dy/16,dx,cs,0,LE);
|
|
j= loop(bp,dx/2,0 ,dy,cs,0,DO);
|
|
if (i>=dx/2 && j>dy/8 && j>2 && j<dy/2) Break; // t
|
|
|
|
if (dy>=3*dx && dy>12) ad=99*ad/100; // (
|
|
i= loop(bp,dx-1,dy-1,dy,cs,0,UP);
|
|
j= loop(bp,dx/2,dy-1,dy,cs,0,UP);
|
|
if (i==0 && j>dy/8) ad=95*ad/100; // <
|
|
i= loop(bp,dx-1, 0,dy,cs,0,DO);
|
|
j= loop(bp,dx/2, 0,dy,cs,0,DO);
|
|
if (i==0 && j>dy/8) ad=95*ad/100; // <
|
|
if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // <
|
|
if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // <
|
|
if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // <
|
|
|
|
if (gchar) ad=98*ad/100; // could happen for 5x7 font
|
|
bc=((hchar)?'C':'c');
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_lL(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test L ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='L'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
if (i<3 && dy>8) {Break;}
|
|
if (i<dx/2) ad=98*ad/100; // ~G
|
|
|
|
if (dx<8 && 3*loop(bp,dx-1,0,dy,cs,0,DO)<=dy) break; // ~G
|
|
for( i=i1=0,y=y1-dy/4;y<=y1;y++){ // check bottom line (i1)
|
|
j=loop(box1->p,x0 ,y,dx,cs,0,RI);
|
|
j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; }
|
|
} if( i<3*dx/4 ) Break; i1=i; // length of horizontal line
|
|
// line thickness (i2)
|
|
i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break;
|
|
j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j;
|
|
if (loop(bp,dx-1, 0,dx,cs,0,LE)<dx/8
|
|
&& loop(bp,dx-1, dy/4,dx,cs,0,LE)>dx/2
|
|
&& loop(bp, 0,5*dy/8,dx,cs,0,RI)<dx/4
|
|
&& loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<dx/4) Break; // ~G
|
|
for( i=1,y=y0;y<=y1-dy/4 && i;y++){ // check vertical line
|
|
j=loop(box1->p,x0 ,y,dx,cs,0,RI);
|
|
if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; }
|
|
x=loop(box1->p,x0+j,y,dx,cs,1,RI);
|
|
if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0;
|
|
} if( !i ) Break;
|
|
if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break;
|
|
if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break;
|
|
if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break;
|
|
if (loop(bp,0,dy-1,dx,cs,0,RI)
|
|
-loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
|
|
if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4
|
|
|
|
if ( gchar) ad=98*ad/100;
|
|
if (!hchar) ad=99*ad/100;
|
|
if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l
|
|
Setac(box1,'L',ad);
|
|
break;
|
|
}
|
|
// --- test l ---------------------------------------------------
|
|
// recognize a "l" is a never ending problem, because there are lots of
|
|
// variants and the char is not very unique (under construction)
|
|
// --- test italic l ---------------------------------------------------
|
|
// --- test l ~italic (set flag-italic) --------------------------------
|
|
// if unsure d should be multiplied by 80..90%
|
|
for(ad=d=100; dy>dx && dy>5;){ // min 3x4
|
|
DBG( wchar_t c_ask='l'; )
|
|
if( box1->dots>0 ) Break;
|
|
if( num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1
|
|
|| num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1 ) Break;
|
|
// mesure thickness
|
|
for(i1=0,i2=dx,y=dy/4;y<dy-dy/4;y++){
|
|
j = loop(bp,0,y,dx,cs,0,RI);
|
|
j = loop(bp,j,y,dx,cs,1,RI);
|
|
if( j>i1 ) { i1=j; } // thickest
|
|
if( j<i2 ) { i2=j; } // thinnest
|
|
}
|
|
if ( i1>2*i2 ) Break;
|
|
if(box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100;
|
|
if( box1->m2-box1->m1>1 && y0>=box1->m2 ) ad=94*ad/100;
|
|
for(i0=0,i3=0,y=0;y<dy/4;y++){
|
|
j = loop(bp,0,y,dx,cs,0,RI);
|
|
if( j>i3 ) { i3=j; } // widest space
|
|
j = loop(bp,j,y,dx,cs,1,RI);
|
|
if( j>i0 ) { i0=j;i3=0; } // thickest
|
|
}
|
|
if ( i0>4*i2 || 3*i3>2*dx)
|
|
if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8
|
|
|| loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7
|
|
|
|
// detect serifs
|
|
x =loop(bp,0, 0,dx,cs,0,RI);
|
|
i3=loop(bp,x, 0,dx,cs,0,RI);
|
|
x =loop(bp,0, 1,dx,cs,0,RI);
|
|
x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x;
|
|
x =loop(bp,0,dy-1,dx,cs,0,RI);
|
|
i4=loop(bp,x,dy-1,dx,cs,0,RI);
|
|
x =loop(bp,0,dy-2,dx,cs,0,RI);
|
|
x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x;
|
|
if( i3>i1+dx/8+1 && i4>i1+dx/8+1 ) Break; // ~I
|
|
|
|
for(i=dx,j=0,y=1;y<dy/4;y++){
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE); if(x>i+1) break; i=x;
|
|
if( num_cross(0,dx-1,y ,y ,bp,cs)==2
|
|
&& num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1;
|
|
} if ( y<dy/4 ) Break;
|
|
if(j){ // if loop at the upper end, look also on bottom
|
|
for(y=3*dy/4;y<dy;y++){
|
|
if( num_cross(0,dx-1,y ,y ,bp,cs)==2
|
|
&& num_cross(0,dx-1,y-1-dy/32,y-1-dy/32,bp,cs)==2 ) break;
|
|
} if ( y==dy ) Break;
|
|
}
|
|
|
|
// if( get_bw(x0,x1,y0,y1,p,cs,2) == 0 ) Break; // unsure !I|
|
|
|
|
if(dx>3)
|
|
if( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 )
|
|
if( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break;
|
|
|
|
if( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ???
|
|
if( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 )
|
|
if( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 )
|
|
if( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100;
|
|
if( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ???
|
|
if( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 )
|
|
if( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 )
|
|
if( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break;
|
|
|
|
for( y=1;y<12*dy/16;y++ )
|
|
if( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ?
|
|
&& num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break;
|
|
if( y<12*dy/16 ) Break;
|
|
|
|
if(dx>3){
|
|
for( y=dy/2;y<dy-1;y++ )
|
|
if( get_bw(dx/4,dx-1-dx/4,y,y,bp,cs,1) != 1 ) break;
|
|
if( y<dy-1 ) Break;
|
|
}
|
|
// test ob rechte Kante gerade
|
|
for(x=dx,y=bp->y-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
|
|
i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
|
|
if( i-2-dx/16>=x ) break;
|
|
if( i<x ) x=i;
|
|
}
|
|
if (y>=dy/5 ) Break;
|
|
|
|
// test ob linke Kante gerade
|
|
for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
|
|
i=loop(bp,0,y,x1-x0,cs,0,RI);
|
|
if( i+2+dx/16<x ) break;
|
|
if( i>x ) x=i;
|
|
}
|
|
if (y>=dy/5 ) Break;
|
|
if (box1->m4 && y1<box1->m4)
|
|
if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 )
|
|
ad=ad*97/100; // unsure !l|
|
|
i=loop(bp,dx-1,dy/16,dx,cs,0,LE);
|
|
j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
|
|
if( i>3 && j>3 )
|
|
if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t
|
|
|
|
for(y=5*dy/8;y<dy;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) break;
|
|
if( y<dy ){
|
|
i =loop(bp,0,y,dx,cs,0,RI);
|
|
i+=loop(bp,i,y,dx,cs,1,RI);
|
|
i+=loop(bp,i,y,dx,cs,0,RI)/2; // middle of v-gap
|
|
if( num_cross(0,i,5*dy/8,5*dy/8,bp,cs)==0
|
|
&& num_cross(i,i,5*dy/8, y,bp,cs)==0 ) Break; // ~J
|
|
}
|
|
if ( dx>8
|
|
&& loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4
|
|
&& loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8
|
|
&& loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8
|
|
&& loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J
|
|
|
|
if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7
|
|
if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2
|
|
&& get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7
|
|
|
|
if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2
|
|
&& get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) {
|
|
if (loop(bp,0,dy-1,dx,cs,0,RI)<dx/8) ad=99*ad/100; // ~L
|
|
if(5*dx>2*dy) ad=99*ad/100; // ~L
|
|
if(5*dx>3*dy) ad=99*ad/100; // ~L
|
|
}
|
|
if(!hchar){ // right part (bow) of h is never a l
|
|
if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
|
|
&& get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
|
|
}
|
|
if( dx>3 && dy>3*dx )
|
|
if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4
|
|
&& loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2
|
|
&& loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){
|
|
ad=98*ad/100; // ~]
|
|
if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break;
|
|
}
|
|
|
|
for(x=0;x<dx/2;x++)
|
|
if( get_bw( x, x, 0,dy/4 ,bp,cs,1) == 1 ) break;
|
|
// works only for perpenticular char
|
|
if( get_bw( x,x+dx/16, 0,dy/16,bp,cs,1) == 0
|
|
&& get_bw( x,x+dx/16,dy/4 ,dy/2 ,bp,cs,1) == 0
|
|
&& get_bw( x,x+dx/16,dy/16,dy/4 ,bp,cs,1) == 1 ){
|
|
for(i=dx,y=0;y<dy/4;y++){
|
|
x=loop(bp,0,y,dx,cs,0,RI);
|
|
if( x>i ) break;
|
|
}
|
|
if( x>=loop(bp,0,y+1,dx,cs,0,RI) )
|
|
if( loop(bp,0 ,0,dy,cs,0,DO)>1 )
|
|
if( loop(bp,0 ,0,dy,cs,0,DO)
|
|
- loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1 ) Break; // ~1 Jul00,Nov00
|
|
if( num_cross(0,dx/2,y-1,y-1,bp,cs)==2 ) Break; // ~1
|
|
}
|
|
if(dx<8 && dy<12){ // screen font
|
|
i= loop(bp,0,0,dy,cs,0,DO);
|
|
if( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2
|
|
&& loop(bp,0,dy/2,dx,cs,0,RI)>=2
|
|
&& i>1 && i<dy/2 ) Break; // ~1
|
|
}
|
|
if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~] */
|
|
i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
if( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2
|
|
&& (i<dx/2 || i==0) ) ad=98*ad/100; // ~]
|
|
if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */
|
|
|
|
x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
|
|
i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
|
|
&& loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
|
|
&& loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
|
|
&& loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
|
|
if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
|
|
&& loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
|
|
&& loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
|
|
&& loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
|
|
|
|
i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line?
|
|
if(dy>=12 && i>dy/8 && i<dy/2){
|
|
if( loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
|
|
>loop(bp,dx-1, i,dx,cs,0,LE)
|
|
|| loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
|
|
>loop(bp,dx-1, i+1,dx,cs,0,LE) )
|
|
if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
|
|
>loop(bp,dx-1, i,dx,cs,0,LE)
|
|
|| loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
|
|
>loop(bp,dx-1, i+1,dx,cs,0,LE) )
|
|
if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
|
|
>loop(bp, 0, i,dx,cs,0,RI)
|
|
|| loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
|
|
>loop(bp, 0, i+1,dx,cs,0,RI) )
|
|
if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
|
|
>loop(bp, 0, i,dx,cs,0,RI)
|
|
|| loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
|
|
>loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t
|
|
if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t
|
|
if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8
|
|
&& loop(bp, 0, i,dx,cs,1,RI)>=dx-1
|
|
&& loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8
|
|
&& loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t
|
|
}
|
|
// if( vertical_detected && dx>5 )
|
|
if( loop(bp,0, 1,dx,cs,0,RI)>=dx/2
|
|
&& ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8
|
|
|| loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) )
|
|
if( ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8
|
|
|| loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 )
|
|
&& loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) ad=98*ad/100; // ~/
|
|
|
|
if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
|
|
|
|
if (!hchar || loop(bp,0,dy/4,dx,cs,0,RI)>dx/2){ // ~z
|
|
i=loop(bp,0,dy/16 ,dx,cs,0,RI);
|
|
i=loop(bp,i,dy/16 ,dx,cs,1,RI); j=i;
|
|
i=loop(bp,0,dy/16+1,dx,cs,0,RI);
|
|
i=loop(bp,i,dy/16+1,dx,cs,1,RI); if (i>j) j=i;
|
|
i=loop(bp,0,dy/16+2,dx,cs,0,RI);
|
|
i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i;
|
|
if (j*4>=dx*3) ad=98*ad/100; // ~z
|
|
if (j*8>=dx*7) ad=96*ad/100; // ~z
|
|
}
|
|
|
|
if( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
|
|
if( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
|
|
if (ad==100) ad--; /* I have to fix that:
|
|
.@@@@.<-
|
|
@@..@@
|
|
....@@
|
|
....@@<
|
|
...@@.
|
|
..@@@.
|
|
..@@..
|
|
.@@...
|
|
@@....
|
|
@@@@@@<-
|
|
*/
|
|
if(!hchar) ad=ad*99/100;
|
|
if( gchar) ad=ad*99/100;
|
|
Setac(box1,'l',ad);
|
|
// if( i<100 ) Break; ????
|
|
// if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8
|
|
// && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8
|
|
// && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1;
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_oO(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test o,O ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='o'; )
|
|
if (sdata->holes.num !=1 ) Break;
|
|
if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
|
|
if (sdata->holes.hole[0].y0 > dy/3
|
|
|| sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
|
|
|
|
if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2
|
|
&& num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break;
|
|
if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND
|
|
if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen"
|
|
if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
|
|
if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
|
|
if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
|
|
if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
|
|
loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
|
|
|
|
x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum
|
|
for( y=dy-1-dy/3;y<dy;y++ ){
|
|
i=loop(bp,dx-1,y,x1-x0,cs,0,LE);
|
|
if( i<x ) break; x=i;
|
|
}
|
|
if( y<dy ) Break;
|
|
|
|
// ~D
|
|
if( loop(bp,0, dy/16,dx,cs,0,RI)
|
|
+ loop(bp,0,dy-1-dy/16,dx,cs,0,RI)
|
|
<= 2*loop(bp,0, dy/2 ,dx,cs,0,RI)+dx/8 ) Break; // not konvex
|
|
if( loop(bp,0 , 1+dy/16,dx,cs,0,RI) + dx/4
|
|
<= loop(bp,dx-1, 1+dy/16,dx,cs,0,LE) ) Break; // Dec00
|
|
|
|
if( loop(bp,dx-1, dy/16,dx,cs,0,LE)>dx/8 )
|
|
if( loop(bp,0 , dy/16,dx,cs,0,RI)<dx/16 ) Break;
|
|
if( loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)>dx/8 )
|
|
if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)<dx/16 ) Break;
|
|
if( get_bw(x1-dx/32,x1,y0,y0+dy/32,box1->p,cs,1) == 0
|
|
&& get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0
|
|
// && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1
|
|
&& ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1
|
|
|| get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D
|
|
|
|
// search lowest inner white point
|
|
for(y=dy,j=x=0;x<dx;x++) {
|
|
i =loop(bp,x,dy-1 ,y1-y0,cs,0,UP);
|
|
i+=loop(bp,x,dy-1-i,y1-y0,cs,1,UP);
|
|
if (i<=y) { y=i; j=x; }
|
|
} i=y;
|
|
// italic a
|
|
for(y=dy-1-i;y<dy-1;y++)
|
|
if( num_cross(j,dx-1,y,y,bp,cs) > 1 ) ad=99*ad/100; // ~a \it a
|
|
for(y=0;y<dy-1-i;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) ad=98*ad/100; // ~a \it a
|
|
if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
|
|
if (loop(bp,dx-1, 0,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
|
|
if (loop(bp,dx-1,dy-1-dy/8,x1-x0,cs,0,LE)+1+dx/16
|
|
<loop(bp, 0,dy-1-dy/8,x1-x0,cs,0,RI))
|
|
{ ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
|
|
if (loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)+1+(dy+3)/8
|
|
<loop(bp, 0,dy-1,y1-y0,cs,0,UP))
|
|
{ ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
|
|
|
|
if (abs(loop(bp,dx/2, 0,dy,cs,0,DO)
|
|
-loop(bp,dx/2,dy-1,dy,cs,0,UP))>dy/8
|
|
|| num_cross(0,dx-1, 0, 0,bp,cs) > 1
|
|
|| num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1
|
|
) ad=98*ad/100; // ~bq
|
|
|
|
// corrections for wrong recognized m1,m2 (all chars of same high)
|
|
if (hchar && 2*y0<box1->m1+box1->m2) i=1; else i=0;
|
|
if (gchar) ad=99*ad/100;
|
|
bc='o';
|
|
if (i){ bc='O'; }
|
|
if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */
|
|
if (bc=='o' && (!hchar) && y0<box1->m2) {
|
|
Setac(box1,'O',98*ad/100);
|
|
Setac(box1,'0',98*ad/100);
|
|
}
|
|
Setac(box1,bc,ad);
|
|
if (bc=='O') Setac(box1,'0',ad);
|
|
if (bc=='o') Setac(box1,'0',98*ad/100);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_pP(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test pP ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='p'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break;
|
|
if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break;
|
|
if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break;
|
|
i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (i<dx/4) Break;
|
|
if( num_cross(x1-3*i/4,x1-3*i/4, y0, y1-3*dy/16,box1->p,cs) != 2 )
|
|
if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 )
|
|
if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break;
|
|
if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 )
|
|
if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break;
|
|
if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
|
|
if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve
|
|
if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
|
|
if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
|
|
|
|
i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++;
|
|
if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
|
|
if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
|
|
|
|
i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100;
|
|
i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line
|
|
i3= loop(bp, 0,7*dy/8,dx,cs,0,RI);
|
|
i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line
|
|
// out_x(box1);printf(" p:");
|
|
for ( y=dy/8; y<7*dy/8; y++ ){
|
|
x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line
|
|
i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break;
|
|
} if ( y<7*dy/8 ) Break;
|
|
for ( x=0,j=y=dy/3; y<dy-dy/8; y++ ){ // suche unterkante (also 4x6)
|
|
i=loop(bp,dx-1,y,dx,cs,0,LE);
|
|
if ( i>x ) { x=i; j=y; } if(x>dx/2) break;
|
|
} if ( x<dx/2 || x>=dx) Break;
|
|
if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break;
|
|
|
|
i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL);
|
|
// j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
|
|
j=sdata->holes.num;
|
|
|
|
if (j!=1 && dx< 8) ad=96*ad/100;
|
|
if (j!=1 && dx>=8) ad=98*ad/100;
|
|
if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */
|
|
if (i>1 || j>1 || j>i) Break;
|
|
|
|
// check for serif F
|
|
i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE);
|
|
i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
|
|
j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO);
|
|
if (j>dy/2) ad=80*ad/100; // its an serif-F
|
|
|
|
if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100;
|
|
bc='p';
|
|
if( hchar && ((!gchar) || dy<14)) bc='P';
|
|
if ( hchar && gchar) ad=98*ad/100; // \ss sz
|
|
if ((!hchar) && !gchar) ad=98*ad/100;
|
|
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_qQ(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad; /* tmp-vars */
|
|
|
|
// --- test Q ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='Q'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha
|
|
if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break;
|
|
if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND
|
|
if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
|
|
if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 )
|
|
if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
|
|
if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
// i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL);
|
|
i=sdata->holes.num;
|
|
if(!i) Break;
|
|
if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break;
|
|
x=x1;y=y1;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x1-dx/2 ) Break;
|
|
turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
|
|
if( x<x1-dx/2 ) { if (gchar) ad=98*ad/100; else ad=90*ad/100; }
|
|
if( loop(bp,0 ,0 ,dx,cs,0,RI)
|
|
< loop(bp,0 ,2 ,dx,cs,0,RI) ) Break;
|
|
if( loop(bp,0 ,dy/8+2,dx,cs,0,RI)
|
|
+loop(bp,dx-1,dy/8+2,dx,cs,0,LE) > 5*dx/8 ) Break; // ~4 Okt00
|
|
|
|
x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break;
|
|
if( loop(bp,dx-1-x,0 ,dy,cs,0,DO)
|
|
<= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4
|
|
|
|
if( loop(bp,dx-1,dy-2,dx,cs,0,LE)
|
|
<= loop(bp,dx-1,dy/2,dx,cs,0,LE) )
|
|
if( loop(bp, 1,dy-1,dy,cs,0,UP)
|
|
<= loop(bp,dx/2,dy-1,dy,cs,0,UP) )
|
|
if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 )
|
|
if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4
|
|
|
|
if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)
|
|
+ loop(bp, 0,3*dy/4,dx,cs,0,RI)
|
|
< loop(bp,dx-1,2*dy/4,dx,cs,0,LE)
|
|
+ loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4
|
|
if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4
|
|
|
|
|
|
if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break;
|
|
j=loop(bp,dx/2,dy-1,dy,cs,0,UP);
|
|
if (j>1 && j>dy/8) {
|
|
if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA
|
|
if (j<5) ad=95*ad/100;
|
|
else Break;
|
|
}
|
|
}
|
|
|
|
// italic a
|
|
for(i=0,y=0;y<dy/2;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) i++; if(i>dy/8) Break; // ~a \it a
|
|
if (i>0) ad=99*ad/100;
|
|
|
|
// ~o look at the lower right side for falling line
|
|
for(j=x=0,y=dy/2;y<dy;y++){
|
|
i=loop(bp,dx-1,y,dx,cs,0,LE);if(i>x){ x=i; }
|
|
if (x-i>j) j=x-i;
|
|
if( j>dx/16 ) Break; // falling line detected
|
|
}
|
|
if (j==0) Break; // no falling line => no Q
|
|
if (j<=dx/16) ad=98*ad/100;
|
|
if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare
|
|
if(!hchar) ad=96*ad/100;
|
|
Setac(box1,'Q',ad);
|
|
break;
|
|
}
|
|
// --- test q ---------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>3;){ // min 3x4
|
|
DBG( wchar_t c_ask='q'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring
|
|
if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break;
|
|
} if (2*y>y0+y1) Break; /* < */
|
|
for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line
|
|
if( num_cross(x0, x1, y, y,box1->p,cs) == 1
|
|
&& num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break;
|
|
} if (y>y1) Break; /* O (y==y1 for 4x6font-q) */
|
|
for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline
|
|
i=loop(box1->p,x0,y,dx,cs,0,RI);
|
|
if ( i>x ) { x=i; j=y; }
|
|
if ( x>dx/2 ) break;
|
|
} if ( x<dx/2 || x>=dx) Break;
|
|
if (y1-j+1<dy/4) ad=96*ad/100; // ~\it{a}
|
|
if( num_cross(x0+x/2,x0+x/2, j, y1,box1->p,cs) != 0 ) ad=96*ad/100; // ~g
|
|
if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){
|
|
ad=97*ad/100;
|
|
if (hchar || !gchar) Break; // 4
|
|
}
|
|
if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8
|
|
|| loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){
|
|
ad=96*ad/100; // 4
|
|
}
|
|
if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100;
|
|
if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break;
|
|
// if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 )
|
|
if (sdata->holes.num != 1)
|
|
{ if (dx<16) ad=98*ad/100; else Break; }
|
|
if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 )
|
|
{ if (dx<16) ad=98*ad/100; else Break; }
|
|
// ~\it g
|
|
if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8
|
|
&& get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g
|
|
// what about unsure m1-m4?
|
|
if(!gchar){ ad=ad*99/100; } // ~4
|
|
if( hchar){ ad=ad*99/100; } // ~49
|
|
Setac(box1,'q',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
ax,ay,bx,by,cx,cy,ex,ey,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */
|
|
(*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */
|
|
|
|
// --- test i ---------------------------------------------------
|
|
// if(box1->dots==1) // what about \it neighbouring ij
|
|
for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot
|
|
DBG( wchar_t c_ask='i'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
// ToDo: ':' check that high of dot is smaller than the vert. line!
|
|
/*
|
|
* o <== ya
|
|
* o
|
|
*
|
|
* ooo <== yb
|
|
* o
|
|
* o
|
|
* o
|
|
* ooo
|
|
*/
|
|
ya=y0;
|
|
if (box1->dots!=1) ad=98*ad/100;
|
|
while(dy>3*dx && box1->m2){ // test for vertical i without detected dot
|
|
i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP);
|
|
if (dy-1-i<box1->m3-2) break;
|
|
i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP);
|
|
// distance upper end to m2 > (m2-m1)/3
|
|
if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break;
|
|
if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
|
|
if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0
|
|
|| get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0
|
|
|| get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 )
|
|
{
|
|
Setac(box1,'i',ad);
|
|
return 'i'; /* beleave me, thats an "i"! */
|
|
} break;
|
|
}
|
|
// if( box1->dots!=1 ) Break;
|
|
if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
|
|
|
|
// out_x(box1);
|
|
for (y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if (2*y>=ya+y1) Break; // hmm, gap only, no dot?
|
|
ya=y;
|
|
if (box1->m2 && ya>box1->m2+2) Break;
|
|
for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
|
|
if (2*y>=ya+y1) Break; // hmm no gap
|
|
for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
yb=y;
|
|
if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap
|
|
if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~:
|
|
if (5*yb>=2*ya+3*y1) Break; // huge gap, ~:
|
|
if (loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml)
|
|
ad=95*ad/100;
|
|
|
|
// printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
|
|
// printf(" dots=%d\n",box1->dots); out_x(box1);
|
|
// \sl ~f. !
|
|
for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
|
|
if (y>(ya+3*y1)/4) Break;
|
|
if (y>(ya+2*y1)/3) ad=96*ad/100;
|
|
|
|
y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break;
|
|
for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
|
|
for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
|
|
if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
|
|
y=(y1-yb+1)/2+yb-y0;
|
|
for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
|
|
for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
|
|
if( yf>(y1-yb+1)/4+yb-y0 ) Break;
|
|
if(yd>yc+2){
|
|
xa=loop(bp, 0,yc-1,dx,cs,0,RI);
|
|
xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
|
|
if(
|
|
xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
|
|
> xa-loop(bp, 0,yc,dx,cs,0,RI) ){
|
|
y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
|
|
if(y>0){
|
|
i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
|
|
if( i>0 ) y+=i-1;
|
|
}
|
|
if( yc-1+y < yd-1 ) Break;
|
|
} else {
|
|
y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
|
|
if( yc-1+y < yd-2 ) Break;
|
|
}
|
|
}
|
|
if(yf<ye-2){
|
|
x=loop(bp,0 ,ye+1,dx,cs,0,RI);
|
|
y=loop(bp,x-1,ye+1,dy,cs,0,UP);
|
|
i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
|
|
if( i>0 ) y+=i-1;
|
|
if( ye+1-y > yf+1 ) Break;
|
|
}
|
|
if( 2*y0 <= box1->m1+box1->m2
|
|
&& loop(bp,0, 0,dx,cs,0,RI)+1
|
|
< loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
|
|
|
|
if( gchar ) // i is more often than j, be sure that realy correct Mai00
|
|
if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
|
|
-loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break;
|
|
|
|
// could be a broken + or similar thing?
|
|
if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100;
|
|
|
|
if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2
|
|
&& loop(bp,dx-1, dy-1,dx,cs,0,LE)<dx/4 ) Break; // ~d=cl
|
|
|
|
// test for é
|
|
if( dx>5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 )
|
|
ad=95*ad/100;
|
|
|
|
Setac(box1,'i',ad);
|
|
break;
|
|
}
|
|
// --- test j ---------------------------------------------------
|
|
// if(box1->dots==1) // what about \it neighbouring ij
|
|
for(ad=d=100;dy>4 && dx>0;){ // min 3x4
|
|
DBG( wchar_t c_ask='j'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
ya=y0;
|
|
if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
|
|
|
|
for(y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if(2*y>=ya+y1) Break; // hmm only gap
|
|
ya=y;
|
|
if( box1->m2 && ya>box1->m2+2 ) Break;
|
|
for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
|
|
if(2*y>=ya+y1) Break; // hmm no gap
|
|
for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if(2*y>=ya+y1) Break; // hmm very large gap
|
|
yb=y;
|
|
if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml)
|
|
|
|
// printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
|
|
// printf(" dots=%d\n",box1->dots); out_x(box1);
|
|
// \sl ~f. !
|
|
for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
|
|
if(y<=y1) Break;
|
|
|
|
y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
|
|
if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break;
|
|
for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
|
|
for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
|
|
if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
|
|
y=(y1-yb+1)/2+yb-y0;
|
|
for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
|
|
for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
|
|
if( yf>(y1-yb+1)/4+yb-y0 ) Break;
|
|
if(yd>yc+2){
|
|
xa=loop(bp, 0,yc-1,dx,cs,0,RI);
|
|
xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
|
|
if(
|
|
xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
|
|
> xa-loop(bp, 0,yc,dx,cs,0,RI) ){
|
|
y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
|
|
if(y>0){
|
|
i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
|
|
if( i>0 ) y+=i-1;
|
|
}
|
|
if( yc-1+y < yd-1 ) Break;
|
|
} else {
|
|
y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
|
|
if( yc-1+y < yd-2 ) Break;
|
|
}
|
|
}
|
|
if(yf<ye-2){
|
|
x=loop(bp,0 ,ye+1,dx,cs,0,RI);
|
|
y=loop(bp,x-1,ye+1,dy,cs,0,UP);
|
|
i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
|
|
if( i>0 ) y+=i-1;
|
|
if( ye+1-y > yf+1 ) Break;
|
|
}
|
|
if( 2*y0 <= box1->m1+box1->m2
|
|
&& loop(bp,0, 0,dx,cs,0,RI)+1
|
|
< loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
|
|
if (loop(bp,0,dy-1,dx,cs,0,RI)
|
|
-loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
|
|
|
|
if( gchar ) // i is more often than j, be sure that realy correct Mai00
|
|
if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
|
|
-loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break;
|
|
// could be a broken + or similar thing?
|
|
if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100;
|
|
if (!gchar) ad=96*ad/100;
|
|
if( box1->dots!=1 ) ad=98*ad/100;
|
|
|
|
Setac(box1,'j',ad);
|
|
|
|
break;
|
|
}
|
|
// --- test I ---------------------------------------------------
|
|
for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4
|
|
DBG( wchar_t c_ask='I'; )
|
|
if( box1->dots==1 ) Break;
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
|
|
x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q
|
|
if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break;
|
|
for( y=dy/16;y<dy-1-dy/16;y++ )
|
|
if( num_cross(0, dx-1, y , y ,bp,cs) != 1 )
|
|
if( num_cross(0, dx-1, y+dy/16 , y+dy/16 ,bp,cs) != 1 ) break;
|
|
if( y<dy-1-dy/16 ) Break;
|
|
x =loop(bp,0, dy/2,dx,cs,0,RI);
|
|
i5=loop(bp,x, dy/2,dx,cs,1,RI); // center width
|
|
for(y=dy/4;y<3*dy/4;y++ ){ // same width ?
|
|
x =loop(bp,0, y,dx,cs,0,RI);
|
|
x =loop(bp,x, y,dx,cs,1,RI); // width
|
|
if( abs(x-i5)>1+dx/8 ) break;
|
|
} if( y<3*dy/4 ) Break;
|
|
// out_x(box1);
|
|
|
|
// upper max width
|
|
for(i2=i1=0,y=0;y<dy/4;y++ ){
|
|
x =loop(bp,0, y,dx,cs,0,RI);
|
|
x =loop(bp,x, y,dx,cs,1,RI); if(x>i1){ i1=x;i2=y; }
|
|
}
|
|
for(i4=i3=0,y=3*dy/4;y<dy;y++ ){
|
|
x =loop(bp,0, y,dx,cs,0,RI);
|
|
x =loop(bp,x, y,dx,cs,1,RI); if(x>i3){ i3=x;i4=y; }
|
|
}
|
|
if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure!
|
|
if( i1>i5 ){ // look for edges else *80%
|
|
}
|
|
if(i1+1<i5 && !hchar) Break; // Jun00
|
|
|
|
// calculate upper and lower mass center
|
|
x =loop(bp,0, dy/8,dx,cs,0,RI); i1=x;
|
|
x+=loop(bp,x, dy/8,dx,cs,1,RI); i1=(i1+x-1)/2;
|
|
|
|
x =loop(bp,0,dy-1-dy/8,dx,cs,0,RI); i2=x;
|
|
x+=loop(bp,x,dy-1-dy/8,dx,cs,1,RI); i2=(i2+x-1)/2;
|
|
x =loop(bp,0,dy-2-dy/8,dx,cs,0,RI); i=x;
|
|
x+=loop(bp,x,dy-2-dy/8,dx,cs,1,RI); i=(i+x-1)/2; if( i>i2 ) i2=i;
|
|
|
|
// printf(" get_line(%d,%d) %d\n",i1,i2,
|
|
// get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100));
|
|
if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break;
|
|
x =(i1-i2+4)/8; i1+=x; i2-=x;
|
|
|
|
// upper and lower width (what about serifs?)
|
|
y=dy/8;
|
|
x =loop(bp,i1, y+0,dx,cs,1,LE); i=x;
|
|
x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x;
|
|
x =loop(bp,i1, y+0,dx,cs,1,RI); j=x;
|
|
x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
|
|
x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x;
|
|
x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
|
|
x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x;
|
|
x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
|
|
|
|
if(dy>15) // v024a4
|
|
if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE)
|
|
> loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn)
|
|
|
|
for(i=0,y=dy/16;y<15*dy/16 && i<2;y++)
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
|
|
if( i>1 ) Break;
|
|
|
|
if(!hchar){ // right part (bow) of h is never a l
|
|
if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
|
|
&& get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
|
|
if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4
|
|
&& loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4
|
|
&& loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z
|
|
}
|
|
|
|
if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
|
|
&& get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */
|
|
|
|
if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2
|
|
&& loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2
|
|
&& loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */
|
|
|
|
x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
|
|
i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
|
|
if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
|
|
&& loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
|
|
&& loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
|
|
&& loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
|
|
if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
|
|
&& loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
|
|
&& loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
|
|
&& loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
|
|
if( loop(bp, 0, dy/8,dx,cs,0,RI)
|
|
-(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/
|
|
if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs
|
|
&& loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2
|
|
&& loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2
|
|
&& loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/
|
|
|
|
if (box1->m2 && 3*y0>box1->m1+2*box1->m2)
|
|
if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
|
|
Break; // ~i
|
|
|
|
if(i1+1<i5 && !hchar){ ad=65*ad/100; MSG({}) } // ~ slanted I
|
|
|
|
// be sure only for serif
|
|
i3=loop(bp,dx-1, dy/4,dx,cs,0,LE);
|
|
i4=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
|
|
if (i3<2 || i4<2
|
|
|| get_bw(x1-i3/4,x1-i3/4,y0,y0+dy/4,box1->p,cs,1) != 1
|
|
|| get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 )
|
|
{ ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it
|
|
if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r
|
|
if (box1->m4 && y1<box1->m4) { // probably lower dot?
|
|
if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
|
|
|| (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1)) {
|
|
ad=96*ad/100;
|
|
}
|
|
} // ~!
|
|
// a---b
|
|
// I
|
|
// I
|
|
// c---e
|
|
// check against Z
|
|
for(bx=0,ax=dx,ay=by=y=0;y<dy/4;y++){
|
|
i =loop(bp,dx-1 ,y,dx,cs,0,LE); if (dx-i-1>bx) { bx=dx-1-i; by=y; }
|
|
i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1<ax) { ax=dx-i; ay=y; }
|
|
}
|
|
for(cx=dx,ex=0,ey=cy=y=dy-1;y>dy-1-dy/4;y--){
|
|
i =loop(bp,0,y,dx,cs,0,RI); if (i<cx) { cx=i; cy=y; }
|
|
i+=loop(bp,i,y,dx,cs,1,RI); if (i>ex) { ex=i; ey=y; }
|
|
}
|
|
x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI);
|
|
x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE);
|
|
if (j>0 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100;
|
|
if (j>0 && ( i>2*j || 2*i< j )) ad=97*ad/100;
|
|
i=loop(bp,0,0,dy,cs,0,DO);
|
|
if (i>dy/8 && i<dy/2) ad=99*ad/100; // ~1
|
|
if (loop(bp,dx-1,0,dx,cs,0,LE)
|
|
-loop(bp, 0,0,dx,cs,0,RI)>dx/4) ad=96*ad/100; // ~l 5x7
|
|
|
|
if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
|
|
if (gchar) ad=98*ad/100; // J
|
|
if (box1->m3 && 2*y1<=box1->m2+box1->m3) ad=96*ad/100; // '
|
|
|
|
Setac(box1,'I',ad);
|
|
break;
|
|
}
|
|
// --- test J --------------------------------------------------- 22Nov06
|
|
for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d',
|
|
// rewritten for vectors 0.42
|
|
int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners
|
|
DBG( wchar_t c_ask='J'; )
|
|
if (sdata->holes.num > 0) Break; /* no hole */
|
|
/* half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the upper right end of the J */
|
|
if (aa[3][2]>d) Break; /* [2] = distance */
|
|
/* searching for 4 notches between neighbouring ends */
|
|
|
|
/*
|
|
type A B
|
|
|
|
6OOOO 6O5
|
|
7O5 7O
|
|
O O
|
|
O O
|
|
2O 1O4 1O4
|
|
OO 2OO
|
|
3 3
|
|
*/
|
|
|
|
/* Warning: aa0 can be left upper or left lower point for type B */
|
|
/* get a point on the inner low left side of the J */
|
|
i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0);
|
|
/* failed for slanted J before Jun09 */
|
|
i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx/8,y1-dy/8);
|
|
/* get the most left point on the lower part of the J */
|
|
i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8);
|
|
/* get a point on the middle of the bottom of the J */
|
|
i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1);
|
|
/* get a point on the outer low right side of the J */
|
|
i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3);
|
|
/* get a point on the outer right side below top serif */
|
|
i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0);
|
|
/* get a point on the left side of upper serif */
|
|
i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0);
|
|
/* get a point on the most right left side of upper serif */
|
|
i7=nearest_frame_vector(box1,i6,i1, x1, y0);
|
|
MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
|
|
|
|
/* check the highest point on lower left area */
|
|
i =nearest_frame_vector(box1,i1,i3,x0,y0);
|
|
if (box1->frame_vector[i ][1]-y0<dy/4) Break; // U
|
|
if (box1->frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a
|
|
/* check the lowest point on upper left area, serife? */
|
|
j =nearest_frame_vector(box1,i6,i7,x0,y1);
|
|
if (box1->frame_vector[i ][1]
|
|
-box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a
|
|
if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low
|
|
if (box1->frame_vector[i1][1]
|
|
-box1->frame_vector[i7][1]<dy/2) Break;
|
|
if (box1->frame_vector[i4][1]
|
|
-box1->frame_vector[i5][1]<dy/2) Break;
|
|
if (box1->frame_vector[i7][0]<x0+dx/2) Break;
|
|
if (box1->frame_vector[i1][0]
|
|
-box1->frame_vector[i2][0]<=dx/8) Break; // ~1
|
|
if (box1->frame_vector[i1][0]
|
|
-box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1
|
|
if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1
|
|
if (aa[0][2]==0) { // ]?
|
|
ad=99*ad/100;
|
|
if (aa[1][2]==0) ad=98*ad/100;
|
|
if (aa[2][2]<=aa[3][2]) ad=97*ad/100;
|
|
}
|
|
|
|
/* check for left bow */
|
|
for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[ i][0] /* [0]=x */
|
|
<box1->frame_vector[i1][0]) break; /* curve? */
|
|
} if (i==i4) Break; // ~I
|
|
/* check for no right bow */
|
|
for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[ i][0] /* [0]=x */
|
|
>box1->frame_vector[i4][0]) break;
|
|
} if (i!=i4) Break; // ~I
|
|
/* check for no right bow */
|
|
for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[ i][1] > y0+dy/4) break;
|
|
} if (i!=i6) Break; // ~Y
|
|
/* check if upper left and lower left points are joined directly */
|
|
ld=line_deviation(box1, i7, i1);
|
|
MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
|
|
if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
|
|
if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
|
|
if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
|
|
/* check if lower right and upper right points are joined directly */
|
|
ld=line_deviation(box1, i4, i5);
|
|
MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));)
|
|
if (ld >2*sq(1024/4)) Break;
|
|
if (5*ld >4*2*sq(1024/4)) ad=99*ad/100;
|
|
|
|
// J exists as gchar and ~gchar
|
|
if (!hchar){ ad=99*ad/100; }
|
|
if (box1->num_frames>1) {
|
|
ad=98*ad/100; // j
|
|
}
|
|
Setac(box1,'J',ad);
|
|
break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
static wchar_t ocr0_brackets(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
(*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
|
|
ad,r1,r2; /* tmp-vars */
|
|
wchar_t bc=UNKNOWN;
|
|
|
|
// --- test > derived from xX ---------------------------------------------------
|
|
// rewritten for vectors v0.41
|
|
for(ad=d=100;dx>1 && dy>2;){ // min 3x2
|
|
// 0 - indizes 0,1,i1,i2 pointing to edges of the char
|
|
// \ .
|
|
// \ .
|
|
// i1,i2
|
|
// /
|
|
// /
|
|
// 1
|
|
DBG( wchar_t c_ask='>'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */
|
|
/* calculate the half distance to the center */
|
|
d=2*sq(128/4);
|
|
/* now we check for the 2 left ends of the > */
|
|
if (aa[0][2]>d) Break; /* upper left end */
|
|
if (aa[1][2]>d) Break; /* lower left end */
|
|
if (aa[1][1]-aa[0][1]<dy/2) Break;
|
|
/* searching for 4 notches between neighbouring ends */
|
|
|
|
/* run along left side from top to bottom */
|
|
for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[i][0]
|
|
>=box1->frame_vector[j][0]) j=i; /* notice most right vector */
|
|
} if (j==i || j==aa[0][3]) Break;
|
|
/* calculate the distance to the center */
|
|
x=box1->frame_vector[j][0];
|
|
y=box1->frame_vector[j][1];
|
|
if (2*x-aa[0][0]-aa[1][0]<dx) ad=99*ad/100;
|
|
if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)) Break;
|
|
if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
|
|
i1=j;
|
|
d=line_deviation(box1, aa[0][3], j) >sq(1024/4);
|
|
/* check if upper left and center point are joined directly */
|
|
MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
|
|
if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
|
|
MSG(fprintf(stderr,"ad=%d", ad);)
|
|
d=line_deviation(box1, j, aa[1][3]);
|
|
/* check if lower left and center point are joined directly */
|
|
MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
|
|
if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
|
|
MSG(fprintf(stderr,"ad=%d", ad);)
|
|
|
|
/* run along right side from bottom to top */
|
|
for (j=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
|
|
if (box1->frame_vector[i][0]
|
|
>=box1->frame_vector[j][0]) j=i; /* notice most right vector */
|
|
// MSG(fprintf(stderr,"search right: %d %d %d %d",i,j,aa[1][3],aa[0][3]);)
|
|
} if (j==i || j==aa[1][3]) Break;
|
|
/* calculate the distance to the center */
|
|
x=box1->frame_vector[j][0];
|
|
y=box1->frame_vector[j][1];
|
|
if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break;
|
|
if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break;
|
|
if (aa[0][0]>=x || aa[1][0]>=x) Break;
|
|
i2=j;
|
|
d=line_deviation(box1, j, aa[0][3]);
|
|
/* check if upper left and center point are directly joined directly */
|
|
MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
|
|
if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
|
|
MSG(fprintf(stderr,"ad=%d", ad);)
|
|
d=line_deviation(box1, aa[1][3], j);
|
|
/* check if lower left and center point are directly joined */
|
|
MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
|
|
if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
|
|
MSG(fprintf(stderr,"ad=%d", ad);)
|
|
|
|
/*
|
|
ToDo: calculate momentums or max derivations
|
|
along lines to distinguish )]}>
|
|
i1,i2
|
|
*/
|
|
|
|
if (sdata->gchar) ad=98*ad/100;
|
|
if (sdata->hchar) ad=99*ad/100;
|
|
bc='>';
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
// --- test /\\ ------------------------------------------------
|
|
// if(bc==UNKNOWN)
|
|
// if(!box1->dots)
|
|
for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font
|
|
DBG( wchar_t c_ask='/'; )
|
|
if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
|
|
#if 1
|
|
for(i=y=0;y<dy;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
|
|
if( loop(bp, 0,y,dx,cs,0,RI)
|
|
+ loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
|
|
}
|
|
if( y<dy ) Break;
|
|
if ( i>2 || (i>0 && dy<16)) Break;
|
|
#endif
|
|
/* get the center as exact as possible */
|
|
i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts
|
|
+dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE)
|
|
+ loop(bp, 0,dy/2 ,dx,cs,0,RI)
|
|
+ loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI);
|
|
if (abs(i2-2*dx)>1+dx/2) Break;
|
|
if (abs(i2-2*dx)> dx/2) ad=99*ad/100;
|
|
|
|
i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // right side
|
|
i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
|
|
i4=loop(bp, 0,0 ,dx,cs,0,RI); // left side
|
|
i6=loop(bp, 0,dy-1 ,dx,cs,0,RI);
|
|
i=(box1->m4+box1->m3)/2-box1->m2;
|
|
//
|
|
// out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
|
|
|
|
// ~lI
|
|
for(i=i4,y=0;y<dy;y++){
|
|
x=loop(bp,0 ,y,dx,cs,0,RI);if(abs(x-i)>dx/6+1 ) break; i=x;
|
|
} if( y<dy ) Break;
|
|
for(i=i1,y=0;y<dy;y++){
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE);if(abs(x-i)>dx/6+1 ) break; i=x;
|
|
} if( y<dy ) Break;
|
|
if(i1<=dx/8 && i6<=dx/8 && i4-(dx-i3)>dx/4 ) { Setac(box1,(bc='/'),ad);break; }
|
|
if(i4<=dx/8 && i3<=dx/8 && i6-(dx-i1)>dx/4 ) { Setac(box1,(bc='\\'),ad);break; }
|
|
Break;
|
|
}
|
|
// --- test ()<> ------------------------------------------------
|
|
// if(bc==UNKNOWN)
|
|
// if(!box1->dots)
|
|
for(ad=d=100;dx>1 && dy>4;){ // min 3x4
|
|
DBG( wchar_t c_ask='('; )
|
|
if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */
|
|
#if 1
|
|
for(i=y=0;y<dy;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
|
|
if( loop(bp, 0,y,dx,cs,0,RI)
|
|
+ loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
|
|
}
|
|
if( y<dy ) {Break;};
|
|
if ( i>2 || (i>0 && dy<16)) {Break;};
|
|
#endif
|
|
/* look for the extrema => r1..r2 */
|
|
for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){
|
|
j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
|
|
j=loop(bp,dx-1,y,dx,cs,0,LE); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
|
|
} y=(r1+r2)/2;
|
|
i1=loop(bp,dx-1, dy/16,dx,cs,0,LE);
|
|
i2=loop(bp,dx-1,y ,dx,cs,0,LE);
|
|
i3=loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
|
|
i4=loop(bp, 0,dy/16 ,dx,cs,0,RI);
|
|
i5=loop(bp, 0,y ,dx,cs,0,RI);
|
|
i6=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI);
|
|
if(dx>dy){
|
|
// from Aug06 vector-version of greater is used
|
|
// if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; }
|
|
if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; }
|
|
}
|
|
if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5)
|
|
ad=98;
|
|
if (dx<8) ad=99*ad/100;
|
|
if (dx<6) ad=96*ad/100;
|
|
if( 2*dx > JOB->res.avX && 4*dx>dy ) ad=98;
|
|
// printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6);
|
|
if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1
|
|
&& i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) {
|
|
if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16<i4+i5) ad=95*ad/100;
|
|
if (2*loop(bp, 0,dy-1-y/2,dx,cs,0,RI)+1+dx/16<i6+i5) ad=95*ad/100;
|
|
Setac(box1,(bc='<'),ad);{Break;};
|
|
}
|
|
/* obsolete code Aug06, will be removed if new code is stable
|
|
if( i2==0 && i4<=dx/8 && i6<=dx/8
|
|
&& i5>=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) {
|
|
if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16<i1+i2) ad=95*ad/100;
|
|
if (2*loop(bp,dx-1,dy-1-y/2,dx,cs,0,LE)+1+dx/16<i3+i2) ad=95*ad/100;
|
|
Setac(box1,(bc='>'),ad);{Break;};
|
|
}
|
|
*/
|
|
}
|
|
|
|
i1=loop(bp,dx-1,dy/16,dx,cs,0,LE);
|
|
i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
|
|
i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
|
|
i4=loop(bp, 0,0 ,dx,cs,0,RI);
|
|
i5=loop(bp, 0,dy/2,dx,cs,0,RI);
|
|
i6=loop(bp, 0,dy-1,dx,cs,0,RI);
|
|
i=(box1->m4+box1->m3)/2-box1->m2;
|
|
//
|
|
// out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
|
|
if(2*i2<i1+i3 && 2*i5>i4+i6 && 2*dx<dy && dy>=i){
|
|
Setac(box1,(bc=')'),98);break; }
|
|
if(2*i2>i1+i3 && 2*i5<i4+i6 && 2*dx<dy && dy>=i){
|
|
if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100;
|
|
if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100;
|
|
for(x=y=0;y<dy/4;y++){
|
|
i=loop(bp,0,y,dx,cs,0,RI);if( i>x ) x=i;
|
|
}
|
|
for(y=0;y<(dy+2)/4;y++){
|
|
i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i<x ) break;
|
|
}
|
|
if( y==(dy+2)/4 ) {Break;}; // ~l (left upper side must be convex) Jul00
|
|
if (loop(bp,0,dy/2+dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09
|
|
if (loop(bp,0,dy/2-dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09
|
|
Setac(box1,(bc='('),ad); break;
|
|
}
|
|
Break;
|
|
}
|
|
// --------- test [] --------------------------------
|
|
for(ad=d=98;dx>2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font
|
|
DBG( wchar_t c_ask=']'; )
|
|
if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */
|
|
if (!hchar) ad=97*ad/100;
|
|
for(y=0;y<dy;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
|
|
} if (y<dy) {Break;};
|
|
if( get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) == 2
|
|
&& get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;};
|
|
if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2
|
|
&& get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;};
|
|
if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0
|
|
|| get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 )
|
|
if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
|
|
{ Setac(box1,(bc='['),ad);break; }
|
|
if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0
|
|
|| get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 )
|
|
if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
|
|
{ Setac(box1,(bc=']'),ad);break; }
|
|
break;
|
|
}
|
|
|
|
#if CODE_NOT_COMPLETED
|
|
// --- test ] -------
|
|
for(ad=d=100;dx>2 && dy>3;){
|
|
DBG( wchar_t c_ask=']'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num > 0) ad=98*ad/100; /* # */
|
|
/* 1/8 distance to the center */
|
|
d=2*sq(128/16);
|
|
/* now we check for the 4 ends of the x */
|
|
if (aa[0][2]>d) Break;
|
|
if (aa[1][2]>d) Break;
|
|
if (aa[2][2]>d) Break;
|
|
if (aa[3][2]>d) Break;
|
|
if (aa[3][0]-aa[0][0]<7*dx/8) Break;
|
|
if (aa[2][0]-aa[1][0]<7*dx/8) Break;
|
|
if (aa[1][1]-aa[0][1]<7*dy/8) Break;
|
|
if (aa[2][1]-aa[3][1]<7*dy/8) Break;
|
|
if (aa[3][0]-aa[0][0]<2) Break; /* to small */
|
|
if (aa[2][0]-aa[1][0]<2) Break; /* to small */
|
|
MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\
|
|
aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
|
|
aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\
|
|
aa[0][2],aa[1][2],aa[2][2],aa[3][2]);)
|
|
/* left and right vertical line */
|
|
d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
|
|
ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
|
|
d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
|
|
|
|
/* search uppermost left ^ */
|
|
i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0);
|
|
x=box1->frame_vector[i1][0];
|
|
y=box1->frame_vector[i1][1];
|
|
if (y-y0 > 5*dy/8) Break;
|
|
if (x-x0 > 5*dx/8) Break;
|
|
/* search uppermost right ^ ~H */
|
|
i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
|
|
if ( box1->frame_vector[i3][0]-x> dx/4
|
|
&& box1->frame_vector[i3][1]-y<=dy/8) Break;
|
|
|
|
/* check if upper left and lower right point are joined directly */
|
|
dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break;
|
|
/* check if lower left and lower left point are joined directly */
|
|
dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break;
|
|
|
|
if (!hchar) ad=99*ad/100;
|
|
if ( gchar) ad=98*ad/100; // \sc N
|
|
ac=(wchar_t) ']';
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
#endif
|
|
// --------- test ocr-a-[] --------------------------------
|
|
if(bc==UNKNOWN)
|
|
for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment
|
|
DBG( wchar_t c_ask='['; )
|
|
if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
|
|
if (!hchar) ad=97*ad/100;
|
|
if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break;
|
|
if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
|
|
if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)
|
|
+loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O
|
|
for(y=dy/8;y<dy-dy/8;y++){
|
|
if( num_cross(0,dx,y,y,bp,cs) != 2 ) break;
|
|
} if (y<dy-dy/8) break;
|
|
if( get_bw((3*x0+5*x1)/8,x1,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
|
|
{ Setac(box1,(bc='['),ad);break; }
|
|
if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
|
|
{ Setac(box1,(bc=']'),ad);break; }
|
|
break;
|
|
}
|
|
// --------- test {} --------------------------------
|
|
for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
|
|
DBG( wchar_t c_ask='{'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if (!hchar) ad=97*ad/100;
|
|
for(y=0;y<dy;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
|
|
} if (y<dy) Break;
|
|
for(x=0;x<dx/2;x++){
|
|
if( num_cross(dx-1-x,dx-1-x,0,dy-1,bp,cs) != 2 ) break;
|
|
} if (y<dx/2) Break;
|
|
if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
|
|
if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
|
|
if ( loop(bp,0,dy-1,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100;
|
|
if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100; // <
|
|
if ( loop(bp,0, 0,dy,cs,0,DO)<dy/2-1 ) ad=98*ad/100;
|
|
if ( loop(bp,0,dy-1,dy,cs,0,UP)<dy/2-2 ) ad=98*ad/100; // (
|
|
if ( loop(bp,dx-1,0,dx,cs,0,LE)
|
|
+ loop(bp,dx-1,2,dx,cs,0,LE)
|
|
- 2*loop(bp,dx-1,1,dx,cs,0,LE) >=dx/8 ) ad=98*ad/100; // <
|
|
if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f
|
|
if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1
|
|
|| get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
|
|
Setac(box1,(bc='{'),ad);Break;
|
|
}
|
|
for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
|
|
DBG( wchar_t c_ask='}'; )
|
|
if (!hchar) ad=97*ad/100;
|
|
for(y=0;y<dy;y++){
|
|
if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
|
|
} if (y<dy) Break;
|
|
for(x=0;x<dx/2;x++){
|
|
if( num_cross(x,x,0,dy-1,bp,cs) != 2 ) break;
|
|
} if (y<dx/2) Break;
|
|
if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
|
|
if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
|
|
if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;}
|
|
if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;} // >
|
|
if ( loop(bp,dx-1, 0,dy,cs,0,DO)<dy/2-1 ) {ad=98*ad/100;}
|
|
if ( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dy/2-2 ) {ad=98*ad/100;} // )
|
|
if ( loop(bp,0,0,dx,cs,0,RI)
|
|
+ loop(bp,0,2,dx,cs,0,RI)
|
|
- 2*loop(bp,0,1,dx,cs,0,RI) >=dx/8 ) ad=98*ad/100; // <
|
|
if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ???
|
|
if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1
|
|
|| get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
|
|
Setac(box1,(bc='}'),ad);Break;
|
|
}
|
|
return box1->c;
|
|
}
|
|
|
|
#if 0
|
|
/* ---------- empty prototype function for copy and expand ---------- */
|
|
static wchar_t ocr0_XXX(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
|
|
x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
ac,ad; /* tmp-vars */
|
|
|
|
// --- test XXX ---------------------------------------------------
|
|
return box1->c;
|
|
}
|
|
#endif
|
|
|
|
|
|
/* ----------------------- part9 -------------------------------- */
|
|
static wchar_t ocr0p9(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
i1,i2,i3,i4; /* tmp-vars */
|
|
int xa,xb, /* used for store significant points of char */
|
|
dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */
|
|
ya,yb,ad,cs=sdata->cs;
|
|
wchar_t ac,bc=UNKNOWN; // bestletter
|
|
int hchar; // char is higher than e
|
|
int gchar; // char has ink lower than m3
|
|
// --- hchar --- gchar -------------------------
|
|
hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
|
|
gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
|
|
// if the char is slightly moved down correction can be done
|
|
if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
|
|
if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
|
|
|
|
/* reserved for the future */
|
|
// --- test beta,\3,sz,"s ---------------------------------------------
|
|
if(bc==UNKNOWN && hchar)
|
|
for(ad=d=100;dx>3 && dy>6;){ // min 4x7
|
|
DBG( wchar_t c_ask='S'; )
|
|
if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
|
|
/* this part is provisorium, should be changed!
|
|
a-\
|
|
| d
|
|
b| /
|
|
| \
|
|
-c /
|
|
*/
|
|
if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2
|
|
&& num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break;
|
|
for(i=1+dy/16,y=y0+dy/8;y<y1-dy/4 && i>0;y++){
|
|
if( y<y1-6*dy/16 ){ if( num_cross(x0 ,x1 ,y,y,box1->p,cs) != 2 ) i--;}
|
|
else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;}
|
|
if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--;
|
|
if( y<y1-5*dy/16 )
|
|
if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) i--;
|
|
} if( i<=0 ) break;
|
|
// out_x(box1);
|
|
|
|
for(y=y0+dy/3;y<y1-dy/3;y++){
|
|
i =loop(box1->p,x1,y,dx,cs,0,LE);
|
|
if( i>=dx/8 ) break;
|
|
i+=loop(box1->p,x1-i,y,dx,cs,1,LE);
|
|
if( i>=dx/2 ) break;
|
|
} if( y>=y1-dy/3 ) break;
|
|
|
|
for(y=y0+dy/5;y<y0+dy/3;y++)
|
|
if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if( y>=y0+dy/3 ) break;
|
|
|
|
for(y=y0+dy/2;y<y1;y++)
|
|
if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if( y>=y1 ) break;
|
|
|
|
for(y=y1-dy/3;y<y1-dy/8;y++){
|
|
i=loop(box1->p,x1,y,dx,cs,0,LE);
|
|
if( i>dx/4
|
|
&& get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break;
|
|
} if( y<y1-dy/8 ) break; // ~Q
|
|
|
|
if( box1->m3==0 || 2*y1<box1->m3+box1->m4 )
|
|
if( loop(box1->p,x1,y1, dx,cs,0,LE)==0
|
|
&& loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R
|
|
|
|
|
|
for(x=x0+dx/4;x<x1-dx/4;x++)
|
|
if( num_cross(x,x,y0,y1,box1->p,cs) == 3 ) break;
|
|
if( x>=x1-dx/4 ) break;
|
|
|
|
i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00
|
|
for(x=dx/5;x<dx/2;x++)
|
|
if( loop(bp,x,dy-1,dy,cs,0,UP) > i ) break;
|
|
if( x==dx/2 ) break;
|
|
|
|
x=x0+loop(bp,0,dy/4,dx,cs,0,RI);
|
|
for(;x<x1-dx/3;x++)
|
|
if( get_bw(x,x,y0,y0+dy/4,box1->p,cs,1) == 0 ) break;
|
|
if( x<x1-dx/3 ) break;
|
|
|
|
if( !gchar )
|
|
// if( num_hole( x0, x1, y0, y1,box1->p,cs,NULL) != 0 ) break;
|
|
if (sdata->holes.num != 0) break;
|
|
|
|
bc=LATIN_SMALL_LETTER_SHARP_S;
|
|
Setac(box1,(wchar_t)bc,98);
|
|
break;
|
|
}
|
|
// --- test + ------------------------------------------------
|
|
for(ad=d=100;dx>2 && dy>2;){ // min 3x3
|
|
DBG( wchar_t c_ask='+'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
xa=(dx+1)/3-1; ya=(dy+1)/3-1; /* size of the 4 gaps = 1/3 * 1/3 */
|
|
xb=(dx+1)/4; yb=(dy+2)/4; /* smaller gap */
|
|
if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // left upper
|
|
if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break; // left lower
|
|
if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // right upper
|
|
if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) { // right lower
|
|
if( get_bw(x1-xa,x1,y1-yb,y1,box1->p,cs,1) == 1 ) Break;
|
|
ad=99*ad/100; // smoothed inner corner? 0907
|
|
}
|
|
for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line
|
|
if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; }
|
|
}
|
|
if (3*dx<2*dy) ad=99*ad/100; // ~t
|
|
if( !i ) Break;
|
|
ac=(wchar_t) '+';
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
// --- test $ ------------------------------------------------
|
|
for(ad=d=99;dx>3 && dy>5;){ // min 3x4
|
|
DBG( wchar_t c_ask='$'; )
|
|
if (sdata->holes.num != 2) Break;
|
|
|
|
if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
|
|
if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break;
|
|
i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1<x0+dx/3 || i1>x1-dx/5 ) Break;
|
|
i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2<x0+dx/5 || i2>i1 ) Break;
|
|
ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100;
|
|
// check upper left and lower right half circle, $ (Oct08: removed)
|
|
// Oct08 JS: check the position of holes (better for tiny fonts)
|
|
// upper hole must be the first!? (hole[].x0 = relative coordinates)
|
|
if ( sdata->holes.hole[0].y0 <
|
|
sdata->holes.hole[1].y0 ) i4=0; else i4=1; /* sort to [idx^i4] */
|
|
if ( sdata->holes.hole[0^i4].y1 >
|
|
sdata->holes.hole[1^i4].y0 ) Break; /* no y-overlap allowed */
|
|
// upper left hole
|
|
// fprintf(stderr,"\nDBG hole[0]=x0=%d %d", sdata->holes.hole[0^i4].x0, sdata->holes.hole[0^i4].x1);
|
|
if ( sdata->holes.hole[0^i4].x0 > (dx+1)/3 ) Break;
|
|
if ( sdata->holes.hole[0^i4].x1 > dx/2+dx/4 ) Break;
|
|
if ( sdata->holes.hole[0^i4].y1 >= dy/2+dy/8 ) Break;
|
|
if ( sdata->holes.hole[0^i4].y0 > dy/2-dy/8 ) Break;
|
|
// lower right hole
|
|
// fprintf(stderr,"\nDBG hole[1]=x0=%d %d", sdata->holes.hole[1^i4].x0, sdata->holes.hole[1^i4].x1);
|
|
if ( sdata->holes.hole[1^i4].x0 <= dx/2-dx/4 ) Break;
|
|
if ( sdata->holes.hole[1^i4].x1 < dx/2+dx/4 ) Break;
|
|
if ( sdata->holes.hole[1^i4].y1 < dy/2+dy/8 ) Break;
|
|
if ( sdata->holes.hole[1^i4].y0 <= dy/2-dy/8 ) Break;
|
|
if (ad<95) Break;
|
|
ac=(wchar_t) '$';
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
// --- test & ------------------------------------------------
|
|
for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */
|
|
DBG( wchar_t c_ask='&'; )
|
|
if (sdata->holes.num != 2) Break;
|
|
if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g
|
|
if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break;
|
|
i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break;
|
|
i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (i<i1) i1=i;
|
|
i3=loop(bp,0,dy-dy/4 ,dx,cs,0,RI); if (i3>dx/2) Break;
|
|
i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (i<i3) i3=i;
|
|
if (i3>i1) Break;
|
|
for( i2=0, y=dy/4; y<=dy/2+1; y++ ){
|
|
i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i;
|
|
}
|
|
if(2*i2-i1-i3<1) Break;
|
|
// if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break;
|
|
if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break;
|
|
if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break;
|
|
for( x=dx-1; x>=dx/2; x-- ){
|
|
if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break;
|
|
} if( x<=3*dx/4 && x<dx-2) Break;
|
|
if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) > 3 ) { // glued ah
|
|
if (dy>15) { Break; } else ad=96*ad/100;
|
|
}
|
|
if (!hchar) ad=98*ad/100;
|
|
bc=(wchar_t) '&';
|
|
Setac(box1,bc,ad);
|
|
if (ad>=100) return bc;
|
|
break;
|
|
}
|
|
// --- test \it & like \epsilon\tau ------------------------------
|
|
if(bc==UNKNOWN)
|
|
for(ad=d=100;dx>7 && dy>7;){
|
|
DBG( wchar_t c_ask='&'; )
|
|
if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
|
|
if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break;
|
|
if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break;
|
|
if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break;
|
|
if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break;
|
|
if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break;
|
|
if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break;
|
|
if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break;
|
|
if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break;
|
|
if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
|
|
if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
|
|
if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
|
|
if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break;
|
|
if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
|
|
ac=(wchar_t) '&';
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
// --- test ? ---------------------------------------------------
|
|
for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2)
|
|
DBG( wchar_t c_ask='?'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~?
|
|
if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~?
|
|
for(y=y0;y<y1;y++) // new y1
|
|
if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
|
|
if (2*y<y0+y1) Break;
|
|
i1=y1;
|
|
if (y==y1 && box1->m4) { // probably lower dot not catched in box?
|
|
if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break;
|
|
i1=box1->m4;
|
|
for(;i1>y1;i1--) // new y1
|
|
if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
|
|
}
|
|
y--; i=y-y0+1; // new dy
|
|
for (y=0;y<dy/2;y++)
|
|
if( num_cross(x0, x1, y0+y, y0+y, box1->p, cs) == 2 ) break;
|
|
if (y==dy/2) Break;
|
|
// if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break;
|
|
if (sdata->holes.num > 0) Break;
|
|
for(y=y0+dy/2;y<=i1;y++)
|
|
if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
|
|
if( y==i1 ) Break;
|
|
for( ;y<=i1;y++)
|
|
if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
|
|
if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2
|
|
bc='?';
|
|
Setac(box1,(wchar_t)bc,98);
|
|
return bc;
|
|
}
|
|
// --- test !| ---------------------------------------------------
|
|
for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4
|
|
DBG( wchar_t c_ask='!'; )
|
|
if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
|
|
// measure thickness
|
|
if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break;
|
|
if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break;
|
|
for(y=y0;y<y1;y++) // new y1
|
|
if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
|
|
if (2*y<y0+y1) Break;
|
|
if (y==y1 && y>box1->m3-dy/8) ad=ad*97/100; /* missing dot? */
|
|
i1=y1;
|
|
if (y==y1 && box1->m4) { // probably lower dot not catched in box?
|
|
if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
|
|
|| (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) {
|
|
i1=box1->m4;
|
|
for(;i1>y1;i1--) // new y1
|
|
if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
|
|
}
|
|
} i2=i1;
|
|
for( i1=0,y=y0;y<=i2;y++){
|
|
i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break;
|
|
if(i==0 && i1==0) i1=y;
|
|
} if(y<=i2 || i1==0 || i1<y0+dy/2) Break;
|
|
|
|
if( loop(bp,dx-1,dy/8,dx,cs,0,LE)
|
|
-loop(bp,dx-1, 0,dx,cs,0,LE)>dx/4+1 ) Break; // f
|
|
|
|
if (!hchar) ad=96*ad/100;
|
|
Setac(box1,(wchar_t)'!',ad);
|
|
break;
|
|
}
|
|
// --- test * five egdes (jagges? beames?) what is the right english word? ----
|
|
for(ad=d=99;dx>2 && dy>4;){
|
|
DBG( wchar_t c_ask='*'; )
|
|
if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
|
|
if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1
|
|
&& num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break;
|
|
if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
|
|
&& num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
|
|
x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded
|
|
/* upwarts from center */
|
|
dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break;
|
|
if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */
|
|
if (get_bw(x,x,0,y,bp,cs,2)==2) Break;
|
|
/* horizontal */
|
|
dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break;
|
|
if (dy<8)
|
|
if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2
|
|
&& get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break;
|
|
/* down (right) */
|
|
i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100);
|
|
j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j;
|
|
if(i<95) Break;
|
|
/* down (left) */
|
|
dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up
|
|
/* check for lower gap at bottom */
|
|
dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break;
|
|
dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap
|
|
dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap
|
|
MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);)
|
|
Setac(box1,(wchar_t)'*',ad);
|
|
break;
|
|
}
|
|
// --- test * six egdes (jagges? beames?) incl. vert. line (|+X) ----
|
|
for(ad=d=100;dx>4 && dy>4;){
|
|
DBG( wchar_t c_ask='*'; )
|
|
if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
|
|
if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3
|
|
&& num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break;
|
|
if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break;
|
|
if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break;
|
|
if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break;
|
|
if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break;
|
|
if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break;
|
|
if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break;
|
|
if (dx>5) {
|
|
dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam
|
|
dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam
|
|
/* check vertical line */
|
|
dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break;
|
|
}
|
|
MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
|
|
Setac(box1,(wchar_t)'*',99);
|
|
break;
|
|
}
|
|
// --- test * six egdes '*'='x'+'-' incl. horizontal line ----
|
|
for(ad=d=100;dx>3 && dy>4;){
|
|
DBG( wchar_t c_ask='*'; )
|
|
if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
|
|
if( num_cross( dx/8, dx/8, 0, dy-1,bp,cs) != 3
|
|
&& num_cross(1+dx/8,1+dx/8, 0, dy-1,bp,cs) != 3) Break;
|
|
if( num_cross(dx-1-dx/8,dx-1-dx/8,0,dy-1,bp,cs) != 3
|
|
&& num_cross(dx-2-dx/8,dx-2-dx/8,0,dy-1,bp,cs) != 3) Break;
|
|
if( num_cross( 0,dx-1, 0, 0,bp,cs) != 2) Break;
|
|
if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 2) Break;
|
|
if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 1) Break;
|
|
// check upper and lower gap
|
|
if( num_cross(dx/2,dx/2,0,dy/8,bp,cs) != 0) Break;
|
|
if( num_cross(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs) != 0) Break;
|
|
if (dx>5) {
|
|
dbg[0]=i=get_line2(dx-2-dx/8,0,dx/8,dy-1,bp,cs,100); if(i<95) Break; // black upwarts beam
|
|
dbg[1]=i=get_line2(dx/8,0,dx-2-dx/8,dy-1,bp,cs,100); if(i<95) Break; // black downwards beam
|
|
/* check horizontal line */
|
|
dbg[2]=i=get_line2(0,dy/2,dx-1,dy/2,bp,cs,100); if(i<95) Break;
|
|
}
|
|
MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
|
|
Setac(box1,(wchar_t)'*',98);
|
|
break;
|
|
}
|
|
// --- test @ - a popular char should be detectable! added in version v0.2.4a5
|
|
if(bc==UNKNOWN)
|
|
for(ad=d=99;dx>5 && dy>7;){
|
|
DBG( wchar_t c_ask='@'; )
|
|
if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
|
|
// check ~ 7x10 0 with dot in it
|
|
// num_holes==1 + hole.y0<=dy/8,>=y1-dy/8
|
|
if (sdata->holes.num==1
|
|
&& sdata->holes.hole[0].y0<=y0+dy/8
|
|
&& sdata->holes.hole[0].y1>=y1-dy/8) Break;
|
|
if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break;
|
|
if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break;
|
|
if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break;
|
|
if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break;
|
|
/* ..@@@@..<- 8x10 example
|
|
.@@..@@.
|
|
@@....@@
|
|
@@..@@@@<
|
|
@@.@@.@@
|
|
@@.@@.@@
|
|
@@..@@@.
|
|
@@......
|
|
.@@...@@
|
|
..@@@@@.<- */
|
|
x=6*dx/16;
|
|
y=dy/2;
|
|
i=num_cross(0,dx-1,y,y,bp,cs);
|
|
if (i<3 || i>4) Break;
|
|
if( i != 4 && dx>8 ) ad=98*ad/100;
|
|
|
|
i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break;
|
|
if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs);
|
|
if (abs(4-j)<abs(i-4)) i=j; }
|
|
if (i!=4) { j=num_cross(x+2,x+2,0,dy-1,bp,cs);
|
|
if (abs(4-j)<abs(i-4)) i=j; }
|
|
if (i<3 || i>4) Break;
|
|
if (i!=4) ad=97*ad/100;
|
|
if( num_cross(0, x,y,y,bp,cs) != 2 ) Break;
|
|
if( num_cross(x,dx-1,y,y,bp,cs) != 2 ) Break;
|
|
if( num_cross(x,x,0, y,bp,cs) != 2 ) Break;
|
|
if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break;
|
|
if (dx>7) {
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
|
|
if (sdata->holes.num != 1) Break;
|
|
if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
|
|
}
|
|
Setac(box1,(wchar_t)'@',ad);
|
|
break;
|
|
}
|
|
// --- test paragraph v0.2.6
|
|
if(bc==UNKNOWN && hchar)
|
|
for(ad=d=100;dx>4 && dy>15;){
|
|
DBG( wchar_t c_ask='$'; )
|
|
if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */
|
|
if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break;
|
|
if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break;
|
|
if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break;
|
|
if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break;
|
|
if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break;
|
|
if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break;
|
|
if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break;
|
|
if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break;
|
|
if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
|
|
Setac(box1,SECTION_SIGN,96);
|
|
break; // paragraph=0xA7=167
|
|
}
|
|
|
|
return bc;
|
|
}
|
|
|
|
/* ----------------------- partx -------------------------------- */
|
|
static wchar_t ocr0px(ocr0_shared_t *sdata){
|
|
struct box *box1=sdata->box1;
|
|
pix *bp=sdata->bp;
|
|
int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */
|
|
int ya,ad; /* used for store significant points of char */
|
|
wchar_t ac,bc=UNKNOWN; // bestletter
|
|
int hchar; // char is higher than e
|
|
int gchar; // char has ink lower than m3
|
|
// --- hchar --- gchar -------------------------
|
|
hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
|
|
gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
|
|
// if the char is slightly moved down correction can be done
|
|
if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
|
|
if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
|
|
|
|
/* reserved for special chars, to test at the end */
|
|
// --- test 'ff' ---------------------------------------------------
|
|
// ToDo: better check and call test 'f' and 'f' with subboxes
|
|
if( bc==UNKNOWN )
|
|
for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H
|
|
DBG( wchar_t c_ask='f'; )
|
|
if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
|
|
if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2
|
|
&& num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break;
|
|
if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
|
|
&& num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break;
|
|
if( loop(bp,0 ,dy/8,dx,cs,0,RI)
|
|
+ loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A
|
|
for( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
|
|
{ j=loop(box1->p,x0 ,y,dx,cs,0,RI)
|
|
+loop(box1->p,x1 ,y,dx,cs,0,LE);
|
|
if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; }
|
|
if( !i ) break;
|
|
for( x=dx/4; x<dx/2; x++ ){ // lower gap
|
|
y=loop(bp,x ,dy-1,dy,cs,0,UP);
|
|
if ( y > 3*dy/8 ) break;
|
|
if ( 10*y > dy ){ /* italic */
|
|
i=loop(bp,x ,dy-y,dx,cs,0,RI);
|
|
if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break;
|
|
}
|
|
} if( x>=dx/2 ) break;
|
|
x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI)
|
|
+loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE);
|
|
for( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
|
|
{ j=loop(bp,0 ,y,dx,cs,0,RI)
|
|
+loop(bp,dx-1,y,dx,cs,0,LE); if( j-x>dx/5 ) i=0; }
|
|
if( !i ) break; // ~K Jul00
|
|
for( i=0,ya=y=y0+dy/4; y<y1-dy/3; y++ ) // horizontal line
|
|
{ j=loop(box1->p,x0 ,y,dx,cs,0,RI);
|
|
j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
|
|
if( i<=dx/2 ) break; ya-=y0;
|
|
if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
|
|
&& num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */
|
|
for( y=ya; y<dy-dy/4; y++ ) // ~M Dec00
|
|
if( num_cross(0,dx-1,y ,y ,bp,cs) > 2
|
|
&& num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
|
|
if ( y<dy-dy/4 ) break;
|
|
for(i=1,x=x0+dx/2;x<=x1-dx/4 && i;x++){
|
|
if( get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
|
|
} if( !i ) break;
|
|
for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
|
|
if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
|
|
} if( i ) break;
|
|
for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
|
|
if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
|
|
} if( i ) break;
|
|
for(i=1,y=y0;y<=y0+dy/4 && i;y++){
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) break;
|
|
for(i=1,y=y1-dy/4;y<=y1 && i;y++){
|
|
if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
|
|
} if( i ) break;
|
|
if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100;
|
|
if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break;
|
|
if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break;
|
|
i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break;
|
|
i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) break;
|
|
i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) break;
|
|
if(abs(i1+i3-2*i2)>dx/16+1) break;
|
|
if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break;
|
|
if (!hchar) ad=96*ad/100;
|
|
if (!gchar) ad=99*ad/100;
|
|
ac=LATIN_SMALL_LIGATURE_FF;
|
|
Setac(box1,ac,ad);
|
|
break;
|
|
}
|
|
// --- test ae ---------------------------------------------------
|
|
if( bc==UNKNOWN )
|
|
for(ad=98;dx>4 && dy>6;){ // provisorium
|
|
DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; )
|
|
if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */
|
|
if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2
|
|
&& num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
|
|
if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
|
|
if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
|
|
if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break;
|
|
if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 )
|
|
if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break;
|
|
if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break;
|
|
for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){
|
|
j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; }
|
|
} if( x<dx/4 || x>3*dx/4 ) Break;
|
|
for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){
|
|
j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
|
|
} if( x<dx/4 || x>3*dx/4 ) Break;
|
|
for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){
|
|
j=loop(bp,dx-1 ,y,dx,cs,0,LE);
|
|
j=loop(bp,dx-1-j,y,dx,cs,1,LE);
|
|
if(j>x) { i4=y; x=j; }
|
|
} if( x<dx/4 ) Break;
|
|
if( num_hole(x0,x0+3*dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
|
|
if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
|
|
ac=LATIN_SMALL_LETTER_AE;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
|
|
}
|
|
// --- test AE ---------------------------------------------------
|
|
if( bc==UNKNOWN )
|
|
for(ad=98;dx>5 && dy>6;){ // provisorium
|
|
DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break;
|
|
if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
|
|
if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
|
|
if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1
|
|
&& num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1
|
|
&& num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break;
|
|
// check for upper horizontal line
|
|
j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j;
|
|
j=loop(bp,dx-1-j,0,dx,cs,1,LE);
|
|
i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (i<x) x=i;
|
|
i=loop(bp,dx-1-i,1,dx,cs,1,LE);
|
|
if (i>j) j=i;
|
|
if (x>dx/8) Break;
|
|
if (j<dx/4) Break;
|
|
for( x=dx,i1=i3=0,i2=y=dy/4; y<3*dy/4; y++ ){
|
|
j=loop(bp, 0,y,dx,cs,0,RI); if(j>x) break; x=j;
|
|
j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; }
|
|
j=loop(bp,dx-1 ,y,dx,cs,0,LE);
|
|
j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; }
|
|
} if( y<3*dy/4 || i1<dx/4-1 || i3<dx/4-1) Break;
|
|
for( i1=i3=0,y=0; y<dy/8; y++ ){
|
|
j=loop(bp,dx-1 , y,dx,cs,0,LE);
|
|
j=loop(bp,dx-1-j, y,dx,cs,1,LE); if(j>i1) { i1=j; }
|
|
j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE);
|
|
j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; }
|
|
} if( i1<=dx/4 || i3<=dx/4 ) Break;
|
|
for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E
|
|
if( num_cross(x,x, 0,dy-1,bp,cs) == 3 )
|
|
if( num_cross(x,x, 0,dy/4,bp,cs) == 1 )
|
|
if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 )
|
|
if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break;
|
|
} if (x<=dx/2) Break; // not found
|
|
if (sdata->holes.num != 1) Break;
|
|
if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
|
|
// if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break;
|
|
ac=LATIN_CAPITAL_LETTER_AE;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
|
|
}
|
|
// --- test /0 /o /O O_WITH_STROKE -----------------------------------------
|
|
for(ad=99;dx>4 && dy>4;){ // provisorium
|
|
DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; )
|
|
if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
|
|
if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break;
|
|
if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
|
|
if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break;
|
|
if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break;
|
|
if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
|
|
if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break;
|
|
i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break;
|
|
i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1;
|
|
i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break;
|
|
for(y=1;y<dy-1;y++){
|
|
x=i1+y*(i2-i1)/dy-dx/8; if(x<0)x=0;
|
|
j=loop(bp,x,y,dx,cs,0,RI); if( j>3*dx/16 ) break;
|
|
} if( y<dy-1 ) Break;
|
|
if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
|
|
if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 1 ) Break;
|
|
if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
|
|
if (sdata->holes.num != 2) Break;
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break;
|
|
|
|
if ( hchar && 2*y0<box1->m1+box1->m2 )
|
|
ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE;
|
|
else ac=LATIN_SMALL_LETTER_O_WITH_STROKE;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
|
|
}
|
|
// --- test /c /C C_WITH_STROKE CENT_SIGN --------------------------
|
|
// here only the version with a continuously vertical line (not broken variant)
|
|
if( bc==UNKNOWN )
|
|
for(ad=98;dx>4 && dy>4;){ // provisorium
|
|
DBG( wchar_t c_ask=CENT_SIGN; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break;
|
|
if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break;
|
|
if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
|
|
if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
|
|
if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break;
|
|
if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
|
|
if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break;
|
|
i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break;
|
|
i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1;
|
|
i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break;
|
|
for(y=0;y<dy;y++){
|
|
x=i1+y*(i2-i1)/dy; if(x>dx/16+1) x-=dx/16+1;
|
|
j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j);
|
|
if( j>(dx+4)/8 ) ad=96*ad/100;
|
|
if( j>(dx+2)/4 ) break;
|
|
} if( y<dy ) Break;
|
|
if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
|
|
if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 0 ) Break;
|
|
if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
|
|
if (sdata->holes.num != 1) Break;
|
|
|
|
ac=CENT_SIGN;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
|
|
}
|
|
// --- test EURO_CURRENCY_SIGN -----------------------------------------
|
|
if( bc==UNKNOWN )
|
|
for(ad=98;dx>4 && dy>6;){ // provisorium
|
|
DBG( wchar_t c_ask='&'; )
|
|
if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
|
|
if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break;
|
|
if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break;
|
|
if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
|
|
if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break;
|
|
for(i=0,y=dy/4;y<dy-dy/4-1;y++){ // check if no gap on left side
|
|
x=loop(bp,0,y,dx,cs,0,RI); if( x>dx/4 ) break;
|
|
j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j;
|
|
} if( y<dy-dy/4-1 || i<dx/2 ) break;
|
|
for(y=dy/4;y<dy-dy/4-1;y++){ // check for right horizontal gap
|
|
x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>dx/2 ) break;
|
|
} if( y>=dy-dy/4-1 ) break;
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
|
|
if (sdata->holes.num != 0) break;
|
|
ac=EURO_CURRENCY_SIGN;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
// --- test LETTER_C_WITH_CEDILLA ---------------------------------------------------
|
|
if (bc==UNKNOWN)
|
|
if (gchar)
|
|
for(ad=98;dx>3 && dy>6;){ // provisorium
|
|
DBG( wchar_t c_ask='c'; )
|
|
if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */
|
|
j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE);
|
|
x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (x<j) j=x;
|
|
if (3*x>dx) Break; // ~4 ocr-b
|
|
if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break;
|
|
if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break;
|
|
if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break;
|
|
for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){
|
|
j=loop(bp,0,y,dx,cs,0,RI); if(j<x) { i2=y; x=j; }
|
|
} if( x>0 ) break; i1=x;
|
|
for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){
|
|
j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
|
|
} if( x<dx/2 ) break; i3=x;
|
|
j =loop(bp,dx/2,0,dy,cs,0,DO);
|
|
j+=loop(bp,dx/2,j,dy,cs,1,DO); if(j>dy/4) break;
|
|
j =loop(bp,dx/2,j,dy,cs,0,DO); if(j<dy/2) break;
|
|
j =loop(bp,dx-1 ,dy-1-dy/8,dx,cs,0,LE); if(j<dx/4 || 4*j>3*dx) break;
|
|
j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~()
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
|
|
if (sdata->holes.num) break;
|
|
if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
|
|
else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
|
|
}
|
|
// --- test # ---------------------------------------------------
|
|
for(ad=99;dx>4 && dy>4;){ // never sure?
|
|
DBG( wchar_t c_ask='#'; )
|
|
if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
|
|
if (sdata->holes.num < 1) Break;
|
|
if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break;
|
|
if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break;
|
|
if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break;
|
|
if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break;
|
|
/* fat "#" have only small ends on left and right side, we tolerate this */
|
|
j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
|
|
j=loop(bp, 0,dy/2,dx,cs,0,RI); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
|
|
j=loop(bp,dx-1,dy/2,dx,cs,0,LE); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
|
|
j=loop(bp,dx-1,dy-1,dx,cs,0,LE); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
|
|
for( i1=i3=0,y=dy/4; y<dy/2; y++ ){
|
|
j=loop(bp,0, y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
|
|
j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; }
|
|
j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
|
|
j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; }
|
|
}
|
|
if (i1<dx-dx/4 || i3<dx-dx/4) Break;
|
|
if (i1<dx-dx/8) ad=97*ad/100;
|
|
if (i3<dx-dx/8) ad=97*ad/100;
|
|
if (sdata->holes.num != 1) {ad=95*ad/100;}
|
|
if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
|
|
// if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
|
|
|
|
ac=(wchar_t) '#';
|
|
if( gchar ) {ad=99*ad/100;}
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
// --- test bullet, full_box, grabbed cursor, ZapfDingBats_156
|
|
if (bc==UNKNOWN)
|
|
for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium
|
|
DBG( wchar_t c_ask='#'; )
|
|
if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break;
|
|
ac=BULLET;
|
|
if (gchar && !hchar) ad=80*ad/100;
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
/* --- test | (vertical line, could be a I or l) --- */
|
|
for(ad=99;dy>4 && 2*dx<dy;){ /* v0.44 */
|
|
DBG( wchar_t c_ask='|'; )
|
|
/* test if everything is filled black */
|
|
if( get_bw(x0+dx/8,x1-dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) break;
|
|
/* more unsure if the borders are not exact */
|
|
if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
|
|
if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
|
|
if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100;
|
|
if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100;
|
|
if (3*dx<dy) ad=98*ad/100;
|
|
if (4*dx<dy) ad=99*ad/100;
|
|
if (box1->m2 && 2*y1> box1->m2+box1->m3) Break;
|
|
if (box1->m2 && 3*y1>2*box1->m2+box1->m3) ad=95*ad/100;
|
|
ac='|';
|
|
if (!hchar) ad=98*ad/100;
|
|
Setac(box1,ac,ad);
|
|
break;
|
|
}
|
|
// --- test % ---------------------------------------------------
|
|
for(ad=100;dx>5 && dy>7;){ // provisorium
|
|
DBG( wchar_t c_ask='%'; )
|
|
if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
|
|
if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3
|
|
&& num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break;
|
|
if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3
|
|
&& num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break;
|
|
if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4
|
|
&& num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4
|
|
&& num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4
|
|
&& dx>7 && dy>15) Break;
|
|
if( num_cross(x0,x1, y0, y1,box1->p,cs) !=5 ) ad=99*ad/100;
|
|
|
|
if (dx>7 && dy>12) {
|
|
if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
|
|
if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
|
|
if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break;
|
|
} else ad=98*ad/100;
|
|
// use box1->p instead of b, because % is a sum of 3 objects
|
|
if ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
|
|
<= loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI) ) ad=96*ad/100; // X
|
|
if ( loop(box1->p,x1,y1 ,dx,cs,0,LE)
|
|
<= loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE) ) ad=96*ad/100; // X
|
|
for (x=0;x<dx;x++) { /* look for a vertical line and break if found */
|
|
if ( get_bw(x0+x,x0+x,y0+dy/8,y1-dy/8,box1->p,cs,2) != 2 ) break;
|
|
} if (x<dx) Break; // ~gluedVI
|
|
if (gchar) ad=98*ad/100;
|
|
ac=(wchar_t) '%';
|
|
Setac(box1,ac,ad);
|
|
if (ad>=100) return ac;
|
|
break;
|
|
}
|
|
// --- test Omega ---------------------------------------------------
|
|
for(ad=d=99;dx>7 && dy>7;){ // min 3x4
|
|
DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; )
|
|
if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
|
|
if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
|
|
|
|
if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
|
|
if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen"
|
|
if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break;
|
|
if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
|
|
if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
|
|
if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
|
|
if (sdata->holes.num) Break;
|
|
// if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
|
|
|
|
if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
|
|
loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
|
|
if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4
|
|
|| loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break;
|
|
if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)<dx/4
|
|
|| loop(bp,dx/2,3*dy/8,x1-x0,cs,0,LE)<dx/4 ) Break;
|
|
|
|
i=loop(bp,0,dy-1-dy/16,x1-x0,cs,0,RI); if(i>dx/8) Break;
|
|
x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break;
|
|
x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i<dx/2 || i>5*dx/8) Break;
|
|
x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break;
|
|
|
|
/* look for a vertikal gap at lower end */
|
|
for( x=dx/4;x<3*dx/4;x++ ){
|
|
i=loop(bp,x,dy-1,y1-y0,cs,0,UP);
|
|
if( i>3*dy/4 ) break;
|
|
}
|
|
if( x>=3*dx/4 ) Break;
|
|
|
|
if( !hchar ) ad=60*ad/100;
|
|
bc=GREEK_CAPITAL_LETTER_OMEGA;
|
|
Setac(box1,bc,ad);
|
|
break;
|
|
}
|
|
|
|
return bc;
|
|
}
|
|
|
|
// -------------------- OCR engine ;) ----------------------------
|
|
wchar_t ocr0(struct box *box1, pix *bp, int cs){
|
|
// pix p=*(box1->p);
|
|
int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
|
|
int dx=x1-x0+1,dy=y1-y0+1, /* size */
|
|
rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */
|
|
// ad,ac will be used in future
|
|
wchar_t bc = UNKNOWN; // bestletter
|
|
wchar_t um = SPACE; // modifier '"
|
|
int hchar; // char is higher than e
|
|
int gchar; // char has ink lower than m3
|
|
int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */
|
|
ocr0_shared_t sdata; // data used in all subfunctions
|
|
|
|
sdata.box1=box1;
|
|
sdata.bp=bp;
|
|
sdata.cs=cs;
|
|
// --- hchar --- gchar -------------------------
|
|
hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1;
|
|
gchar=0;if( y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1;
|
|
// if the char is slightly moved down correction can be done
|
|
if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
|
|
if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
|
|
|
|
sdata.hchar=hchar;
|
|
sdata.gchar=gchar;
|
|
|
|
/* search for nearest points to the 4 courners, typical for xX */
|
|
/* this is faster as calling nearest_frame_vector 4 times */
|
|
aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */
|
|
aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */
|
|
aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */
|
|
aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */
|
|
/* searching for 4 diagonal line ends */
|
|
for (i=0;i<box1->num_frame_vectors[0];i++) {
|
|
x=box1->frame_vector[i][0]; /* take a vector */
|
|
y=box1->frame_vector[i][1];
|
|
/* distance to upper left end, normalized to 128 */
|
|
j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy);
|
|
// fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]);
|
|
if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
|
|
/* distance to lower left end */
|
|
j=1; d=sq((x-x0)*128/dx)+sq((y-y1)*128/dy);
|
|
if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
|
|
/* distance to lower right end */
|
|
j=2; d=sq((x-x1)*128/dx)+sq((y-y1)*128/dy);
|
|
if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
|
|
/* distance to upper right end */
|
|
j=3; d=sq((x-x1)*128/dx)+sq((y-y0)*128/dy);
|
|
if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
|
|
}
|
|
for (i=0;i<16;i++) sdata.aa[i/4][i%4]=aa[i/4][i%4];
|
|
|
|
/* extract number position and size of holes and store in a table
|
|
* - hole coordinates are relative to box (x-x0,y-y0)
|
|
*/
|
|
sdata.holes.num=0;
|
|
if (box1->num_frames>0) // speedup v0.42
|
|
num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once
|
|
// printf(" num_holes=%d\n",sdata.holes.num);
|
|
|
|
/*
|
|
after division of two glued chars, boundaries could be wrong,
|
|
check this first (ToDo: only if a flag set?)
|
|
*/
|
|
if (2*y0 < box1->m2+box1->m3)
|
|
if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){
|
|
/* could be a "I" from divided "Ij" or "Ig" */
|
|
for(y=(box1->m3+box1->m2)/2;2*y<box1->m3+box1->m4;y++)
|
|
if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break;
|
|
if(2*y<box1->m3+box1->m4)
|
|
if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){
|
|
/* be sure, ~_ */
|
|
if (y>y0) y1=box1->y1=y;
|
|
}
|
|
}
|
|
|
|
DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); )
|
|
DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); )
|
|
DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d (4 corners)"
|
|
" d= %d %d %d %d",
|
|
aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,
|
|
aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,
|
|
aa[0][2], aa[1][2], aa[2][2], aa[3][2]);)
|
|
DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);)
|
|
|
|
// --- test thin lines - ---------------------------------
|
|
for( ad=100; 2*dy<box1->m3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font)
|
|
DBG( wchar_t c_ask='-'; )
|
|
if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0),
|
|
y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break;
|
|
if( box1->dots ) { Setac(box1,'=',97);break; }
|
|
if (dx<=2*dy) ad=98*ad/100;
|
|
if (dx<=3*dy) ad=99*ad/100;
|
|
if (!box1->m4) ad=96*ad/100;
|
|
else {
|
|
if (y1>=box1->m3) {
|
|
if ( dx<2*dy) ad=98*ad/100;
|
|
if (2*dx<3*dy) ad=98*ad/100;
|
|
Setac(box1,'_',ad);
|
|
break;
|
|
}
|
|
}
|
|
Setac(box1,'-',ad); if (ad>=100) return '-';
|
|
break;
|
|
}
|
|
// --- test thin lines = ---------------------------------
|
|
for( ; dy>2 && dx>2; ){ // min 3x3 (small font)
|
|
DBG( wchar_t c_ask='='; )
|
|
for( y=y0;y<y1;y++) // remove upper empty space
|
|
if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,1)==1 ) break;
|
|
if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break;
|
|
if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break;
|
|
if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break;
|
|
Setac(box1,'=',100);
|
|
return '=';
|
|
}
|
|
// --- test dots : ---------------------------------
|
|
for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font)
|
|
|
|
DBG( wchar_t c_ask=':'; )
|
|
// check the gap hight
|
|
for( i1=dy/16;i1<dy/2;i1++)
|
|
if( get_bw(x0+dx/8,x1-dx/8,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
|
|
if (i1>=dy/2) Break;
|
|
for( i2=dy/16;i2<dy/2;i2++)
|
|
if( get_bw(x0+dx/8,x1-dx/8,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
|
|
if (i2>=dy/2) Break;
|
|
MSG(fprintf(stderr,"gap y12 %d %d",i1,i2);)
|
|
|
|
if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~;
|
|
if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i
|
|
if (gchar) ad=99*ad/100;
|
|
ad=ad-abs(i1-i2)/dy*20;
|
|
if (abs(i1-dx)>dy/4) Break; // round or quadratic dots?
|
|
if (abs(i1-dx)>dy/8) ad=98*ad/100;
|
|
if (abs(i2-dx)>dy/4) Break; // round or quadratic dots?
|
|
if (abs(i2-dx)>dy/8) ad=98*ad/100;
|
|
if (box1->dots!=1) ad=96*ad/100;
|
|
Setac(box1,':',ad); // dx<=3 ad--
|
|
if (ad>=100) return ':';
|
|
break;
|
|
}
|
|
// --- test dots ; ---------------------------------
|
|
if( 2*y0> box1->m2+box1->m1 ) // ~i
|
|
if( 4*y1>=3*box1->m3+box1->m2 ) // ~:
|
|
for( ad=100; dy>5 && dx>1 && dy>2*dx; ){ // max 3x3 (small font)
|
|
DBG( wchar_t c_ask=';'; )
|
|
// better would it be to detect round pixelcluster on top
|
|
// check high of upper and lower dot
|
|
for( i1=0;i1<dy/2;i1++)
|
|
if( get_bw(x0,x1,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
|
|
if (i1>=dy/2) break;
|
|
for( i2=0;i2<dy/2;i2++)
|
|
if( get_bw(x0,x1,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
|
|
if (i2<i1) break;
|
|
|
|
/* test for horizontal symmetry ~i */
|
|
for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
|
|
if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
|
|
if (y==dy) ad=96*ad/100; /* ~i */
|
|
|
|
if (i2==i1 && y1<=box1->m3) ad=97*ad/100;
|
|
if (i2-i1<dy/8) ad=99*ad/100;
|
|
Setac(box1,';',ad); // dx<=3 ad--
|
|
if (ad>=100) return ';';
|
|
break;
|
|
}
|
|
// --- first test small dots . ---------------------------------
|
|
if( 3*dy<box1->m4-box1->m1 && abs(dx-dy)<(dx+dy)/4+2
|
|
&& 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline?
|
|
&& 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00
|
|
DBG( wchar_t c_ask='.'; )
|
|
d=0; r1=60;r2=140; ad=99;
|
|
for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */
|
|
rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number
|
|
ry=100*(2*y-(y0+y1))/dy;
|
|
if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; }
|
|
if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; }
|
|
// MSG( fprintf(stderr,"x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2); )
|
|
}
|
|
if (loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)> dx/8) { ad=98*ad/100; // ,
|
|
if (loop(box1->p,x0,y1 ,x1-x0,cs,0,RI)<=dx/8) ad=98*ad/100; } // ,
|
|
MSG( fprintf(stderr,"d= %3d ad= %3d", d, ad); )
|
|
if(d==0)
|
|
if( loop(box1->p,x0,y0,x1-x0,cs,0,RI)
|
|
<= loop(box1->p,x0,y1,x1-x0,cs,0,RI)
|
|
|| loop(box1->p,x1,y0,x1-x0,cs,0,LE)
|
|
>= loop(box1->p,x1,y1,x1-x0,cs,0,LE) )
|
|
{
|
|
bc='.';
|
|
if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; }
|
|
Setac(box1,bc,ad);
|
|
}
|
|
}
|
|
// --- first test small dots , ---------------------------------
|
|
if( 3*dy<2*(box1->m4-box1->m1)
|
|
&& 2*y0> box1->m2+box1->m3
|
|
&& (2*dx<3*dy
|
|
|| get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0) ){ // ocr-a-,
|
|
DBG( wchar_t c_ask=','; )
|
|
ad=100; bc=',';
|
|
if (dy==1 && dx==1) ad=98*ad/100;
|
|
if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case
|
|
if (dx>=dy) ad=99*ad/100;
|
|
if( 2*dy >= box1->m4-box1->m1) ad=98*ad/100;
|
|
if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */
|
|
> loop(box1->p,x0,y1,x1-x0,cs,0,RI)
|
|
&& loop(box1->p,x1,y0,x1-x0,cs,0,LE)
|
|
< loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; }
|
|
else { /* with upper circle */
|
|
if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)<dx/2 ) ad=98*ad/100;
|
|
if( loop(box1->p,x1, y1 ,x1-x0,cs,0,LE)<dx/2 ) ad=98*ad/100;
|
|
if( loop(box1->p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 )
|
|
if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 ) ad=96*ad/100;
|
|
}
|
|
if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; }
|
|
Setac(box1,bc,ad);
|
|
}
|
|
// --- first test small dots '" ---------------------------------
|
|
if( 2*dy < box1->m4 -box1->m1+1
|
|
&& 2*y0 < box1->m2 +box1->m3
|
|
&& 3*y1 < box1->m2+2*box1->m3+2 ){
|
|
DBG( wchar_t c_ask='\''; )
|
|
ad=100; bc='\'';
|
|
if (2*y1 >= box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } // ~!
|
|
if (3*y1>=2*box1->m2+box1->m3) { ad=96*ad/100; MSG({}) }
|
|
if (get_bw(x0,x1,(box1->m2+box1->m3)/2,box1->m4,box1->p,cs,1)!=0)
|
|
{ ad=98*ad/100; MSG({}) }
|
|
if (dx>4
|
|
&& num_cross(x0,x1,y1,y1,box1->p,cs) == 2) { // " "
|
|
bc='"';
|
|
// ocr-a-" has no gap!
|
|
if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100;
|
|
} else {
|
|
if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100;
|
|
if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100;
|
|
if (dx>dy) { ad=96*ad/100; MSG({}) }
|
|
}
|
|
if ( num_cross(x1,x1, y0 , y1 ,box1->p,cs)!=1) ad=99*ad/100;
|
|
if ( num_cross(x0,x1, y0+dy/4 , y0+dy/4 ,box1->p,cs)>2) ad=97*ad/100;
|
|
if ( num_cross(x0,x1, y1-dy/4 , y1-dy/4 ,box1->p,cs)>2) ad=97*ad/100; // * 5x8font
|
|
if (2*y0 > box1->m1+box1->m2) ad=99*ad/100;
|
|
Setac(box1,bc,ad);
|
|
if (ad>=100) return bc;
|
|
}
|
|
// --- TILDE ~ ---------------------------------
|
|
if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>3 && dy>1
|
|
&& 2*y0< box1->m1+box1->m2
|
|
&& 3*y1<2*box1->m2+box1->m3 ){
|
|
if( loop(box1->p,x0,y0,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y1,dx,cs,0,RI)
|
|
&& loop(box1->p,x1,y0,dx,cs,0,LE)
|
|
< loop(box1->p,x1,y1,dx,cs,0,LE)
|
|
&& num_cross(x0,x1,y0,y0,box1->p,cs) == 2
|
|
&& num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) {
|
|
DBG( wchar_t c_ask='~'; )
|
|
bc=TILDE;
|
|
Setac(box1,bc,99);
|
|
}
|
|
}
|
|
// --- CIRCUMFLEX, hat ^ ---------------------------------
|
|
if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>2 && dy>1
|
|
&& 2*y0< box1->m1+box1->m2
|
|
&& 3*y1<2*box1->m2+box1->m3 ){
|
|
DBG( wchar_t c_ask='^'; )
|
|
if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8
|
|
|| loop(box1->p,x0,y0 ,dx,cs,0,RI)
|
|
> loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 )
|
|
&& ( loop(box1->p,x1,y0 ,dx,cs,0,LE)
|
|
> loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8
|
|
|| loop(box1->p,x1,y0 ,dx,cs,0,LE)
|
|
> loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 )
|
|
&& num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1
|
|
&& ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2
|
|
|| num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) {
|
|
bc='^';
|
|
Setac(box1,bc,99);
|
|
}
|
|
}
|
|
// ------------------------------------------------------
|
|
// if( dots==1 ){ um='\''; }
|
|
#if 0 /* ToDo: change to vectors, call here or in whatletter */
|
|
if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong)
|
|
y=box1->m1;
|
|
for(;y<y0+dy/2;y++)if( get_bw(x0+dx/4,x1,y,y,box1->p,cs,1)==1) break;
|
|
{ i1=y;
|
|
if( y<y0+dy/4 )
|
|
for(;y<y0+dy/2;y++)if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) break;
|
|
if( y<y0+dy/2 && 5*(y-i1+1)>box1->m2-box1->m1){
|
|
testumlaut(box1,cs,2,&um); // set modifier + new y0 ???
|
|
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
um = box1->modifier;
|
|
#endif
|
|
if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){
|
|
for(y=y1;y>y0;y--)
|
|
if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u
|
|
}
|
|
|
|
// --- test numbers 0..9 --- separated for faster compilation
|
|
if( JOB->cfg.only_numbers ) return ocr0n(&sdata);
|
|
|
|
// bc=ocr1(box1,bp,cs);
|
|
if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100)
|
|
return bc; // for fast compilable tests
|
|
|
|
// ------ separated for faster compilation
|
|
// ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,...
|
|
#define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100)
|
|
|
|
IF_NOT_SURE bc=ocr0_eE(&sdata);
|
|
IF_NOT_SURE bc=ocr0_f(&sdata);
|
|
IF_NOT_SURE bc=ocr0_bB(&sdata);
|
|
IF_NOT_SURE bc=ocr0_dD(&sdata);
|
|
IF_NOT_SURE bc=ocr0_F(&sdata);
|
|
IF_NOT_SURE bc=ocr0_uU(&sdata);
|
|
IF_NOT_SURE bc=ocr0_micro(&sdata);
|
|
IF_NOT_SURE bc=ocr0_vV(&sdata);
|
|
IF_NOT_SURE bc=ocr0_rR(&sdata);
|
|
IF_NOT_SURE bc=ocr0_m(&sdata);
|
|
IF_NOT_SURE bc=ocr0_tT(&sdata);
|
|
IF_NOT_SURE bc=ocr0_sS(&sdata);
|
|
IF_NOT_SURE bc=ocr0_gG(&sdata);
|
|
IF_NOT_SURE bc=ocr0_xX(&sdata);
|
|
IF_NOT_SURE bc=ocr0_yY(&sdata);
|
|
IF_NOT_SURE bc=ocr0_zZ(&sdata);
|
|
IF_NOT_SURE bc=ocr0_wW(&sdata);
|
|
IF_NOT_SURE bc=ocr0_aA(&sdata);
|
|
IF_NOT_SURE bc=ocr0_cC(&sdata);
|
|
IF_NOT_SURE bc=ocr0_lL(&sdata);
|
|
IF_NOT_SURE bc=ocr0_oO(&sdata);
|
|
IF_NOT_SURE bc=ocr0_pP(&sdata);
|
|
IF_NOT_SURE bc=ocr0_qQ(&sdata);
|
|
IF_NOT_SURE bc=ocr0_iIjJ(&sdata);
|
|
IF_NOT_SURE bc=ocr0_n(&sdata);
|
|
IF_NOT_SURE bc=ocr0_M(&sdata);
|
|
IF_NOT_SURE bc=ocr0_N(&sdata);
|
|
IF_NOT_SURE bc=ocr0_h(&sdata);
|
|
IF_NOT_SURE bc=ocr0_H(&sdata);
|
|
IF_NOT_SURE bc=ocr0_k(&sdata);
|
|
IF_NOT_SURE bc=ocr0_K(&sdata);
|
|
IF_NOT_SURE bc=ocr0n(&sdata);
|
|
IF_NOT_SURE bc=ocr0_brackets(&sdata);
|
|
IF_NOT_SURE bc=ocr0p9(&sdata);
|
|
IF_NOT_SURE bc=ocr0px(&sdata);
|
|
|
|
|
|
if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,"<!--ERROR 576-->");
|
|
if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0];
|
|
/* will be removed later, only fix old things */
|
|
for (i=0;i<box1->num_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; }
|
|
|
|
return bc;
|
|
}
|
|
|
|
|