From 8becbb1118d400826423f625390ffa5235c2cb1b Mon Sep 17 00:00:00 2001 From: Olly Date: Mon, 23 Dec 2024 19:30:32 +0000 Subject: [PATCH] Add pixel font ocr --- Examples/image_drawtext.simba | 2 +- Examples/mouse_teleport_event.simba | 12 +- Examples/randomleft.simba | 2 + .../imports/simba.import_bitmapfontocr.pas | 99 ++++++ Source/script/imports/simba.pixelocr.pas | 323 ++++++++++++++++++ Source/script/simba.script_imports.pas | 3 +- Source/simba.image.pas | 13 +- Tests/pixelocr.simba | 84 +++++ 8 files changed, 528 insertions(+), 10 deletions(-) create mode 100644 Source/script/imports/simba.import_bitmapfontocr.pas create mode 100644 Source/script/imports/simba.pixelocr.pas create mode 100644 Tests/pixelocr.simba diff --git a/Examples/image_drawtext.simba b/Examples/image_drawtext.simba index 3c8c10753..b635d08cf 100644 --- a/Examples/image_drawtext.simba +++ b/Examples/image_drawtext.simba @@ -15,7 +15,7 @@ begin myImage.DrawColor := Colors.RED; myImage.DrawBox(myBox); myImage.DrawColor := Colors.BLACK; - myImage.DrawText(Now.ToString('c'), myBox, [EDrawTextAlign.CENTER, EDrawTextAlign.VERTICAL_CENTER]); + myImage.DrawText(Now.ToString('c'), myBox, [EImageTextAlign.CENTER, EImageTextAlign.VERTICAL_CENTER]); myImage.FontBold := True; myImage.FontSize := 50; diff --git a/Examples/mouse_teleport_event.simba b/Examples/mouse_teleport_event.simba index a45dbe8cc..a4bebfffc 100644 --- a/Examples/mouse_teleport_event.simba +++ b/Examples/mouse_teleport_event.simba @@ -1,19 +1,19 @@ var TPA: TPointArray; // stores the teleport events so we can view at the end -procedure MouseTeleportEvent(var Sender: TTarget; P: TPoint); +procedure MouseTeleportEvent(var Target: TTarget; Data: TTargetEventData); begin - WriteLn('Mouse teleported to: ', P); - TPA += P; + WriteLn('Mouse teleported to: ', Data.MouseTeleport); + TPA += [Data.MouseTeleport.X, Data.MouseTeleport.Y]; end; var - Event: TMouseTeleportEvent; + Event: TTargetEvent; begin - Event := Target.AddMouseEvent(@MouseTeleportEvent); + Event := Target.AddEvent(ETargetEventType.MOUSE_TELEPORT, @MouseTeleportEvent); Target.MouseTeleport([200,200]); Target.MouseMove([600,600]); - Target.RemoveMouseEvent(Event); // Remove the event + Target.RemoveEvent(ETargetEventType.MOUSE_TELEPORT, Event); // Remove the event Target.MouseMove([200, 600]); // The event has been removed so this wont be "recorded" when we show the path diff --git a/Examples/randomleft.simba b/Examples/randomleft.simba index c07ea8ae9..69094760a 100644 --- a/Examples/randomleft.simba +++ b/Examples/randomleft.simba @@ -1,3 +1,5 @@ +// Random left returns numbers weighted to the left number + const SampleCount = 1000000; Range = 10; diff --git a/Source/script/imports/simba.import_bitmapfontocr.pas b/Source/script/imports/simba.import_bitmapfontocr.pas new file mode 100644 index 000000000..528c9c97a --- /dev/null +++ b/Source/script/imports/simba.import_bitmapfontocr.pas @@ -0,0 +1,99 @@ +unit simba.import_bitmapfontocr; + +{$i simba.inc} + +interface + +uses + Classes, SysUtils, + simba.base, simba.script; + +procedure ImportBitmapFontOCR(Script: TSimbaScript); + +implementation + +uses + lptypes, lpvartypes, + simba.pixelocr, + simba.image; + +type + PPixelFont = ^TPixelFont; + PPixelOCR = ^TPixelOCR; + +procedure _LapePixelOCR_LoadFont(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PPixelFont(Result)^ := PPixelOCR(Params^[0])^.LoadFont(PString(Params^[1])^, PInteger(Params^[2])^); +end; + +procedure _LapePixelOCR_Recognize1(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PString(Result)^ := PPixelOCR(Params^[0])^.Recognize(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PPoint(Params^[3])^); +end; + +procedure _LapePixelOCR_Recognize2(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PString(Result)^ := PPixelOCR(Params^[0])^.Recognize(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PBox(Params^[3])^); +end; + +procedure _LapePixelOCR_RecognizeLines(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + PStringArray(Result)^ := PPixelOCR(Params^[0])^.RecognizeLines(PSimbaImage(Params^[1])^, PPixelFont(Params^[2])^, PBox(Params^[3])^); +end; + +procedure ImportBitmapFontOCR(Script: TSimbaScript); +begin + with Script.Compiler do + begin + addGlobalType([ + 'record', + ' Glyphs: array of record', + ' Value: Char;', + ' Width: Integer;', + ' Height: Integer;', + '', + ' points: TPointArray;', + ' shadow: TPointArray;', + ' background: TPointArray;', + '', + ' croppedWidth: Integer;', + '', + ' BestMatch: Integer;', + ' end;', + '', + ' Height: Integer;', + ' SpaceWidth: Integer;', + 'end;'], + 'TPixelFont' + ); + + addGlobalType([ + 'record', + ' Text: String;', + ' Hits: Integer;', + ' Bounds: TBox;', + 'end;'], + 'TPixelOCRMatch' + ); + + addGlobalType([ + 'record', + ' Tolerance: Single;', + ' ShadowTolerance: Single;', + ' Blacklist: set of Char;', + ' MaxWalk: Integer;', + ' Matches: array of TPixelOCRMatch;', + 'end'], + 'TPixelOCR' + ); + + addGlobalFunc('function TPixelOCR.LoadFont(dir: String; SpaceWidth: Integer): TPixelFont;', @_LapePixelOCR_LoadFont); + + addGlobalFunc('function TPixelOCR.Recognize(img: TImage; font: TPixelFont; p: TPoint): String; overload;', @_LapePixelOCR_Recognize1); + addGlobalFunc('function TPixelOCR.Recognize(img: TImage; font: TPixelFont; bounds: TBox): String; overload;', @_LapePixelOCR_Recognize2); + addGlobalFunc('function TPixelOCR.RecognizeLines(img: TImage; font: TPixelFont; bounds: TBox): TStringArray;', @_LapePixelOCR_RecognizeLines); + end; +end; + +end. + diff --git a/Source/script/imports/simba.pixelocr.pas b/Source/script/imports/simba.pixelocr.pas new file mode 100644 index 000000000..1a28a5cf0 --- /dev/null +++ b/Source/script/imports/simba.pixelocr.pas @@ -0,0 +1,323 @@ +unit simba.pixelocr; + +{$i simba.inc} + +interface + +uses + Classes, SysUtils, + simba.base, + simba.image, + simba.colormath; + +type + TPixelFontGlyph = record + Value: Char; + Width: Integer; + Height: Integer; + + points: TPointArray; + shadow: TPointArray; + background: TPointArray; + + croppedWidth: Integer; + + BestMatch: Integer; + end; + PPixelFontGlyph = ^TPixelFontGlyph; + + TPixelFont = record + Glyphs: array of TPixelFontGlyph; + + Height: Integer; + SpaceWidth: Integer; + end; + PPixelFont = ^TPixelFont; + + TPixelOCRMatch = record + Text: String; + Hits: Integer; + Bounds: TBox; + end; + + TPixelOCR = record + tolerance: Single; + shadowTolerance: Single; + + Blacklist: set of Char; + maxWalk: Integer; // todo + + matches: array of TPixelOCRMatch; + {$IFDEF FPC} + private type + TImage = TSimbaImage; + private + function _RecognizeX(const img: TImage; const font: PPixelFont; x,y: Integer): TPixelOCRMatch; + function _RecognizeXY(const img: TImage; const font: PPixelFont; x,y, height: Integer): TPixelOCRMatch; + public + function LoadFont(dir: String; SpaceWidth: Integer): TPixelFont; + + function Recognize(img: TImage; font: TPixelFont; p: TPoint): String; overload; + function Recognize(img: TImage; font: TPixelFont; bounds: TBox): String; overload; + function RecognizeLines(img: TImage; font: TPixelFont; bounds: TBox): TStringArray; + {$ENDIF} + end; + +implementation + +uses + simba.vartype_string, + simba.vartype_box, + simba.vartype_pointarray, + simba.fs, + simba.colormath_distance; + +function TPixelOCR._RecognizeX(const img: TImage; const font: PPixelFont; x,y: Integer): TPixelOCRMatch; + + function IsShadow(const Color: TColor; const tol: Single): Boolean; + begin + Result := (Color.R <= tol) and (Color.G <= tol) and (Color.B <= tol + 5); // allow a little more in the blue channel only + end; + + function Match(const Glyph: PPixelFontGlyph; const X, Y: Integer): Integer; + var + First: TColor; + i: Integer; + Hits: Integer; + Bad: Integer; + begin + Result := -1; + // Range check + if (Glyph^.Width + X >= img.Width) or (Glyph^.Height + Y >= img.Height) or (Length(Glyph^.points) = 0) then + Exit; + + // Always use first pixel to compare against + First := img.Pixel[Glyph^.points[0].X + X, Glyph^.points[0].Y + Y]; + + if (Glyph^.shadow <> nil) then + begin + // exit early if: + // - first pixel is a dark'ish color its a non starter + // - if first shadow isn't one it's not a match + if IsShadow(First, Self.shadowTolerance * 2) or (not IsShadow(img.Pixel[Glyph^.shadow[0].X + X, Glyph^.shadow[0].Y + Y], Self.shadowTolerance)) then + Exit; + + // check all points match the first pixel + for i:=0 to High(Glyph^.points) do + if not SimilarColors(img.Pixel[Glyph^.points[i].X + X, Glyph^.points[i].Y + Y], First, self.tolerance) then + Exit; + + // check all shadows are shadows + for i:=0 to High(Glyph^.shadow) do + if not IsShadow(img.Pixel[Glyph^.shadow[i].X + X, Glyph^.shadow[i].Y + Y], self.shadowTolerance) then + Exit; + + Result := Glyph^.BestMatch; + end else + begin + // check all points match the first pixel + for i:=0 to High(Glyph^.points) do + if not SimilarColors(img.Pixel[Glyph^.points[i].X + X, Glyph^.points[i].Y + Y], First, self.tolerance) then + Exit; + + Result := Glyph^.BestMatch; + + Bad := 0; + for i:=0 to High(Glyph^.background) do + if SimilarColors(img.Pixel[Glyph^.background[i].X + X, Glyph^.background[i].Y + Y], First, self.tolerance) then + Inc(Bad); + + if Bad >= (High(Glyph^.background) div 2) then + Result := -1 + else + Result += Length(Glyph^.background) - Bad; + end; + end; + +var + Space: Integer; + Hits, BestHits: Integer; + I: Integer; + + Lo, Hi: PPixelFontGlyph; + Glyph, BestGlyph: PPixelFontGlyph; +begin + Result := Default(TPixelOCRMatch); + Result.Bounds.X1 := $FFFFFF; + Result.Bounds.Y1 := $FFFFFF; + Space := 0; + + if (x < 0) then x := 0; + if (y < 0) then y := 0; + + Lo := @Font^.Glyphs[0]; + Hi := @Font^.Glyphs[High(Font^.Glyphs)]; + + while (x < img.Width) and (Space < 9999) do + begin + BestHits := 0; + + Glyph := Lo; + while (PtrUInt(Glyph) <= PtrUInt(Hi)) do + begin + Hits := Match(Glyph, x, y); + if (Hits > BestHits) then + begin + BestGlyph := Glyph; + BestHits := Hits; + end; + + Inc(Glyph); + end; + + if (BestHits > 0) then + begin + if (not (BestGlyph^.Value in Self.Blacklist)) then + begin + if (Result.Text <> '') and (Space >= Font^.spaceWidth) then + Result.Text += ' '; + + Result.Bounds.X1 := Min(Result.Bounds.X1, X); + Result.Bounds.Y1 := Min(Result.Bounds.Y1, Y); + Result.Bounds.X2 := Max(Result.Bounds.X2, X + BestGlyph^.Width); + Result.Bounds.Y2 := Max(Result.Bounds.Y2, Y + BestGlyph^.Height); + Result.Text += BestGlyph^.Value; + Result.Hits += BestHits; + end; + + Space := 0; + x += BestGlyph^.croppedWidth; + end else + begin + Space += 1; + x += 1; + end; + end; +end; + +function TPixelOCR._RecognizeXY(const img: TImage; const font: PPixelFont; x,y, height: Integer): TPixelOCRMatch; +var + stop: Integer; + Match: TPixelOCRMatch; +begin + Result := Default(TPixelOCRMatch); + + stop := y+height; + while (y < stop) do + begin + Match := Self._RecognizeX(img, font, x, y); + if (Match.Hits > Result.Hits) then + Result := Match; + + y += 1; + end; +end; + +function TPixelOCR.LoadFont(dir: String; SpaceWidth: Integer): TPixelFont; +var + img: TImage; + files: TStringArray; + i, Count: Integer; + Character: String; +begin + Result := Default(TPixelFont); + Result.SpaceWidth := SpaceWidth; + + Files := TSimbaDir.DirList(dir); + if (Length(Files) = 0) then + Exit; + + SetLength(Result.Glyphs, Length(Files)); + Count := 0; + + img := TImage.Create(); + for i:= 0 to High(Files) do + begin + Character := TSimbaPath.PathExtractNameWithoutExt(Files[I]); + if character.IsNumeric and (Character.ToInteger in [32..126]) then + begin + img.Load(Files[I]); + if img.FindColor($FFFFFF) = nil then + Continue; + + with Result.Glyphs[Count] do + begin + Value := Char(Character.ToInteger); + Points := img.FindColor($FFFFFF); + Background := img.FindColor($000000); + Shadow := img.FindColor($0000FF); + Width := img.Width; + Height := img.Height; + Result.height := Height; + + croppedWidth := max(points.Bounds.X2, shadow.Bounds.X2); + + if (Length(Shadow) > 0) then + BestMatch := Length(Points) + Length(Shadow) + else + BestMatch := Length(Points); + end; + + Inc(Count); + end; + end; + img.Free(); + + SetLength(Result.Glyphs, Count); +end; + +function TPixelOCR.Recognize(img: TImage; font: TPixelFont; p: TPoint): String; overload; +var + m: TPixelOCRMatch; +begin + m := Self._RecognizeX(img, @font, p.x, p.y); + Self.matches := [m]; + Result := m.Text; +end; + +function TPixelOCR.Recognize(img: TImage; font: TPixelFont; bounds: TBox): String; overload; +var + m: TPixelOCRMatch; +begin + m := Self._RecognizeXY(img, @font, bounds.X1, bounds.Y1, bounds.Height); + Self.matches := [m]; + Result := m.Text; +end; + +function TPixelOCR.RecognizeLines(img: TImage; font: TPixelFont; bounds: TBox): TStringArray; + + function MaybeRecognize(X, Y: Integer; out Match: TPixelOCRMatch): Boolean; + var + i: Integer; + begin + Match := Self._RecognizeXY(Img, @Font, X, Y, font.height div 4); + + // Verify that actual text was extracted, not just a symbol mess of short or small character symbols. + for i := 1 to Length(Match.Text) do + if (Match.Text[i] in ['a'..'z', 'A'..'Z', '0'..'9', '%', '&', '#', '$', '[', ']', '{', '}', '@', '!', '?']) then + Exit(True); + end; + +var + Match: TPixelOCRMatch; +begin + Result := []; + Self.matches := []; + + while (Bounds.Y1 < Bounds.Y2) do + begin + if MaybeRecognize(Bounds.X1, Bounds.Y1, Match) then + begin + Result += [Match.Text]; + SetLength(Self.matches, Length(Self.matches) + 1); + Self.matches[High(self.matches)] := Match; + + Bounds.Y1 := Max(Bounds.Y1, Match.Bounds.Y2 - (Font.height div 2)); + end; + + Bounds.Y1 += 1; + end; +end; + +end. + diff --git a/Source/script/simba.script_imports.pas b/Source/script/simba.script_imports.pas index 10feae07a..4e57a8834 100644 --- a/Source/script/simba.script_imports.pas +++ b/Source/script/simba.script_imports.pas @@ -39,7 +39,7 @@ implementation // Simba classes simba.import_image, simba.import_externalcanvas, simba.import_dtm, simba.import_matchtemplate, - simba.import_json, simba.import_imagebox, simba.import_shapebox, + simba.import_json, simba.import_imagebox, simba.import_shapebox, simba.import_bitmapfontocr, // LCL simba.import_lcl_system, simba.import_lcl_graphics, simba.import_lcl_controls, @@ -83,6 +83,7 @@ procedure AddSimbaImports(Script: TSimbaScript); ImportExternalCanvas(Script); ImportMatchTemplate(Script); ImportJSON(Script); + ImportBitmapFontOCR(Script); ImportDateTime(Script); ImportEncoding(Script); diff --git a/Source/simba.image.pas b/Source/simba.image.pas index 687fb7ce2..8749ba31d 100644 --- a/Source/simba.image.pas +++ b/Source/simba.image.pas @@ -258,8 +258,9 @@ TSimbaImage = class(TSimbaBaseClass) procedure FromLazBitmap(LazBitmap: TBitmap); // Basic finders, use Target.SetTarget(img) for all - function FindColor(Color: TColor; Tolerance: Single): TPointArray; - function FindImage(Image: TSimbaImage; Tolerance: Single): TPoint; + function FindColor(Color: TColor; Tolerance: Single = 0): TPointArray; overload; + function FindColor(Color: TColor; Bounds: TBox; Tolerance: Single = 0): TPointArray; overload; + function FindImage(Image: TSimbaImage; Tolerance: Single = 0): TPoint; end; PSimbaImage = ^TSimbaImage; @@ -844,6 +845,14 @@ function TSimbaImage.FindColor(Color: TColor; Tolerance: Single): TPointArray; Result := Target.FindColor(Color, Tolerance, Target.Bounds); end; +function TSimbaImage.FindColor(Color: TColor; Bounds: TBox; Tolerance: Single): TPointArray; +var + Target: TSimbaTarget; +begin + Target.SetImage(Self); + Result := Target.FindColor(Color, Tolerance, Bounds); +end; + function TSimbaImage.FindImage(Image: TSimbaImage; Tolerance: Single): TPoint; var Target: TSimbaTarget; diff --git a/Tests/pixelocr.simba b/Tests/pixelocr.simba new file mode 100644 index 000000000..f46738225 --- /dev/null +++ b/Tests/pixelocr.simba @@ -0,0 +1,84 @@ +{$assertions on} + +procedure GenerateFont(dir: String; FontName: String; FontSize: Integer); +var + img: TImage; + s: String; + i,height: integer; +begin + DirCreate(dir); + + img := TImage.Create(50,50); + + img.FontName := FontName; + img.FontSize := FontSize; + img.DrawColor := $FFFFFF; + + height := 0; + for i := Ord('a') to Ord('z') do + begin + img.Clear(); + img.DrawText(Char(i), [0,0]); + height := Max(height, img.FindColor($FFFFFF).Bounds.Height); + end; + + for i := Ord('a') to Ord('z') do + begin + img.SetSize(30,height); + img.Clear(); + img.DrawText(Char(i), [0,0]); + img.SetSize(img.FindColor($FFFFFF).Bounds.X2+2, img.Height); + img.Save(dir + ToStr(i) + '.bmp', True); + end; + + img.Free(); +end; + +function GenerateTestImage(FontName: String; FontSize: Integer): TImage; +var + tpa: TPointArray; + atpa: T2DPointArray; + i,x,y: Integer; +begin + Result := TImage.Create(250,50); + Result.FontName := 'Arial'; + Result.FontSize := 25; + Result.DrawColor := $FFFFFF; + Result.DrawText('hello world', [5,5]); + + tpa := Result.FindColor($FFFFFF); + atpa := tpa.Cluster(1, 10).SortByX(True); + tpa := []; + for i := 0 to High(atpa) do + begin + tpa += atpa[i]; + atpa := atpa.Offset([Random(4),0]); + end; + + for x := 0 to Result.Width-1 do + for y := 0 to Result.Height-1 do + begin + Result.Pixel[x,y] := x*10+y*5; + end; + + for i := 0 to High(tpa) do + begin + Result.DrawAlpha := Random(150,175); + Result.DrawTPA([tpa[i]]); + end; +end; + +var + ocr: TPixelOCR; + testImage: TImage; + testFont: TPixelFont; +begin + GenerateFont(SimbaEnv.TempPath + 'bitmapfont/', 'Arial', 25); + testFont := ocr.LoadFont(SimbaEnv.TempPath + 'bitmapfont/', 8); + testImage := GenerateTestImage('Arial', 25); + + ocr.Tolerance := 30; + Assert(ocr.Recognize(testImage, testFont, [5,5]) = 'hello world'); + + testImage.Free(); +end.