Skip to content

Commit

Permalink
Merge branch 'main' into emsdk-backward-order
Browse files Browse the repository at this point in the history
  • Loading branch information
wydengyre committed Jun 15, 2024
2 parents 58e5981 + 251a4e7 commit 46a72d0
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 32 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,9 @@ jobs:
run: npm ci
- name: Build library
run: make lib
- name: post-lib node info
run: which node && node --version && npm --version
- name: Typecheck
run: make typecheck
- name: Check formatting
run: make checkformat
- name: Mode node info
run: which node && node --version && npm --version
- name: Run tests
run: make test
2 changes: 1 addition & 1 deletion .node-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
22.2.0
22.3.0
15 changes: 4 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,11 @@ checkformat:
node_modules/.bin/prettier --check {src,test}/**/*.js

.PHONY: typecheck
typecheck:
typecheck: build/tesseract-core.d.ts
node_modules/.bin/tsc

.PHONY: test
test: third_party/tessdata_fast
SHELL=/bin/bash
echo "Current Shell: $$SHELL"
echo "PATH: $$PATH"
echo "Checking user permissions:"
id
which node
type node
alias
node --version
node --test test/ocr-engine-test.js

.PHONY: release
Expand Down Expand Up @@ -161,7 +152,6 @@ build/tesseract-core.js build/tesseract-core.wasm: src/lib.cpp src/tesseract-ini
emcc src/lib.cpp $(EMCC_FLAGS) \
-I$(INSTALL_DIR)/include/ -L$(INSTALL_DIR)/lib/ -ltesseract -lleptonica -lembind \
-o build/tesseract-core.js
cp src/tesseract-core.d.ts build/

# Build fallback WASM binary for browsers that don't support WASM SIMD. The JS
# output from this build is not used.
Expand All @@ -170,6 +160,9 @@ build/tesseract-core-fallback.js build/tesseract-core-fallback.wasm: src/lib.cpp
-I$(INSTALL_DIR)/include/ -L$(FALLBACK_INSTALL_DIR)/lib/ -L$(INSTALL_DIR)/lib -ltesseract -lleptonica -lembind \
-o build/tesseract-core-fallback.js

build/tesseract-core.d.ts: src/tesseract-core.d.ts build
cp $< $@

dist/tesseract-core.wasm: build/tesseract-core.wasm
mkdir -p dist/
cp $< $@
Expand Down
20 changes: 9 additions & 11 deletions src/ocr-engine.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// @ts-ignore - Don't error if library hasn't been built yet.
import initTesseractCore from "../build/tesseract-core";
import initTesseractCore, {MainModule, OCREngine as WASMOCREngine} from "../build/tesseract-core";

import { imageDataFromBitmap } from "./utils";

Expand Down Expand Up @@ -89,8 +88,8 @@ export type ProgressListener = (progress: number) => void;
* Instances are constructed using {@link createOCREngine}.
*/
export class OCREngine {
private _tesseractLib: any;
private _engine: any;
private _tesseractLib: MainModule;
private _engine: WASMOCREngine;
private _modelLoaded: boolean;
private _imageLoaded: boolean;
private _progressChannel?: MessagePort;
Expand All @@ -104,7 +103,7 @@ export class OCREngine {
* @param progressChannel - Channel used to report progress
* updates when OCREngine is run on a background thread
*/
constructor(tessLib: any, progressChannel?: MessagePort) {
constructor(tessLib: MainModule, progressChannel?: MessagePort) {
this._tesseractLib = tessLib;
this._engine = new tessLib.OCREngine();
this._modelLoaded = false;
Expand All @@ -116,8 +115,7 @@ export class OCREngine {
* Shut down the OCR engine and free up resources.
*/
destroy() {
this._engine.delete();
this._engine = null;
this._engine?.delete();
}

/**
Expand All @@ -130,7 +128,7 @@ export class OCREngine {
if (!result.success) {
throw new Error(`Unable to get variable ${name}`);
}
return result.value;
return result.value.toString();
}

/**
Expand Down Expand Up @@ -192,7 +190,7 @@ export class OCREngine {
// Tesseract
const engineImage = new this._tesseractLib.Image(
imageData.width,
imageData.height
imageData.height,
);
const engineImageBuf = engineImage.data();
engineImageBuf.set(new Uint32Array(imageData.data.buffer));
Expand Down Expand Up @@ -262,7 +260,7 @@ export class OCREngine {
this._engine.getTextBoxes(textUnit, (progress: number) => {
onProgress?.(progress);
this._progressChannel?.postMessage({ progress });
})
}),
);
}

Expand Down Expand Up @@ -380,7 +378,7 @@ export type CreateOCREngineOptions = {
* used to create the tesseract module. Possible options are documented here:
* https://github.com/emscripten-core/emscripten/blob/1e7472362a7f5844c5bd23214d725b7a3fd18775/src/settings.js#L876
*/
emscriptenModuleOptions?: {wasmBinary: ArrayBuffer};
emscriptenModuleOptions?: { wasmBinary: ArrayBuffer };
};

/**
Expand Down
107 changes: 102 additions & 5 deletions src/tesseract-core.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,102 @@
// Type definitions for the Emscripten-generated JS entry point for the
// WASM file.
export default function initTesseractCore(
options: { wasmBinary?: ArrayBuffer } = {}
): Promise<unknown>;
// The file below was generated with emcc --emit-tsd
// and edited by hand where noted
// TypeScript bindings for emscripten-generated code. Automatically generated at compile time.
declare namespace RuntimeExports {
let HEAPF32: any;
let HEAPF64: any;
let HEAP_DATA_VIEW: any;
let HEAP8: any;
let HEAPU8: any;
let HEAP16: any;
let HEAPU16: any;
let HEAP32: any;
let HEAPU32: any;
let HEAP64: any;
let HEAPU64: any;
}
interface WasmModule {
}

type EmbindString = ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string;
export interface Image {
data(): any;
delete(): void;
}

export interface OCREngine {
getBoundingBoxes(_0: TextUnit): vector<TextRect>;
clearImage(): void;
getOrientation(): Orientation;
getVariable(_0: EmbindString): GetVariableResult;
loadImage(_0: Image): OCRResult;
loadModel(_0: EmbindString): OCRResult;
setVariable(_0: EmbindString, _1: EmbindString): OCRResult;
getHOCR(_0: any): string;
getText(_0: any): string;
getTextBoxes(_0: TextUnit, _1: any): vector<TextRect>;
delete(): void;
}

export interface TextUnitValue<T extends number> {
value: T;
}
export type TextUnit = TextUnitValue<1>|TextUnitValue<0>;

export interface vector<IntRect> {
push_back(_0: IntRect): void;
resize(_0: number, _1: IntRect): void;
size(): number;
get(_0: number): IntRect | undefined;
set(_0: number, _1: IntRect): boolean;
delete(): void;
}

export interface vector<TextRect> {
size(): number;
get(_0: number): TextRect | undefined;
push_back(_0: TextRect): void;
resize(_0: number, _1: TextRect): void;
set(_0: number, _1: TextRect): boolean;
delete(): void;
}

export type IntRect = {
left: number,
top: number,
right: number,
bottom: number
};

export type Orientation = {
rotation: number,
confidence: number
};

export type TextRect = {
rect: IntRect,
flags: number,
confidence: number,
text: EmbindString
};

export type GetVariableResult = {
success: boolean,
value: EmbindString
};

export type OCRResult = {
error: EmbindString
};

interface EmbindModule {
Image: {new(_0: number, _1: number): Image};
OCREngine: {new(): OCREngine};
TextUnit: {Line: TextUnitValue<1>, Word: TextUnitValue<0>};
// the following two invalid lines are commented out by hand:
// they seem to be a result of a bug in the emscripten bindings
// vector<IntRect>: {new(): vector<IntRect>};
// vector<TextRect>: {new(): vector<TextRect>};
}

export type MainModule = WasmModule & typeof RuntimeExports & EmbindModule;
export default function MainModuleFactory (options?: unknown): Promise<MainModule>;

0 comments on commit 46a72d0

Please sign in to comment.