Skip to content

Commit

Permalink
Merge pull request #12263 from dlorenc/tess
Browse files Browse the repository at this point in the history
Add tesseract and leptonica packages.
  • Loading branch information
dlorenc authored Feb 1, 2024
2 parents b430fd9 + 9b96475 commit 729947f
Show file tree
Hide file tree
Showing 2 changed files with 196 additions and 0 deletions.
42 changes: 42 additions & 0 deletions leptonica.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package:
name: leptonica
version: 1.84.1
epoch: 0
description: Leptonica is an open source library containing software that is broadly useful for image processing and image analysis applications.
copyright:
- license: BSD-2-Clause

environment:
contents:
packages:
- autoconf
- automake
- build-base
- busybox
- ca-certificates-bundle
- cmake

pipeline:
- uses: git-checkout
with:
repository: https://github.com/danbloomberg/leptonica
tag: ${{package.version}}
expected-commit: 7e803e73511fbd320f01314c141d35d2b8491dde

- uses: cmake/configure

- uses: cmake/build

- uses: cmake/install

- uses: strip

subpackages:
- name: leptonica-dev
pipeline:
- uses: split/dev

update:
enabled: true
github:
identifier: danbloomberg/leptonica
154 changes: 154 additions & 0 deletions tesseract.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package:
name: tesseract
version: 5.3.4
epoch: 0
description: Tesseract Open Source OCR Engine
copyright:
- license: Apache-2.0

environment:
contents:
packages:
- autoconf
- automake
- build-base
- busybox
- ca-certificates-bundle
- cairo-dev
- cmake
- expat-dev
- fontconfig-config
- fontconfig-dev
- fribidi-dev
- glib-dev
- harfbuzz-dev
- icu-dev
- leptonica-dev
- libfontconfig1
- libjpeg-turbo-dev
- libxft-dev
- opencl-dev
- pango
- pango-dev
- pkgconf
- pkgconf-dev
- zlib-dev

data:
- name: langs
items:
afr:
ara:
aze:
bel:
ben:
bul:
cat:
ces:
chi_sim:
chi_tra:
chr:
dan:
deu:
eng:
enm:
epo:
equ:
est:
eus:
fin:
fra:
frk:
frm:
glg:
grc:
heb:
hin:
hrv:
hun:
ind:
isl:
ita:
ita_old:
jpn:
kan:
kat:
khm:
kor:
lav:
lit:
mal:
mkd:
mlt:
msa:
nld:
nor:
osd:
pol:
por:
ron:
rus:
slk:
slv:
spa:
spa_old:
sqi:
srp:
swa:
swe:
tam:
tel:
tgl:
tha:
tur:
ukr:
vie:

vars:
tessdata-version: 4.1.0

pipeline:
- uses: git-checkout
with:
repository: https://github.com/tesseract-ocr/tesseract
tag: ${{package.version}}
expected-commit: 8ee020e14cf5be4e3f0e9beb09b6b050a1871854

# Training data is stored in a separate repository
- uses: git-checkout
with:
repository: https://github.com/tesseract-ocr/tessdata
destination: tessdata-${{vars.tessdata-version}}
tag: ${{vars.tessdata-version}}
expected-commit: 4767ea922bcc460e70b87b1d303ebdfed0897da8

- runs: |
# They have some hardcoded include paths
ln -s /usr/include/pango-1.0 /usr/include/pango
- uses: cmake/configure
with:
opts: \ --enable-opencl

- uses: cmake/build

- uses: cmake/install

- uses: strip

subpackages:
- name: tesseract-dev
pipeline:
- uses: split/dev

- range: langs
name: tesseract-${{range.key}}
pipeline:
- runs: |
mkdir -p ${{targets.subpkgdir}}/usr/share/tessdata
mv tessdata-${{vars.tessdata-version}}/${{range.key}}.traineddata ${{targets.subpkgdir}}/usr/share/tessdata/
update:
enabled: true
github:
identifier: tesseract-ocr/tesseract

0 comments on commit 729947f

Please sign in to comment.