diff --git a/dataScripts/tessTrain/example_truth/1039279885.gt.txt b/dataScripts/tessTrain/example_truth/1039279885.gt.txt new file mode 100644 index 0000000..1280031 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1039279885.gt.txt @@ -0,0 +1 @@ +1039279885 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1039279885.png b/dataScripts/tessTrain/example_truth/1039279885.png new file mode 100644 index 0000000..1bdfe03 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1039279885.png differ diff --git a/dataScripts/tessTrain/example_truth/104069277.gt.txt b/dataScripts/tessTrain/example_truth/104069277.gt.txt new file mode 100644 index 0000000..fec8a38 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/104069277.gt.txt @@ -0,0 +1 @@ +104069277 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/104069277.png b/dataScripts/tessTrain/example_truth/104069277.png new file mode 100644 index 0000000..a49f3fe Binary files /dev/null and b/dataScripts/tessTrain/example_truth/104069277.png differ diff --git a/dataScripts/tessTrain/example_truth/1120865009.gt.txt b/dataScripts/tessTrain/example_truth/1120865009.gt.txt new file mode 100644 index 0000000..e1ba13b --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1120865009.gt.txt @@ -0,0 +1 @@ +1120865009 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1120865009.png b/dataScripts/tessTrain/example_truth/1120865009.png new file mode 100644 index 0000000..57d432c Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1120865009.png differ diff --git a/dataScripts/tessTrain/example_truth/1157374083.gt.txt b/dataScripts/tessTrain/example_truth/1157374083.gt.txt new file mode 100644 index 0000000..8b6d7cc --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1157374083.gt.txt @@ -0,0 +1 @@ +1157374083 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1157374083.png b/dataScripts/tessTrain/example_truth/1157374083.png new file mode 100644 index 0000000..c13399c Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1157374083.png differ diff --git a/dataScripts/tessTrain/example_truth/1242187802.gt.txt b/dataScripts/tessTrain/example_truth/1242187802.gt.txt new file mode 100644 index 0000000..69ff56a --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1242187802.gt.txt @@ -0,0 +1 @@ +1242187802 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1242187802.png b/dataScripts/tessTrain/example_truth/1242187802.png new file mode 100644 index 0000000..fd1535b Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1242187802.png differ diff --git a/dataScripts/tessTrain/example_truth/1293982005.gt.txt b/dataScripts/tessTrain/example_truth/1293982005.gt.txt new file mode 100644 index 0000000..e3b06af --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1293982005.gt.txt @@ -0,0 +1 @@ +1293982005 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1293982005.png b/dataScripts/tessTrain/example_truth/1293982005.png new file mode 100644 index 0000000..17aac7a Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1293982005.png differ diff --git a/dataScripts/tessTrain/example_truth/1307855064.gt.txt b/dataScripts/tessTrain/example_truth/1307855064.gt.txt new file mode 100644 index 0000000..921682b --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1307855064.gt.txt @@ -0,0 +1 @@ +1307855064 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1307855064.png b/dataScripts/tessTrain/example_truth/1307855064.png new file mode 100644 index 0000000..906ffe7 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1307855064.png differ diff --git a/dataScripts/tessTrain/example_truth/1335408497.gt.txt b/dataScripts/tessTrain/example_truth/1335408497.gt.txt new file mode 100644 index 0000000..395043c --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1335408497.gt.txt @@ -0,0 +1 @@ +1335408497 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1335408497.png b/dataScripts/tessTrain/example_truth/1335408497.png new file mode 100644 index 0000000..99956be Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1335408497.png differ diff --git a/dataScripts/tessTrain/example_truth/1401381498.gt.txt b/dataScripts/tessTrain/example_truth/1401381498.gt.txt new file mode 100644 index 0000000..25e5cca --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1401381498.gt.txt @@ -0,0 +1 @@ +1401381498 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1401381498.png b/dataScripts/tessTrain/example_truth/1401381498.png new file mode 100644 index 0000000..75a966d Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1401381498.png differ diff --git a/dataScripts/tessTrain/example_truth/1494819970.gt.txt b/dataScripts/tessTrain/example_truth/1494819970.gt.txt new file mode 100644 index 0000000..c1af13d --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1494819970.gt.txt @@ -0,0 +1 @@ +1494819970 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1494819970.png b/dataScripts/tessTrain/example_truth/1494819970.png new file mode 100644 index 0000000..781043e Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1494819970.png differ diff --git a/dataScripts/tessTrain/example_truth/1588056367.gt.txt b/dataScripts/tessTrain/example_truth/1588056367.gt.txt new file mode 100644 index 0000000..211a11d --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1588056367.gt.txt @@ -0,0 +1 @@ +1588056367 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1588056367.png b/dataScripts/tessTrain/example_truth/1588056367.png new file mode 100644 index 0000000..f6a14ce Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1588056367.png differ diff --git a/dataScripts/tessTrain/example_truth/1668624399.gt.txt b/dataScripts/tessTrain/example_truth/1668624399.gt.txt new file mode 100644 index 0000000..f3c3a60 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1668624399.gt.txt @@ -0,0 +1 @@ +1668624399 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1668624399.png b/dataScripts/tessTrain/example_truth/1668624399.png new file mode 100644 index 0000000..1dd373d Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1668624399.png differ diff --git a/dataScripts/tessTrain/example_truth/1713389101.gt.txt b/dataScripts/tessTrain/example_truth/1713389101.gt.txt new file mode 100644 index 0000000..1453de3 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1713389101.gt.txt @@ -0,0 +1 @@ +1713389101 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1713389101.png b/dataScripts/tessTrain/example_truth/1713389101.png new file mode 100644 index 0000000..5df908a Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1713389101.png differ diff --git a/dataScripts/tessTrain/example_truth/1763858340.gt.txt b/dataScripts/tessTrain/example_truth/1763858340.gt.txt new file mode 100644 index 0000000..3250c18 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1763858340.gt.txt @@ -0,0 +1 @@ +1763858340 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1763858340.png b/dataScripts/tessTrain/example_truth/1763858340.png new file mode 100644 index 0000000..270b148 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1763858340.png differ diff --git a/dataScripts/tessTrain/example_truth/1996779286.gt.txt b/dataScripts/tessTrain/example_truth/1996779286.gt.txt new file mode 100644 index 0000000..96f58ae --- /dev/null +++ b/dataScripts/tessTrain/example_truth/1996779286.gt.txt @@ -0,0 +1 @@ +1996779286 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/1996779286.png b/dataScripts/tessTrain/example_truth/1996779286.png new file mode 100644 index 0000000..a2af4e5 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/1996779286.png differ diff --git a/dataScripts/tessTrain/example_truth/2014646864.gt.txt b/dataScripts/tessTrain/example_truth/2014646864.gt.txt new file mode 100644 index 0000000..014b445 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/2014646864.gt.txt @@ -0,0 +1 @@ +2014646864 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/2014646864.png b/dataScripts/tessTrain/example_truth/2014646864.png new file mode 100644 index 0000000..4f1bea8 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/2014646864.png differ diff --git a/dataScripts/tessTrain/example_truth/404341978.gt.txt b/dataScripts/tessTrain/example_truth/404341978.gt.txt new file mode 100644 index 0000000..32bbebd --- /dev/null +++ b/dataScripts/tessTrain/example_truth/404341978.gt.txt @@ -0,0 +1 @@ +404341978 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/404341978.png b/dataScripts/tessTrain/example_truth/404341978.png new file mode 100644 index 0000000..0da30f1 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/404341978.png differ diff --git a/dataScripts/tessTrain/example_truth/428256746.gt.txt b/dataScripts/tessTrain/example_truth/428256746.gt.txt new file mode 100644 index 0000000..bd3af44 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/428256746.gt.txt @@ -0,0 +1 @@ +428256746 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/428256746.png b/dataScripts/tessTrain/example_truth/428256746.png new file mode 100644 index 0000000..0069c57 Binary files /dev/null and b/dataScripts/tessTrain/example_truth/428256746.png differ diff --git a/dataScripts/tessTrain/example_truth/97984949.gt.txt b/dataScripts/tessTrain/example_truth/97984949.gt.txt new file mode 100644 index 0000000..1bd3b0f --- /dev/null +++ b/dataScripts/tessTrain/example_truth/97984949.gt.txt @@ -0,0 +1 @@ +97984949 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/97984949.png b/dataScripts/tessTrain/example_truth/97984949.png new file mode 100644 index 0000000..6d9600a Binary files /dev/null and b/dataScripts/tessTrain/example_truth/97984949.png differ diff --git a/dataScripts/tessTrain/example_truth/984313802.gt.txt b/dataScripts/tessTrain/example_truth/984313802.gt.txt new file mode 100644 index 0000000..fa33213 --- /dev/null +++ b/dataScripts/tessTrain/example_truth/984313802.gt.txt @@ -0,0 +1 @@ +984313802 \ No newline at end of file diff --git a/dataScripts/tessTrain/example_truth/984313802.png b/dataScripts/tessTrain/example_truth/984313802.png new file mode 100644 index 0000000..3cff7eb Binary files /dev/null and b/dataScripts/tessTrain/example_truth/984313802.png differ diff --git a/dataScripts/tessTrain/tessTrain.sh b/dataScripts/tessTrain/tessTrain.sh new file mode 100644 index 0000000..61be30a --- /dev/null +++ b/dataScripts/tessTrain/tessTrain.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Author: Charles Bock +# Email: Charles@CharlesBock.com +# Date: 2024-02-27 +# Tested on a bare install of Ubuntu 22.04.3 LTS + +set -e + +# Color Stuff +BGreen='\033[1;32m' +NC='\033[0m' + +greentext () { + echo -e "\n${BGreen}### $1 ${NC}\n" +} + +# Build tesseract +# Upstream Docs: https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation#installing-with-autoconf-tools +greentext "Installing Deps and Creating File Structure" + +# Dont polute the directory +mkdir -p ./tess +cd tess +pwd + +# Get Deps +sudo apt-get install libicu-dev libpango1.0-dev libcairo2-dev +sudo apt-get install automake ca-certificates g++ git libtool libleptonica-dev make pkg-config +sudo apt-get install libpango1.0-dev libleptonica-dev + +greentext "Cloning Tesseract" + +git clone https://github.com/tesseract-ocr/tesseract.git 2> /dev/null || (cd tesseract ; git pull; cd ..) + +# Build the training tools +# Upstream Docs: https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation#build-with-training-tools +greentext "Building Tesseract WITH Training Tools - This can take a long time" + +sudo apt-get install libicu-dev +sudo apt-get install libpango1.0-dev +sudo apt-get install libcairo2-dev + +cd tesseract + +./autogen.sh +./configure +make +sudo make install +sudo ldconfig +make training +sudo make training-install + +greentext "Finished Building Teseract and Training Tools" + +cd .. +pwd + +# Install and configure tesstrain +# Upstream Docs: https://github.com/tesseract-ocr/tesstrain?tab=readme-ov-file#choose-model-name + +greentext "Pulling the required ENG traineddata from github" +wget https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata +sudo mv eng.traineddata /usr/local/share/tessdata + +greentext "Cloning Tesstrain" +git clone https://github.com/tesseract-ocr/tesstrain.git 2> /dev/null || (cd tesstrain ; git pull; cd ..) +cd tesstrain +pwd + +greentext "Generating Tesstrain Langdata" +make tesseract-langdata + +greentext "Creating and populating tesstrain ground truth Directory Structure" +mkdir -p ./data/noita-ground-truth + +# Copy our example data in +cp -ar ../../truth/example_truth/* ./data/noita-ground-truth + +greentext "Running Example Training - This can take some time" +# Run training against our example data +make training MODEL_NAME=noita + +if test -f ./data/noita.traineddata; then + greentext "Example Model was trained successfully" + greentext "Setup is complete and tested" + greentext "You are now ready for Training" + + exit 0 +fi \ No newline at end of file