From fa5b48dba6c627ee12b095fbba90087ddda95100 Mon Sep 17 00:00:00 2001 From: Stefano Cossu <4306733+scossu@users.noreply.github.com> Date: Thu, 9 May 2024 09:28:40 -0400 Subject: [PATCH] Test (#101) * Yiddish transliteration via submodules. * Update checkout workflow. * Change refs for Yiddish submodules. * Fix WORKDIR in Dockerfile * Do not remove yiddish module. * Manually add yiddish submodules. * Use git clone instead of submodule. * Move ext checkout to github actions. * Chinese numerals (#97) * WIP Parse Chinese numerals. * WIP complete number parsing. * Complete Chinese numerals: * Use standard table override instead of pre-config hooks. * Add few test strings. * Complete numerals: * Transliterate all numeric examples correctly * Modify hook return logic for consistency * WIP partial spacing fix. * Some cleanup; upgrade docker OS. * Add dependency for uwsgi. * Squashed commit of the following: (#98) commit 30859a52b9cc325c323b414133856d0af3ffc2a6 Author: scossu Date: Wed Feb 28 22:17:36 2024 -0500 Move ext checkout to github actions. commit 6d8da6df68ac764f90deb15861089095043fd4ba Author: scossu Date: Wed Feb 28 21:45:01 2024 -0500 Use git clone instead of submodule. commit ade9da589179870d331b703ff526d7fff33e88bb Author: scossu Date: Wed Feb 28 21:42:45 2024 -0500 Manually add yiddish submodules. commit 77cb9ef2959f611d0220cc405e0b584ece71147c Author: scossu Date: Wed Feb 28 21:23:37 2024 -0500 Do not remove yiddish module. commit e405b3605dd2629ed5557ccc5fdd5fe8812799ed Author: scossu Date: Wed Feb 28 09:11:41 2024 -0500 Fix WORKDIR in Dockerfile commit 95445ba642163e28b94df6736ad6946ad7dc76c0 Author: scossu Date: Wed Feb 28 09:07:50 2024 -0500 Change refs for Yiddish submodules. commit 208ea095e792195981f644497ccd5fcd55e15c1b Author: scossu Date: Wed Feb 28 08:45:58 2024 -0500 Update checkout workflow. * Add debug output to /trans response. * Split docker files and requirements. * Add bad request debug handler. * Add bad request debug handler. * Adjust CI workflows. * Fix image name typo. * Refine triggers. * Fix typo on test workflow trigger. * Use JSON in POST body. * Also use JSON in feedback request; update docs. * Return json data in 400 debug. --- ...sh-docker-image.yml => push-app-image.yml} | 17 +++++-- .github/workflows/push-base-image.yml | 46 +++++++++++++++++ .github/workflows/push-test-image.yml | 14 +++-- .gitmodules | 4 ++ Dockerfile | 28 +++------- deps.txt | 7 +++ doc/rest_api.md | 23 +++++++++ ext/yiddish | 1 + requirements.txt | 6 +-- scriptshifter/hooks/yiddish_/__init__.py | 51 +++++++++++++++++++ scriptshifter/rest_api.py | 48 ++++++++++------- scriptshifter/static/ss.js | 51 ++++++++++--------- scriptshifter/tables/data/index.yml | 2 + scriptshifter/tables/data/yiddish.yml | 21 ++++++++ scriptshifter_base.Dockerfile | 21 ++++++++ test.Dockerfile | 7 +++ 16 files changed, 268 insertions(+), 79 deletions(-) rename .github/workflows/{push-docker-image.yml => push-app-image.yml} (59%) create mode 100644 .github/workflows/push-base-image.yml create mode 100644 deps.txt create mode 160000 ext/yiddish create mode 100644 scriptshifter/hooks/yiddish_/__init__.py create mode 100644 scriptshifter/tables/data/yiddish.yml create mode 100644 scriptshifter_base.Dockerfile create mode 100644 test.Dockerfile diff --git a/.github/workflows/push-docker-image.yml b/.github/workflows/push-app-image.yml similarity index 59% rename from .github/workflows/push-docker-image.yml rename to .github/workflows/push-app-image.yml index 193f205..610703a 100644 --- a/.github/workflows/push-docker-image.yml +++ b/.github/workflows/push-app-image.yml @@ -1,8 +1,15 @@ -name: Push image to Docker Hub. +name: Push app image on: + # This runs on v *.*.0 after the base image has been + # built and pushed, or on patch version tag. push: tags: - - "v*.*.*" + - "v*.*.[1-9]*" + workflow_run: + workflows: + - "Push base image" + types: + - "completed" env: DOCKER_USER: lcnetdev @@ -13,13 +20,15 @@ jobs: push-image-to-docker-hub: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: checkout repo + uses: actions/checkout@v4 with: submodules: recursive - name: Build the Docker image run: > - docker build . --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }} + docker build -f Dockerfile . + --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }} --tag $DOCKER_USER/$REPO_NAME:latest - name: Login to Docker Hub diff --git a/.github/workflows/push-base-image.yml b/.github/workflows/push-base-image.yml new file mode 100644 index 0000000..db26243 --- /dev/null +++ b/.github/workflows/push-base-image.yml @@ -0,0 +1,46 @@ +name: Push base image +on: + push: + tags: + - "v*.*.0" + +env: + DOCKER_USER: lcnetdev + DOCKER_PASSWORD: ${{secrets.DOCKER_HUB}} + REPO_NAME: scriptshifter-base + +jobs: + push-image-to-docker-hub: + runs-on: ubuntu-latest + steps: + - name: checkout repo + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: checkout yiddish submodules (1/2) + uses: actions/checkout@v4 + with: + repository: ibleaman/loshn-koydesh-pronunciation + path: ext/yiddish/yiddish/submodules/loshn-koydesh-pronunciation + + - name: checkout yiddish submodules (2/2) + uses: actions/checkout@v4 + with: + repository: ibleaman/hasidify_lexicon + path: ext/yiddish/yiddish/submodules/hasidify_lexicon + + - name: Build the Docker image + run: > + docker build -f scriptshifter_base.Dockerfile . + --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }} + --tag $DOCKER_USER/$REPO_NAME:latest + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: lcnetdev + password: ${{ secrets.DOCKER_HUB }} + + - name: Push to Docker Hub + run: docker push $DOCKER_USER/$REPO_NAME --all-tags diff --git a/.github/workflows/push-test-image.yml b/.github/workflows/push-test-image.yml index 2bcc042..93cb36f 100644 --- a/.github/workflows/push-test-image.yml +++ b/.github/workflows/push-test-image.yml @@ -1,8 +1,8 @@ -name: Push test image to Docker Hub. +name: Push test image on: push: - branch: - - "main" + branches: + - "test" env: DOCKER_USER: lcnetdev @@ -13,12 +13,16 @@ jobs: push-image-to-docker-hub: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: checkout repo + uses: actions/checkout@v4 with: submodules: recursive - name: Build the Docker image - run: docker build . --tag $DOCKER_USER/$REPO_NAME:test + run: > + docker build -f Dockerfile . + --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }} + --tag $DOCKER_USER/$REPO_NAME:test - name: Login to Docker Hub uses: docker/login-action@v3 diff --git a/.gitmodules b/.gitmodules index 45cbf19..4d50032 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "ext/arabic_rom"] path = ext/arabic_rom url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git +[submodule "ext/yiddish"] + path = ext/yiddish + url = https://github.com/scossu/yiddish.git + branch = loc diff --git a/Dockerfile b/Dockerfile index 95b15ad..779cfe4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,15 @@ -FROM python:3.10-slim-bookworm - -RUN apt update -RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all-dev libpcre2-dev - -ENV TZ=America/New_York -ENV _workroot "/usr/local/scriptshifter/src" - -WORKDIR ${_workroot} -COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt - -# Remove development packages. -RUN apt remove -y build-essential -RUN apt autoremove -y - -RUN addgroup --system www -RUN adduser --system www -RUN gpasswd -a www www +FROM lcnetdev/scriptshifter-base:latest +ARG WORKROOT "/usr/local/scriptshifter/src" +# Copy core application files. +WORKDIR ${WORKROOT} COPY entrypoint.sh uwsgi.ini wsgi.py ./ -COPY ext ./ext/ COPY scriptshifter ./scriptshifter/ +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt RUN chmod +x ./entrypoint.sh -RUN chown -R www:www ${_workroot} . +#RUN chown -R www:www ${WORKROOT} . EXPOSE 8000 diff --git a/deps.txt b/deps.txt new file mode 100644 index 0000000..67b0a05 --- /dev/null +++ b/deps.txt @@ -0,0 +1,7 @@ +# External dependencies. +aksharamukha>=2.1,<3 +camel-tools>=1.5 +funcy>=1.15,<2 +pymarc>=4.0,<5 +repackage>=0.7.3 +./ext/yiddish diff --git a/doc/rest_api.md b/doc/rest_api.md index 34c9f2b..b4712c3 100644 --- a/doc/rest_api.md +++ b/doc/rest_api.md @@ -69,6 +69,10 @@ Transliterate an input string into a given language. ### POST body +MIME type: `application/json` + +Content: JSON object with the following keys: + - `lang`: Language code as given by the `/languages` endpoint. - `text`: Input text to be transliterated. - `capitalize`: One of `first` (capitalize the first letter of the input), @@ -92,3 +96,22 @@ Content: JSON object containing two keys: `ouput` containing the transliterated string; and `warnings` containing a list of warnings. Characters not found in the mapping are copied verbatim in the transliterated string (see "Configuration files" section for more information). + +## `POST /feedback` + +Send a feedback form about a transliteration result. + +### POST body + +MIME type: `application/json` + +Content: JSON object with the following keys: + + `lang`: language of the transliteration. Mandatory. + `src`: source text. Mandatory. + `t_dir`: transliteration direction. If omitted, it defaults to `s2r`. + `result`: result of the transliteration. Mandatory. + `expected`: expected result. Mandatory. + `options`: options passed to the request, if any. + `notes`: optional user notes. + `contact`: contact email for feedback. Optional. diff --git a/ext/yiddish b/ext/yiddish new file mode 160000 index 0000000..9bf22c5 --- /dev/null +++ b/ext/yiddish @@ -0,0 +1 @@ +Subproject commit 9bf22c55ca76710940e141de5d88922a9f55ed1f diff --git a/requirements.txt b/requirements.txt index 3136aa2..6f1a221 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,5 @@ -aksharamukha>=2.1,<3 -camel-tools>=1.5 +# Core application dependencies. flask>=2.3,<3 -funcy>=1.15,<2 -pymarc>=4.0,<5 python-dotenv>=1.0,<2 pyyaml>=6.0,<7 -repackage>=0.7.3 uwsgi>=2.0,<2.1 diff --git a/scriptshifter/hooks/yiddish_/__init__.py b/scriptshifter/hooks/yiddish_/__init__.py new file mode 100644 index 0000000..22ca3a8 --- /dev/null +++ b/scriptshifter/hooks/yiddish_/__init__.py @@ -0,0 +1,51 @@ +# @package ext + +__doc__ = """ +Yiddish transliteration module. + +Courtesy of Isaac Bleaman and Asher Lewis. + +https://github.com/ibleaman/yiddish.git + +Note the underscore in the module name to disambiguate with the `yiddish` +external package name. +""" + + +from yiddish import detransliterate, transliterate + +from scriptshifter.exceptions import BREAK +from scriptshifter.tools import capitalize + + +def s2r_post_config(ctx): + """ + Script to Roman. + """ + + rom = transliterate( + ctx.src, loc=True, + loshn_koydesh=ctx.options.get("loshn_koydesh")) + + if ctx.options["capitalize"] == "all": + rom = capitalize(rom) + elif ctx.options["capitalize"] == "first": + rom = rom[0].upper() + rom[1:] + + ctx.dest = rom + + return BREAK + + +def r2s_post_config(ctx): + """ + Roman to script. + + NOTE: This doesn't support the `loc` option. + """ + + ctx.dest = detransliterate( + ctx.src, + loshn_koydesh=ctx.options.get("loshn_koydesh")) + + return BREAK diff --git a/scriptshifter/rest_api.py b/scriptshifter/rest_api.py index d7676cd..80b96ef 100644 --- a/scriptshifter/rest_api.py +++ b/scriptshifter/rest_api.py @@ -3,11 +3,12 @@ from base64 import b64encode from copy import deepcopy from email.message import EmailMessage -from json import dumps, loads +from json import dumps from os import environ, urandom from smtplib import SMTP from flask import Flask, jsonify, render_template, request +from werkzeug.exceptions import BadRequest from scriptshifter import EMAIL_FROM, EMAIL_TO, SMTP_HOST, SMTP_PORT from scriptshifter.exceptions import ApiError @@ -46,6 +47,20 @@ def handle_exception(e: ApiError): }, e.status_code) +@app.errorhandler(BadRequest) +def handle_400(e): + if logging.DEBUG >= logging.root.level: + body = { + "debug": { + "form_data": request.json or request.form, + } + } + else: + body = "" + + return body, 400 + + @app.route("/", methods=["GET"]) def index(): return render_template( @@ -91,16 +106,16 @@ def get_options(lang): @app.route("/trans", methods=["POST"]) def transliterate_req(): - lang = request.form["lang"] - in_txt = request.form["text"] - capitalize = request.form.get("capitalize", False) - t_dir = request.form.get("t_dir", "s2r") + lang = request.json["lang"] + in_txt = request.json["text"] + capitalize = request.json.get("capitalize", False) + t_dir = request.json.get("t_dir", "s2r") if t_dir not in ("s2r", "r2s"): return f"Invalid direction: {t_dir}", 400 if not len(in_txt): return ("No input text provided! ", 400) - options = loads(request.form.get("options", "{}")) + options = request.json.get("options", {}) logger.debug(f"Extra options: {options}") try: @@ -116,14 +131,9 @@ def feedback(): """ Allows users to provide feedback to improve a specific result. """ - lang = request.form["lang"] - src = request.form["src"] - t_dir = request.form.get("t_dir", "s2r") - result = request.form["result"] - expected = request.form["expected"] - options = request.form.get("options", {}) - notes = request.form.get("notes") - contact = request.form.get("contact") + t_dir = request.json.get("t_dir", "s2r") + options = request.json.get("options", {}) + contact = request.json.get("contact") msg = EmailMessage() msg["subject"] = "Scriptshifter feedback report" @@ -133,16 +143,16 @@ def feedback(): msg["cc"] = contact msg.set_content(f""" *Scriptshifter feedback report from {contact or 'anonymous'}*\n\n - *Language:* {lang}\n + *Language:* {request.json['lang']}\n *Direction:* { 'Roman to Script' if t_dir == 'r2s' else 'Script to Roman'}\n - *Source:* {src}\n - *Result:* {result}\n - *Expected result:* {expected}\n + *Source:* {request.json['src']}\n + *Result:* {request.json['result']}\n + *Expected result:* {request.json['expected']}\n *Applied options:* {dumps(options)}\n *Notes:*\n - {notes}""") + {request.json['notes']}""") # TODO This uses a test SMTP server: # python -m smtpd -n -c DebuggingServer localhost:1025 diff --git a/scriptshifter/static/ss.js b/scriptshifter/static/ss.js index 5d2fb40..d51daf5 100644 --- a/scriptshifter/static/ss.js +++ b/scriptshifter/static/ss.js @@ -94,33 +94,33 @@ document.getElementById('transliterate').addEventListener('submit',(event)=>{ } document.getElementById('loader_results').classList.remove("hidden"); - const data = new URLSearchParams(); - let t_dir = Array.from(document.getElementsByName("t_dir")).find(r => r.checked).value; let capitalize = Array.from(document.getElementsByName("capitalize")).find(r => r.checked).value; - data.append('text',document.getElementById('text').value) - data.append('lang',document.getElementById('lang').value) - data.append('t_dir',t_dir) - data.append('capitalize',capitalize) + const data = { + 'text': document.getElementById('text').value, + 'lang': document.getElementById('lang').value, + 't_dir': t_dir, + 'capitalize': capitalize, + 'options': {} + } - let options = {}; let option_inputs = document.getElementsByClassName("option_i"); for (i = 0; i < option_inputs.length; i++) { let el = option_inputs[i]; if (el.type == "checkbox") { - options[el.id] = el.checked; + data['options'][el.id] = el.checked; } else { - options[el.id] = el.value; + data['options'][el.id] = el.value; } }; - data.append('options', JSON.stringify(options)); fetch('/trans', { method: 'post', - body: data, + body: JSON.stringify(data), + headers: {"Content-Type": "application/json"} }) .then(response=>response.json()) .then((results)=>{ @@ -133,7 +133,7 @@ document.getElementById('transliterate').addEventListener('submit',(event)=>{ fb_btn.classList.remove("hidden"); } - if (results.warnings.length>0){ + if (results.warnings && results.warnings.length>0){ document.getElementById('warnings-toggle').classList.remove("hidden"); document.getElementById('warnings').innerText = "WARNING:\n" + results.warnings.join("\n") } @@ -167,26 +167,27 @@ if (fb_active) { }) document.getElementById('feedback_form').addEventListener('submit',(event)=>{ - const data = new URLSearchParams(); - data.append('lang', document.getElementById('lang_fb_input').value); - data.append('src', document.getElementById('src_fb_input').value); - data.append('t_dir', document.getElementById('t_dir_fb_input').value); - data.append('result', document.getElementById('result_fb_input').value); - data.append('expected', document.getElementById('expected_fb_input').value); - data.append('contact', document.getElementById('contact_fb_input').value); - data.append('notes', document.getElementById('notes_fb_input').value); - - let options = {}; + const data = { + 'lang': document.getElementById('lang_fb_input').value, + 'src': document.getElementById('src_fb_input').value, + 't_dir': document.getElementById('t_dir_fb_input').value, + 'result': document.getElementById('result_fb_input').value, + 'expected': document.getElementById('expected_fb_input').value, + 'contact': document.getElementById('contact_fb_input').value, + 'notes': document.getElementById('notes_fb_input').value, + 'options': {} + }; + let option_inputs = document.getElementsByClassName("option_i"); for (i = 0; i < option_inputs.length; i++) { let el = option_inputs[i]; - options[el.getAttribute('id')] = el.value; + data['options'][el.getAttribute('id')] = el.value; }; - data.append('options', JSON.stringify(options)); fetch('/feedback', { method: 'post', - body: data, + body: JSON.stringify(data), + headers: {"Content-Type": "application/json"} }) .then(response=>response.json()) .then((results)=>{ diff --git a/scriptshifter/tables/data/index.yml b/scriptshifter/tables/data/index.yml index f6d89ac..889ba8f 100644 --- a/scriptshifter/tables/data/index.yml +++ b/scriptshifter/tables/data/index.yml @@ -154,5 +154,7 @@ uzbek_cyrillic: name: Uzbek (Cyrillic) yakut_cyrillic: name: Yakut (Cyrillic) +yiddish: + name: Yiddish yuit_cyrillic: name: Yuit (Cyrillic) diff --git a/scriptshifter/tables/data/yiddish.yml b/scriptshifter/tables/data/yiddish.yml new file mode 100644 index 0000000..c55c431 --- /dev/null +++ b/scriptshifter/tables/data/yiddish.yml @@ -0,0 +1,21 @@ +general: + name: Yiddish + +options: + - id: loshn_koydesh + label: Loshn Koydesh + description: [TODO] + type: boolean + default: false + +script_to_roman: + hooks: + post_config: + - + - yiddish_.s2r_post_config + +roman_to_script: + hooks: + post_config: + - + - yiddish_.r2s_post_config diff --git a/scriptshifter_base.Dockerfile b/scriptshifter_base.Dockerfile new file mode 100644 index 0000000..9cd06c3 --- /dev/null +++ b/scriptshifter_base.Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.10-slim-bookworm + +RUN apt update +RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all-dev libpcre2-dev + +ENV TZ=America/New_York +ARG WORKROOT "/usr/local/scriptshifter/src" + +RUN addgroup --system www +RUN adduser --system www +RUN gpasswd -a www www + +# Copy external dependencies. +WORKDIR ${WORKROOT} +COPY ext ./ext/ +COPY deps.txt ./ +RUN pip install --no-cache-dir -r deps.txt + +# Remove development packages. +RUN apt remove -y build-essential git +RUN apt autoremove -y diff --git a/test.Dockerfile b/test.Dockerfile new file mode 100644 index 0000000..0f310c4 --- /dev/null +++ b/test.Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.10-slim-bookworm + +RUN apt update +RUN apt install -y build-essential libpcre2-dev + +RUN pip install uwsgi +