Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor of visualizer code structure #39

Merged
merged 14 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,6 @@ tags

# VSCode
.devcontainer
devcontainer.json
devcontainer.json

static/mmif-viz-cache
91 changes: 58 additions & 33 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
import secrets
import sys
from threading import Thread
from shutil import rmtree

from flask import request, render_template, flash, send_from_directory, redirect
from flask import Flask, request, render_template, flash, send_from_directory, redirect
from mmif.serialize import Mmif
from mmif.vocabulary import DocumentTypes

import cache
from cache import set_last_access, cleanup
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
import traceback
import utils
from utils import app
from render import render_documents, render_annotations, prepare_ocr, render_ocr_page

# these two static folder-related params are important, do not remove
app = Flask(__name__, static_folder='static', static_url_path='')
app.secret_key = 'your_secret_key_here'


@app.route('/')
Expand All @@ -22,24 +26,12 @@ def index():

@app.route('/ocr', methods=['POST'])
def ocr():
try:
data = dict(request.json)
mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read()
mmif = Mmif(mmif_str)
ocr_view = mmif.get_view_by_id(data["view_id"])
return utils.prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
except Exception as e:
app.logger.error(f"{e}\n{traceback.format_exc()}")
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'


@app.route('/ocrpage', methods=['POST'])
def ocrpage():
data = request.json
try:
return utils.render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
except Exception as e:
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
if "page_number" not in request.json:
build_ocr_tab(request.json)
request.json["page_number"] = 0
# return serve_first_ocr_page(request.json)
# else:
return serve_ocr_page(request.json)


@app.route('/upload', methods=['GET', 'POST'])
Expand Down Expand Up @@ -93,7 +85,7 @@ def display(viz_id):
return html_file
else:
app.logger.debug(f"Visualization {viz_id} not found in cache.")
os.remove(path)
rmtree(path)
flash("File not found -- please upload again (it may have been deleted to clear up cache space).")
return redirect("/upload")

Expand All @@ -103,6 +95,45 @@ def send_js(path):
return send_from_directory("uv", path)


def render_mmif(mmif_str, viz_id):
mmif = Mmif(mmif_str)
rendered_documents = render_documents(mmif, viz_id)
rendered_annotations = render_annotations(mmif, viz_id)
return render_template('player.html',
docs=rendered_documents,
viz_id=viz_id,
annotations=rendered_annotations)


def build_ocr_tab(data):
"""
Prepares OCR (at load time, due to lazy loading)
"""
try:
data = dict(request.json)
mmif_str = open(cache.get_cache_root() /
data["mmif_id"] / "file.mmif").read()
mmif = Mmif(mmif_str)
ocr_view = mmif.get_view_by_id(data["view_id"])
prepare_ocr(mmif, ocr_view, data["mmif_id"])
request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
0].location_path()

except Exception as e:
app.logger.error(f"{e}\n{traceback.format_exc()}")
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'


def serve_ocr_page(data):
"""
Serves subsequent OCR pages
"""
try:
return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
except Exception as e:
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'


def upload_file(in_mmif):
# Save file locally
in_mmif_bytes = in_mmif if isinstance(in_mmif, bytes) else in_mmif.read()
Expand All @@ -117,13 +148,7 @@ def upload_file(in_mmif):
with open(path / 'file.mmif', 'w') as in_mmif_file:
app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
in_mmif_file.write(in_mmif_str)
mmif = Mmif(in_mmif_str)
htmlized_docs = utils.documents_to_htmls(mmif, viz_id)
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
annotations = utils.prep_annotations(mmif, viz_id)
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
html_page = render_template('player.html',
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
html_page = render_mmif(in_mmif_str, viz_id)
with open(os.path.join(path, "index.html"), "w") as f:
f.write(html_page)
except FileExistsError:
Expand All @@ -133,7 +158,6 @@ def upload_file(in_mmif):
t = Thread(target=cleanup)
t.daemon = True
t.run()

agent = request.headers.get('User-Agent')
if 'curl' in agent.lower():
return f"Visualization ID is {viz_id}\nYou can access the visualized file at {request.url_root}display/{viz_id}\n"
Expand All @@ -143,7 +167,8 @@ def upload_file(in_mmif):
if __name__ == '__main__':
# Make path for temp files
cache_path = cache.get_cache_root()
cache_symlink_path = os.path.join(app.static_folder, cache._CACHE_DIR_SUFFIX)
cache_symlink_path = os.path.join(
app.static_folder, cache._CACHE_DIR_SUFFIX)
if os.path.islink(cache_symlink_path):
os.unlink(cache_symlink_path)
elif os.path.exists(cache_symlink_path):
Expand All @@ -158,5 +183,5 @@ def upload_file(in_mmif):
port = 5000
if len(sys.argv) > 2 and sys.argv[1] == '-p':
port = int(sys.argv[2])

app.run(port=port, host='0.0.0.0', debug=True, use_reloader=True)
4 changes: 2 additions & 2 deletions displacy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def read_text(textdoc, app_root):
# container, see the comment in html_text() in ../app.py)
if not os.path.isfile(location):
if location.startswith('file:///'):
location = location[8:]
location = location[7:]
else:
# this should not happen anymore, but keeping it anyway
location = location[1:]
location = os.path.join(app_root, 'static', location)
# location = os.path.join(app_root, 'static', location)
with open(location) as fh:
text = fh.read()
else:
Expand Down
6 changes: 3 additions & 3 deletions examples/whisper-spacy.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@
"properties": {
"mime": "video",
"id": "d1",
"location": "file:///data/video/service-mbrs-ntscrm-01181182.mp4"
"location": "file:///data/service-mbrs-ntscrm-01181182.mp4"
}
},
{
"@type": "http://mmif.clams.ai/vocabulary/AudioDocument/v1",
"properties": {
"mime": "audio",
"id": "d2",
"location": "file:///data/audio/service-mbrs-ntscrm-01181182.wav"
"location": "file:///data/service-mbrs-ntscrm-01181182.wav"
}
},
{
"@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1",
"properties": {
"mime": "text",
"id": "d3",
"location": "file:///data/text/service-mbrs-ntscrm-01181182.txt"
"location": "file:///data/service-mbrs-ntscrm-01181182.txt"
}
}
],
Expand Down
Loading
Loading