Skip to content

Commit

Permalink
Merge pull request #39 from clamsproject/code-refactor
Browse files Browse the repository at this point in the history
Refactor of visualizer code structure
  • Loading branch information
haydenmccormick authored Jun 27, 2024
2 parents 8000d53 + 6e21661 commit 6778625
Show file tree
Hide file tree
Showing 11 changed files with 592 additions and 522 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,6 @@ tags

# VSCode
.devcontainer
devcontainer.json
devcontainer.json

static/mmif-viz-cache
91 changes: 58 additions & 33 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
import secrets
import sys
from threading import Thread
from shutil import rmtree

from flask import request, render_template, flash, send_from_directory, redirect
from flask import Flask, request, render_template, flash, send_from_directory, redirect
from mmif.serialize import Mmif
from mmif.vocabulary import DocumentTypes

import cache
from cache import set_last_access, cleanup
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
import traceback
import utils
from utils import app
from render import render_documents, render_annotations, prepare_ocr, render_ocr_page

# these two static folder-related params are important, do not remove
app = Flask(__name__, static_folder='static', static_url_path='')
app.secret_key = 'your_secret_key_here'


@app.route('/')
Expand All @@ -22,24 +26,12 @@ def index():

@app.route('/ocr', methods=['POST'])
def ocr():
try:
data = dict(request.json)
mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read()
mmif = Mmif(mmif_str)
ocr_view = mmif.get_view_by_id(data["view_id"])
return utils.prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
except Exception as e:
app.logger.error(f"{e}\n{traceback.format_exc()}")
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'


@app.route('/ocrpage', methods=['POST'])
def ocrpage():
data = request.json
try:
return utils.render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
except Exception as e:
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
if "page_number" not in request.json:
build_ocr_tab(request.json)
request.json["page_number"] = 0
# return serve_first_ocr_page(request.json)
# else:
return serve_ocr_page(request.json)


@app.route('/upload', methods=['GET', 'POST'])
Expand Down Expand Up @@ -93,7 +85,7 @@ def display(viz_id):
return html_file
else:
app.logger.debug(f"Visualization {viz_id} not found in cache.")
os.remove(path)
rmtree(path)
flash("File not found -- please upload again (it may have been deleted to clear up cache space).")
return redirect("/upload")

Expand All @@ -103,6 +95,45 @@ def send_js(path):
return send_from_directory("uv", path)


def render_mmif(mmif_str, viz_id):
mmif = Mmif(mmif_str)
rendered_documents = render_documents(mmif, viz_id)
rendered_annotations = render_annotations(mmif, viz_id)
return render_template('player.html',
docs=rendered_documents,
viz_id=viz_id,
annotations=rendered_annotations)


def build_ocr_tab(data):
"""
Prepares OCR (at load time, due to lazy loading)
"""
try:
data = dict(request.json)
mmif_str = open(cache.get_cache_root() /
data["mmif_id"] / "file.mmif").read()
mmif = Mmif(mmif_str)
ocr_view = mmif.get_view_by_id(data["view_id"])
prepare_ocr(mmif, ocr_view, data["mmif_id"])
request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
0].location_path()

except Exception as e:
app.logger.error(f"{e}\n{traceback.format_exc()}")
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'


def serve_ocr_page(data):
"""
Serves subsequent OCR pages
"""
try:
return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
except Exception as e:
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'


def upload_file(in_mmif):
# Save file locally
in_mmif_bytes = in_mmif if isinstance(in_mmif, bytes) else in_mmif.read()
Expand All @@ -117,13 +148,7 @@ def upload_file(in_mmif):
with open(path / 'file.mmif', 'w') as in_mmif_file:
app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
in_mmif_file.write(in_mmif_str)
mmif = Mmif(in_mmif_str)
htmlized_docs = utils.documents_to_htmls(mmif, viz_id)
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
annotations = utils.prep_annotations(mmif, viz_id)
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
html_page = render_template('player.html',
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
html_page = render_mmif(in_mmif_str, viz_id)
with open(os.path.join(path, "index.html"), "w") as f:
f.write(html_page)
except FileExistsError:
Expand All @@ -133,7 +158,6 @@ def upload_file(in_mmif):
t = Thread(target=cleanup)
t.daemon = True
t.run()

agent = request.headers.get('User-Agent')
if 'curl' in agent.lower():
return f"Visualization ID is {viz_id}\nYou can access the visualized file at {request.url_root}display/{viz_id}\n"
Expand All @@ -143,7 +167,8 @@ def upload_file(in_mmif):
if __name__ == '__main__':
# Make path for temp files
cache_path = cache.get_cache_root()
cache_symlink_path = os.path.join(app.static_folder, cache._CACHE_DIR_SUFFIX)
cache_symlink_path = os.path.join(
app.static_folder, cache._CACHE_DIR_SUFFIX)
if os.path.islink(cache_symlink_path):
os.unlink(cache_symlink_path)
elif os.path.exists(cache_symlink_path):
Expand All @@ -158,5 +183,5 @@ def upload_file(in_mmif):
port = 5000
if len(sys.argv) > 2 and sys.argv[1] == '-p':
port = int(sys.argv[2])

app.run(port=port, host='0.0.0.0', debug=True, use_reloader=True)
4 changes: 2 additions & 2 deletions displacy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def read_text(textdoc, app_root):
# container, see the comment in html_text() in ../app.py)
if not os.path.isfile(location):
if location.startswith('file:///'):
location = location[8:]
location = location[7:]
else:
# this should not happen anymore, but keeping it anyway
location = location[1:]
location = os.path.join(app_root, 'static', location)
# location = os.path.join(app_root, 'static', location)
with open(location) as fh:
text = fh.read()
else:
Expand Down
6 changes: 3 additions & 3 deletions examples/whisper-spacy.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@
"properties": {
"mime": "video",
"id": "d1",
"location": "file:///data/video/service-mbrs-ntscrm-01181182.mp4"
"location": "file:///data/service-mbrs-ntscrm-01181182.mp4"
}
},
{
"@type": "http://mmif.clams.ai/vocabulary/AudioDocument/v1",
"properties": {
"mime": "audio",
"id": "d2",
"location": "file:///data/audio/service-mbrs-ntscrm-01181182.wav"
"location": "file:///data/service-mbrs-ntscrm-01181182.wav"
}
},
{
"@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1",
"properties": {
"mime": "text",
"id": "d3",
"location": "file:///data/text/service-mbrs-ntscrm-01181182.txt"
"location": "file:///data/service-mbrs-ntscrm-01181182.txt"
}
}
],
Expand Down
Loading

0 comments on commit 6778625

Please sign in to comment.