riven314 · riven314 · Sep 19, 2020 · Sep 25, 2020 · Sep 25, 2020 · Sep 26, 2020
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
-app/ckpts/*
+*/ckpts/*
+api/demo
 
 **/tags
 

diff --git a/Makefile b/Makefile
@@ -0,0 +1,10 @@
+build: 
+	docker-compose build
+
+start:
+	docker-compose up -d
+
+run: build start
+
+stop:
+	docker-compose stop
diff --git a/README.md b/README.md
@@ -43,10 +43,10 @@ Download the following files and place them into `ckpts` folder
 Clone the repo and navigate to the repo:
 ```
 git clone https://github.com/riven314/TweakStory.git
-cd TweakStory/app
+cd TweakStory
 ```
 
-Build and run the docker image locally:
+Build and run the docker composed images locally:
 ```
 make run
 ```
@@ -61,8 +61,9 @@ make stop
 
 ### Run Unit Test
 ```
-cd TweakStory/app
-pytest -s tests
+# test FastAPI by TestClient
+cd TweakStory/api
+pytest -s tests/test_requests.py
 ```
 
 ## Model Structure

diff --git a/api/.python-version b/api/.python-version
@@ -0,0 +1 @@
+3.6.10
diff --git a/api/Dockerfile b/api/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.6-slim
+
+EXPOSE 8080
+
+WORKDIR /app/
+
+# This is needed to run open-cv in python slim package.
+RUN apt-get update && apt-get -y install libgl1-mesa-glx && apt-get clean
+
+COPY requirements.txt .
+
+RUN pip install -r requirements.txt
+
+COPY ./ckpts ./ckpts/
+
+COPY . .
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/api/Makefile b/api/Makefile
@@ -0,0 +1,24 @@
+APP_NAME=model-api
+DOCKER_CMD=$(shell which docker || which podman || echo "docker")
+PORT=8080
+MODEL_CHECKPOINT_NAME=BEST_checkpoint_flickr8k_1_cap_per_img_1_min_word_freq.pth
+WORD_MAP_CHECKPOINT_NAME=WORDMAP_flickr8k_1_cap_per_img_1_min_word_freq.json
+CHECKPOINT_LOCATION=http://tweakstory.stefanmesken.info/checkpoints
+
+.get_checkpoint/%:
+	test -f ckpts/$* || wget $(CHECKPOINT_LOCATION)/$* -P ckpts/
+
+get_checkpoints: .get_checkpoint/$(MODEL_CHECKPOINT_NAME)\
+	             .get_checkpoint/$(WORD_MAP_CHECKPOINT_NAME)
+
+build: get_checkpoints
+	$(DOCKER_CMD) build -t $(APP_NAME) .
+
+start:
+	$(DOCKER_CMD) run -d -p=$(PORT):$(PORT) --rm --name=$(APP_NAME) $(APP_NAME)
+	@echo $(APP_NAME) running at localhost:$(PORT)
+
+run: build start
+
+stop:
+	$(DOCKER_CMD) stop $(APP_NAME) || true
diff --git a/api/config/api_config.yaml b/api/config/api_config.yaml
@@ -0,0 +1,4 @@
+is_cuda: false
+img_resize: 256
+beam_size: 10
+model_config: !include ./model.yaml
diff --git a/api/config/model.yaml b/api/config/model.yaml
@@ -0,0 +1,13 @@
+checkpoint_file: ./ckpts/BEST_checkpoint_flickr8k_1_cap_per_img_1_min_word_freq.pth
+word_map_file: ./ckpts/WORDMAP_flickr8k_1_cap_per_img_1_min_word_freq.json
+attention_dim: 512
+emb_dim: 512
+decoder_dim: 1024
+style_dim: 128
+dropout: 0.5
+regularized_decoder:
+    embed_p: 0.1
+    weight_p: 0.4
+    input_p: 0.3
+    output_p: 0.3
+fine_tune_encoder: false
diff --git a/api/main.py b/api/main.py
@@ -0,0 +1,38 @@
+# uvicorn {script name}:{FastAPI instance name} --port 8080
+import time
+import logging
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+from src.utils import decode_b64_image_string
+from src.api_inputs import model_inference
+
+logging.basicConfig(level = logging.INFO)
+logger = logging.getLogger()
+
+
+# set up FastAPI server
+app = FastAPI()
+
+class UserControl(BaseModel):
+    sentence_class: int
+    emoji_class: int
+    b64_img_str: str
+
+
+@app.post("/inference")
+def get_model_caption(usr_ctrl: UserControl):
+    start = time.time()
+
+    sentence_class = usr_ctrl.sentence_class
+    emoji_class = usr_ctrl.emoji_class
+    np_img = decode_b64_image_string(usr_ctrl.b64_img_str)
+
+    caption = model_inference(np_img, sentence_class, emoji_class)
+
+    t = time.time() - start
+    logger.info(f'inference complete: {t:.5f} s')
+    logger.info(f'generated caption: {caption}')
+    return {'output': caption}  
+
diff --git a/api/requirements.txt b/api/requirements.txt
@@ -0,0 +1,12 @@
+--find-links https://download.pytorch.org/whl/torch_stable.html
+easydict==1.9
+emoji==0.6.0
+numpy==1.19.1
+opencv-python-headless
+pytest==6.0.1
+torch==1.5.0+cpu
+torchvision==0.6.0+cpu
+transformers==2.1.1
+pyyaml==5.3.1
+fastapi==0.61.1
+uvicorn==0.11.8
diff --git a/app/tests/__init__.py → api/src/__init__.py b/app/tests/__init__.py → api/src/__init__.py
diff --git a/api/src/api_inputs.py b/api/src/api_inputs.py
@@ -0,0 +1,30 @@
+import cv2
+from easydict import EasyDict as edict
+
+from src.utils import read_yaml, read_json
+from src.infer_utils import tfms_image, output_caption, setup_models, setup_tokenizer
+
+CONFIG_FILE = './config/api_config.yaml' 
+
+
+# set up model-specific dependencies
+app_cfg = edict(read_yaml(CONFIG_FILE))
+model_cfg = edict(app_cfg.model_config)
+encoder, decoder = setup_models(model_cfg, is_cuda = False)
+word_map = read_json(model_cfg.word_map_file)
+rev_word_map = {v: k for k, v in word_map.items()}
+tokenizer = setup_tokenizer(word_map)
+
+
+def model_inference(np_img, sentence_class, emoji_class):
+    resized_img = cv2.resize(np_img, (app_cfg.img_resize, app_cfg.img_resize))
+    tensor_img = tfms_image(resized_img)
+
+    # caption not yet emojized (e.g. :hugging_face:)
+    caption, pred_ids, _ = output_caption(
+        encoder, decoder, tensor_img, 
+        word_map, rev_word_map, tokenizer,
+        sentence_class, emoji_class,
+        beam_size = app_cfg.beam_size
+    )
+    return caption
diff --git a/api/src/common.py b/api/src/common.py
@@ -0,0 +1,35 @@
+import os
+import json
+import yaml
+
+
+def read_json(json_path):
+    assert json_path, f'{json_path} not exist'
+    with open(json_path, 'r') as f:
+        data = json.load(f)
+    return data
+
+
+class Loader(yaml.SafeLoader):
+    def __init__(self, stream):
+
+        self._root = os.path.split(stream.name)[0]
+
+        super(Loader, self).__init__(stream)
+
+    def include(self, node):
+
+        filename = os.path.join(self._root, self.construct_scalar(node))
+
+        with open(filename, 'r') as f:
+            return yaml.load(f, Loader)
+
+# enable PyYAML to handle "!include"
+Loader.add_constructor('!include', Loader.include)
+
+
+def read_yaml(yaml_path):
+    assert yaml_path, f'{yaml_path} not exist'
+    with open(yaml_path, 'r') as f:
+        data = yaml.load(f, Loader = Loader)
+    return data
diff --git a/app/src/dropouts.py → api/src/dropouts.py b/app/src/dropouts.py → api/src/dropouts.py
diff --git a/app/src/app_utils.py → api/src/infer_utils.py b/app/src/app_utils.py → api/src/infer_utils.py
@@ -1,53 +1,15 @@
-import os
-import time
-import json
-import yaml
-
-import cv2
 import numpy as np
-from easydict import EasyDict as edict
+from PIL import Image
 
 import torch
 import torch.nn.functional as F
 import torchvision.transforms as transforms
 
-import emoji
 from transformers import AutoTokenizer
 
 from src.models import get_encoder_decoder
 
 
-def read_json(json_path):
-    assert json_path, f'{json_path} not exist'
-    with open(json_path, 'r') as f:
-        data = json.load(f)
-    return data
-
-
-class Loader(yaml.SafeLoader):
-    def __init__(self, stream):
-
-        self._root = os.path.split(stream.name)[0]
-
-        super(Loader, self).__init__(stream)
-
-    def include(self, node):
-
-        filename = os.path.join(self._root, self.construct_scalar(node))
-
-        with open(filename, 'r') as f:
-            return yaml.load(f, Loader)
-
-# enable PyYAML to handle "!include"
-Loader.add_constructor('!include', Loader.include)
-
-
-def read_yaml(yaml_path):
-    assert yaml_path, f'{yaml_path} not exist'
-    with open(yaml_path, 'r') as f:
-        data = yaml.load(f, Loader = Loader)
-    return data
-
 def setup_models(cfg, is_cuda):
     encoder, decoder = get_encoder_decoder(cfg)
 
@@ -67,20 +29,6 @@ def setup_tokenizer(word_map):
     return tokenizer
 
 
-def open_image(img_fn, demo_flag):
-    if demo_flag:
-        img = cv2.imread(img_fn)
-    else:
-        img = cv2.imdecode(np.fromstring(img_fn.read(), np.int8), 1)
-    # from pdb import set_trace
-    # set_trace()
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    if len(img) == 2:
-        img = img[:, :, np.newaxis]
-        img = np.concatenate([img, img, img], axis = 2)
-    return img
-
-
 def tfms_image(img):
     img = img.transpose(2, 0, 1)
     img = torch.FloatTensor(img / 255.)
@@ -92,6 +40,7 @@ def tfms_image(img):
 
 def output_caption(encoder, decoder, image, word_map, rev_word_map, 
                        tokenizer, len_class, emoji_class, beam_size):
+    """  @return: de-emojized caption (e.g. :hugging_face:) """
     device = image.device
     len_class = torch.as_tensor([len_class]).long().to(device)
     emoji_class = torch.as_tensor([emoji_class]).long().to(device)
@@ -106,7 +55,6 @@ def output_caption(encoder, decoder, image, word_map, rev_word_map,
 
     # decode and postprocessing
     caption = tokenizer.decode(enc)
-    caption = emoji.emojize(caption)
     caption = caption.replace('[UNK]', '')
     return caption, pred_ids, pred_subwords
 

diff --git a/app/src/models.py → api/src/models.py b/app/src/models.py → api/src/models.py
diff --git a/api/src/utils.py b/api/src/utils.py
@@ -0,0 +1,14 @@
+import os
+from io import BytesIO
+from base64 import b64decode
+
+import numpy as np
+from PIL import Image
+
+from src.common import read_json, read_yaml
+
+
+def decode_b64_image_string(b64_img_str):
+    bytes_img = BytesIO(b64decode(b64_img_str.encode('ascii')))
+    np_img = np.array(Image.open(bytes_img).convert('RGB'))
+    return np_img
diff --git a/api/tests/__init__.py b/api/tests/__init__.py
diff --git a/api/tests/config.py b/api/tests/config.py
@@ -0,0 +1,11 @@
+REQUEST_URL = 'http://127.0.0.1'
+REQUEST_PORT = 8080
+REQUEST_ROUTE = '/inference'
+
+SUCCESS_CODE = 200
+
+IMAGE_PATH = '../app/demo/demo_img1.jpg'
+
+SENTENCE_CLASSES = [0, 1, 2]
+EMOJI_CLASSES = [0, 1]
+
diff --git a/api/tests/conftest.py b/api/tests/conftest.py
@@ -0,0 +1,10 @@
+import pytest
+
+from fastapi.testclient import TestClient
+from main import app
+
+
+@pytest.fixture(scope = 'module')
+def test_client():
+    client = TestClient(app)
+    return client
diff --git a/api/tests/test_requests.py b/api/tests/test_requests.py
@@ -0,0 +1,29 @@
+# TweakStory/api $ pytest -s tests/test_requests.py
+import os
+from base64 import b64encode
+
+from tests.config import *
+
+
+def _stringify_b64_encoded_image(img_path):
+    assert os.path.isfile(img_path)
+
+    bytes_img = b64encode(open(img_path, 'rb').read())
+    b64_img_str = bytes_img.decode('ascii')
+    return b64_img_str
+
+
+def test_fastapi_endpoint(test_client):
+    b64_img_str = _stringify_b64_encoded_image(IMAGE_PATH)
+
+    for sentence_class in SENTENCE_CLASSES:
+        for emoji_class in EMOJI_CLASSES:
+            body = dict(
+                    sentence_class = sentence_class,
+                    emoji_class = emoji_class,
+                    b64_img_str = b64_img_str
+                )
+
+            res = test_client.post(REQUEST_ROUTE, json = body)
+
+            assert res.status_code == SUCCESS_CODE
diff --git a/app/Dockerfile b/app/Dockerfile
@@ -11,8 +11,6 @@ COPY requirements.txt .
 
 RUN pip install -r requirements.txt
 
-COPY ./ckpts ./ckpts/
-
 COPY . .
 
 CMD ["streamlit", "run", "app.py"]