forked from coffeeorgreentea/jsonformer-api
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
74 lines (55 loc) · 1.67 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from potassium import Potassium, Request, Response
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import time
from jsonformer.format import highlight_values
from jsonformer.main import Jsonformer
app = Potassium("dolly_jsonformer")
# @app.init runs at startup, and initializes the app's context
@app.init
def init():
device = 0 if torch.cuda.is_available() else -1
MODEL_NAME = "databricks/dolly-v2-3b"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto",
)
context = {"model": model, "tokenizer": tokenizer}
return context
# @app.handler is an http post handler running for every call
@app.handler()
def handler(context: dict, request: Request) -> Response:
model = context.get("model")
tokenizer = context.get("tokenizer")
# Start timer
t_1 = time.time()
prompt = request.json.get("prompt")
json_schema = request.json.get("json_schema")
if not json_schema:
return Response(
json={
"error": "No json_schema provided in the request body."
},
status=400,
)
builder = Jsonformer(
model=model,
tokenizer=tokenizer,
json_schema=json_schema,
prompt=prompt,
)
output = builder()
highlight_values(output)
t_2 = time.time()
return Response(
json={
"output": output,
"prompt": prompt,
"inference_time": t_2 - t_1,
},
status=200,
)
if __name__ == "__main__":
app.serve()