Skip to content

Commit

Permalink
Port to Quart (#503)
Browse files Browse the repository at this point in the history
* Quart draft

* Fix ask and test

* Quart deploying now

* Use semantic

* Get tests working

* Revert simple

* Typing fixes

* dont use pipe
  • Loading branch information
pamelafox authored Aug 9, 2023
1 parent df48d8c commit 9da71ef
Show file tree
Hide file tree
Showing 21 changed files with 216 additions and 191 deletions.
17 changes: 8 additions & 9 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,23 @@
"version": "0.2.0",
"configurations": [
{
"name": "Python: Flask",
"name": "Python: Quart",
"type": "python",
"request": "launch",
"module": "flask",
"module": "quart",
"cwd": "${workspaceFolder}/app/backend",
"env": {
"FLASK_APP": "app.py",
"FLASK_ENV": "development",
"FLASK_DEBUG": "0"
"QUART_APP": "main:app",
"QUART_ENV": "development",
"QUART_DEBUG": "0"
},
"args": [
"run",
"--no-debugger",
"--no-reload",
"-p 5000"
"-p 50505"
],
"console": "integratedTerminal",
"justMyCode": true,
"justMyCode": false,
"envFile": "${input:dotEnvFilePath}",
},
{
Expand Down Expand Up @@ -57,4 +56,4 @@
"command": "azure-dev.commands.getDotEnvFilePath"
}
]
}
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,5 +157,5 @@ Here are the most common failure scenarios and solutions:

1. You see `CERTIFICATE_VERIFY_FAILED` when the `prepdocs.py` script runs. That's typically due to incorrect SSL certificates setup on your machine. Try the suggestions in this [StackOverflow answer](https://stackoverflow.com/questions/35569042/ssl-certificate-verify-failed-with-python3/43855394#43855394).

1. After running `azd up` and visiting the website, you see a '404 Not Found' in the browser. Wait 10 minutes and try again, as it might be still starting up. Then try running `azd deploy` and wait again. If you still encounter errors with the deployed app, consult these [tips for debugging Flask app deployments](http://blog.pamelafox.org/2023/06/tips-for-debugging-flask-deployments-to.html)
1. After running `azd up` and visiting the website, you see a '404 Not Found' in the browser. Wait 10 minutes and try again, as it might be still starting up. Then try running `azd deploy` and wait again. If you still encounter errors with the deployed app, consult these [tips for debugging App Service app deployments](http://blog.pamelafox.org/2023/06/tips-for-debugging-flask-deployments-to.html)
and file an issue if the error logs don't help you resolve the issue.
76 changes: 36 additions & 40 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import time

import openai
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.storage.blob import BlobServiceClient
from flask import (
from azure.identity.aio import DefaultAzureCredential
from azure.search.documents.aio import SearchClient
from azure.storage.blob.aio import BlobServiceClient
from quart import (
Blueprint,
Flask,
Quart,
abort,
current_app,
jsonify,
Expand Down Expand Up @@ -49,75 +49,76 @@
bp = Blueprint("routes", __name__, static_folder='static')

@bp.route("/")
def index():
return bp.send_static_file("index.html")
async def index():
return await bp.send_static_file("index.html")

@bp.route("/favicon.ico")
def favicon():
return bp.send_static_file("favicon.ico")
async def favicon():
return await bp.send_static_file("favicon.ico")

@bp.route("/assets/<path:path>")
def assets(path):
return send_from_directory("static/assets", path)
async def assets(path):
return await send_from_directory("static/assets", path)

# Serve content files from blob storage from within the app to keep the example self-contained.
# *** NOTE *** this assumes that the content files are public, or at least that all users of the app
# can access all the files. This is also slow and memory hungry.
@bp.route("/content/<path>")
def content_file(path):
async def content_file(path):
blob_container = current_app.config[CONFIG_BLOB_CLIENT].get_container_client(AZURE_STORAGE_CONTAINER)
blob = blob_container.get_blob_client(path).download_blob()
blob = await blob_container.get_blob_client(path).download_blob()
if not blob.properties or not blob.properties.has_key("content_settings"):
abort(404)
mime_type = blob.properties["content_settings"]["content_type"]
if mime_type == "application/octet-stream":
mime_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
blob_file = io.BytesIO()
blob.readinto(blob_file)
await blob.readinto(blob_file)
blob_file.seek(0)
return send_file(blob_file, mimetype=mime_type, as_attachment=False, download_name=path)
return await send_file(blob_file, mimetype=mime_type, as_attachment=False, attachment_filename=path)

@bp.route("/ask", methods=["POST"])
def ask():
async def ask():
if not request.is_json:
return jsonify({"error": "request must be json"}), 415
approach = request.json["approach"]
request_json = await request.get_json()
approach = request_json["approach"]
try:
impl = current_app.config[CONFIG_ASK_APPROACHES].get(approach)
if not impl:
return jsonify({"error": "unknown approach"}), 400
r = impl.run(request.json["question"], request.json.get("overrides") or {})
r = await impl.run(request_json["question"], request_json.get("overrides") or {})
return jsonify(r)
except Exception as e:
logging.exception("Exception in /ask")
return jsonify({"error": str(e)}), 500

@bp.route("/chat", methods=["POST"])
def chat():
async def chat():
if not request.is_json:
return jsonify({"error": "request must be json"}), 415
approach = request.json["approach"]
request_json = await request.get_json()
approach = request_json["approach"]
try:
impl = current_app.config[CONFIG_CHAT_APPROACHES].get(approach)
if not impl:
return jsonify({"error": "unknown approach"}), 400
r = impl.run(request.json["history"], request.json.get("overrides") or {})
r = await impl.run(request_json["history"], request_json.get("overrides") or {})
return jsonify(r)
except Exception as e:
logging.exception("Exception in /chat")
return jsonify({"error": str(e)}), 500

@bp.before_request
def ensure_openai_token():
async def ensure_openai_token():
openai_token = current_app.config[CONFIG_OPENAI_TOKEN]
if openai_token.expires_on < time.time() + 60:
openai_token = current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default")
openai_token = await current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default")
current_app.config[CONFIG_OPENAI_TOKEN] = openai_token
openai.api_key = openai_token.token


def create_app():
app = Flask(__name__)
@bp.before_app_serving
async def setup_clients():

# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed,
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
Expand All @@ -135,24 +136,21 @@ def create_app():
credential=azure_credential)

# Used by the OpenAI SDK
openai.api_type = "azure"
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
openai.api_version = "2023-05-15"

# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead
openai.api_type = "azure_ad"
openai_token = azure_credential.get_token(
openai_token = await azure_credential.get_token(
"https://cognitiveservices.azure.com/.default"
)
openai.api_key = openai_token.token

# Store on app.config for later use inside requests
app.config[CONFIG_OPENAI_TOKEN] = openai_token
app.config[CONFIG_CREDENTIAL] = azure_credential
app.config[CONFIG_BLOB_CLIENT] = blob_client
current_app.config[CONFIG_OPENAI_TOKEN] = openai_token
current_app.config[CONFIG_CREDENTIAL] = azure_credential
current_app.config[CONFIG_BLOB_CLIENT] = blob_client
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
# or some derivative, here we include several for exploration purposes
app.config[CONFIG_ASK_APPROACHES] = {
current_app.config[CONFIG_ASK_APPROACHES] = {
"rtr": RetrieveThenReadApproach(
search_client,
AZURE_OPENAI_CHATGPT_DEPLOYMENT,
Expand All @@ -175,7 +173,7 @@ def create_app():
KB_FIELDS_CONTENT
)
}
app.config[CONFIG_CHAT_APPROACHES] = {
current_app.config[CONFIG_CHAT_APPROACHES] = {
"rrr": ChatReadRetrieveReadApproach(
search_client,
AZURE_OPENAI_CHATGPT_DEPLOYMENT,
Expand All @@ -186,10 +184,8 @@ def create_app():
)
}

app.register_blueprint(bp)

def create_app():
app = Quart(__name__)
app.register_blueprint(bp)
return app

if __name__ == "__main__":
app = create_app()
app.run()
Empty file.
14 changes: 11 additions & 3 deletions app/backend/approaches/approach.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from abc import ABC, abstractmethod
from typing import Any


class Approach:
def run(self, q: str, overrides: dict[str, Any]) -> Any:
raise NotImplementedError
class ChatApproach(ABC):
@abstractmethod
async def run(self, history: list[dict], overrides: dict[str, Any]) -> Any:
...


class AskApproach(ABC):
@abstractmethod
async def run(self, q: str, overrides: dict[str, Any]) -> Any:
...
33 changes: 17 additions & 16 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from typing import Any, Sequence
from typing import Any

import openai
from azure.search.documents import SearchClient
from azure.search.documents.aio import SearchClient
from azure.search.documents.models import QueryType

from approaches.approach import Approach
from approaches.approach import ChatApproach
from core.messagebuilder import MessageBuilder
from core.modelhelper import get_token_limit
from text import nonewlines


class ChatReadRetrieveReadApproach(Approach):
class ChatReadRetrieveReadApproach(ChatApproach):
# Chat roles
SYSTEM = "system"
USER = "user"
Expand Down Expand Up @@ -57,7 +57,7 @@ def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt
self.content_field = content_field
self.chatgpt_token_limit = get_token_limit(chatgpt_model)

def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
async def run(self, history: list[dict[str, str]], overrides: dict[str, Any]) -> Any:
has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
Expand All @@ -77,7 +77,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
self.chatgpt_token_limit - len(user_q)
)

chat_completion = openai.ChatCompletion.create(
chat_completion = await openai.ChatCompletion.acreate(
deployment_id=self.chatgpt_deployment,
model=self.chatgpt_model,
messages=messages,
Expand All @@ -93,7 +93,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A

# If retrieval mode includes vectors, compute an embedding for the query
if has_vector:
query_vector = openai.Embedding.create(engine=self.embedding_deployment, input=query_text)["data"][0]["embedding"]
query_vector = (await openai.Embedding.acreate(engine=self.embedding_deployment, input=query_text))["data"][0]["embedding"]
else:
query_vector = None

Expand All @@ -103,7 +103,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A

# Use semantic L2 reranker if requested and if retrieval mode is text or hybrid (vectors + text)
if overrides.get("semantic_ranker") and has_text:
r = self.search_client.search(query_text,
r = await self.search_client.search(query_text,
filter=filter,
query_type=QueryType.SEMANTIC,
query_language="en-us",
Expand All @@ -115,16 +115,16 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
top_k=50 if query_vector else None,
vector_fields="embedding" if query_vector else None)
else:
r = self.search_client.search(query_text,
r = await self.search_client.search(query_text,
filter=filter,
top=top,
vector=query_vector,
top_k=50 if query_vector else None,
vector_fields="embedding" if query_vector else None)
if use_semantic_captions:
results = [doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc['@search.captions']])) for doc in r]
results = [doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc['@search.captions']])) async for doc in r]
else:
results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) for doc in r]
results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) async for doc in r]
content = "\n".join(results)

follow_up_questions_prompt = self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""
Expand All @@ -147,7 +147,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
history[-1]["user"],
max_tokens=self.chatgpt_token_limit)

chat_completion = openai.ChatCompletion.create(
chat_completion = await openai.ChatCompletion.acreate(
deployment_id=self.chatgpt_deployment,
model=self.chatgpt_model,
messages=messages,
Expand All @@ -161,7 +161,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A

return {"data_points": results, "answer": chat_content, "thoughts": f"Searched for:<br>{query_text}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>')}

def get_messages_from_history(self, system_prompt: str, model_id: str, history: Sequence[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> []:
def get_messages_from_history(self, system_prompt: str, model_id: str, history: list[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> list:
message_builder = MessageBuilder(system_prompt, model_id)

# Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
Expand All @@ -174,9 +174,10 @@ def get_messages_from_history(self, system_prompt: str, model_id: str, history:
message_builder.append_message(self.USER, user_content, index=append_index)

for h in reversed(history[:-1]):
if h.get("bot"):
message_builder.append_message(self.ASSISTANT, h.get('bot'), index=append_index)
message_builder.append_message(self.USER, h.get('user'), index=append_index)
if bot_msg := h.get("bot"):
message_builder.append_message(self.ASSISTANT, bot_msg, index=append_index)
if user_msg := h.get("user"):
message_builder.append_message(self.USER, user_msg, index=append_index)
if message_builder.token_length > max_tokens:
break

Expand Down
Loading

0 comments on commit 9da71ef

Please sign in to comment.