Port to Quart (#503)

* Quart draft * Fix ask and test * Quart deploying now * Use semantic * Get tests working * Revert simple * Typing fixes * dont use pipe
Azure-Samples · Aug 9, 2023 · 9da71ef · 9da71ef
1 parent df48d8c
commit 9da71ef
Show file tree

Hide file tree

Showing 21 changed files with 216 additions and 191 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -5,24 +5,23 @@
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Python: Flask",
+            "name": "Python: Quart",
             "type": "python",
             "request": "launch",
-            "module": "flask",
+            "module": "quart",
             "cwd": "${workspaceFolder}/app/backend",
             "env": {
-                "FLASK_APP": "app.py",
-                "FLASK_ENV": "development",
-                "FLASK_DEBUG": "0"
+                "QUART_APP": "main:app",
+                "QUART_ENV": "development",
+                "QUART_DEBUG": "0"
             },
             "args": [
                 "run",
-                "--no-debugger",
                 "--no-reload",
-                "-p 5000"
+                "-p 50505"
             ],
             "console": "integratedTerminal",
-            "justMyCode": true,
+            "justMyCode": false,
             "envFile": "${input:dotEnvFilePath}",
         },
         {
@@ -57,4 +56,4 @@
             "command": "azure-dev.commands.getDotEnvFilePath"
         }
     ]
-}
+}
diff --git a/README.md b/README.md
@@ -157,5 +157,5 @@ Here are the most common failure scenarios and solutions:
 
 1. You see `CERTIFICATE_VERIFY_FAILED` when the `prepdocs.py` script runs. That's typically due to incorrect SSL certificates setup on your machine. Try the suggestions in this [StackOverflow answer](https://stackoverflow.com/questions/35569042/ssl-certificate-verify-failed-with-python3/43855394#43855394).
 
-1. After running `azd up` and visiting the website, you see a '404 Not Found' in the browser. Wait 10 minutes and try again, as it might be still starting up. Then try running `azd deploy` and wait again. If you still encounter errors with the deployed app, consult these [tips for debugging Flask app deployments](http://blog.pamelafox.org/2023/06/tips-for-debugging-flask-deployments-to.html)
+1. After running `azd up` and visiting the website, you see a '404 Not Found' in the browser. Wait 10 minutes and try again, as it might be still starting up. Then try running `azd deploy` and wait again. If you still encounter errors with the deployed app, consult these [tips for debugging App Service app deployments](http://blog.pamelafox.org/2023/06/tips-for-debugging-flask-deployments-to.html)
 and file an issue if the error logs don't help you resolve the issue.
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -5,12 +5,12 @@
 import time
 
 import openai
-from azure.identity import DefaultAzureCredential
-from azure.search.documents import SearchClient
-from azure.storage.blob import BlobServiceClient
-from flask import (
+from azure.identity.aio import DefaultAzureCredential
+from azure.search.documents.aio import SearchClient
+from azure.storage.blob.aio import BlobServiceClient
+from quart import (
     Blueprint,
-    Flask,
+    Quart,
     abort,
     current_app,
     jsonify,
@@ -49,75 +49,76 @@
 bp = Blueprint("routes", __name__, static_folder='static')
 
 @bp.route("/")
-def index():
-    return bp.send_static_file("index.html")
+async def index():
+    return await bp.send_static_file("index.html")
 
 @bp.route("/favicon.ico")
-def favicon():
-    return bp.send_static_file("favicon.ico")
+async def favicon():
+    return await bp.send_static_file("favicon.ico")
 
 @bp.route("/assets/<path:path>")
-def assets(path):
-    return send_from_directory("static/assets", path)
+async def assets(path):
+    return await send_from_directory("static/assets", path)
 
 # Serve content files from blob storage from within the app to keep the example self-contained.
 # *** NOTE *** this assumes that the content files are public, or at least that all users of the app
 # can access all the files. This is also slow and memory hungry.
 @bp.route("/content/<path>")
-def content_file(path):
+async def content_file(path):
     blob_container = current_app.config[CONFIG_BLOB_CLIENT].get_container_client(AZURE_STORAGE_CONTAINER)
-    blob = blob_container.get_blob_client(path).download_blob()
+    blob = await blob_container.get_blob_client(path).download_blob()
     if not blob.properties or not blob.properties.has_key("content_settings"):
         abort(404)
     mime_type = blob.properties["content_settings"]["content_type"]
     if mime_type == "application/octet-stream":
         mime_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
     blob_file = io.BytesIO()
-    blob.readinto(blob_file)
+    await blob.readinto(blob_file)
     blob_file.seek(0)
-    return send_file(blob_file, mimetype=mime_type, as_attachment=False, download_name=path)
+    return await send_file(blob_file, mimetype=mime_type, as_attachment=False, attachment_filename=path)
 
 @bp.route("/ask", methods=["POST"])
-def ask():
+async def ask():
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
-    approach = request.json["approach"]
+    request_json = await request.get_json()
+    approach = request_json["approach"]
     try:
         impl = current_app.config[CONFIG_ASK_APPROACHES].get(approach)
         if not impl:
             return jsonify({"error": "unknown approach"}), 400
-        r = impl.run(request.json["question"], request.json.get("overrides") or {})
+        r = await impl.run(request_json["question"], request_json.get("overrides") or {})
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /ask")
         return jsonify({"error": str(e)}), 500
 
 @bp.route("/chat", methods=["POST"])
-def chat():
+async def chat():
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
-    approach = request.json["approach"]
+    request_json = await request.get_json()
+    approach = request_json["approach"]
     try:
         impl = current_app.config[CONFIG_CHAT_APPROACHES].get(approach)
         if not impl:
             return jsonify({"error": "unknown approach"}), 400
-        r = impl.run(request.json["history"], request.json.get("overrides") or {})
+        r = await impl.run(request_json["history"], request_json.get("overrides") or {})
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /chat")
         return jsonify({"error": str(e)}), 500
 
 @bp.before_request
-def ensure_openai_token():
+async def ensure_openai_token():
     openai_token = current_app.config[CONFIG_OPENAI_TOKEN]
     if openai_token.expires_on < time.time() + 60:
-        openai_token = current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default")
+        openai_token = await current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default")
         current_app.config[CONFIG_OPENAI_TOKEN] = openai_token
         openai.api_key = openai_token.token
 
-
-def create_app():
-    app = Flask(__name__)
+@bp.before_app_serving
+async def setup_clients():
 
     # Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed,
     # just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
@@ -135,24 +136,21 @@ def create_app():
         credential=azure_credential)
 
     # Used by the OpenAI SDK
-    openai.api_type = "azure"
     openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
     openai.api_version = "2023-05-15"
-
-    # Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead
     openai.api_type = "azure_ad"
-    openai_token = azure_credential.get_token(
+    openai_token = await azure_credential.get_token(
         "https://cognitiveservices.azure.com/.default"
     )
     openai.api_key = openai_token.token
 
     # Store on app.config for later use inside requests
-    app.config[CONFIG_OPENAI_TOKEN] = openai_token
-    app.config[CONFIG_CREDENTIAL] = azure_credential
-    app.config[CONFIG_BLOB_CLIENT] = blob_client
+    current_app.config[CONFIG_OPENAI_TOKEN] = openai_token
+    current_app.config[CONFIG_CREDENTIAL] = azure_credential
+    current_app.config[CONFIG_BLOB_CLIENT] = blob_client
     # Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
     # or some derivative, here we include several for exploration purposes
-    app.config[CONFIG_ASK_APPROACHES] = {
+    current_app.config[CONFIG_ASK_APPROACHES] = {
         "rtr": RetrieveThenReadApproach(
             search_client,
             AZURE_OPENAI_CHATGPT_DEPLOYMENT,
@@ -175,7 +173,7 @@ def create_app():
             KB_FIELDS_CONTENT
         )
     }
-    app.config[CONFIG_CHAT_APPROACHES] = {
+    current_app.config[CONFIG_CHAT_APPROACHES] = {
         "rrr": ChatReadRetrieveReadApproach(
             search_client,
             AZURE_OPENAI_CHATGPT_DEPLOYMENT,
@@ -186,10 +184,8 @@ def create_app():
         )
     }
 
-    app.register_blueprint(bp)
 
+def create_app():
+    app = Quart(__name__)
+    app.register_blueprint(bp)
     return app
-
-if __name__ == "__main__":
-    app = create_app()
-    app.run()
diff --git a/app/backend/approaches/__init__.py b/app/backend/approaches/__init__.py
diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -1,6 +1,14 @@
+from abc import ABC, abstractmethod
 from typing import Any
 
 
-class Approach:
-    def run(self, q: str, overrides: dict[str, Any]) -> Any:
-        raise NotImplementedError
+class ChatApproach(ABC):
+    @abstractmethod
+    async def run(self, history: list[dict], overrides: dict[str, Any]) -> Any:
+        ...
+
+
+class AskApproach(ABC):
+    @abstractmethod
+    async def run(self, q: str, overrides: dict[str, Any]) -> Any:
+        ...
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -1,16 +1,16 @@
-from typing import Any, Sequence
+from typing import Any
 
 import openai
-from azure.search.documents import SearchClient
+from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import QueryType
 
-from approaches.approach import Approach
+from approaches.approach import ChatApproach
 from core.messagebuilder import MessageBuilder
 from core.modelhelper import get_token_limit
 from text import nonewlines
 
 
-class ChatReadRetrieveReadApproach(Approach):
+class ChatReadRetrieveReadApproach(ChatApproach):
     # Chat roles
     SYSTEM = "system"
     USER = "user"
@@ -57,7 +57,7 @@ def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt
         self.content_field = content_field
         self.chatgpt_token_limit = get_token_limit(chatgpt_model)
 
-    def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
+    async def run(self, history: list[dict[str, str]], overrides: dict[str, Any]) -> Any:
         has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
@@ -77,7 +77,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
             self.chatgpt_token_limit - len(user_q)
             )
 
-        chat_completion = openai.ChatCompletion.create(
+        chat_completion = await openai.ChatCompletion.acreate(
             deployment_id=self.chatgpt_deployment,
             model=self.chatgpt_model,
             messages=messages,
@@ -93,7 +93,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
 
         # If retrieval mode includes vectors, compute an embedding for the query
         if has_vector:
-            query_vector = openai.Embedding.create(engine=self.embedding_deployment, input=query_text)["data"][0]["embedding"]
+            query_vector = (await openai.Embedding.acreate(engine=self.embedding_deployment, input=query_text))["data"][0]["embedding"]
         else:
             query_vector = None
 
@@ -103,7 +103,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
 
         # Use semantic L2 reranker if requested and if retrieval mode is text or hybrid (vectors + text)
         if overrides.get("semantic_ranker") and has_text:
-            r = self.search_client.search(query_text,
+            r = await self.search_client.search(query_text,
                                           filter=filter,
                                           query_type=QueryType.SEMANTIC,
                                           query_language="en-us",
@@ -115,16 +115,16 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
                                           top_k=50 if query_vector else None,
                                           vector_fields="embedding" if query_vector else None)
         else:
-            r = self.search_client.search(query_text,
+            r = await self.search_client.search(query_text,
                                           filter=filter,
                                           top=top,
                                           vector=query_vector,
                                           top_k=50 if query_vector else None,
                                           vector_fields="embedding" if query_vector else None)
         if use_semantic_captions:
-            results = [doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc['@search.captions']])) for doc in r]
+            results = [doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc['@search.captions']])) async for doc in r]
         else:
-            results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) for doc in r]
+            results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) async for doc in r]
         content = "\n".join(results)
 
         follow_up_questions_prompt = self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""
@@ -147,7 +147,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
             history[-1]["user"],
             max_tokens=self.chatgpt_token_limit)
 
-        chat_completion = openai.ChatCompletion.create(
+        chat_completion = await openai.ChatCompletion.acreate(
             deployment_id=self.chatgpt_deployment,
             model=self.chatgpt_model,
             messages=messages,
@@ -161,7 +161,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
 
         return {"data_points": results, "answer": chat_content, "thoughts": f"Searched for:<br>{query_text}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>')}
 
-    def get_messages_from_history(self, system_prompt: str, model_id: str, history: Sequence[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> []:
+    def get_messages_from_history(self, system_prompt: str, model_id: str, history: list[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> list:
         message_builder = MessageBuilder(system_prompt, model_id)
 
         # Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
@@ -174,9 +174,10 @@ def get_messages_from_history(self, system_prompt: str, model_id: str, history:
         message_builder.append_message(self.USER, user_content, index=append_index)
 
         for h in reversed(history[:-1]):
-            if h.get("bot"):
-                message_builder.append_message(self.ASSISTANT, h.get('bot'), index=append_index)
-            message_builder.append_message(self.USER, h.get('user'), index=append_index)
+            if bot_msg := h.get("bot"):
+                message_builder.append_message(self.ASSISTANT, bot_msg, index=append_index)
+            if user_msg := h.get("user"):
+                message_builder.append_message(self.USER, user_msg, index=append_index)
             if message_builder.token_length > max_tokens:
                 break