Skip to content

Commit

Permalink
Use an app factory pattern to enable app testing (#495)
Browse files Browse the repository at this point in the history
* Run ruff, add to precommit

* Blueprint

* Change to blueprint, app factory

* Fixing the imports

* Build frontend

* Update startup

* Update startup

* Gunicorn command update

* Revert yaml change
  • Loading branch information
pamelafox authored Aug 3, 2023
1 parent 07a97f3 commit df48d8c
Show file tree
Hide file tree
Showing 15 changed files with 267 additions and 89 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/python-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,23 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-20.04"]
python_version: ["3.8", "3.9", "3.10", "3.11"]
python_version: ["3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- name: Setup python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python_version }}
architecture: x64
- name: Setup node
uses: actions/setup-node@v2
with:
node-version: 18
- name: Build frontend
run: |
cd ./app/frontend
npm install
npm run build
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ The repo includes sample data so it's ready to try end to end. In this sample ap
#### To Run Locally

* [Azure Developer CLI](https://aka.ms/azure-dev/install)
* [Python 3.8+](https://www.python.org/downloads/)
* [Python 3.9+](https://www.python.org/downloads/)
* **Important**: Python and the pip package manager must be in the path in Windows for the setup scripts to work.
* **Important**: Ensure you can run `python --version` from console. On Ubuntu, you might need to run `sudo apt install python-is-python3` to link `python` to `python3`.
* [Node.js 14+](https://nodejs.org/en/download/)
Expand Down
217 changes: 138 additions & 79 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,85 +5,67 @@
import time

import openai
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.storage.blob import BlobServiceClient
from flask import (
Blueprint,
Flask,
abort,
current_app,
jsonify,
request,
send_file,
send_from_directory,
)

from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
from approaches.readdecomposeask import ReadDecomposeAsk
from approaches.readretrieveread import ReadRetrieveReadApproach
from approaches.retrievethenread import RetrieveThenReadApproach
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.storage.blob import BlobServiceClient
from flask import Flask, abort, jsonify, request, send_file

# Replace these with your own values, either in environment variables or directly here
AZURE_STORAGE_ACCOUNT = os.environ.get("AZURE_STORAGE_ACCOUNT") or "mystorageaccount"
AZURE_STORAGE_CONTAINER = os.environ.get("AZURE_STORAGE_CONTAINER") or "content"
AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") or "gptkb"
AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") or "gptkbindex"
AZURE_OPENAI_SERVICE = os.environ.get("AZURE_OPENAI_SERVICE") or "myopenai"
AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT_DEPLOYMENT") or "davinci"
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT") or "chat"
AZURE_OPENAI_CHATGPT_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_MODEL") or "gpt-35-turbo"
AZURE_OPENAI_EMB_DEPLOYMENT = os.environ.get("AZURE_OPENAI_EMB_DEPLOYMENT") or "embedding"

KB_FIELDS_CONTENT = os.environ.get("KB_FIELDS_CONTENT") or "content"
KB_FIELDS_CATEGORY = os.environ.get("KB_FIELDS_CATEGORY") or "category"
KB_FIELDS_SOURCEPAGE = os.environ.get("KB_FIELDS_SOURCEPAGE") or "sourcepage"

# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed,
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
# keys for each service
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True)

# Used by the OpenAI SDK
openai.api_type = "azure"
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
openai.api_version = "2023-05-15"

# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead
openai.api_type = "azure_ad"
openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default")
openai.api_key = openai_token.token

# Set up clients for Cognitive Search and Storage
search_client = SearchClient(
endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
index_name=AZURE_SEARCH_INDEX,
credential=azure_credential)
blob_client = BlobServiceClient(
account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net",
credential=azure_credential)
blob_container = blob_client.get_container_client(AZURE_STORAGE_CONTAINER)

# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
# or some derivative, here we include several for exploration purposes
ask_approaches = {
"rtr": RetrieveThenReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, AZURE_OPENAI_EMB_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
"rrr": ReadRetrieveReadApproach(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, AZURE_OPENAI_EMB_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
"rda": ReadDecomposeAsk(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, AZURE_OPENAI_EMB_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
}

chat_approaches = {
"rrr": ChatReadRetrieveReadApproach(search_client,
AZURE_OPENAI_CHATGPT_DEPLOYMENT,
AZURE_OPENAI_CHATGPT_MODEL,
AZURE_OPENAI_EMB_DEPLOYMENT,
KB_FIELDS_SOURCEPAGE,
KB_FIELDS_CONTENT)
}

app = Flask(__name__)

@app.route("/", defaults={"path": "index.html"})
@app.route("/<path:path>")
def static_file(path):
return app.send_static_file(path)
AZURE_STORAGE_ACCOUNT = os.getenv("AZURE_STORAGE_ACCOUNT", "mystorageaccount")
AZURE_STORAGE_CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER", "content")
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE", "gptkb")
AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX", "gptkbindex")
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE", "myopenai")
AZURE_OPENAI_GPT_DEPLOYMENT = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT", "davinci")
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "chat")
AZURE_OPENAI_CHATGPT_MODEL = os.getenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-35-turbo")
AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT", "embedding")

KB_FIELDS_CONTENT = os.getenv("KB_FIELDS_CONTENT", "content")
KB_FIELDS_CATEGORY = os.getenv("KB_FIELDS_CATEGORY", "category")
KB_FIELDS_SOURCEPAGE = os.getenv("KB_FIELDS_SOURCEPAGE", "sourcepage")

CONFIG_OPENAI_TOKEN = "openai_token"
CONFIG_CREDENTIAL = "azure_credential"
CONFIG_ASK_APPROACHES = "ask_approaches"
CONFIG_CHAT_APPROACHES = "chat_approaches"
CONFIG_BLOB_CLIENT = "blob_client"


bp = Blueprint("routes", __name__, static_folder='static')

@bp.route("/")
def index():
return bp.send_static_file("index.html")

@bp.route("/favicon.ico")
def favicon():
return bp.send_static_file("favicon.ico")

@bp.route("/assets/<path:path>")
def assets(path):
return send_from_directory("static/assets", path)

# Serve content files from blob storage from within the app to keep the example self-contained.
# *** NOTE *** this assumes that the content files are public, or at least that all users of the app
# can access all the files. This is also slow and memory hungry.
@app.route("/content/<path>")
@bp.route("/content/<path>")
def content_file(path):
blob_container = current_app.config[CONFIG_BLOB_CLIENT].get_container_client(AZURE_STORAGE_CONTAINER)
blob = blob_container.get_blob_client(path).download_blob()
if not blob.properties or not blob.properties.has_key("content_settings"):
abort(404)
Expand All @@ -95,13 +77,13 @@ def content_file(path):
blob_file.seek(0)
return send_file(blob_file, mimetype=mime_type, as_attachment=False, download_name=path)

@app.route("/ask", methods=["POST"])
@bp.route("/ask", methods=["POST"])
def ask():
if not request.json:
return jsonify({"error": "request must be json"}), 400
if not request.is_json:
return jsonify({"error": "request must be json"}), 415
approach = request.json["approach"]
try:
impl = ask_approaches.get(approach)
impl = current_app.config[CONFIG_ASK_APPROACHES].get(approach)
if not impl:
return jsonify({"error": "unknown approach"}), 400
r = impl.run(request.json["question"], request.json.get("overrides") or {})
Expand All @@ -110,13 +92,13 @@ def ask():
logging.exception("Exception in /ask")
return jsonify({"error": str(e)}), 500

@app.route("/chat", methods=["POST"])
@bp.route("/chat", methods=["POST"])
def chat():
if not request.json:
return jsonify({"error": "request must be json"}), 400
if not request.is_json:
return jsonify({"error": "request must be json"}), 415
approach = request.json["approach"]
try:
impl = chat_approaches.get(approach)
impl = current_app.config[CONFIG_CHAT_APPROACHES].get(approach)
if not impl:
return jsonify({"error": "unknown approach"}), 400
r = impl.run(request.json["history"], request.json.get("overrides") or {})
Expand All @@ -125,12 +107,89 @@ def chat():
logging.exception("Exception in /chat")
return jsonify({"error": str(e)}), 500

@app.before_request
@bp.before_request
def ensure_openai_token():
global openai_token
openai_token = current_app.config[CONFIG_OPENAI_TOKEN]
if openai_token.expires_on < time.time() + 60:
openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default")
openai_token = current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default")
current_app.config[CONFIG_OPENAI_TOKEN] = openai_token
openai.api_key = openai_token.token


def create_app():
app = Flask(__name__)

# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed,
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
# keys for each service
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True)

# Set up clients for Cognitive Search and Storage
search_client = SearchClient(
endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
index_name=AZURE_SEARCH_INDEX,
credential=azure_credential)
blob_client = BlobServiceClient(
account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net",
credential=azure_credential)

# Used by the OpenAI SDK
openai.api_type = "azure"
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
openai.api_version = "2023-05-15"

# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead
openai.api_type = "azure_ad"
openai_token = azure_credential.get_token(
"https://cognitiveservices.azure.com/.default"
)
openai.api_key = openai_token.token

# Store on app.config for later use inside requests
app.config[CONFIG_OPENAI_TOKEN] = openai_token
app.config[CONFIG_CREDENTIAL] = azure_credential
app.config[CONFIG_BLOB_CLIENT] = blob_client
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
# or some derivative, here we include several for exploration purposes
app.config[CONFIG_ASK_APPROACHES] = {
"rtr": RetrieveThenReadApproach(
search_client,
AZURE_OPENAI_CHATGPT_DEPLOYMENT,
AZURE_OPENAI_CHATGPT_MODEL,
AZURE_OPENAI_EMB_DEPLOYMENT,
KB_FIELDS_SOURCEPAGE,
KB_FIELDS_CONTENT
),
"rrr": ReadRetrieveReadApproach(
search_client,
AZURE_OPENAI_GPT_DEPLOYMENT,
AZURE_OPENAI_EMB_DEPLOYMENT,
KB_FIELDS_SOURCEPAGE,
KB_FIELDS_CONTENT
),
"rda": ReadDecomposeAsk(search_client,
AZURE_OPENAI_GPT_DEPLOYMENT,
AZURE_OPENAI_EMB_DEPLOYMENT,
KB_FIELDS_SOURCEPAGE,
KB_FIELDS_CONTENT
)
}
app.config[CONFIG_CHAT_APPROACHES] = {
"rrr": ChatReadRetrieveReadApproach(
search_client,
AZURE_OPENAI_CHATGPT_DEPLOYMENT,
AZURE_OPENAI_CHATGPT_MODEL,
AZURE_OPENAI_EMB_DEPLOYMENT,
KB_FIELDS_SOURCEPAGE,
KB_FIELDS_CONTENT,
)
}

app.register_blueprint(bp)

return app

if __name__ == "__main__":
app = create_app()
app.run()
3 changes: 2 additions & 1 deletion app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Any, Sequence

import openai
from approaches.approach import Approach
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType

from approaches.approach import Approach
from core.messagebuilder import MessageBuilder
from core.modelhelper import get_token_limit
from text import nonewlines
Expand Down
3 changes: 2 additions & 1 deletion app/backend/approaches/readdecomposeask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
from typing import Any, List, Optional

import openai
from approaches.approach import Approach
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType
from langchain.agents import AgentExecutor, Tool
from langchain.agents.react.base import ReActDocstoreAgent
from langchain.callbacks.manager import CallbackManager
from langchain.llms.openai import AzureOpenAI
from langchain.prompts import BasePromptTemplate, PromptTemplate

from approaches.approach import Approach
from langchainadapters import HtmlCallbackHandler
from text import nonewlines

Expand Down
3 changes: 2 additions & 1 deletion app/backend/approaches/readretrieveread.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from typing import Any

import openai
from approaches.approach import Approach
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType
from langchain.agents import AgentExecutor, Tool, ZeroShotAgent
from langchain.callbacks.manager import CallbackManager, Callbacks
from langchain.chains import LLMChain
from langchain.llms.openai import AzureOpenAI

from approaches.approach import Approach
from langchainadapters import HtmlCallbackHandler
from lookuptool import CsvLookupTool
from text import nonewlines
Expand Down
3 changes: 2 additions & 1 deletion app/backend/approaches/retrievethenread.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Any

import openai
from approaches.approach import Approach
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType

from approaches.approach import Approach
from core.messagebuilder import MessageBuilder
from text import nonewlines

Expand Down
1 change: 1 addition & 0 deletions app/backend/gunicorn.conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
workers = (num_cpus * 2) + 1
threads = 1 if num_cpus == 1 else 2
timeout = 600
worker_class = "gthread"
1 change: 0 additions & 1 deletion app/backend/start_appservice.sh

This file was deleted.

2 changes: 1 addition & 1 deletion infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ module backend 'core/host/appservice.bicep' = {
appServicePlanId: appServicePlan.outputs.id
runtimeName: 'python'
runtimeVersion: '3.10'
appCommandLine: 'start_appservice.sh'
appCommandLine: 'python3 -m gunicorn "app:create_app()"'
scmDoBuildDuringDeployment: true
managedIdentity: true
appSettings: {
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
target-version = "py38"
select = ["E", "F", "I", "UP"]
ignore = ["E501", "E701"] # line too long, multiple statements on one line
src = ["app/backend"]

[tool.black]
line-length = 300

[tool.pytest.ini_options]
addopts = "-ra --cov"
pythonpath = ["app/backend"]

[tool.coverage.paths]
source = ["scripts", "app"]
Expand Down
Loading

0 comments on commit df48d8c

Please sign in to comment.