-
Notifications
You must be signed in to change notification settings - Fork 0
/
flask_api.py
135 lines (121 loc) · 6.31 KB
/
flask_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import openai
import PyPDF2
import asyncio
import os
from flask import Flask, jsonify, request
from flask_restful import Api
from utils import MessageBuilder
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (SearchIndex, SearchFieldDataType, SearchField,
ComplexField, SimpleField, SearchableField,
CorsOptions)
from azure.search.documents.aio import SearchClient as SearchClient2
app = Flask(__name__)
api = Api(app)
API_KEY = ""
RESOURCE_ENDPOINT = ""
openai.api_type = "azure"
openai.api_key = API_KEY
openai.api_base = RESOURCE_ENDPOINT
openai.api_version = "2023-05-15"
def extract_text_from_pdf(pdf_file):
text = ""
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page in pdf_reader.pages:
text += page.extract_text()
return text
@app.route("/home", methods=["POST"])
async def home():
index_name = request.form.get('index_name')
search_service_name = request.form.get('search_service_name')
admin_key = request.form.get('admin_key')
endpoint = f"https://{search_service_name}.search.windows.net"
credential = AzureKeyCredential(admin_key)
client = SearchIndexClient(endpoint=endpoint, credential=credential)
try:
existing_index = client.get_index(index_name)
except Exception as e:
fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True),
SimpleField(name="title", type=SearchFieldDataType.String, SearchableField=True),
SimpleField(name="content", type=SearchFieldDataType.String, SearchableField=True),
]
cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []
index = SearchIndex(name=index_name, fields=fields, scoring_profiles=scoring_profiles, cors_options=cors_options)
result = client.create_index(index)
print(f'Created index - {index_name}')
#print(result)
else:
print(f"The Index {index_name} already exists.")
client2 = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)
try:
doc_count_initial = client2.get_document_count()
documents = []
uploaded_files = request.files.getlist("pdf_files")
for uploaded_file in uploaded_files:
file_extension = os.path.splitext(uploaded_file.filename)[1]
if file_extension.lower() == ".pdf":
pdf_text = extract_text_from_pdf(uploaded_file)
document = {
"@search.action": "upload",
"id": uploaded_file.filename.split('.')[0],
"title": os.path.splitext(uploaded_file.filename)[0],
"content": pdf_text,
}
documents.append(document)
else:
return jsonify({"error": "Only PDF files are supported"}), 400
if documents:
result = client2.upload_documents(documents)
print("Documents are being uploaded....")
#while client2.get_document_count() - doc_count_initial < len(documents):
# await asyncio.sleep(1) # Wait for indexing to complete
print("Upload of new documents succeeded")
content=[]
for i in range(len(documents)):
res = client2.get_document(key=documents[i]["id"])
content.append(res)
system_chat_template = \
"You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. " + \
"Use 'you' to refer to the individual asking the questions even if they ask with 'I'. " + \
"Answer the following question using only the data provided in the sources below. " + \
"For tabular information return it as an html table. Do not return markdown format. " + \
"Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. " + \
"If you cannot answer using the sources below, say you don't know. Use below example to answer"
question = """
'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'
Sources:
info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.
info2.pdf: Overlake is in-network for the employee plan.
info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.
info4.pdf: In-network institutions include Overlake, Swedish and others in the region
"""
answer = "In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf]."
q = request.form.get('question')
message_builder = MessageBuilder(system_content=system_chat_template, chatgpt_model="gpt-35-turbo")
#await client2.search(search_text=query)
user_content = q + "/n" + f"Sources:\n {content}"
message_builder.append_message('user', user_content)
message_builder.append_message('assistant', answer)
message_builder.append_message('user', question)
messages = message_builder.messages
#print(messages)
chat_completion = await openai.ChatCompletion.acreate(
engine = "dep-1",
deployment_id = "gpt-35-turbo",
#engine="gpt-35-turbo",
messages = messages,
temperature = 0.3,
max_tokens = 1024,
n=1
)
return jsonify({"answer":chat_completion.choices[0].message.content}), 200
else:
return jsonify({"error": "No valid PDF files were provided"}), 400
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(debug=True)