Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make-rag-optional-but-default #2123

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions app/backend/approaches/chatapproach.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,24 @@ def system_message_chat_conversation(self) -> str:
async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
pass

def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str) -> str:
def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str, sources_reference_content: str = "") -> str:
if override_prompt is None:
return self.system_message_chat_conversation.format(
injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt
injected_prompt="",
follow_up_questions_prompt=follow_up_questions_prompt,
sources_reference_content=sources_reference_content
)
elif override_prompt.startswith(">>>"):
return self.system_message_chat_conversation.format(
injected_prompt=override_prompt[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt
injected_prompt=override_prompt[3:] + "\n",
follow_up_questions_prompt=follow_up_questions_prompt,
sources_reference_content=sources_reference_content
)
else:
return override_prompt.format(follow_up_questions_prompt=follow_up_questions_prompt)
return override_prompt.format(
follow_up_questions_prompt=follow_up_questions_prompt,
sources_reference_content=sources_reference_content
)

def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
response_message = chat_completion.choices[0].message
Expand Down
200 changes: 105 additions & 95 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def system_message_chat_conversation(self):
return """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jeannotdamoiseaux - Only after overriding the prompt template, I received an answer not based on the sources. Otherwise, the response was 'I don't know.' This might be due to line 59 of the master prompt instruction. Can you confirm if this is intended to be like this?
image

Copy link
Contributor Author

@jeannotdamoiseaux jeannotdamoiseaux Nov 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bnodir This is actually the intended behavior. With this feature, RAG is essentially turned off, transforming the app into a more generic chatbot that relies on the general knowledge of the LLM.

If the question is not in English, answer in the language used in the question.
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
{sources_reference_content}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've considered moving the prompts into Jinja files. I'm wondering if we should make that change first, as this could read a bit nicer, like

{% if should_reference_sources $}
Each source...
{% endif %}

Our current method of storing the templates inside multi-line strings is not easy to work with, so I've been hoping to move them at least into separate files, and just hadn't decided about .txt versus Jinja versus prompty files. Jinja is a happy medium. That could be done by you or I in a separate PR. Thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I'll create a separate PR for this first.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented in #2164

{follow_up_questions_prompt}
{injected_prompt}
"""
Expand Down Expand Up @@ -96,106 +96,89 @@ async def run_until_final_call(
top = overrides.get("top", 3)
minimum_search_score = overrides.get("minimum_search_score", 0.0)
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
include_category = overrides.get("include_category")
filter = self.build_filter(overrides, auth_claims)

original_user_query = messages[-1]["content"]
if not isinstance(original_user_query, str):
raise ValueError("The most recent message content must be a string.")
user_query_request = "Generate search query for: " + original_user_query

tools: List[ChatCompletionToolParam] = [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
}
sources_content = []
extra_info = {"thoughts": [], 'data_points': []}

if include_category != "__NONE__":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think include_category != "__NONE__" condition should be executed only once, stored in a variable, and be under a comment that says that NONE is a value passed down by the frontend categories picker.
I'm nervous about special magical values like NONE so want to make it clear where it came from and minimize places to use it incorrectly.
It could also be a class attribute on Approach, like NO_CATEGORIES

tools: List[ChatCompletionToolParam] = [
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm. It'd be nice to make this change in a way that doesn't cause this large indentation change, since that can make it harder for developers to merge in new changes..but I think that's not possible, right? Just musing out loud.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that as well. I think the only way to avoid indentation is to create a separate function. What do you think?

{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
}
},
"required": ["search_query"],
},
"required": ["search_query"],
},
},
}
]
}
]

# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
query_response_token_limit = 100
query_messages = build_messages(
model=self.chatgpt_model,
system_prompt=self.query_prompt_template,
tools=tools,
few_shots=self.query_prompt_few_shots,
past_messages=messages[:-1],
new_user_content=user_query_request,
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
query_response_token_limit = 100
query_messages = build_messages(
model=self.chatgpt_model,
system_prompt=self.query_prompt_template,
tools=tools,
few_shots=self.query_prompt_few_shots,
past_messages=messages[:-1],
new_user_content=user_query_request,
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)

chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
messages=query_messages, # type: ignore
# Azure OpenAI takes the deployment name as the model name
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
temperature=0.0, # Minimize creativity for search query generation
max_tokens=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
n=1,
tools=tools,
seed=seed,
)
chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
messages=query_messages, # type: ignore
# Azure OpenAI takes the deployment name as the model name
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
temperature=0.0, # Minimize creativity for search query generation
max_tokens=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
n=1,
tools=tools,
seed=seed,
)

query_text = self.get_search_query(chat_completion, original_user_query)

# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query

# If retrieval mode includes vectors, compute an embedding for the query
vectors: list[VectorQuery] = []
if use_vector_search:
vectors.append(await self.compute_text_embedding(query_text))

results = await self.search(
top,
query_text,
filter,
vectors,
use_text_search,
use_vector_search,
use_semantic_ranker,
use_semantic_captions,
minimum_search_score,
minimum_reranker_score,
)

sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
content = "\n".join(sources_content)
query_text = self.get_search_query(chat_completion, original_user_query)

# STEP 3: Generate a contextual and content specific answer using the search results and chat history
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query

# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
system_message = self.get_system_prompt(
overrides.get("prompt_template"),
self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
)
# If retrieval mode includes vectors, compute an embedding for the query
vectors: list[VectorQuery] = []
if use_vector_search:
vectors.append(await self.compute_text_embedding(query_text))

response_token_limit = 1024
messages = build_messages(
model=self.chatgpt_model,
system_prompt=system_message,
past_messages=messages[:-1],
# Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
new_user_content=original_user_query + "\n\nSources:\n" + content,
max_tokens=self.chatgpt_token_limit - response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)
results = await self.search(
top,
query_text,
filter,
vectors,
use_text_search,
use_vector_search,
use_semantic_ranker,
use_semantic_captions,
minimum_search_score,
minimum_reranker_score,
)

data_points = {"text": sources_content}
sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
if sources_content:
extra_info["data_points"] = {"text": sources_content}

extra_info = {
"data_points": data_points,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You seem to have lost the data_points from thought process, please bring them back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This behavior is intentional, as there are no data points when RAG is disabled. However, the solution could be more elegant than simply overwriting the extra_info variable.

"thoughts": [
extra_info["thoughts"].extend([
ThoughtStep(
"Prompt to generate search query",
query_messages,
Expand All @@ -221,20 +204,47 @@ async def run_until_final_call(
"Search results",
[result.serialize_for_results() for result in results],
),
ThoughtStep(
"Prompt to generate answer",
messages,
(
{"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
if self.chatgpt_deployment
else {"model": self.chatgpt_model}
),
])

# STEP 3: Generate a contextual and content specific answer

# Additional prompt injected into the masterprompt if RAG is enabled
sources_reference_content = """
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
""" if include_category != "__NONE__" else ""

# Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
system_message = self.get_system_prompt(
overrides.get("prompt_template"),
self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
sources_reference_content=sources_reference_content
)

response_token_limit = 1024
messages = build_messages(
model=self.chatgpt_model,
system_prompt=system_message,
past_messages=messages[:-1],
new_user_content=original_user_query + ("\n\nSources:\n" + "\n".join(sources_content) if sources_content else ""),
max_tokens=self.chatgpt_token_limit - response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)

data_points = {"text": sources_content}

extra_info["thoughts"].append(
ThoughtStep(
"Prompt to generate answer",
messages,
(
{"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
if self.chatgpt_deployment
else {"model": self.chatgpt_model}
),
],
}
)
)

chat_coroutine = self.openai_client.chat.completions.create(
# Azure OpenAI takes the deployment name as the model name
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
messages=messages,
temperature=overrides.get("temperature", 0.3),
Expand Down
Loading