Skip to content

Commit

Permalink
update instruction generation setting
Browse files Browse the repository at this point in the history
  • Loading branch information
DoraDong-2023 committed Jan 18, 2024
1 parent 6276539 commit 08bbd8f
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 42 deletions.
14 changes: 5 additions & 9 deletions src/dataloader/preprocess_retriever_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
from sklearn.utils import shuffle
from models.model import LLM_response, LLM_model
#from configs.model_config import LIB
from prompt.instruction import Task_Description_of_Singletool_oneapi_Instructions, \
Other_Requirements_singletool_oneapi, Task_Description_of_Singletool_oneapi_Instructions_simple, \
Other_Requirements_singletool_oneapi_simple
from prompt.instruction import Task_Description_of_Singletool_oneapi_Instructions_whole, Other_Requirements_singletool_oneapi_whole
from inference.utils import process_retrieval_document, compress_api_str_from_list
parser = argparse.ArgumentParser()
parser.add_argument('--LIB', type=str, help='PyPI tool')
Expand All @@ -22,8 +20,7 @@

semaphore = asyncio.Semaphore(args.concurrency)

prompt_oneapi = f"{Task_Description_of_Singletool_oneapi_Instructions}\n{Other_Requirements_singletool_oneapi}"
prompt_oneapi_simple = f"{Task_Description_of_Singletool_oneapi_Instructions_simple}\n{Other_Requirements_singletool_oneapi_simple}"
prompt_oneapi_whole = f"{Task_Description_of_Singletool_oneapi_Instructions_whole}\n{Other_Requirements_singletool_oneapi_whole}"

def unify_response_format(response):
list_pattern = re.compile(r'\[\{.*?\}\]', re.DOTALL)
Expand Down Expand Up @@ -85,12 +82,11 @@ async def preprocess_instruction_generation(API_composite, QUERY_FILE):
#tasks = [process_api_async(api_name, ori_data[api_name], llm, tokenizer) for api_name in tqdm(ori_data)]
all_tasks = []
print('Start instruction generation ...')
print('Num. of Tasks is twice of the num. of APIs ...')
progress = tqdm_asyncio(total=len(ori_data) * 2)
print('Num. of Tasks is one times of the num. of APIs ...')
progress = tqdm_asyncio(total=len(ori_data))
for api_name in tqdm_asyncio(ori_data):
async with semaphore:
all_tasks.append(process_prompt_async(api_name, ori_data[api_name], llm, tokenizer, prompt_oneapi, progress))
all_tasks.append(process_prompt_async(api_name, ori_data[api_name], llm, tokenizer, prompt_oneapi_simple, progress))
all_tasks.append(process_prompt_async(api_name, ori_data[api_name], llm, tokenizer, prompt_oneapi_whole, progress))
# Run the tasks and collect results
results_from_tasks = await asyncio.gather(*(all_tasks))
# close progres bar
Expand Down
1 change: 0 additions & 1 deletion src/gpt/gpt_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def setup_openai(fname, mode='azure'):
openai.api_key = secrets['MS_KEY']
return secrets


@T.retry(stop=T.stop_after_attempt(5), wait=T.wait_fixed(60), after=lambda s: logging.error(repr(s)))
def query_openai(prompt, mode='azure', model='gpt-35-turbo', **kwargs):
if mode == 'openai':
Expand Down
7 changes: 5 additions & 2 deletions src/inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ def compress_api_str_from_list_query_version(api):
req_params = json.dumps(api['required_parameters'])
opt_params = json.dumps(api['optional_parameters'])
return_schema = json.dumps(api['Returns'])
compressed_str = f"{api_name}, {api_desc_truncated}, required_params: {req_params}, optional_params: {opt_params}, return_schema: {return_schema}"
#compressed_str = f"{api_name}, {api_desc_truncated}, required_params: {req_params}, optional_params: {opt_params}, return_schema: {return_schema}"
compressed_str = f"{api_name}, {api_desc_truncated}, required_params: {req_params}, optional_params: {opt_params}"
return compressed_str

def process_retrieval_document_query_version(documents_df):
Expand Down Expand Up @@ -160,7 +161,8 @@ def compress_api_str_from_list(api):
req_params = json.dumps({})
opt_params = json.dumps({})
return_schema = json.dumps(api['Returns'])
compressed_str = f"{api_name}, {api_desc_truncated}, required_params: {req_params}, return_schema: {return_schema}" # optional_params: {opt_params},
#compressed_str = f"{api_name}, {api_desc_truncated}, required_params: {req_params}, return_schema: {return_schema}" # optional_params: {opt_params},
compressed_str = f"API description: {api_desc_truncated}, required_params: {req_params}" # optional_params: {opt_params},
return compressed_str

def process_retrieval_document(documents_df):
Expand Down Expand Up @@ -211,3 +213,4 @@ def fast_get_environment(pre_code):
print(executor.variables)
executor.execute_api_call('print(a)')


3 changes: 2 additions & 1 deletion src/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ def LLM_response(llm,tokenizer,chat_prompt,history=[],kwargs={}):
if llm_model_dict[LLM_MODEL]['platform'] in ['OPENAI']:
from gpt import gpt_interface
gpt_interface.setup_openai('', mode='openai')
response = gpt_interface.query_openai(chat_prompt, mode="openai", model="gpt-3.5-turbo-16k", max_tokens=MAX_NEW_TOKENS)
response = gpt_interface.query_openai(chat_prompt, mode="openai", model="gpt-3.5-turbo-1106", max_tokens=MAX_NEW_TOKENS)
# Modified 240117, changed to gpt-3.5-turbo-1106
#response = gpt_interface.query_openai(chat_prompt, mode="openai", model="gpt-4", max_tokens=MAX_NEW_TOKENS)
history.append([chat_prompt, response])
elif llm_model_dict[LLM_MODEL]['platform'] in ['OPENAI']: # 231201_deprecate
Expand Down
44 changes: 15 additions & 29 deletions src/prompt/instruction.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,24 @@
def Task_Description_of_Singletool_oneapi_Instructions_template(detailed_summarized, could_must, specific_concised):
return f"""
You are provided with one API function, its descriptions, the parameters and returns information required for each API function.
Your task involves creating 5 varied, innovative, and {detailed_summarized} user queries that employ the given API function.
The queries exemplify how to utilize the API call. A query should only use the given API.
Additionally, you {could_must} incorporate the input parameters required for each API call.
To achieve this, generate random information for required parameters according to its type.
The 5 queries should be very {specific_concised}.
Note that you shouldn't ask 'which API to use', rather, simply state your needs that can be addressed by these APIs.
Never explicitly mentioning the specific calling of API functions in your response.
Task_Description_of_Singletool_oneapi_Instructions_whole = f"""
You are provided with the API function, its descriptions, the parameters required for each API function.
Your task involves creating a total of 10 totally differentiate user queries for a given API function, 5 should be detailed and specific and other 5 brief and concise. Each query is innovative.
These queries illustrate only on how to accomplish the exact task that the API is designed for, and the user never intend to use API/function/tool to solve the task.
Incorporate randomly generated values for required parameters, ensuring variation among queries based on their types.
Never explicitly mentioning any keywords of API function names in your response.
You should also avoid asking for the input parameters required by the API call, but instead directly provide the parameter in your query.
"""

Task_Description_of_Singletool_oneapi_Instructions = Task_Description_of_Singletool_oneapi_Instructions_template("detailed", "could", "specific")

Task_Description_of_Singletool_oneapi_Instructions_simple = Task_Description_of_Singletool_oneapi_Instructions_template("summarized", "could", "concised")


def Other_Requirements_singletool_oneapi_template(word_minimum_number):
return f"""
Please produce 5 queries in line with the given requirements and inputs.
These 5 queries should display a diverse range of sentence structures:
Other_Requirements_singletool_oneapi_whole = f"""
Create queries in line with the given requirements and inputs.
These queries should display a diverse range of sentence structures:
some queries should be in the form of imperative sentences, others declarative, and yet others interrogative.
Equally, they should encompass a variety of tones, with some being polite, some being straightforward, some like layman.
Ensure they vary in length.
Aim to include a number of engaging queries as long as they relate to API calls.
Keep in mind that
- Queries should be around {word_minimum_number} words, and avoid explicit mentions of API calls like 'xx.yy.zz(parameters)', or PyPI lib name as 'use API in xx', or reference paper like 'Zhang21 et al.', or API function keywords, or specific parameters type.
- For quotation, use ` instead of '.
- Avoid technical terms like 'the given API' or unmeaningful terms like 'data with all observations' in your inquiry; keep it natural and focus on the user's intention to accomplish a real task.
- Avoiding repeated the same information within each inquiry, like "based on the data object for the given data".
- Queries should be unique in structure and phrasing, and vocabulary should be varied precise, accurate, and diverse.
- Restricted to the response format as a list of effective json: [{"Query": "(query content)"},{"Query": "(query content)"},{"Query": "(query content)"},{"Query": "(query content)"},{"Query": "(query content)"}]
- Queries should contain about 10-20 words, avoiding direct references to API calls (e.g., 'xx.yy.zz(parameters)'), library names (e.g., 'use API in xx'), function names (e.g., 'zz'), academic references (e.g., 'Zhang21 et al.'), or specific parameter types (e.g. 'AnnData data matrix object').
- Use backticks (`) for quotes, not single quotes (').
- Keep language natural and task-focused, avoiding overly technical or vague terms (e.g. 'data with all observations', 'using the given API').
- Ensure each query is distinct without repeating the same information (e.g. `based on the data object for the given data`).
- Queries should be structurally unique, with precise and diverse vocabulary.
- Format responses as a list in JSON: [{{"Query": "(query content)"}}, {{"Query": "(query content)"}}, {{"Query": "(query content)"}},{{"Query": "(query content)"}}, {{"Query": "(query content)"}}, {{"Query": "(query content)"}},{{"Query": "(query content)"}}, {{"Query": "(query content)"}}, {{"Query": "(query content)"}},{{"Query": "(query content)"}}].
"""

Other_Requirements_singletool_oneapi_simple = Other_Requirements_singletool_oneapi_template("fifteen")

Other_Requirements_singletool_oneapi = Other_Requirements_singletool_oneapi_template("twenty")

0 comments on commit 08bbd8f

Please sign in to comment.