Skip to content

Commit

Permalink
minor update for prompt and new libs
Browse files Browse the repository at this point in the history
  • Loading branch information
DoraDong-2023 committed Jan 16, 2024
1 parent 1baeb0a commit 6276539
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 104 deletions.
2 changes: 1 addition & 1 deletion docs/PyPI2APP.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export LIB=scanpy
# download materials according to your provided url links
python dataloader/utils/other_download.py --LIB ${LIB}
# generate codes for your downloaded tutorial files, support for either html, ipynb.
python dataloader/utils/tutorial_loader_strategy.py --LIB ${LIB} --file_type 'html'
python dataloader/utils/tutorial_loader_strategy.py --LIB ${LIB} --file_type 'ipynb'
```

NOTE it requires API_HTML_PATH, READTHEDOC_PATH and TUTORIAL_GITHUB to run the above script!
Expand Down
12 changes: 6 additions & 6 deletions src/configs/Lib_cheatsheet.json
Original file line number Diff line number Diff line change
Expand Up @@ -174,18 +174,18 @@
"LIB": "ehrapy",
"LIB_ALIAS": "ehrapy",
"API_HTML_PATH": "ehrapy.readthedocs.io/en/latest/usage/usage.html",
"GITHUB_LINK": null,
"READTHEDOC_LINK": null,
"GITHUB_LINK": "https://github.com/theislab/ehrapy",
"READTHEDOC_LINK": "https://ehrapy.readthedocs.io/",
"TUTORIAL_HTML_PATH": "https://ehrapy.readthedocs.io/en/latest/tutorials/index.html",
"TUTORIAL_GITHUB": null
"TUTORIAL_GITHUB": "https://github.com/theislab/ehrapy-tutorials"
},
"snapatac2": {
"LIB": "snapatac2",
"LIB_ALIAS": "snapatac2",
"API_HTML_PATH": "kzhang.org/SnapATAC2/api/index.html",
"GITHUB_LINK": null,
"READTHEDOC_LINK": null,
"GITHUB_LINK": "https://github.com/kaizhang/SnapATAC2",
"READTHEDOC_LINK": "https://kzhang.org/SnapATAC2",
"TUTORIAL_HTML_PATH": "https://kzhang.org/SnapATAC2/tutorials/index.html",
"TUTORIAL_GITHUB": null
"TUTORIAL_GITHUB": "https://github.com/kaizhang/SnapATAC2"
}
}
38 changes: 13 additions & 25 deletions src/dataloader/get_API_composite_from_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from configs.model_config import ANALYSIS_PATH, get_all_variable_from_cheatsheet #tut, html_dict, code
from dataloader.utils.tutorial_loader_strategy import main_convert_tutorial_to_py
from dataloader.utils.code_analyzer import extract_io_variables
from models.model import LLM_model, LLM_response
from prompt.composite import build_prompt_for_composite_docstring, build_prompt_for_composite_name

parser = argparse.ArgumentParser()
parser.add_argument('--LIB', type=str, required=True, help='PyPI tool')
Expand Down Expand Up @@ -342,33 +344,19 @@ def extract_api_calls(code_block, imports, lib_alias):
except SyntaxError:
return []

def process_docstring_with_LLM(llm, API_description, func_inputs,func_outputs, description_text=""):
def process_docstring_with_LLM(llm, tokenizer, API_description, func_inputs,func_outputs, description_text=""):
# LLM for modifying docstring
prompt = f"""You are an expert in Python programming. Your task is to write the docstring for the given information of an invisible function. Interpret the assigned inputs and return variables in the docstring.
The description of used APIs inside this code is: {API_description}
The input and output parameter information is as below:
- Parameters: {func_inputs}
- Returns: {func_outputs}
- The other description associated with the code is: {description_text}
- Please extract the core information in 1-2 sentences and polish it. Docstring description should only use 1-2 sentences.
Your Response format is detailed docstring. Please do not include other information except for response information, in reStructuredText format. Never include specific API information in description.
"""
response = llm.predict(prompt)
prompt = build_prompt_for_composite_docstring(API_description, func_inputs, func_outputs, description_text)
response, history = LLM_response(llm,tokenizer,prompt,history=[],kwargs={})
print(f'==>GPT docstring response: {response}')
if 'def' in response.split('\n')[0]:
return '\n'.join(response.split('\n')[1:])
else:
return response

def process_name_with_LLM(llm,sub_API_names,llm_docstring):
prompt=f"""Your task is to suggest an appropriate name for the given invisible function:
- Here are the sub API used together with function's docstring, please consider the API name to generate function name. sub API names: {sub_API_names},
function docstring: ```{llm_docstring}```
- Your name should consist of 4-5 keywords that combined with `_`, name should be recognizable and contain as much information as you can in keywords.
Your Response format: {{'func_name': (your designed function name)}}
Please do not include other information except for response format.
"""
response = llm.predict(prompt)
def process_name_with_LLM(llm,tokenizer,sub_API_names,llm_docstring):
prompt = build_prompt_for_composite_name(sub_API_names, llm_docstring)
response, history = LLM_response(llm,tokenizer,prompt,history=[],kwargs={})
print(f'==>GPT name response: {response}')
MAX_trial = 5
count=0
Expand All @@ -381,7 +369,7 @@ def process_name_with_LLM(llm,sub_API_names,llm_docstring):
ans = ast.literal_eval(response)
return list(ans.keys())[0]
except:
response = llm.predict(prompt)
response, history = LLM_response(llm,tokenizer,prompt,history=[],kwargs={})
print(f'==>retry GPT {count}: {response}')
count+=1
return "function"
Expand Down Expand Up @@ -412,7 +400,6 @@ def main_get_API_composite(LIB_ANALYSIS_PATH, output_folder_json):
return unique_code_blocks

def main_get_LLM_docstring(unique_code_blocks):
from models.model import LLM_model
# LLM model
llm, tokenizer = LLM_model()
# load API_init.json
Expand All @@ -439,8 +426,9 @@ def main_get_LLM_docstring(unique_code_blocks):
# drop duplicate
func_inputs = list(set(func_inputs))
# prompt
llm_docstring = process_docstring_with_LLM(llm, '\n'.join(API_description), json.dumps(func_inputs),json.dumps(func_outputs), description_text=code_blocks['text'])
new_name = process_name_with_LLM(llm,','.join(sub_API_names),llm_docstring)
print('llm: ', llm)
llm_docstring = process_docstring_with_LLM(llm, tokenizer, '\n'.join(API_description), json.dumps(func_inputs),json.dumps(func_outputs), description_text=code_blocks['text'])
new_name = process_name_with_LLM(llm, tokenizer, ','.join(sub_API_names),llm_docstring)
if new_name=='function':
new_name = f'function_{idxxxxx}'
idxxxxx+=1
Expand Down Expand Up @@ -471,7 +459,7 @@ def main_get_LLM_docstring(unique_code_blocks):
def generate_api_callings(results, basic_types=['str', 'int', 'float', 'bool', 'list', 'dict', 'tuple', 'set', 'any', 'List', 'Dict']):
updated_results = {}
for api_name, api_info in results.items():
if api_info["api_type"] in ['function', 'method', 'class', 'functools.partial']:
if api_info["api_type"]: # in ['function', 'method', 'class', 'functools.partial']
# Update the optional_value key for each parameter
for param_name, param_details in api_info["Parameters"].items():
param_type = param_details.get('type')
Expand Down
2 changes: 0 additions & 2 deletions src/dataloader/utils/code_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
import pandas as pd
import numpy as np
import seaborn as sns
import cv2
import math
import sklearn.preprocessing
import sklearn
import scipy

def is_variable_in_parentheses(var: str, code: str) -> bool:
"""
Expand Down
10 changes: 5 additions & 5 deletions src/deploy/inference_dialog_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ def run_pipeline_after_doublechecking_API_selection(self, user_input):
print(self.user_states)
#[callback.on_tool_start() for callback in self.callbacks]
#[callback.on_tool_end() for callback in self.callbacks]
[callback.on_agent_action(block_id="log-"+str(self.indexxxx), task="However, there are still some parameters with special type undefined. Please start from uploading data, or input your query from preprocessing dataset.",task_title="Missing Parameters: special type") for callback in self.callbacks]
[callback.on_agent_action(block_id="log-"+str(self.indexxxx), task="However, there are still some parameters with special type undefined. Please start from uploading data, or check your parameter type in json files.",task_title="Missing Parameters: special type") for callback in self.callbacks]
self.indexxxx+=1
self.last_user_states = self.user_states
self.user_states = "initial"
Expand Down Expand Up @@ -1270,7 +1270,7 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input):
code = result['code']
output_list = result['output_list']
self.executor.load_environment("./tmp/tmp_output_run_pipeline_execution_code_variables.pkl")
#print('check:', code, output_list, self.executor.execute_code, self.executor.variables)
print('check:', code, output_list, self.executor.execute_code, self.executor.variables)

if len(execution_code_list)>0:
self.last_execute_code = self.get_last_execute_code(code)
Expand Down Expand Up @@ -1372,12 +1372,12 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input):
pass
else:
pass
logging.info("Show current variables in namespace:")
logging.info(json.dumps(list(self.executor.variables.keys())))
print("Show current variables in namespace:")
print(json.dumps(list(self.executor.variables.keys())))
new_str = []
for i in self.executor.execute_code:
new_str.append({"code":i['code'],"execution_results":i['success']})
logging.info("Currently all executed code: %s", json.dumps(new_str))
print("Currently all executed code: %s", json.dumps(new_str))
filename = f"./tmp/sessions/{str(self.session_id)}_environment.pkl"
self.last_user_states = self.user_states
self.user_states = "initial"
Expand Down
Loading

0 comments on commit 6276539

Please sign in to comment.