From 4cbe301f8f4c13c5221ae5aedeb374b7357d0151 Mon Sep 17 00:00:00 2001 From: DoraDong-2023 Date: Mon, 10 Jun 2024 17:12:09 -0400 Subject: [PATCH] update task planning & execution correction prompt & pipeline - letting it distinguish using built-in dataset API or using local data loading API, if user upload files or not - letting it polish the next subtask based on the variables information after execution, to conduct better parameters prediction - letting user input the subtask inquiry if execution correction fails 3 times, instead of exits. - update requirements.txt - visualize UI in a user friendly way --- README.md | 2 + .../Chat/ProgressCards/LoggingCard.tsx | 9 + docs/PyPI2APP.md | 7 +- requirements.txt | 3 +- src/dataloader/download_issues.py | 10 +- src/deploy/model.py | 176 ++++++++++++++---- src/inference/execution_UI.py | 9 +- src/models/query_issue_corpus.py | 84 +++++++-- src/prompt/promptgenerator.py | 65 ++++--- 9 files changed, 279 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 6fbdac0..2338a9c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ We provide several ways to run the service: terminal CLI, Docker, railway, pytho pip install git+https://github.com/batmen-lab/BioMANIA.git --index-url https://pypi.org/simple # setup OPENAI_API_KEY echo 'OPENAI_API_KEY="sk-proj-xxxx"' >> .env +# (optional) setup github token +echo "GITHUB_TOKEN=your_github_token" >> .env # download data, retriever, and resources from drive, and put them to the # - data/standard_process/{LIB} and # - hugging_models/retriever_model_finetuned/{LIB} and diff --git a/chatbot_ui_biomania/components/Chat/ProgressCards/LoggingCard.tsx b/chatbot_ui_biomania/components/Chat/ProgressCards/LoggingCard.tsx index 8b12e6c..4a90a40 100644 --- a/chatbot_ui_biomania/components/Chat/ProgressCards/LoggingCard.tsx +++ b/chatbot_ui_biomania/components/Chat/ProgressCards/LoggingCard.tsx @@ -30,11 +30,20 @@ const LoggingCard = ({ title, logString, tableData, logColor = 'black', imageDat const successMatch = /\[Success\]/; const failMatch = /\[Fail\]/; + const confirmationMatch = /Enter Parameters|Can you confirm|User Confirmation|Could you confirm whether this API should be called\? Please enter y\/n\./; + const planMatch = /Multi step Task Planning|SubTask Execution|Continue to the next subtask|Step \d+: .*/; + const erroranalysisMatch = /Error Analysis/; if (successMatch.test(title)) { titleColor = 'green'; } else if (failMatch.test(title)) { titleColor = 'red'; + } else if (confirmationMatch.test(title)) { + titleColor = 'orange'; + } else if (planMatch.test(title)) { + titleColor = 'blue'; + } else if (erroranalysisMatch.test(title)) { + titleColor = 'blue'; } const theme = useTheme(); diff --git a/docs/PyPI2APP.md b/docs/PyPI2APP.md index 05cbb26..b010025 100644 --- a/docs/PyPI2APP.md +++ b/docs/PyPI2APP.md @@ -57,7 +57,12 @@ python -m src.dataloader.download_issues --LIB ${LIB} --token {GITHUB_TOKEN} # TODO: download prepared corpus `data/github_issues/{LIB}/*` from google drive python -m src.dataloader.prepare_issue_corpus --LIB ${LIB} # query the corpus with command: -python -m src.models.query_issue_corpus --LIB scanpy --example_query "KeyError: 'No "neighbors" in .uns'" --method sentencebert --field issue_description --top_k 3 +python -m src.models.query_issue_corpus --LIB scanpy --example_query "KeyError: \"No \"neighbors\" in .uns\"" --top_k 3 --query_source local +``` + +(Optional) Or you can either choose query issue online +```bash +python -m src.models.query_issue_corpus --query_source online --LIB scanpy --example_query "KeyError: \"No \"neighbors\" in .uns\"" --top_k 10 ``` NOTE it requires API_HTML_PATH, READTHEDOC_PATH and TUTORIAL_GITHUB to run the above script! diff --git a/requirements.txt b/requirements.txt index efd17be..28b21dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,7 +49,7 @@ Requests==2.31.0 #scikit_learn scipy==1.9.2 #scvi_tools==1.0.4 -seaborn==0.13.0 +seaborn sentence_transformers==2.2.2 sentencepiece==0.1.99 scanpy==1.9.6 @@ -62,6 +62,7 @@ tensorboard==2.15.1 timm==0.9.12 torch==2.1.0 torchsummary==1.5.1 +trimap==1.1.4 tqdm==4.66.1 transformers==4.35.0 typing_extensions==4.8.0 diff --git a/src/dataloader/download_issues.py b/src/dataloader/download_issues.py index 87c360a..62fa36f 100644 --- a/src/dataloader/download_issues.py +++ b/src/dataloader/download_issues.py @@ -34,13 +34,17 @@ def fetch_issues(repo, threshold): solutions.append((comment.body, reactions)) if solutions: solutions = sorted(solutions, key=lambda x: x[1], reverse=True) - best_solution = solutions[0][0] + #best_solution = solutions[0][0] + top_k = 3 + best_solutions = [sol[0] for sol in solutions[:top_k]] + best_solutions = '\n'.join([f'Rank {i+1}: {solution}' for i, solution in enumerate(best_solutions)]) else: - best_solution = None + #best_solutions = [None, None, None] + best_solutions = "No solutions" pair = { 'issue_title': issue_title, 'issue_body': issue_body, - 'solution': best_solution, + 'solution': best_solutions, 'count': count } issue_solution_pairs.append(pair) diff --git a/src/deploy/model.py b/src/deploy/model.py index 8fdf9ae..737c5b6 100644 --- a/src/deploy/model.py +++ b/src/deploy/model.py @@ -21,6 +21,16 @@ from ..models.dialog_classifier import Dialog_Gaussian_classification from ..inference.param_count_acc import predict_parameters +def color_text(text, color): + color_codes = { + 'red': '\033[91m', + 'green': '\033[92m', + 'blue': '\033[94m', + 'orange': '\033[93m', + 'reset': '\033[0m' + } + return f"{color_codes.get(color, color_codes['reset'])}{text}{color_codes['reset']}" + def remove_duplicates(lst): seen = set() result = [] @@ -33,7 +43,7 @@ def remove_duplicates(lst): basic_types.append('Any') class Model: - def __init__(self, logger, device, model_llm_type="gpt-3.5-turbo-0125"): # llama3 + def __init__(self, logger, device, model_llm_type="gpt-4-turbo"): # llama3, # # gpt-3.5-turbo-0125 print('start initialization!') self.user_query_list = [] self.prompt_factory = PromptFactory() @@ -60,6 +70,8 @@ def __init__(self, logger, device, model_llm_type="gpt-3.5-turbo-0125"): # llama #load_dotenv() OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-test') os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY + GITHUB_TOKEN = os.getenv('GITHUB_TOKEN', '') + os.environ["GITHUB_TOKEN"] = GITHUB_TOKEN self.initialize_executor() reset_result = self.reset_lib(self.LIB) if reset_result=='Fail': @@ -368,6 +380,7 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T self.logger.error(e) self.initialize_executor() self.new_task_planning = True + self.user_query_list = [] pass # only reset lib when changing lib if lib!=self.LIB and lib!='GPT': @@ -383,6 +396,7 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T # only clear namespace when starting new conversations if conversation_started in ["True", True]: self.new_task_planning = True + self.user_query_list = [] self.logger.info('==>new conversation_started!') self.user_states="run_pipeline" self.initialize_executor() @@ -444,16 +458,44 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T sub_task = self.get_query() if not sub_task: raise ValueError("sub_task is empty!") + self.callback_func('log', ' → ' + sub_task, "SubTask Execution") self.new_task_planning = False + self.first_task_start = True else: sub_task = user_input else: - self.callback_func('log', "SubTask: "+user_input, "SubTask Planning") sub_task = user_input + self.callback_func('log', ' → ' + sub_task, "SubTask Execution") + if len([i['code'] for i in self.executor.execute_code if i['success']=='True'])>0: # for non-first subtasks + retrieved_apis = self.retriever.retrieving(self.initial_goal_description, top_k=3) + prompt = self.prompt_factory.create_prompt("modify_subtask", + sub_task, + '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), + json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), + "\n".join([json_to_docstring(api, self.API_composite[api]["description"], self.API_composite[api]['Parameters']) for api in retrieved_apis]) + ) + sub_task, _ = LLM_response(prompt, self.model_llm_type, history=[], kwargs={}) + self.logger.info('modified sub_task: {}', sub_task) + print('-'*10) + print('modified sub_task: ', sub_task) + print('-'*10) + else: + pass self.logger.info("start retrieving names!") # get sub_task after dialog prediction self.user_query = sub_task - retrieved_names = self.retriever.retrieving(self.user_query, top_k=self.args_top_k) + if self.first_task_start: # for the first API, it is assumed to be loading data (not setting), if no files provided, must use builtin dataset + retrieved_names = self.retriever.retrieving(self.user_query, top_k=self.args_top_k+10) + # filter out APIs + if len(files) > 0: + retrieved_names = [api_name for api_name in retrieved_names if all((not any(special_type in str(param['type']) for special_type in special_types)) for param_name, param in self.API_composite[api_name]['Parameters'].items())] + print('there exist files, retrieved_names are: {}', retrieved_names) + else: # need to consider only the builtin dataset + retrieved_names = [api_name for api_name in retrieved_names if all((not any(special_type in str(param['type']) for special_type in special_types)) and (str(param['type']) not in io_types) and (param_name not in io_param_names) for param_name, param in self.API_composite[api_name]['Parameters'].items())] + print('there not exist files, retrieved_names are: {}', retrieved_names) + self.first_task_start = False + else: + retrieved_names = self.retriever.retrieving(self.user_query, top_k=self.args_top_k) self.logger.info("retrieved names: {}!", retrieved_names) # start retrieving names # produce prompt @@ -482,7 +524,7 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T instruction_shot_example = similar_queries self.logger.info('start predicting API!') api_predict_init_prompt = get_retrieved_prompt() - print('api_predict_init_prompt:', api_predict_init_prompt) + self.logger.info('api_predict_init_prompt:', api_predict_init_prompt) #print(self.all_apis_json.keys()) retrieved_apis_prepare = "" for idx, api in enumerate(retrieved_names): @@ -494,13 +536,15 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T for _ in range(self.predict_api_llm_retry): try: response, _ = LLM_response(api_predict_prompt, self.model_llm_type, history=[], kwargs={}) # llm - self.logger.info('==>LLM response: {}', api_predict_prompt, response) + self.logger.info('==>LLM response: {}, {}', api_predict_prompt, response) # hack for if LLM answers this or that """response = response.split(',')[0].split("(")[0].split(' or ')[0] response = response.replace('{','').replace('}','').replace('"','').replace("'",'') response = response.split(':')[0]# for robustness, sometimes llm will return api:description""" response = correct_pred(response, self.LIB) + self.logger.info('==>correct response: {}', response) self.logger.info('correct prediction') + response = response.replace('"','').replace("'","") response = response.strip() #self.logger.info('self.all_apis_json keys: {}', self.all_apis_json.keys()) if len(response.split(','))>1: @@ -521,17 +565,26 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T if self.predicted_api_name in self.ambiguous_api: filtered_pairs = [api_pair for api_pair in self.ambiguous_pair if self.predicted_api_name in api_pair] self.filtered_api = list(set(api for api_pair in filtered_pairs for api in api_pair)) - next_str = "" + #next_str = "" + next_str = "We have retrieved an API, but we found that there may be several similar or related APIs. Please choose one of the following options:\n" idx_api = 1 for api in self.filtered_api: - if idx_api>1: - next_str+='\n' - next_str+=f"Candidate [{idx_api}]: {api}" + if idx_api == 1: + next_str += f"1---Retrieved: {api}" + else: + next_str += f"{idx_api}---Similar: {api}" + #next_str+=f"Candidate [{idx_api}]: {api}" description_1 = self.API_composite[api]['Docstring'].split("\n")[0] - next_str+='\n'+description_1 + next_str+='\n'+description_1 + '\n' idx_api+=1 - next_str+="\n"+f"Candidate [-1]: No appropriate candidate, restart another inquiry by input -1" - + #next_str+="\n"+f"Candidate [-1]: No appropriate API, input inquiry manually by enter -1" + #next_str+="\n"+f"Candidate [-2]: Skip to next subtask by enter -2" + next_str += "Option [-1]: No appropriate API, input inquiry manually by entering -1\n" + next_str += "Option [-2]: Skip to the next subtask by entering -2\n" + # for ambiguous API, we think that it might be executed more than once as ambiguous API sometimes work together + # user can exit by entering -1 + # so we add it back to the subtask list to execute it again + self.add_query([self.user_query], mode='pre') self.update_user_state("run_pipeline_after_ambiguous") self.initialize_tool() self.callback_func('log', next_str, f"Can you confirm which of the following {len(self.filtered_api)} candidates") @@ -581,7 +634,7 @@ def run_pipeline_asking_GPT(self,user_input): summary_prompt = self.prompt_factory.create_prompt('summary_full', self.user_query, self.predicted_api_name, self.API_composite[self.predicted_api_name]['description'], self.API_composite[self.predicted_api_name]['Parameters'],self.API_composite[self.predicted_api_name]['Returns'], self.execution_code) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.callback_func('log', response, "Task summary before execution") - self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed? Please enter y/n.\nIf you press n, we will re-generate the code\nIf you press r, we will restart another turn", "Double Check") + self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed? Please enter y/n.\nIf you press n, we will re-generate the code\nIf you press r, we will restart another turn", "User Confirmation") self.update_user_state("run_pipeline_after_doublechecking_execution_code") self.save_state_enviro() return @@ -608,6 +661,12 @@ def run_pipeline_after_ambiguous(self,user_input): self.callback_func('log', "We will start another round. Could you re-enter your inquiry?", "Start another round") self.save_state_enviro() return 'break' + if user_index==-2: + sub_task = self.get_query() + self.update_user_state("run_pipeline") + self.save_state_enviro() + self.run_pipeline(sub_task, self.LIB, top_k=3, files=[],conversation_started=False,session_id=self.session_id) + return try: self.filtered_api[user_index-1] except IndexError: @@ -690,7 +749,7 @@ def run_pipeline_after_fixing_API_selection(self,user_input): response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.logger.info(f'summary_prompt: {summary_prompt}, summary_prompt response: {response}') self.callback_func('log', response, f"Predicted API: {self.predicted_api_name}") - self.callback_func('log', "Could you confirm whether this API should be called? Please enter y/n.", "Double Check") + self.callback_func('log', "Could you confirm whether this API should be called? Please enter y/n.", "User Confirmation") self.update_user_state("run_pipeline_after_doublechecking_API_selection") self.save_state_enviro() @@ -710,9 +769,14 @@ def run_pipeline_after_doublechecking_API_selection(self, user_input): #sub_task = self.get_query() # polish and modify the sub_task retrieved_apis = self.retriever.retrieving(self.initial_goal_description, top_k=3) - retrieved_docs = [f"{api}: "+ self.all_apis_json[api] for api in retrieved_apis] - prompt = self.prompt_factory.create_prompt("modify_subtask", self.user_query, "\n".join(retrieved_docs), self.initial_goal_description) + prompt = self.prompt_factory.create_prompt("modify_subtask", + self.user_query, + '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), + json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), + "\n".join([f"{api}: "+ self.all_apis_json[api] for api in retrieved_apis]) + ) self.user_query, _ = LLM_response(prompt, self.model_llm_type, history=[], kwargs={}) + self.logger.info('modified subtask: {}', self.user_query) self.update_user_state("run_pipeline") self.save_state_enviro() self.run_pipeline(self.user_query, self.LIB, top_k=3, files=[],conversation_started=False,session_id=self.session_id) @@ -1097,7 +1161,7 @@ def run_pipeline_after_entering_params(self, user_input): summary_prompt = self.prompt_factory.create_prompt('summary_full', user_input, self.predicted_api_name, api_data_single['description'], api_data_single['Parameters'],api_data_single['Returns'], self.execution_code) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.callback_func('log', response, "Task summary before execution") - self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed? Please enter y/n.\nIf you press n, we will re-direct to the parameter input step\nIf you press r, we will restart another turn", "Double Check") + self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed? Please enter y/n.\nIf you press n, we will re-direct to the parameter input step\nIf you press r, we will restart another turn", "User Confirmation") self.update_user_state("run_pipeline_after_doublechecking_execution_code") self.save_state_enviro() @@ -1153,7 +1217,6 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): output_list = result['output_list'] self.executor.load_environment("./tmp/tmp_output_run_pipeline_execution_code_variables.pkl") self.logger.info('check: {}, {}, {}, {}', code, output_list, self.executor.execute_code, self.executor.variables) - if len(execution_code_list)>0: self.last_execute_code = self.get_last_execute_code(code) else: @@ -1164,7 +1227,8 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): except: try: content = self.last_execute_code['error'] - except: + except Exception as e: + self.logger.error('error for loading content: {}', e) content = "" self.logger.info('content: {}', content) # show the new variable @@ -1241,64 +1305,98 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): # 240531: add newer execution prompt, which combines information from docstring examples, api_callings, and github issue solutions, and traceback informations # remove tracebackerror because the information is nonsense, only keep the line of 'ValueError: ' - executor_info = "\n".join(list(set(output_list))) + if output_list: + executor_info = "\n".join(list(set([str(iii) for iii in output_list]))) + else: + executor_info = "" """error_index = next((index for index, value in enumerate(output_list) if 'Error:' in value), None) if error_index is not None: filtered_output_list = output_list[error_index:] else: filtered_output_list = [] executor_info = "\n".join(filtered_output_list)""" - from ..models.query_issue_corpus import retrieved_issue_solution + from ..models.query_issue_corpus import retrieved_issue_solution, search_github_issues from ..gpt.get_summarize_tutorial import extract_imports, get_sub_API # use github issue retriever - #possible_solution = retrieved_issue_solution(self.LIB, 2, executor_info, "sentencebert", "issue_description") + #possible_solution = retrieved_issue_solution(self.LIB, 3, executor_info, "sentencebert", "issue_title") # issue_description + possible_solution = search_github_issues(self.LIB, 3, executor_info) + try: + possible_solution = '\n'.join(possible_solution) + except: + possible_solution = str(possible_solution) # do not use github issue retriever - possible_solution = "" + #possible_solution = "" self.logger.info("possible_solution: {}", possible_solution) + print("possible_solution: {}", possible_solution) example_json = {} api_callings = {} - whole_code = '\n'.join([i['code'] for i in self.executor.execute_code]) - error_code = '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='False']) - self.logger.info("whole_code: {}", whole_code) + parameters_json = {} + success_history_code = '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']) + last_success_index = max(idx for idx, item in enumerate(self.executor.execute_code) if item['success'] == 'True') + error_code_after_last_success = '\n'.join( + [i['code'] for i in self.executor.execute_code[last_success_index + 1:] if i['success'] == 'False'] + ) + #error_code = '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='False']) + error_code = error_code_after_last_success + self.logger.info("success_history_code: {}", success_history_code) self.logger.info("error_code: {}", error_code) + whole_code = success_history_code+'\n' + error_code + # get all previous API in the code, then collect the examples and put into prompt imports = extract_imports(whole_code) - ori_relevant_API, relevant_API = get_sub_API(whole_code, imports, self.LIB) # ALIAS + ori_relevant_API, relevant_API = get_sub_API(error_code, imports, self.LIB) # ALIAS for api in relevant_API: if api in self.API_composite: example = self.API_composite[api]['example'] if example: example_json[api] = example api_callings[api] = self.API_composite[api]['api_calling'] - execution_prompt = self.prompt_factory.create_prompt('executor_correction', executor_info, json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), error_code, possible_solution, json.dumps(example_json), json.dumps(list(api_callings.values())), whole_code, self.user_query) + parameters_json[api] = self.API_composite[api]['Parameters'] + else: + self.logger.error('there exist error that some APIs are not in API_init.json') + self.logger.info('relevant_API: {}', relevant_API) + # collect parameters and put into prompt + execution_prompt = self.prompt_factory.create_prompt('executor_correction', executor_info, json.dumps(parameters_json,indent=4), json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), error_code, possible_solution, json.dumps(example_json), json.dumps(list(api_callings.values())), success_history_code, self.user_query) self.logger.info('execution_prompt: {}', execution_prompt) #prompt = self.prompt_factory.create_prompt('subtask_code', [], self.user_query, whole_code, True, execution_prompt) #self.logger.info('prompt: {}', prompt) response, _ = LLM_response(execution_prompt, self.model_llm_type, history=[], kwargs={}) # llm self.logger.info('response: {}', response) tmp_retry_count = 0 - while tmp_retry_count<3: + while tmp_retry_count<5: tmp_retry_count+=1 try: clean_response = response.replace('```json', '').replace('```', '').strip() newer_code = json.loads(clean_response)['code'] - break + newer_analysis = json.loads(clean_response)['analysis'] + if ast.parse(newer_code): # must be valid code + break except Exception as e: print('Error: ', e) newer_code = "" + newer_analysis = "" + self.logger.info('clean_response: {}', clean_response) self.logger.info('newer_code: {}', newer_code) + self.logger.info('newer_analysis: {}', newer_analysis) #newer_code = response.replace('\"\"\"', '') - self.execution_code = newer_code - self.callback_func('code', self.execution_code, "Executed code") + if newer_analysis: + self.callback_func('log', newer_analysis, "Error Analysis") + if newer_code: + self.execution_code = newer_code + self.callback_func('code', self.execution_code, "Executed code") + else: + # TODO: should return to another round + self.callback_func('log', "LLM didn't correct code as we expected.", "Execution correction [Fail]") # LLM response summary_prompt = self.prompt_factory.create_prompt('summary_full', user_input, self.predicted_api_name, self.API_composite[self.predicted_api_name]['description'], self.API_composite[self.predicted_api_name]['Parameters'],self.API_composite[self.predicted_api_name]['Returns'], self.execution_code) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.callback_func('log', response, "Task summary before execution") - self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed? Please enter y/n.\nIf you press n, we will re-direct to the parameter input step\nIf you press r, we will restart another turn", "Double Check") + self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed? Please enter y/n.\nIf you press n, we will re-direct to the parameter input step\nIf you press r, we will restart another turn", "User Confirmation") self.update_user_state("run_pipeline_after_doublechecking_execution_code") self.save_state_enviro() return else: - self.callback_func('log', "The execution failed multiple times. Please re-enter the inquiry and start a new turn.", "Executed results [Fail]") + #self.callback_func('log', "The execution failed multiple times. Please re-enter the inquiry and start a new turn.", "Executed results [Fail]") + self.callback_func('log', "The execution failed multiple times. Please re-enter the inquiry for current task, we will try again and continue the remaining subtasks.", "Executed results [Fail]") self.retry_execution_count = 0 self.update_user_state("run_pipeline") self.save_state_enviro() @@ -1322,7 +1420,7 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): self.logger.info("Currently all executed code: {}", json.dumps(new_str)) # 240604 add logic, if there exist sub task in self.user_query_list, then do not return, go ahead to the next sub task if self.user_query_list: - self.callback_func('log', "As there exist subtask, go ahead to next subtask from the task planning list", "Continue to the next subtask") + self.callback_func('log', "Remaining subtasks: \n → "+ '\n'.join(self.user_query_list), "Continue to the next subtask") sub_task = self.get_query() self.update_user_state("run_pipeline") self.save_state_enviro() @@ -1331,6 +1429,7 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): else: self.callback_func('log', "All subtasks are predicted and executed.", "Start another round.") self.new_task_planning = True + self.user_query_list = [] self.update_user_state("run_pipeline") self.save_state_enviro() def modify_code_add_tmp(self, code, add_tmp = "tmp"): @@ -1397,8 +1496,11 @@ def get_last_execute_code(self, code): pass self.logger.info('Something wrong with getting execution status by code! Enter wrong code {}', code) return None - def add_query(self, queries): - self.user_query_list.extend(queries) + def add_query(self, queries, mode='aft'): + if mode=='aft': + self.user_query_list.extend(queries) + else: + self.user_query_list = queries + self.user_query_list def get_query(self): if self.user_query_list: return self.user_query_list.pop(0) diff --git a/src/inference/execution_UI.py b/src/inference/execution_UI.py index b90a349..89b57a4 100644 --- a/src/inference/execution_UI.py +++ b/src/inference/execution_UI.py @@ -382,12 +382,15 @@ def generate_execution_code_for_one_api(self, api_name, selected_params, return_ maybe_instance_name = maybe_class_name.lower() + "_instance" pass if single_class_API: + self.logger.info('single_class_API: {}', single_class_API) if api_type in ['property', 'constant']: api_call = f"{maybe_instance_name} = {maybe_class_name}" else: api_call = f"{maybe_instance_name} = {maybe_class_name}({class_params_formatted})" else: + self.logger.info('no single_class_API') if maybe_instance_name not in self.variables: # not initialized + self.logger.info('==> maybe_instance_name not in self.variables') if api_type in ['property', 'constant']: api_call = f"{maybe_instance_name} = {maybe_class_name}" else: @@ -412,10 +415,10 @@ def generate_execution_code_for_one_api(self, api_name, selected_params, return_ index_parenthesis = tmp_api_call.find("(") comparison_result = index_equal < index_parenthesis if index_equal!=-1 and comparison_result: - self.logger.info('debugging1 for return class API:', api_name, return_type, api_call, '--end') + self.logger.info('debugging1 for return class API: {}, {}, {} --end', api_name, return_type, api_call) return import_code+'\n'+f"{api_call}" else: - self.logger.info('debugging2 for return class API:', api_name, return_type, api_call, '--end') + self.logger.info('debugging2 for return class API: {}, {}, {} --end', api_name, return_type, api_call) self.counter = max(self.counter, self.get_newest_counter_from_namespace()) self.counter += 1 return_var = f"result_{self.counter} = " @@ -426,7 +429,7 @@ def generate_execution_code_for_one_api(self, api_name, selected_params, return_ self.generate_code.append(new_code) return import_code+'\n'+new_code else: - self.logger.info('debugging3 for return class API:', api_name, return_type, api_call, '--end') + self.logger.info('debugging3 for return class API: {}, {}, {} --end', api_name, return_type, api_call) self.generate_code.append(f"{api_call}") return import_code+'\n'+f"{api_call}" def split_tuple_variable(self, last_code_status): diff --git a/src/models/query_issue_corpus.py b/src/models/query_issue_corpus.py index efdd445..16adecc 100644 --- a/src/models/query_issue_corpus.py +++ b/src/models/query_issue_corpus.py @@ -4,19 +4,19 @@ Last Modified: May 29, 2024 Description: Query the issue corpus for the specified library Usage: -python -m src.models.query_issue_corpus --LIB scanpy --example_query "Traceback (most recent call last): \n File "/home/z6dong/BioChat/refer/src/2024_biomania_phase2/./examples/case2.1/output/3.sh.execute.py", line 13, in \n sc.tl.louvain(adata)\nFile "/home/z6dong/anaconda3/envs/biomania2/lib/python3.10/site-packages/scanpy/tools/_louvain.py", line 115, in louvain\n adjacency = _choose_graph(adata, obsp, neighbors_key)\n File "/home/z6dong/anaconda3/envs/biomania2/lib/python3.10/site-packages/scanpy/_utils/__init__.py", line 767, in _choose_graph\n neighbors = NeighborsView(adata, neighbors_key)\n File "/home/z6dong/anaconda3/envs/biomania2/lib/python3.10/site-packages/scanpy/_utils/__init__.py", line 711, in __init__\n raise KeyError('No "neighbors" in .uns')\nKeyError: 'No "neighbors" in .uns'" --method sentencebert --field issue_description --top_k 3 +python -m src.models.query_issue_corpus --LIB scanpy --example_query "ValueError: cannot specify integer bins when input data contains infinity" --method sentencebert --field issue_description --top_k 1 +Notice: if we input wrong example_query, the output will be empty. """ -import os -import json -import argparse +import os, json, requests, argparse, ast from typing import Tuple, List, Dict, Any from rank_bm25 import BM25Okapi +from sentence_transformers import SentenceTransformer, util +from dotenv import load_dotenv from ..retrievers import BM25Retriever from ..gpt.utils import load_json -import ast -from sentence_transformers import SentenceTransformer, util from ..dataloader.prepare_issue_corpus import ERROR_KEYWORDS, get_error_type +from ..configs.model_config import get_all_variable_from_cheatsheet def prepare_corpus(queries: List[Dict[str, Any]], field: str) -> Dict[str, Tuple[List[Dict[str, Any]], List[str]]]: """ @@ -38,7 +38,7 @@ def prepare_corpus(queries: List[Dict[str, Any]], field: str) -> Dict[str, Tuple """ corpus_dict = {} for query in queries: - if query['solution'] is None: + if query['solution'] in [None, 'No solutions']: continue error_types = query.get('error_type', {'Other'}) for error_type in error_types: @@ -101,6 +101,57 @@ def sentencebert_retriever(corpus_texts: List[str], query: str, top_k: int) -> L hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=top_k)[0] return [hit['corpus_id'] for hit in hits] +def search_github_issues(lib, topk, question): + info_json = get_all_variable_from_cheatsheet(lib) + GITHUB_LINK = info_json['GITHUB_LINK'] + repo_name = GITHUB_LINK.replace('https://github.com/','').replace('.git','') + if repo_name.endswith('/'): + repo_name = repo_name[:-1] + load_dotenv() + github_token = os.getenv('GITHUB_TOKEN', None) + if not github_token: + print("No GitHub token provided. Unable to retrieve issues from GitHub.") + search_url = 'https://api.github.com/search/issues' + params = { + 'q': f'repo:{repo_name} "{question}" is:issue', + 'sort': 'comments', + 'order': 'desc', + 'per_page': topk + } + print('params', params) + headers = { + 'Authorization': f'token {github_token}' + } + def fetch_issues(): + response = requests.get(search_url, headers=headers, params=params) + if response.status_code == 200: + issues = response.json()['items'] + return issues if issues else "" + else: + return "" + def fetch_comments(comments_url): + comments_response = requests.get(comments_url, headers=headers) + if comments_response.status_code == 200: + return comments_response.json() + else: + return [] + issues = fetch_issues() + if not issues: + return "" + results = [] + for issue in issues: + issue_title = issue['title'] + comments_url = issue['comments_url'] + comments = fetch_comments(comments_url) + if comments: + sorted_comments = sorted(comments, key=lambda x: x['reactions']['total_count'], reverse=True) + solutions = [f"Solution {idx + 1}: {comment['body']} (Reactions: {comment['reactions']['total_count']})" for idx, comment in enumerate(sorted_comments)] + result = f"issue: {issue_title}, solutions: {'; '.join(solutions)}" + else: + result = f"issue: {issue_title}, solutions: No comments found" + results.append(result) + return "\n".join(results) + def retrieved_issue_solution(LIB: str, top_k: int, example_query: str, method: str, field: str) -> None: """ Main function to prepare data, create a retriever, and evaluate its performance. @@ -149,17 +200,26 @@ def retrieved_issue_solution(LIB: str, top_k: int, example_query: str, method: s print(f"Retrieved titles: {retrieved_titles}") print(f"Retrieved issue descriptions: {retrieved_issue_description}") print(f"Retrieved solutions: {retrieved_solution}") + + return retrieved_solution def main(): parser = argparse.ArgumentParser(description='Query the issue corpus for a library') parser.add_argument('--LIB', type=str, required=True, help='Library name') parser.add_argument('--example_query', type=str, required=True, help='Example query to test') - parser.add_argument('--method', type=str, required=True, choices=['bm25', 'sentencebert'], help='Retrieval method to use') - parser.add_argument('--field', type=str, required=True, choices=['issue_title', 'issue_description'], help='Field to compare') - parser.add_argument('--top_k', type=int, default=3, help='Number of top documents to retrieve') + parser.add_argument('--method', type=str, default="sentencebert", choices=['bm25', 'sentencebert'], help='Retrieval method to use') + parser.add_argument('--field', type=str, default="issue_title", choices=['issue_title', 'issue_description'], help='Field to compare') + parser.add_argument('--top_k', type=int, default=10, help='Number of top documents to retrieve') + parser.add_argument('--query_source', type=str, default="online", help='query issue with solutions online') args = parser.parse_args() - - retrieved_issue_solution(args.LIB, args.top_k, args.example_query, args.method, args.field) + + if args.query_source=='local': + retrieved_issue_solution(args.LIB, args.top_k, args.example_query, args.method, args.field) + elif args.query_source=='online': + solutions = search_github_issues(args.LIB, args.top_k, args.example_query) + print('solutions: ', solutions) + else: + raise NotImplementedError("Unsupported query source. Use 'local' or 'online'.") if __name__ == "__main__": main() diff --git a/src/prompt/promptgenerator.py b/src/prompt/promptgenerator.py index 6ad2302..0aaf638 100644 --- a/src/prompt/promptgenerator.py +++ b/src/prompt/promptgenerator.py @@ -148,8 +148,8 @@ def build_prompt(self, LIB, goal_description, data_list=[]): Avoid creating steps that are too coarse or too detailed. If you find that more than one API needs to be used, split the step into two or more subtasks. For example, if for the preprocessing data step, filtering and normalization are both required, then use two subtasks `preprocessing data by filtering` and `preprocessing data by normalization` to describe them separately. Only include keywords in the subtask, avoid including API name in subtask. Each subtask should consists of 15-20 words, should be clear and concise for one single API usage. -The arrangement of tasks should take into account API dependencies (for example, some APIs need to calculate metrics before visualization) and the logical order of tasks (for example, an example flow is to load data first, then preprocess data, then apply methods, and finally visualize the results). -If a file path is provided, use it to load the data. If no file path is provided, use the built-in dataset API to load the default dataset. Only specify the data loading API for the subtask; omit API details from other subtasks. +The arrangement of tasks should take into account API dependencies (for example, some APIs need to calculate metrics before visualization) and the logical order of tasks (for example, an example flow is to load data first, then preprocess data by logarimizing and then filtering, then apply methods, and finally visualize the results). +If a file path is provided, use it to load the data. If no file path is provided, use the built-in dataset API to load the default dataset. Omit API name from all subtasks. Only respond in JSON format strictly enclosed in double quotes, adhering to the Response Format. Exclude any extraneous content from your response. Goal: {goal_description}\n @@ -163,54 +163,61 @@ def build_prompt(self, LIB, goal_description, data_list=[]): return prompt class ExecutorPromptBuilder(PromptBuilder): - def build_prompt(self, executor_info, namespace_variables, script, possible_solution="", api_examples="", api_calling="", history_code="", goal_description=""): + def build_prompt(self, executor_info, parameters_info, namespace_variables, error_code, possible_solution="", api_examples="", api_calling="", success_history_code="", goal_description=""): if possible_solution: possible_solution_info = f"\nPossible solution from similar issues from Github Issue Discussion:\n{possible_solution}" else: possible_solution_info = "" if api_examples and api_examples != "{}": - api_examples_info = f"\nUsage examples of this: {api_examples}." + api_examples_info = f"\nAPI Usage examples: {api_examples}." else: api_examples_info = "" - if api_calling: - api_calling_info = f"\nExample API calling: {api_calling}. You can use only few parameters" prompt = f""" -Task: Review and correct the Python script based on the traceback information. -Rules: -- Conduct minimum correction. -- Import all necessary libraries at the beginning of the script. -- Respond only with the answer in JSON format. -- Include any prerequisite steps required for the task if you feel necessary. - -Success execution History: {history_code} -Current Task: {goal_description} -Generated Code Script which contain Bugs: {script} -Traceback error information: {executor_info} +Task: Analyze and correct the Python script based on the traceback information. Here are some information to help you analyze the error in code: +Success execution History: {success_history_code} +Current goal for code generation: {goal_description} +We try below codes for this task several times, but all fails: {error_code} +traceback error information from the last trial: {executor_info} Current Namespace variables: {namespace_variables} -{possible_solution_info}{api_examples_info}{api_calling_info} +{possible_solution_info}{api_examples_info} +API calling template: {api_calling}. +Parameters information for this API calling: {parameters_info} +You only need to keep required parameters from previous trial codes, only keep minimum optional parameters necessary for task. Remove optional parameters from error code which cause the problem. Please ensure that required parameters are passed in their proper positional order, as keyword arguments should only be used for optional parameters. You only need to include the task related correct code in your response, do not repeat other API from the success execution history in your response. For parameters starting with 'result_', use only those that exist in the namespace. Do not generate inexist variables. -Follow these steps to debug and ensure the code is bug-free: -Error Analysis: Check if the error is due to using the wrong API based on the goal description. Replace with the correct API if necessary; otherwise, continue with the same API. -Parameter Check: Examine parameters in the code. Remove unnecessary optional parameters and keep only the essential ones and those explicitly mentioned in the subtasks. -Attribute and Value Verification: Verify variable attributes and API parameters. Correct any incorrect parameter values. Especially consider those attributes saved in AnnData object, only fillin the exist attributes as parameters values. +Below are some common reasons, debug based on error types: Import Verification: Ensure all necessary libraries are imported. -API and Parameter Accuracy: Use the correct API names and parameters with appropriate values. +API: No matter the API inexists or the external lib is uninstalled, replace with the correct or similar API if necessary; otherwise, continue with the same API. +Parameter Names: Remove unnecessary optional parameters and keep only those essentials for successful execution. Remove fake parameters that not belong to target API. +Attribute and Values: Correct any incorrect parameter values. For AnnData object attributes, only fillin existing attributes in namespace variables instead of using hallucinated attributes. +Previous steps needed: Some pre-API are required for API executions due to the API design. If so, ensure these steps are included in the corrected code before target API call. E.g., before visualization, you might need to calculate metrics to store it in anndata object first. E.g., some API require to input logarimize data, you need to logarimize the data by another API first. +If the data needs intermediate processing, address these by setting appropriate parameters or another API if possible. If not, use tools like AnnData, pandas related API to preprocess the data before calling the corresponding API from {api_calling}. Sometimes errors are indirect; deduce and locate the real cause based on these steps. +Rules: +- Conduct minimum correction. +- Import all necessary libraries at the beginning of the script. +- Include any prerequisite steps required for the task if you feel it is necessary for API dependency, e.g. in order to use API2, API1 must be executed ahead. +- Respond only with the answer in JSON format. +- Only successful execution history is recorded. Each time, remember to import the targeted API again in correct code, remember to use the exists variable, do not use variable from error trials as they are not recognized as part of the execution history. + Response Format: -{{"analysis": "Provide a detailed error analysis explaining how to correct the bug in the code.", "code": "The corrected bug-free Python script in order to accomplish the task."}} +{{"analysis": "Locate error, explain how to correct the bug.", "code": "Task oriented corrected bug-free Python code based on analysis."}} """ # Response format: {{"info": "Summary and suggestion."}} return prompt class ModifySubtaskPromptBuilder(PromptBuilder): - def build_prompt(self, question, content, totaltask): - query_prompt = ''' -Given the pre-planned subtask for the task planning stage and the retrieval of relevant local API documentation based on this subtask, please adjust the subtask functionality to align it with the PyPI API's capabilities. Ensure clarity and consistency with the original overall task and subtask functionality, but with finer detail. It should be like user inquiry, either in tone of polite, neutral, or formal. + def build_prompt(self, current_subtask, execution_history, namespace_variables, api_docs): + query_prompt = f''' +Code Execution History: {execution_history} +Namespace Variables: {namespace_variables} +Current Subtask: {current_subtask} +API documentation: {api_docs} +Your Task: Based on the Total task planning, current subtask, execution history prior to the current subtasks, namespace variables, and relevant API documentation, please rewrite the subtask description. The rewritten description should correspond to the most specific API and include only the necessary parameters and their values to clearly describe the subtask. Maintain a tone that is polite, neutral, or formal, as if it were a user inquiry. **IMPORTANT** -Just output the query directly. DO NOT add additional explanations or introducement in the answer unless you are asked to. +Just response with the modified subtask description directly. DO NOT add additional explanations or introducement. ''' - return f"##Subtask: {question}\n\n##Content: {content}\n\n##Totaltask:{totaltask}\n\n##Instruction: {query_prompt}" + return query_prompt class SubtaskCodePromptBuilder(PromptBuilder): def build_prompt(self, data_list, goal_description, history_summary, execute_success=False, execute_info=None):