From 2109a135495a068806fd9a986529869914dd4f0e Mon Sep 17 00:00:00 2001 From: DoraDong-2023 Date: Tue, 25 Jun 2024 14:40:58 -0400 Subject: [PATCH] polish prompts, pipeline --- .github/workflows/python-test-unit.yml | 45 ----- src/deploy/model.py | 223 ++++++++++++++++--------- src/deploy/ollama_demo.py | 2 +- src/gpt/get_summarize_tutorial.py | 3 + src/prompt/promptgenerator.py | 165 +++++++++++------- 5 files changed, 247 insertions(+), 191 deletions(-) delete mode 100644 .github/workflows/python-test-unit.yml diff --git a/.github/workflows/python-test-unit.yml b/.github/workflows/python-test-unit.yml deleted file mode 100644 index 37793c9..0000000 --- a/.github/workflows/python-test-unit.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: Python unit tests - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.10.5' - - - name: List directories - run: | - echo "Current directory:" - pwd - echo "List of files in current directory:" - ls -la - echo "List of files in src directory:" - ls -la src/ - - - name: Cache pip packages - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Install dependencies - run: pip install -r requirements.txt - - - name: Set PYTHONPATH - run: echo "PYTHONPATH=${{ github.workspace }}" >> $GITHUB_ENV - - - name: Run pytest - run: pytest tests/unit diff --git a/src/deploy/model.py b/src/deploy/model.py index 0ada079..a35ff88 100644 --- a/src/deploy/model.py +++ b/src/deploy/model.py @@ -21,6 +21,31 @@ from ..models.dialog_classifier import Dialog_Gaussian_classification from ..inference.param_count_acc import predict_parameters +def generate_function_signature(api_name, parameters_json): + # Load parameters from JSON string if it's not already a dictionary + if isinstance(parameters_json, str): + parameters = json.loads(parameters_json) + else: + parameters = parameters_json + # Start building the function signature + signature = api_name + "(" + params = [] + for param_name, param_info in parameters.items(): + # Extract parameter details + param_type = param_info['type'] + default = param_info['default'] + optional = param_info['optional'] + # Format the parameter string + if optional and default is not None: + param_str = f"{param_name}: {param_type} = {default}" + else: + param_str = f"{param_name}: {param_type}" + # Append to the params list + params.append(param_str) + # Join all parameters with commas and close the function parenthesis + signature += ", ".join(params) + ")" + return signature + def color_text(text, color): color_codes = { 'red': '\033[91m', @@ -47,7 +72,7 @@ def get_color(term): def replace_match(match): term = match.group(0) color = get_color(term) - return f' {term} ' + return f' {term} ' for key, value in parameters_dict.items(): pattern_key = re.compile(r'\b' + re.escape(key) + r'\b') @@ -114,7 +139,7 @@ def __init__(self, logger, device, model_llm_type="gpt-3.5-turbo-0125"): # llama self.session_id = "" self.last_user_states = "" self.user_states = "run_pipeline" - self.retrieve_query_mode = "similar" + self.retrieve_query_mode = "nonsimilar" self.parameters_info_list = None self.initial_goal_description = "" self.new_task_planning = True # decide whether re-plan the task @@ -422,22 +447,18 @@ def run_pipeline_without_files(self, user_input): def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=True,session_id=""): self.initialize_tool() self.indexxxx = 2 - if True: - self.session_id = session_id - try: - self.load_state(session_id) - a = str(self.session_id) - self.executor.load_environment(f"./tmp/sessions/{a}_environment.pkl") - except Exception as e: - e = traceback.format_exc() - self.logger.error(e) - self.initialize_executor() - self.new_task_planning = True - self.user_query_list = [] - pass - if len(files)>0: - self.loaded_files = True - #self.logger.info('we set loaded_files as true') + self.session_id = session_id + try: + self.load_state(session_id) + a = str(self.session_id) + self.executor.load_environment(f"./tmp/sessions/{a}_environment.pkl") + except Exception as e: + e = traceback.format_exc() + self.logger.error(e) + self.initialize_executor() + self.new_task_planning = True + self.user_query_list = [] + pass # only reset lib when changing lib if lib!=self.LIB and lib!='GPT': reset_result = self.reset_lib(lib) @@ -452,18 +473,10 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T # only clear namespace when starting new conversations if conversation_started in ["True", True]: self.loaded_files = False - #self.logger.info('we reset loaded_files') if len(files)>0: self.loaded_files = True - #self.logger.info('we set loaded_files as true') else: - # return and ensure if user go on without uploading some files, - # we just ask once!!!!!! - self.callback_func('log', 'No data are uploaded! Would you ensure to go on?\nEnter [y]: Go on please.\nEnter [n]: Restart another turn.', 'User Confirmation') - self.user_query = user_input - self.update_user_state("run_pipeline_without_files") - self.save_state_enviro() - return + pass self.new_task_planning = True self.user_query_list = [] #self.logger.info('==>new conversation_started!') @@ -476,7 +489,8 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T if var_name.startswith('result_') or (var_name.endswith('_instance')): del locals()[var_name] else: - pass + if len(files)>0: + self.loaded_files = True #self.logger.info('==>old conversation_continued!') if self.user_states == "run_pipeline": #self.logger.info('start initial!') @@ -496,7 +510,15 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T self.callback_func('log', response, "Non API chitchat") return else: - pass + if conversation_started in ["True", True] and len(files)==0: + # return and ensure if user go on without uploading some files, + # we just ask once!!!!!! + self.callback_func('log', 'No data are uploaded! Would you ensure to go on?\nEnter [y]: Go on please.\nEnter [n]: Restart another turn.', 'User Confirmation') + self.user_query = user_input + self.update_user_state("run_pipeline_without_files") + self.save_state_enviro() + return + #pass # dialog prediction if self.enable_multi_task and self.new_task_planning: pred_class = self.dialog_classifer.single_prediction(user_input, self.retriever, self.args_top_k) @@ -536,16 +558,17 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T sub_task = user_input else: sub_task = user_input + # we correct the subtask description before retrieving API if len([i['code'] for i in self.executor.execute_code if i['success']=='True'])>0: # for non-first subtasks retrieved_apis = self.retriever.retrieving(sub_task, top_k=3) - prompt = self.prompt_factory.create_prompt("modify_subtask", - sub_task, - '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), - json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), - "\n".join(["def "+self.API_composite[api]["api_calling"][0]+":\n" + self.API_composite[api]["Docstring"] for api in retrieved_apis]) - ) + prompt = self.prompt_factory.create_prompt("modify_subtask_correction", sub_task, + '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), + json.dumps({str(key): str(value) for key, value in self.executor.variables.items() if value['type'] not in ['function', 'module', 'NoneType']}), + "\n".join(['def '+generate_function_signature(api, self.API_composite[api]['Parameters'])+':\n"""'+self.API_composite[api]['Docstring'] + '"""' for api in retrieved_apis]) + ) + self.logger.info('modified sub_task prompt: {}', prompt) sub_task, _ = LLM_response(prompt, self.model_llm_type, history=[], kwargs={}) - #self.logger.info('modified sub_task: {}', sub_task) + self.logger.info('modified sub_task: {}', sub_task) #self.callback_func('log', 'we modify the subtask as '+sub_task, 'Modify subtask description') self.callback_func('log', sub_task, 'Polished subtask') else: @@ -554,11 +577,22 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T # get sub_task after dialog prediction self.user_query = sub_task #self.logger.info('we filter those API with IO parameters!') - retrieved_names = self.retriever.retrieving(self.user_query, top_k=self.args_top_k+10) + retrieved_names = self.retriever.retrieving(self.user_query, top_k=self.args_top_k+20) # filter out APIs #self.logger.info('first_task_start: {}, self.loaded_files: {}', self.first_task_start, self.loaded_files) if self.first_task_start and (not self.loaded_files): # need to consider only the builtin dataset - retrieved_names = [api_name for api_name in retrieved_names if all((not any(special_type in str(param['type']) for special_type in special_types)) and (not any(io_type in str(param['type']) for io_type in io_types)) and (param_name not in io_param_names) for param_name, param in self.API_composite[api_name]['Parameters'].items())] + retrieved_names = [ + api_name for api_name in retrieved_names + if all( + ( + (not param['optional'] and not any(special_type in str(param['type']) for special_type in special_types) and not any(io_type in str(param['type']) for io_type in io_types) and param_name not in io_param_names) + or + param['optional'] + ) + for param_name, param in self.API_composite[api_name]['Parameters'].items() + ) + ] + # TODO: 240623: try smarter way, as there are some saved path instead of loading path parameters, e.g. squidpy.datasets.visium_fluo_adata self.logger.info('there not exist files, retrieved_names are: {}', retrieved_names) else: # for the first API, it is assumed to be loading data (not setting), if no files provided, must use builtin dataset, retrieved_names = [api_name for api_name in retrieved_names if all((not any(special_type in str(param['type']) for special_type in special_types)) for param_name, param in self.API_composite[api_name]['Parameters'].items())] @@ -589,20 +623,24 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T # only retain shot_k number of sampled_shuffled tmp_str = "Instruction: " + instruction + "\nFunction: [" + iii['gold'] + "]" new_function_candidates = [f"{api}, description: "+self.all_apis_json[api].replace('\n',' ') for i, api in enumerate(tmp_retrieved_api_list)] # {i}: - similar_queries += "function candidates:\n" + "\n".join(new_function_candidates) + '\n' + tmp_str + "\n---\n" + similar_queries += f"function candidates: {tmp_retrieved_api_list}\n" + "\n".join(new_function_candidates) + '\n' + tmp_str + "\n---\n" instruction_shot_example = similar_queries #self.logger.info('start predicting API!') api_predict_init_prompt = get_retrieved_prompt() #self.logger.info('api_predict_init_prompt: {}', api_predict_init_prompt) #print(self.all_apis_json.keys()) retrieved_apis_prepare = "" + retrieved_apis_prepare += str(retrieved_names) + "\n" for idx, api in enumerate(retrieved_names): - retrieved_apis_prepare+=f"{idx}:" + api+", description: "+self.all_apis_json[api].replace('\n',' ')+"\n" - #print('retrieved_apis_prepare:', retrieved_apis_prepare) + retrieved_apis_prepare+=api+": "+self.all_apis_json[api].replace('\n',' ')+"\n" + self.logger.info('retrieved_apis_prepare: {}', retrieved_apis_prepare) api_predict_prompt = api_predict_init_prompt.format(query=self.user_query, retrieved_apis=retrieved_apis_prepare, similar_queries=instruction_shot_example) + self.logger.info('api_predict_prompt: {}', api_predict_prompt) #self.logger.info('==>start predicting API! Ask LLM: {}', api_predict_prompt) success = False - for _ in range(self.predict_api_llm_retry): + for idxxx_api in range(self.predict_api_llm_retry): + if idxxx_api>0: + api_predict_prompt += "\nInaccurate response:" + response + " Never respond this fake API again. Please select from the provided function candidates." try: ori_response, _ = LLM_response(api_predict_prompt, self.model_llm_type, history=[], kwargs={}) # llm #self.logger.info('==>start predicting API! LLM response: {}, {}', api_predict_prompt, ori_response) @@ -615,21 +653,24 @@ def run_pipeline(self, user_input, lib, top_k=3, files=[],conversation_started=T #self.logger.info('self.all_apis_json keys: {}', self.all_apis_json.keys()) if len(response.split(','))>1: response = response.split(',')[0].strip() - self.logger.info('==>start predicting API! api_predict_prompt, {}, correct response: {}, {}', api_predict_prompt, ori_response, response) + self.logger.info('==>start predicting API! api_predict_prompt, {}, correct response: {}, respose: {}', api_predict_prompt, ori_response, response) if response in self.all_apis_json: + self.logger.info('response in self.all_apis_json') self.predicted_api_name = response success = True break else: - # use another way to parse + self.logger.info('use another way to parse') def extract_api_calls(text, library): pattern = rf'\b{library}(?:\.\w+)*\b' matches = re.findall(pattern, text) return [i for i in matches if i not in [library]] from ..configs.model_config import get_all_variable_from_cheatsheet info_json = get_all_variable_from_cheatsheet(self.LIB) - lib_alias = info_json["lib_alias"] + self.logger.info('info_json: {}', info_json) + lib_alias = info_json["LIB_ALIAS"] extracted_api_calls = extract_api_calls(ori_response, lib_alias) + self.logger.info('extracted_api_calls: {}', extracted_api_calls) if extracted_api_calls: response = extracted_api_calls[0] if response in self.all_apis_json: @@ -637,6 +678,7 @@ def extract_api_calls(text, library): success = True break except Exception as e: + print('error during api prediction:', e) e = traceback.format_exc() self.logger.error('error during api prediction: {}', e) if not success: @@ -671,6 +713,7 @@ def extract_api_calls(text, library): self.initialize_tool() self.callback_func('log', next_str, f"Can you confirm which of the following {len(self.filtered_api)} candidates") self.save_state_enviro() + return else: self.update_user_state("run_pipeline_after_fixing_API_selection") self.run_pipeline_after_fixing_API_selection(self.user_query) @@ -701,7 +744,7 @@ def run_pipeline_asking_GPT(self,user_input): 'LLM_code_generation', self.user_query, str(self.executor.execute_code), - str(self.executor.variables), + str({str(key): str(value) for key, value in self.executor.variables.items() if value['type'] not in ['function', 'module', 'NoneType']}), self.LIB ) response, _ = LLM_response(prompt, self.model_llm_type, history=[], kwargs={}) @@ -713,7 +756,8 @@ def run_pipeline_asking_GPT(self,user_input): self.execution_code = newer_code self.callback_func('code', self.execution_code, "Executed code") # LLM response - summary_prompt = self.prompt_factory.create_prompt('summary_full', self.user_query, self.predicted_api_name, self.API_composite[self.predicted_api_name]['description'], self.API_composite[self.predicted_api_name]['Parameters'],self.API_composite[self.predicted_api_name]['Returns'], self.execution_code) + api_docstring = 'def '+generate_function_signature(self.predicted_api_name, self.API_composite[self.predicted_api_name]['Parameters'])+':\n"""'+self.API_composite[self.predicted_api_name]['Docstring'] + '"""' + summary_prompt = self.prompt_factory.create_prompt('summary_full', self.user_query, api_docstring, self.execution_code) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.callback_func('log', response, "Task summary") self.callback_func('log', "Could you confirm should this task be executed?\nEnter [y]: Go on please.\nEnter [n]: Re-generate the code\nEnter [r], Restart another turn", "User Confirmation") @@ -745,8 +789,10 @@ def run_pipeline_after_ambiguous(self,user_input): return 'break' if user_index==-2: sub_task = self.get_query() - self.callback_func('log', "Ongoing subtask and remaining subtasks: \n → "+ '\n - '.join(self.user_query_list), "Task Planning") - sub_task = self.get_query() + if self.user_query_list: + self.callback_func('log', "Ongoing subtask and remaining subtasks: \n → "+ '\n - '.join(self.user_query_list), "Task Planning") + sub_task = self.get_query() + self.user_query = sub_task self.update_user_state("run_pipeline") self.save_state_enviro() self.run_pipeline(sub_task, self.LIB, top_k=3, files=[],conversation_started=False,session_id=self.session_id) @@ -796,40 +842,26 @@ def update_user_state(self, new_state): #print(f"Updated state from {self.last_user_states} to {self.user_states}") def run_pipeline_after_fixing_API_selection(self,user_input): self.initialize_tool() - #self.logger.info('==>run_pipeline_after_fixing_API_selection') # check if composite API/class method API, return the relevant APIs if isinstance(self.predicted_api_name, str): self.relevant_api_list = self.process_api_info(self.API_composite, self.predicted_api_name) - api_description = self.API_composite[self.predicted_api_name]['description'] - parameters_tmp = self.API_composite[self.predicted_api_name]['Parameters'] - returns_tmp = self.API_composite[self.predicted_api_name]['Returns'] elif isinstance(self.predicted_api_name, list) and len(self.predicted_api_name) == 1: self.relevant_api_list = self.process_api_info(self.API_composite, self.predicted_api_name[0]) - api_description = self.API_composite[self.predicted_api_name[0]]['description'] - parameters_tmp = self.API_composite[self.predicted_api_name[0]]['Parameters'] - returns_tmp = self.API_composite[self.predicted_api_name[0]]['Returns'] elif isinstance(self.predicted_api_name, list) and len(self.predicted_api_name) > 1: raise ValueError("Predicting Multiple APIs during one inference is not supported yet") - api_description = [] - parameters_tmp = [] - returns_tmp = [] self.relevant_api_list = [] for api_name in self.predicted_api_name: self.relevant_api_list.extend(self.process_api_info(self.API_composite, api_name)) - api_description.append(self.API_composite[api_name]['description']) - parameters_tmp.append(self.API_composite[api_name]['Parameters']) - returns_tmp.append(self.API_composite[api_name]['Returns']) - api_description = " ".join(api_description) else: self.relevant_api_list = [] self.logger.error('Invalid type or empty list for self.predicted_api_name: {}', type(self.predicted_api_name)) - api_description = "" # TODO: should return self.api_name_json = self.check_and_insert_class_apis(self.API_composite, self.relevant_api_list)# also contains class API self.logger.info('self.api_name_json: {}', self.api_name_json) - self.update_user_state("run_pipeline") + #self.update_user_state("run_pipeline") # summary task - summary_prompt = self.prompt_factory.create_prompt('summary', user_input, self.predicted_api_name, api_description, parameters_tmp,returns_tmp) + api_docstring = 'def '+generate_function_signature(self.predicted_api_name, self.API_composite[self.predicted_api_name]['Parameters'])+':\n"""'+self.API_composite[self.predicted_api_name]['Docstring'] + '"""' + summary_prompt = self.prompt_factory.create_prompt('summary', user_input, api_docstring) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.logger.info(f'summary_prompt: {summary_prompt}, summary_prompt response: {response}') self.callback_func('log', response, f"Predicted API: {self.predicted_api_name}") @@ -853,12 +885,12 @@ def run_pipeline_after_doublechecking_API_selection(self, user_input): #sub_task = self.get_query() # polish and modify the sub_task retrieved_apis = self.retriever.retrieving(user_input, top_k=3) - prompt = self.prompt_factory.create_prompt("modify_subtask", - self.user_query, - '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), - json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), - "\n".join(["def "+self.API_composite[api]["api_calling"][0]+":\n" + self.API_composite[api]["Docstring"] for api in retrieved_apis]) - ) + prompt = self.prompt_factory.create_prompt("modify_subtask_correction", + self.user_query, + '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), + json.dumps({str(key): str(value) for key, value in self.executor.variables.items() if value['type'] not in ['function', 'module', 'NoneType']}), + "\n".join(['def '+generate_function_signature(api, self.API_composite[api]['Parameters'])+':\n"""'+self.API_composite[api]['Docstring'] + '"""' for api in retrieved_apis]) + ) self.user_query, _ = LLM_response(prompt, self.model_llm_type, history=[], kwargs={}) #self.logger.info('Polished subtask: {}', self.user_query) self.callback_func('log', self.user_query, 'Polished subtask') @@ -873,6 +905,20 @@ def run_pipeline_after_doublechecking_API_selection(self, user_input): self.save_state_enviro() # user_states didn't change return + self.logger.info('self.predicted_api_name: {}', self.predicted_api_name) + if len([i['code'] for i in self.executor.execute_code if i['success']=='True'])>0: # for non-first subtasks + prompt = self.prompt_factory.create_prompt("modify_subtask_parameters", self.user_query, + '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='True']), + json.dumps({str(key): str(value) for key, value in self.executor.variables.items() if value['type'] not in ['function', 'module', 'NoneType']}), + "\n"+ 'def '+generate_function_signature(self.predicted_api_name, self.API_composite[self.predicted_api_name]['Parameters'])+':\n"""'+self.API_composite[self.predicted_api_name]['Docstring'] + '"""') + self.user_query, _ = LLM_response(prompt, self.model_llm_type, history=[], kwargs={}) + self.logger.info('modified sub_task prompt: {}', prompt) + self.logger.info('modified sub_task: {}', self.user_query) + #self.callback_func('log', 'we modify the subtask as '+sub_task, 'Modify subtask description') + #self.callback_func('log', self.user_query, 'Polished subtask') + # we leave this to the `highlight` card + else: + pass combined_params = {} #self.logger.info('self.api_name_json: {}', self.api_name_json) # if the class API has already been initialized, then skip it @@ -941,8 +987,10 @@ def run_pipeline_after_doublechecking_API_selection(self, user_input): else: predicted_params = {} # filter out the parameters which value is same as their default value - predicted_parameters = {k: v for k, v in predicted_params.items() if k not in param_tmp or v != param_tmp[k].get("default")} - + self.logger.info('now filter out the parameters which value is same as their default value!') + self.logger.info('predicted_parameters: {}', predicted_params) + predicted_parameters = {k: v for k, v in predicted_params.items() if ((k in param_tmp) and (str(v) != param_tmp[k]["default"]) and (str(v) not in [None, "None", "null", 'NoneType']))} + self.logger.info('predicted_parameters after filtering: {}', predicted_parameters) if len(parameters_name_list)==0: #self.logger.info('if there is no required parameters, skip using LLM') response = "[]" @@ -1235,7 +1283,8 @@ def run_pipeline_after_entering_params(self, user_input): self.callback_func('code', self.execution_code, "Executed code") # LLM response api_data_single = self.API_composite[self.predicted_api_name] - summary_prompt = self.prompt_factory.create_prompt('summary_full', user_input, self.predicted_api_name, api_data_single['description'], api_data_single['Parameters'],api_data_single['Returns'], self.execution_code) + api_docstring = 'def '+generate_function_signature(self.predicted_api_name, self.API_composite[self.predicted_api_name]['Parameters'])+':\n"""'+self.API_composite[self.predicted_api_name]['Docstring'] + '"""' + summary_prompt = self.prompt_factory.create_prompt('summary_full', user_input, api_docstring, self.execution_code) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.callback_func('log', response, "Task summary") self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed?\nEnter [y]: Go on please\nEnter [n]: Re-direct to the parameter input step\nEnter [r]: Restart another turn", "User Confirmation") @@ -1293,7 +1342,7 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): code = result['code'] output_list = result['output_list'] self.executor.load_environment("./tmp/tmp_output_run_pipeline_execution_code_variables.pkl") - self.logger.info('check: {}, {}, {}, {}', code, output_list, self.executor.execute_code, self.executor.variables) + self.logger.info('check: {}, {}', code, output_list) if len(execution_code_list)>0: self.last_execute_code = self.get_last_execute_code(code) else: @@ -1390,7 +1439,12 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): filtered_output_list = output_list[error_index:] else: filtered_output_list = [] - executor_info = "\n".join(filtered_output_list) + if filtered_output_list: + executor_info = "\n".join(filtered_output_list) + else: + executor_info = "" + # append executor_info to the self.executor.execute_code + self.executor.execute_code[-1]['traceback'] = executor_info from ..models.query_issue_corpus import retrieved_issue_solution, search_github_issues from ..gpt.get_summarize_tutorial import extract_imports, get_sub_API # use github issue retriever @@ -1415,6 +1469,9 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): error_code_after_last_success = '\n'.join( [i['code'] for i in self.executor.execute_code[last_success_index + 1:] if i['success'] == 'False'] ) + error_code_with_info = '\n'.join( + [i['code'] + ', its traceback:' + i.get('traceback', '') + '\n' for i in self.executor.execute_code[last_success_index + 1:] if i['success'] == 'False'] + ) #error_code = '\n'.join([i['code'] for i in self.executor.execute_code if i['success']=='False']) error_code = error_code_after_last_success self.logger.info("success_history_code: {}, error_code: {}", success_history_code, error_code) @@ -1427,16 +1484,15 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): example = self.API_composite[api]['example'] if example: example_json[api] = example - api_callings[api] = self.API_composite[api]['api_calling'] + api_callings[api] = generate_function_signature(api, self.API_composite[api]['Parameters']) parameters_json[api] = self.API_composite[api]['Parameters'] else: self.logger.error('there exist error that some APIs are not in API_init.json') # collect parameters and put into prompt - execution_prompt = self.prompt_factory.create_prompt('executor_correction', executor_info, json.dumps(parameters_json,indent=4), json.dumps({str(key): str(value) for key, value in self.executor.variables.items()}), error_code, possible_solution, json.dumps(example_json), json.dumps(list(api_callings.values())), success_history_code, self.user_query) - self.logger.info('relevant_API: {}, execution_prompt: {}', relevant_API, execution_prompt) - #prompt = self.prompt_factory.create_prompt('subtask_code', [], self.user_query, whole_code, True, execution_prompt) + api_docstring = 'def '+generate_function_signature(self.predicted_api_name, self.API_composite[self.predicted_api_name]['Parameters'])+':\n"""'+self.API_composite[self.predicted_api_name]['Docstring'] + '"""' + execution_prompt = self.prompt_factory.create_prompt('executor_correction', api_docstring, json.dumps({str(key): str(value) for key, value in self.executor.variables.items() if value['type'] not in ['function', 'module', 'NoneType']}), error_code_with_info, possible_solution, json.dumps(example_json), success_history_code, self.user_query) response, _ = LLM_response(execution_prompt, self.model_llm_type, history=[], kwargs={}) # llm - self.logger.info('prompt: {}, response: {}', execution_prompt, response) + self.logger.info('execution_prompt: {}, response: {}', execution_prompt, response) tmp_retry_count = 0 while tmp_retry_count<5: tmp_retry_count+=1 @@ -1461,7 +1517,8 @@ def run_pipeline_after_doublechecking_execution_code(self, user_input): # TODO: should return to another round self.callback_func('log', "LLM didn't correct code as we expected.", "Execution correction [Fail]") # LLM response - summary_prompt = self.prompt_factory.create_prompt('summary_full', user_input, self.predicted_api_name, self.API_composite[self.predicted_api_name]['description'], self.API_composite[self.predicted_api_name]['Parameters'],self.API_composite[self.predicted_api_name]['Returns'], self.execution_code) + api_docstring = 'def '+generate_function_signature(self.predicted_api_name, self.API_composite[self.predicted_api_name]['Parameters'])+':\n"""'+self.API_composite[self.predicted_api_name]['Docstring'] + '"""' + summary_prompt = self.prompt_factory.create_prompt('summary_full', user_input, api_docstring, self.execution_code) response, _ = LLM_response(summary_prompt, self.model_llm_type, history=[], kwargs={}) self.callback_func('log', response, "Task summary") self.callback_func('log', "Could you confirm whether this task is what you aimed for, and the code should be executed?\nEnter [y]: Go on please\nEnter [n]: Re-direct to the parameter input step\nEnter [r]: Restart another turn", "User Confirmation") diff --git a/src/deploy/ollama_demo.py b/src/deploy/ollama_demo.py index 10c9013..86026ea 100644 --- a/src/deploy/ollama_demo.py +++ b/src/deploy/ollama_demo.py @@ -1,4 +1,4 @@ -from ..deploy.model import Model + -rf from ..deploy.model import Model import os, torch from datetime import datetime from colorama import Fore, Style diff --git a/src/gpt/get_summarize_tutorial.py b/src/gpt/get_summarize_tutorial.py index 1f425da..59cad5e 100644 --- a/src/gpt/get_summarize_tutorial.py +++ b/src/gpt/get_summarize_tutorial.py @@ -218,6 +218,9 @@ def get_relevant_API(combined_tutorials, LIB_ALIAS, ask_GPT=False): #text+='Here are some comments for code.'+'\n'.join(comments) imports.update(extract_imports(code)) ori_relevant_API, relevant_API = get_sub_API(code, imports, LIB_ALIAS) + # a quick makeup for old version of API, which consists of scanpy.API.xx + ori_relevant_API = [api.replace('scanpy.api.', 'scanpy.') for api in ori_relevant_API] + relevant_API = [api.replace('scanpy.api.', 'scanpy.') for api in relevant_API] if ask_GPT: # ask gpt to polish and summarize the tutorial text prompt = build_prompt_for_summarize(code, text) diff --git a/src/prompt/promptgenerator.py b/src/prompt/promptgenerator.py index 341acb9..fc1fd3c 100644 --- a/src/prompt/promptgenerator.py +++ b/src/prompt/promptgenerator.py @@ -113,18 +113,18 @@ def build_prompt(self, user_query, api_docstring, parameters_name_list, param_ty class SummaryPromptBuilder(PromptBuilder): # from prompt/summary.py - def build_prompt(self, user_query, api_function, api_description, parameters_description, return_information): + def build_prompt(self, user_query, api_docstring): return f""" -Help to summarize the task with its solution in layman tone, it should be understandable by non professional programmers. Starts from description of the user query `{user_query}` and includes the selected API function `{api_function}` which functionality is `{api_description}` with its parameters `{parameters_description}`. The returned variable is `{return_information}` -Please use the template as `The task is ..., we solved it by ...`. The response should be in three sentences +Help to summarize the task with its solution in layman tone, it should be understandable by non professional programmers. Starts from description of the user query `{user_query}` and includes the selected API function's Docstring `{api_docstring}`. +Please use the template as `The task is ..., we solved it by ...`. The response should be no more than two sentences """ class SummaryPromptFullBuilder(PromptBuilder): # from prompt/summary.py - def build_prompt(self, user_query, api_function, api_description, parameters_description, return_information, execution_code): + def build_prompt(self, user_query, api_docstring, execution_code): return f""" -Help to summarize the task with its solution in layman tone, it should be understandable by non professional programmers. Starts from description of the user query `{user_query}` and includes the selected API function `{api_function}` which functionality is `{api_description}` with its parameters `{parameters_description}`. The returned variable is `{return_information}`. The generated code is `{execution_code}`. -Please use the template as `The task is ..., we solved it by ...`. The response should be in four sentences. Additionally, the interpretation encompasses explanations for the parameters utilized in the generated code. +Help to summarize the task with its solution in layman tone, it should be understandable by non professional programmers. Starts from description of the user query `{user_query}` and includes the selected API function `{api_docstring}`. The generated code is `{execution_code}`. +Please use the template as `The task is ..., we solved it by ...`. The response should be no more than three sentences. Additionally, the interpretation encompasses explanations for the parameters utilized in the generated code. """ class ExecutionCorrectionPromptBuilder(PromptBuilder): @@ -142,30 +142,46 @@ def build_prompt(self, user_input, history_record, variables, LIB): class MultiTaskPromptBuilder(PromptBuilder): def build_prompt(self, LIB, goal_description, data_list=[]): prompt = f""" -Based on the provided task description, create a task plan with subtasks. -The tone of Each subtask should vary among queries: polite, straightforward, casual. -Each subtask should correspond to the exact usage scope of one single API from library {LIB}. -Avoid creating steps that are too coarse or too detailed. If you find that more than one API needs to be used, split the step into two or more subtasks. For example, if for the preprocessing data step, filtering and normalization are both required, then use two subtasks `preprocessing data by filtering` and `preprocessing data by normalization` to describe them separately. -Only include keywords in the subtask, omit API name from all subtasks. -Each subtask should consists of 15-20 words, should be clear and concise for one single API usage. -The arrangement of tasks should take into account API order and dependencies (for example, some APIs need to calculate metrics before visualization) and the logical order of tasks (for example, an example flow is to load data first, then preprocess data, then apply methods, and finally visualize the results). -To improve task planning and make it more user-friendly, integrate visualization subtasks between specific key subtasks. -If a file path is provided, use it to load the data. If no file path is provided, use the built-in dataset API to load the default dataset. If several filepaths are provided, either use them in one subtask or in different subtasks regarding the task description. -Never include data description in other subtasks except for the data loading subtask. Ensure Goal-Oriented Task Structuring, place the goal description at the beginning of each subtask. +Create a detailed step-by-step task plan with subtasks to achieve the goal. +The tone should vary among queries: polite, straightforward, casual. +Each subtask has 15-20 words, be clear and concise for the scope of one single API's functionality from PyPI library {LIB}. Omit API name from subtask. +Split the subtask into two or more subtasks if it is too complex. Using `Filtering dataset` together with `Normalize dataset` instead of `Preprocess dataset for filtering, and normalization.` +When arranging tasks, consider the logical order and dependencies. +Integrate visualization subtasks among subtasks. +Include Data description only in data loading subtask. +Ensure Goal-Oriented Task Structuring, place the goal description at the beginning of each subtask. Only respond in JSON format strictly enclosed in double quotes, adhering to the Response Format. Exclude any extraneous content from your response. +--- +Examples: +Goal: use Squidpy for spatial analysis of Imaging Mass Cytometry data, focusing on the spatial distribution and interactions of cell types +Response: +{{"plan": [ +"step 1: Load pre-processed Imaging Mass Cytometry data.", +"step 2: Visualize cell type clusters in spatial context to identify distributions of apoptotic and tumor cells among others.", +"step 3: Calculate co-occurrence of cell types across spatial dimensions, focusing on interactions between basal CK tumor cells and T cells.", +"step 4: Visualize co-occurrence results to understand cell type interactions and their spatial patterns.", +"step 5: Compute neighborhood enrichment to assess spatial proximity and interactions of cell clusters.", +"step 6: Visualize neighborhood enrichment results to highlight enriched or depleted interactions among cell clusters.", +"step 7: Construct a spatial neighbors graph to further detail cell-to-cell interactions.", +"step 8: Analyze interaction matrix to count interactions among clusters.", +"step 9: Compute network centrality scores to evaluate the importance of each cell type in the spatial graph." +]}} +--- +Now finish the goal with the following information: Goal: {goal_description}\n Response Format: -{{"plan": ["List your detailed step-by-step tasks to achieve your goal, e.g., ['step 1: content', 'step 2: content', 'step 3: content']."]}} -""" # Specify API and function names and avoid including non PyPI functions in your answered code. - if data_list: - prompt+=f"Input: You have the following information in a list with the format `file path: file description`. I provide those files to you, so you don't need to prepare the data. {data_list}" - else: - prompt+="You don't have any local data provided. Please only use API to load builtin dataset. Please avoid any API that need to load local data." +{{"plan": ['step 1: content', 'step 2: content', ... ]}} +""" +# Specify API and function names and avoid including non PyPI functions in your answered code. +# Avoid creating steps that are too coarse or too detailed. +# Only include keywords in the subtask. +# If a file path is provided, use it to load the data. If no file path is provided, use the built-in dataset API to load the default dataset. If several filepaths are provided, either use them in one subtask or in different subtasks regarding the task description. + prompt += f"Input: Your data is provided in the format 'file path: file description' from {data_list}" if data_list else "You have no local data. Please use only APIs that access built-in datasets and avoid those requiring local data loading." return prompt class ExecutorPromptBuilder(PromptBuilder): - def build_prompt(self, executor_info, parameters_info, namespace_variables, error_code, possible_solution="", api_examples="", api_calling="", success_history_code="", goal_description=""): + def build_prompt(self, api_docstring, namespace_variables, error_code, possible_solution="", api_examples="", success_history_code="", goal_description=""): if possible_solution: possible_solution_info = f"\nPossible solution from similar issues from Github Issue Discussion:\n{possible_solution}" else: @@ -175,54 +191,77 @@ def build_prompt(self, executor_info, parameters_info, namespace_variables, erro else: api_examples_info = "" prompt = f""" -Task: Analyze and correct the Python script based on the traceback information. Here are some information to help you analyze the error in code: +Task: Analyze and correct a Python script based on provided traceback information: Success execution History: {success_history_code} -Current goal for code generation: {goal_description} -We try below codes for this task several times, but all fails: {error_code} -traceback error information from the last trial: {executor_info} -Current Namespace variables: {namespace_variables} +Existing Namespace variables: {namespace_variables} +Current Goal: {goal_description} +Failed Attempts with their tracebacks: {error_code} {possible_solution_info}{api_examples_info} -API calling template: {api_calling}. -Parameters information for this API calling: {parameters_info} -You only need to keep required parameters from previous trial codes, only keep minimum optional parameters necessary for task. Remove optional parameters from error code which cause the problem. Please ensure that required parameters are passed in their proper positional order, as keyword arguments should only be used for optional parameters. You only need to include the task related correct code in your response, do not repeat other API from the success execution history in your response. For parameters starting with 'result_', use only those that exist in the namespace. Do not generate inexist variables. +API Docstring: {api_docstring}. -Below are some common reasons, debug based on error types: -Import Verification: Ensure all necessary libraries are imported. -API: No matter the API inexists or the external lib is uninstalled, replace with the correct or similar API if necessary; otherwise, continue with the same API. -Parameter Names: Remove unnecessary optional parameters and keep only those essentials for successful execution. Remove fake parameters that not belong to target API. -Attribute and Values: Correct any incorrect parameter values. For AnnData object attributes, only fillin existing attributes in namespace variables instead of using hallucinated attributes. -Previous steps needed: Some pre-API are required for API executions due to the API design. If so, ensure these steps are included in the corrected code before target API call. E.g., before visualization, you might need to calculate metrics to store it in anndata object first. E.g., some API require to input logarimize data, you need to logarimize the data by another API first. -For required parameters, pass the values directly without specifying parameter names, like api(required_param1). For optional parameters, specify them explicitly by name, like api(required_param1, optional_param1=value). -If the data needs intermediate processing, address these by setting appropriate parameters or another API if possible. If not, use tools like AnnData, pandas related API to preprocess the data before calling the corresponding API from {api_calling}. -Sometimes errors are indirect; deduce and locate the real cause then correct it. Present the logic steps in the "analysis" part, while present the code in the "code" part +Guidelines: +Perform minimal corrections necessary. +Begin the script with all necessary library imports. +Include prerequisite steps if required by API dependencies, ensuring correct order of execution. This includes not only the pandas, numpy, AnnData, or other libs, depends on the traceback error and the variables. +Use only existing variables from successful executions and avoid those from failed attempts. +Correct the code by removing unnecessary or incorrect parameters, ensuring required parameters are passed in proper positional order. +Adjust any misused attributes or values, especially for object-specific attributes like those in an AnnData object. +If intermediate processing is necessary, utilize relevant tools or APIs to preprocess the data before the main API call. +Respond only with the task-related correct code in JSON format. -Rules: -- Conduct minimum correction. -- Import all necessary libraries at the beginning of the script. -- Include any prerequisite steps required for the task if you feel it is necessary for API dependency, e.g. in order to use API2, API1 must be executed ahead. -- Respond only with the answer in JSON format. -- Only successful execution history is recorded. Each time, remember to import the targeted API again in correct code, remember to use the exists variable, do not use variable from error trials as they are not recognized as part of the execution history. +Common Errors to Address: +Import Verification: Confirm necessary libraries are imported. +API Usage: Replace or continue with the correct API as needed. +Parameter Handling: Streamline parameters to essentials, removing any incorrect or irrelevant ones. +Prerequisite API Calls: Include any necessary pre-API steps. +Identify and address indirect errors by deducing the root cause. Present the logical steps in the 'analysis' section and the corresponding code in the 'code' section. Response Format: {{"analysis": "Locate error, explain how to correct the bug.", "code": "Task oriented corrected bug-free Python code based on analysis."}} -""" - # Response format: {{"info": "Summary and suggestion."}} +""" # You only need to keep required parameters from previous trial codes, only keep minimum optional parameters necessary for task. Remove optional parameters from error code which cause the problem. Please ensure that required parameters are passed in their proper positional order, as keyword arguments should only be used for optional parameters. You only need to include the task related correct code in your response, do not repeat other API from the success execution history in your response. For parameters starting with 'result_', use only those that exist in the namespace. Do not generate inexist variables. return prompt class ModifySubtaskPromptBuilder(PromptBuilder): def build_prompt(self, current_subtask, execution_history, namespace_variables, api_docs): query_prompt = f''' -Code Execution History: {execution_history} -Namespace Variables: {namespace_variables} -Current Subtask: {current_subtask} -API documentation: {api_docs} -Your Task: Based on the Total task planning, current subtask, execution history prior to the current subtasks, namespace variables, and relevant API documentation, please rewrite the subtask description. The rewritten description should correspond to the most specific API and include only the necessary parameters and their values to clearly describe the subtask. Maintain a tone that is polite, neutral, or formal, as if it were a user inquiry. -**IMPORTANT** -Never include data description in other subtasks except for the data loading subtask. Ensure Goal-Oriented Task Structuring, place the goal description at the beginning of each subtask. -Ensure to check docstring requirements for API dependencies, required optional parameters, parameter conflicts, and deprecations. -If there are obvious parameter values in the current subtask, retain them in the polished subtask description and condense the parameter assignments in 1-2 sentences. -Only respond the modified subtask description with assigned parameter values. DO NOT add additional explanations or introducement. DO NOT return any previous subtask. -''' +Refine the subtask description by integrating essential parameters and their values from the docstring, ensuring they are appropriate for the next steps in the code execution. Inherit any clear parameter values from the current subtask, verifying their accuracy and relevance. Check the docstring for API dependencies, required optional parameters, parameter conflicts, duplication, and deprecations. +Provide only the revised subtask description. Avoid including any extraneous information. +--- +Example: +Original Subtask description: Can you scale the data to unit variance and zero mean and clip values? +response: Can you scale the data to unit variance and zero mean and clip values at maximum value as '10.0'? + +Example: +Details to consider +Namespace variables: {{"result_1": "{{'type': 'AnnData', 'value': AnnData object with n_obs \u00d7 n_vars = 3798 \u00d7 36601\n obs: 'in_tissue', 'array_row', 'array_col'\n var: 'gene_ids', 'feature_types', 'genome'\n uns: 'spatial', 'pca'\n obsm: 'spatial', 'X_pca'\n varm: 'PCs'}}"}} +Extract necessary parameter details and constraints from API Docstring: def squidpy.gr.ripley(adata=$, cluster_key=@, mode=$, spatial_key=@, metric=@, n_neigh=@, n_simulations=@, n_observations=@, max_dist=@, n_steps=@, seed=@, copy=@): +Original Subtask description: Can you calculate Ripley's statistics? +response: Can you calculate Ripley's statistics with cluster_key set as 'array_row'? +--- +Details to consider +Understand context and dependencies from past executed code: {execution_history} +Ensure parameter compatibility for existing namespace variables: {namespace_variables} +Extract necessary parameter details and constraints from API Docstring: {api_docs} +Original Subtask description: {current_subtask} +response: +''' # Never include data description in other subtasks except for the data loading subtask. Ensure Goal-Oriented Task Structuring, place the goal description at the beginning of each subtask. + return query_prompt + +class ModifySubtaskCorrectionPromptBuilder(PromptBuilder): + def build_prompt(self, current_subtask, execution_history, namespace_variables, api_docs): + query_prompt = f''' +Refine the subtask description to more closely align with the functionality and intent of a specific API. Review the docstrings of similar API candidates that will be provided, and polish the task description to ensure it encapsulates the API's capabilities and constraints accurately. Refine the interpretation of the existing task based on the most appropriate API's features. Omit API name from subtask. +--- +Example: +Original Subtask description: Can you scale the data to unit variance and zero mean and clip values? +response: Can you scale the data to unit variance and zero mean and clip values at maximum value as '10.0'? +--- +Details to consider: +Extract relevant details from the API docstrings to understand constraints and capabilities: {api_docs} +Review past executed code and namespace variables to ensure compatibility and relevance: {execution_history}, {namespace_variables} +Refine the original subtask description to closely match the intended API functionality: {current_subtask} +response: +''' # Never include data description in other subtasks except for the data loading subtask. Ensure Goal-Oriented Task Structuring, place the goal description at the beginning of each subtask. return query_prompt class SubtaskCodePromptBuilder(PromptBuilder): @@ -296,10 +335,12 @@ def create_prompt(self, prompt_type, *args): return MultiTaskPromptBuilder().build_prompt(*args) elif prompt_type == 'executor_correction': return ExecutorPromptBuilder().build_prompt(*args) - elif prompt_type == 'subtask_code': - return SubtaskCodePromptBuilder().build_prompt(*args) - elif prompt_type == 'modify_subtask': + #elif prompt_type == 'subtask_code': + # return SubtaskCodePromptBuilder().build_prompt(*args) + elif prompt_type == 'modify_subtask_parameters': return ModifySubtaskPromptBuilder().build_prompt(*args) + elif prompt_type == 'modify_subtask_correction': + return ModifySubtaskCorrectionPromptBuilder().build_prompt(*args) else: raise ValueError("Unknown prompt type")