Merge pull request #124 from DeepInsight-AI/new_pre

V1.3.0
DeepInsight-AI · Apr 2, 2024 · af8cb0e · af8cb0e
2 parents 6a59e81 + 9c7c194
commit af8cb0e
Show file tree

Hide file tree

Showing 47 changed files with 335 additions and 186 deletions.
diff --git a/Docker_install_CN.md b/Docker_install_CN.md
@@ -201,7 +201,7 @@ git clone http://github.com/DeepInsight-AI/DeepBI.git
 ## 配置DeepBI
 - 下载代码```git clone [email protected]:DeepInsight-AI/DeepBI.git```
 - 运行命令到对应文件夹 ```cd DeepBI ```
-- 修改权限 ```sudo chmod+x ./Install.sh```
+- 修改权限 ```sudo chmod +x ./Install.sh```
 - 运行命令```sudo ./Install_cn.sh ``` 开始安装，安装结束后会有一个网址提示，直接浏览器访问即可
 - 从版本1.1 如果更新代码，直接拉取 新的代码```git pull```,然后重启docker即可 <br>
     停止命令  ```sudo docker-compose stop```<br>

diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ The database connections supported by DeepBI are:
 Install directly on the ubuntu system, you need to install redis, postgresql python3.8.17 environment.
 
 - Redis can be accessed directly through the 127.0.0.1 password-free command line.
-- Require python version  3.8+
+- Require python version  3.8.x
 - Recommend using virtual environments such as pyenv coda
 - postgresql needs to install postgresql-16 version
 

diff --git a/README_CN.md b/README_CN.md
@@ -73,7 +73,7 @@ DeepBI 支持的数据库连接有:
 - 直接在ubuntu 系统安装,需要将安装redis,postgresql python3.8.17 环境
 - 环境建议
     1. redis 可以直接通过127.0.0.1,无密码命令行访问
-    2. python版本要求3.8+ 建议使用pyenv coda 等虚拟环境
+    2. python版本要求3.8.x 建议使用pyenv coda 等虚拟环境
     3. postgresql 需要安装postgresql-16 版本
 - 下载我们的代码
  ```

diff --git a/ai/agents/agent_instance_util.py b/ai/agents/agent_instance_util.py
@@ -725,7 +725,7 @@ def get_agent_bi_proxy(self):
             """,
             human_input_mode="NEVER",
             websocket=self.websocket,
-            code_execution_config=False,
+            code_execution_config={"last_n_messages": 1, "work_dir": "paper", "use_docker": False},
             default_auto_reply="TERMINATE",
             user_name=self.user_name,
             function_map={"run_mysql_code": BIProxyAgent.run_mysql_code,
@@ -931,7 +931,7 @@ def get_agent_python_executor(self, report_file_name=None):
         python_executor = PythonProxyAgent(
             name="python_executor",
             system_message="python executor. Execute the python code and report the result.",
-            code_execution_config={"last_n_messages": 1, "work_dir": "paper"},
+            code_execution_config={"last_n_messages": 1, "work_dir": "paper", "use_docker": False},
             human_input_mode="NEVER",
             websocket=self.websocket,
             user_name=self.user_name,
@@ -1074,6 +1074,7 @@ def get_agent_starrocks_echart_assistant(self, use_cache=True, report_file_name=
                                              When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
                                              Reply "TERMINATE" in the end when everything is done.
                                              When you find an answer,  You are a report analysis, you have the knowledge and skills to turn raw data into information and insight, which can be used to make business decisions.include your analysis in your reply.
+                                             Don't generate html files.
                                              """ + '\n' + self.base_starrocks_info + '\n' + python_base_dependency + '\n' + MYSQL_ECHART_TIPS_MESS,
             human_input_mode="NEVER",
             user_name=self.user_name,
@@ -1265,7 +1266,7 @@ def set_language_mode(self, language_mode):
             self.question_ask = ' 以下是我的问题，请用中文回答: '
             self.quesion_answer_language = '用中文回答问题.'
             self.data_analysis_error = '分析数据失败，请检查相关数据是否充分'
-        
+
         elif self.language_mode == language_japanese:
             self.error_message_timeout = "申し訳ありませんが、今回のAI-GPTインターフェース呼び出しがタイムアウトしました。もう一度お試しください。"
             self.question_ask = ' これが私の質問です。: '

diff --git a/ai/agents/agentchat/bi_proxy_agent.py b/ai/agents/agentchat/bi_proxy_agent.py
@@ -1490,6 +1490,9 @@ async def run_echart_code(self, chart_code_str: str, name: str):
             else:
                 str_obj = ast.literal_eval(chart_code_str)
 
+            if isinstance(str_obj, list):
+                return "Chart ：" + name + " configuration should not be a list."
+
             json_str = json.dumps(str_obj)
 
             result_message = {

diff --git a/ai/agents/agentchat/chart_presenter_agent.py b/ai/agents/agentchat/chart_presenter_agent.py
@@ -192,3 +192,18 @@ def find_extract_code(self, text: str):
                 extracted.append(("", group2.strip()))
 
         return extracted
+    def extract_json_data(text: str):
+        # 搜索 JSON 数据
+        json_pattern = re.compile(r'\{(?:[^{}]|(?R))*\}')
+        json_matches = json_pattern.findall(text)
+
+        # 提取有效的 JSON 数据
+        extracted_json = None
+        for match in json_matches:
+            try:
+                extracted_json = json.loads(match)
+                break
+            except json.JSONDecodeError:
+                continue
+
+        return extracted_json
diff --git a/ai/agents/agentchat/python_proxy_agent.py b/ai/agents/agentchat/python_proxy_agent.py
@@ -5,6 +5,9 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 from ai.agents import oai
 from .agent import Agent
+import ast
+import re
+from ai.backend.util import base_util
 from ai.agents.code_utils import (
     DEFAULT_MODEL,
     UNKNOWN,
@@ -22,11 +25,19 @@
 try:
     from termcolor import colored
 except ImportError:
-
     def colored(x, *args, **kwargs):
         return x
 
 
+# 函数，用于精确到小数点后两位
+def format_decimal(value):
+    if isinstance(value, float):
+        return round(value, 2)
+    elif isinstance(value, int):
+        return value
+    return value
+
+
 class PythonProxyAgent(Agent):
     """(In preview) A class for generic conversable agents which can be configured as assistant or user proxy.
 
@@ -66,7 +77,6 @@ def __init__(
         db_id: Optional = None,
         is_log_out: Optional[bool] = True,
         report_file_name: Optional[str] = None,
-
     ):
         """
         Args:
@@ -112,6 +122,7 @@ def __init__(
         """
         super().__init__(name)
         # a dictionary of conversations, default value is list
+        self.delay_messages = None
         self._oai_messages = defaultdict(list)
         self._oai_system_message = [{"content": system_message, "role": "system"}]
         self._is_termination_msg = (
@@ -147,6 +158,7 @@ def __init__(
         self.db_id = db_id
         self.is_log_out = is_log_out
         self.report_file_name = report_file_name
+        delay_messages = self.delay_messages
 
     def register_reply(
         self,
@@ -661,15 +673,16 @@ def generate_oai_reply(
 
         return True, oai.ChatCompletion.extract_text_or_function_call(response)[0]
 
-    def generate_code_execution_reply(
+    async def generate_code_execution_reply(
         self,
         messages: Optional[List[Dict]] = None,
         sender: Optional[Agent] = None,
         config: Optional[Any] = None,
+
     ):
         """Generate a reply using code execution.
         """
-
+        from ai.agents.agent_instance_util import AgentInstanceUtil
         code_execution_config = config if config is not None else self._code_execution_config
         # print('self._code_execution_config :', self._code_execution_config)
 
@@ -678,6 +691,7 @@ def generate_code_execution_reply(
         if messages is None:
             messages = self._oai_messages[sender]
         last_n_messages = code_execution_config.pop("last_n_messages", 1)
+        base_content = []
 
         # iterate through the last n messages reversly
         # if code blocks are found, execute the code blocks and return the output
@@ -693,6 +707,7 @@ def generate_code_execution_reply(
 
             if len(code_blocks) == 1 and code_blocks[0][0] != 'python':
                 continue
+            code_blocks = self.regex_fix_date_format(code_blocks)
 
             if self.db_id is not None:
                 obj = database_util.Main(self.db_id)
@@ -703,28 +718,110 @@ def generate_code_execution_reply(
                                    code_blocks]
 
                     # code_blocks = self.replace_ab_with_ac(code_blocks, db_info)
-                    print('new_code_blocks : ', code_blocks)
+                    # print('new_code_blocks : ', code_blocks)
 
             # found code blocks, execute code and push "last_n_messages" back
             exitcode, logs = self.execute_code_blocks(code_blocks)
             code_execution_config["last_n_messages"] = last_n_messages
             exitcode2str = "execution succeeded" if exitcode == 0 else "execution failed"
-
             length = 10000
-            length1 = 10001
             if not str(logs).__contains__('echart_name'):
                 if len(logs) > length:
                     print(' ++++++++++ Length exceeds 10000 characters limit, cropped  +++++++++++++++++')
                     logs = logs[:length]
-            else:
-                if len(logs) > length1:
-                    print(' ++++++++++ Length exceeds 10001 characters limit, cropped  +++++++++++++++++')
-                    logs = "The echarts code is too long, please simplify the code or data (for example, only keep two decimal places), and ensure that the echarts code length does not exceed 10001"
+                return True, f"exitcode: {exitcode} ({exitcode2str})\nCode output: {logs}"
 
 
-            return True, f"exitcode: {exitcode} ({exitcode2str})\nCode output: {logs}"
+            else:
+                try:
+                    if "'echart_name'" in str(logs):
+                        logs = json.dumps(eval(str(logs)))
+                    logs = json.loads(str(logs))
+                except Exception as e:
+                    return True,f"exitcode:exitcode failed\nCode output: There is an error in the JSON code causing parsing errors,Please modify the JSON code for me:{traceback.format_exc()}"
+                for entry in logs:
+                    if 'echart_name' in entry and 'echart_code' in entry:
+                        if isinstance(entry['echart_code'], str):
+                            entry['echart_code'] = json.loads(entry['entry']['echart_code'])
+                        if "series" in entry['echart_code']:
+                            series_data = entry['echart_code']['series']
+                            formatted_series_list = []
+                            for series_data in series_data:
+                                if series_data['type'] in ["bar", "line"]:
+                                    formatted_series_data = [format_decimal(value) for value in series_data['data']]
+                                elif series_data['type'] in ["pie", "gauge", "funnel"]:
+                                    formatted_series_data = [{"name": d["name"], "value": format_decimal(d["value"])} for
+                                                             d in series_data['data']]
+                                elif series_data['type'] in ['graph']:
+                                    formatted_series_data = [
+                                        {'name': data_point['name'], 'symbolSize': format_decimal(data_point['symbolSize'])}
+                                        for data_point in series_data['data']]
+                                elif series_data['type'] in ["Kline", "radar", "heatmap", "scatter", "themeRiver",
+                                                             'parallel', 'effectScatter']:
+                                    formatted_series_data = [[format_decimal(value) for value in sublist] for sublist in
+                                                             series_data['data']]
+                                else:
+                                    formatted_series_data = series_data['data']
+                                series_data['data'] = formatted_series_data
+                                formatted_series_list.append(series_data)
+                            entry['echart_code']['series'] = formatted_series_list
+                        base_content.append(entry)
+
+                agent_instance_util = AgentInstanceUtil(user_name=str(self.user_name),
+                                                        delay_messages=self.delay_messages,
+                                                        outgoing=self.outgoing,
+                                                        incoming=self.incoming,
+                                                        websocket=self.websocket
+                                                        )
+                bi_proxy = agent_instance_util.get_agent_bi_proxy()
+                is_chart = False
+                # Call the interface to generate pictures
+                for img_str in base_content:
+                    echart_name = img_str.get('echart_name')
+                    echart_code = img_str.get('echart_code')
+
+                    if len(echart_code) > 0 and str(echart_code).__contains__('x'):
+                        is_chart = True
+                        print("echart_name : ", echart_name)
+                        # 格式化echart_code
+                        # if base_util.is_json(str(echart_code)):
+                        #     json_obj = json.loads(str(echart_code))
+                        #     echart_code = json.dumps(json_obj)
+                        re_str = await bi_proxy.run_echart_code(str(echart_code), echart_name)
+                # 初始化一个空列表来保存每个echart的信息
+                echarts_data = []
+                # 遍历echarts_code列表，提取数据并构造字典
+                for echart in base_content:
+                    echart_name = echart['echart_name']
+                    series_data = []
+                    for serie in echart['echart_code']['series']:
+                        try:
+                            seri_info = {
+                                'type': serie['type'],
+                                'name': serie['name'],
+                                'data': serie['data']
+                            }
+                        except Exception as e:
+                            seri_info = {
+                                'type': serie['type'],
+                                'data': serie['data']
+                            }
+                        series_data.append(seri_info)
+                    if "xAxis" in echart["echart_code"]:
+                        xAxis_data = echart['echart_code']['xAxis'][0]['data']
+                        echart_dict = {
+                            'echart_name': echart_name,
+                            'series': series_data,
+                            'xAxis_data': xAxis_data
+                        }
+                    else:
+                        echart_dict = {
+                            'echart_name': echart_name,
+                            'series': series_data,
+                        }
+                    echarts_data.append(echart_dict)
+                return True, f"exitcode: {exitcode} ({exitcode2str})\nCode output: 图像已生成,请直接分析图表数据：{echarts_data}"
 
-        # no code blocks are found, push last_n_messages back and return.
         code_execution_config["last_n_messages"] = last_n_messages
 
         return False, None
@@ -1138,3 +1235,24 @@ async def ask_user(self, q_str):
 
         # return "i have no question."
         return None
+
+    def regex_fix_date_format(self, code_blocks):
+        # fix mysql generate %%Y %%m %%d code :list
+        pattern1 = r"%s"
+        patterns_replacements = [
+            (r"%%Y-%%m-%%d %%H", "%Y-%m-%d %H"),
+            (r"%%Y-%%m-%%d", "%Y-%m-%d"),
+            (r"%%Y-%%m", "%Y-%m"),
+            (r"%%H", "%H"),
+            (r"%%Y", "%Y"),
+            (r"%%Y-%%m-%%d %%H:%%i", "%Y-%m-%d %H:%i"),
+            (r"%%Y-%%m-%%d %%H:%%i:%%s", "%Y-%m-%d %H:%i:%s")]
+
+        if re.search(pattern1, str(code_blocks)):
+            for pattern, replacement in patterns_replacements:
+                code_blocks = [(language, re.sub(replacement, pattern, code)) for language, code in code_blocks]
+        else:
+            for pattern, replacement in patterns_replacements:
+                code_blocks = [(language, re.sub(pattern, replacement, code)) for language, code in code_blocks]
+
+        return code_blocks
diff --git a/ai/agents/agentchat/task_selector_agent.py b/ai/agents/agentchat/task_selector_agent.py
@@ -3,9 +3,10 @@
 from ai.backend.util.write_log import logger
 from .conversable_agent import ConversableAgent
 from .agent import Agent
-from ai.backend.base_config import agents_functions
+from ai.backend.base_config import CONFIG
 import re
 
+
 class TaskSelectorAgent(ConversableAgent):
     """(In preview) A class for generic conversable agents which can be configured as assistant or user proxy.
 
@@ -144,23 +145,32 @@ async def generate_reply(
                     # {"qustion_message":"\nWhat is the most common house layout in the dataset?"}
                     # **********************************************
                     print('messages[-1][content] :', messages[-1]['content'])
-                    
+
                     # suggest_function = {'role': 'assistant', 'content': None, 'function_call': {'name': 'task_base',
                     #                                                          'arguments': '{"qustion_message":"\\nWhat is the most common house layout in the dataset?"}'}}
                     # Check if reply is in agents_functions
-                    if reply in agents_functions:
-                        suggest_function = {'role': 'assistant', 'content': None, 'function_call': {'name': reply,
-                                                                                                    'arguments': '{"qustion_message":"' + str(
-                                                                                                        messages[-1][
-                                                                                                            'content']) + '"}'}}
-
-                        # {"qustion_message": " """ + str(messages[-1]['content']) + """"}
-
+                    is_func = False
+                    for func in CONFIG.agents_functions:
+                        if len(str(reply)) > 0 and func in str(reply):
+                            is_func = True
+                            suggest_function = {'role': 'assistant', 'content': None, 'function_call': {'name': func,
+                                                                                                        'arguments': '{"qustion_message":"' + str(
+                                                                                                            messages[
+                                                                                                                -1][
+                                                                                                                'content']) + '"}'}}
+                            print('reply : ', reply)
+                            # return reply
+                            return suggest_function
+
+                    if not is_func:
+                        suggest_function = {'role': 'assistant', 'content': None,
+                                            'function_call': {'name': CONFIG.default_agents_functions,
+                                                              'arguments': '{"qustion_message":"' + str(
+                                                                  messages[-1][
+                                                                      'content']) + '"}'}}
                         print('reply : ', reply)
-
                         # return reply
                         return suggest_function
 
-
         # return messages
         return self._default_auto_reply